### Experimental notebook to validate if it even works in the first place

In [2]:
# from sentence_transformers import SentenceTransformer
import torch
import dotenv
import os
from app.services.news_api import FinancialModelingPrepNews
from huggingface_hub import InferenceClient

torch.set_default_device("mps")
dotenv.load_dotenv()
api_key = os.getenv("FMP_API_KEY")
hf_token = os.getenv("HF_TOKEN")

client = InferenceClient(
    provider="auto",
    api_key=hf_token,
)
embedding_model = "Qwen/Qwen3-Embedding-8B"


In [3]:
import os

result = client.feature_extraction(
    "Today is a sunny day and I will get some ice cream.",
    model=embedding_model,
    normalize=True
)
result

array([[ 0.01031494,  0.00448608,  0.00204468, ..., -0.00167847,
        -0.00396729, -0.00288391]], shape=(1, 4096), dtype=float32)

In [4]:
with FinancialModelingPrepNews() as news_api:
    print(f"\n📰 Fetching latest general news...")
    articles = news_api.get_articles_for_anomaly_detection(limit=100, news_type="general")
    print(f"Found {len(articles)} articles for anomaly detection:")

articles_text = []
for article in articles:
    articles_text.append(f"title: {article['title']}\ncontent: {article['snippet']}")

print(articles_text)


📰 Fetching latest general news...
Found 99 articles for anomaly detection:
['title: A chief White House economic adviser said that Trump wants allies placed in the Bureau of Labor Statistics after the agency published a dismal jobs report and the president fired its commissioner\ncontent: Kevin Hassett said the president wants his own people at the agency to provide “more transparent and reliable” jobs data.', 'title: Stock Market Correction and July 6-Month Calendar Range\ncontent: On July 13th\xa0I wrote a market update on how the July 6-month Calendar Range Resets. To refresh your memory.', 'title: Top Fed officials unswayed by poor July jobs report, take wait-and-see approach to rate cuts\ncontent: Several senior Federal Reserve officials on Friday characterized the labor market as “solid” even after a poor July jobs report and appeared in no rush to lower U.S. interest rates.', 'title: Wall Street Brunch: The BLS Also Does The CPI\ncontent: Wall Street faces uncertainty after Pre

In [None]:
result = client.feature_extraction(
    articles_text,
    model=embedding_model,
    normalize=True
)

result

array([[ 0.00726318,  0.00518799, -0.02453613, ..., -0.00017929,
         0.00035858, -0.0145874 ],
       [-0.01672363, -0.01062012, -0.01757812, ...,  0.00921631,
         0.01818848,  0.0027771 ],
       [ 0.00735474, -0.01098633, -0.02783203, ...,  0.00390625,
         0.00714111,  0.04785156],
       ...,
       [ 0.0133667 , -0.00016022, -0.03515625, ..., -0.00854492,
         0.0168457 ,  0.02941895],
       [ 0.01379395,  0.02722168, -0.01513672, ...,  0.01647949,
         0.02172852,  0.03417969],
       [-0.00167084,  0.00040627, -0.03149414, ..., -0.00239563,
         0.02185059,  0.00622559]], shape=(94, 4096), dtype=float32)

In [6]:
# calculate similarity between articles
# use numpy to calculate similarity
import numpy as np
similarity = np.dot(result, result.T)
print(similarity)

[[1.0011082  0.5423698  0.30085957 ... 0.35115767 0.25027305 0.39292222]
 [0.5423698  1.0024085  0.3797459  ... 0.5442909  0.43137676 0.5703728 ]
 [0.30085957 0.3797459  1.0012469  ... 0.3724013  0.23221067 0.3578353 ]
 ...
 [0.35115767 0.5442909  0.3724013  ... 1.0000752  0.54015934 0.49364024]
 [0.25027305 0.43137676 0.23221067 ... 0.54015934 0.9985377  0.43457985]
 [0.39292222 0.5703728  0.3578353  ... 0.49364024 0.43457985 1.0058978 ]]
