In [None]:
import torch
import pandas as pd
import numpy as np
from collections import defaultdict
from sentence_transformers import SentenceTransformer


import importlib
import Database as db
import Config as c
importlib.reload(db)
importlib.reload(c)

<module 'NewConfig' from 'c:\\Users\\Kristo\\Documents\\GitHub\\testtradingbot\\Groundup4\\NewConfig.py'>

In [95]:

model = SentenceTransformer('all-MiniLM-L6-v2')


In [None]:
def build_affiliation_state(df):
    # Ensure symbols list
    df['symbols'] = df['symbols'].apply(lambda x: x if isinstance(x, list) else [])

    texts = (df['headline'].fillna('') + '. ' + df['summary'].fillna('')).tolist()
    article_embeddings = model.encode(texts, convert_to_tensor=True)

    # Build ticker index map
    all_tickers = sorted({ticker for symbols in df['symbols'] for ticker in symbols})
    ticker_to_idx = {ticker: i for i, ticker in enumerate(all_tickers)}

    embed_dim = article_embeddings.shape[1]
    ticker_sums = torch.zeros(len(all_tickers), embed_dim, device=article_embeddings.device)
    ticker_counts = torch.zeros(len(all_tickers), device=article_embeddings.device)

    for i, tickers in enumerate(df['symbols']):
        emb = article_embeddings[i]
        for ticker in tickers:
            idx = ticker_to_idx[ticker]
            ticker_sums[idx] += emb
            ticker_counts[idx] += 1
        print(f"Processed {i+1}/{len(df)} articles", end='\r', flush=True)

    ticker_embeddings = {t: ticker_sums[i] / ticker_counts[i] for t, i in ticker_to_idx.items()}

    state = {
        "ticker_sums": {t: ticker_sums[i] for t, i in ticker_to_idx.items()},
        "ticker_counts": {t: int(ticker_counts[i].item()) for t, i in ticker_to_idx.items()},
        "ticker_embeddings": ticker_embeddings,
        "all_tickers": all_tickers
    }
    return state

In [97]:
def add_article(article, state):
    """
    Add a single article to existing scores_df and update state.
    
    article: dict with 'headline', 'summary', 'symbols' (list)
    state: dict returned from build_affiliation_scores
    """
    ticker_sums = state["ticker_sums"]
    ticker_counts = state["ticker_counts"]
    ticker_embeddings = state["ticker_embeddings"]
    all_tickers = state["all_tickers"]

    # Embed new article
    text = article['headline'] + '. ' + article['summary']
    embed = model.encode(text, convert_to_tensor=True)

    # Handle new tickers
    for ticker in article['symbols']:
        if ticker not in ticker_sums:
            ticker_sums[ticker] = torch.zeros(embed.shape)
            ticker_counts[ticker] = 0
            ticker_embeddings[ticker] = torch.zeros(embed.shape)
            all_tickers.append(ticker)

        ticker_sums[ticker] += embed
        ticker_counts[ticker] += 1
        ticker_embeddings[ticker] = ticker_sums[ticker] / ticker_counts[ticker]

    # Compute new similarity row
    row_scores = []
    for ticker in all_tickers:
        if ticker_counts[ticker] > 0:
            score = torch.nn.functional.cosine_similarity(
                embed.unsqueeze(0),
                ticker_embeddings[ticker].unsqueeze(0)
            ).item()
        else:
            score = 0.0
        row_scores.append(score)

    # Append new row
    new_row = pd.DataFrame([row_scores], columns=all_tickers)

    # Save updated state
    state["ticker_sums"] = ticker_sums
    state["ticker_counts"] = ticker_counts
    state["ticker_embeddings"] = ticker_embeddings
    state["all_tickers"] = all_tickers

    return new_row, state


In [98]:
raw_ticker_data = db.load_cached_news()
print(f"Loaded {len(raw_ticker_data)} tickers from cache.")
raw_ticker_data = pd.DataFrame(raw_ticker_data)
last_article = raw_ticker_data.iloc[-1] 
raw_ticker_data = raw_ticker_data.iloc[:-1] 
print(f"Last article: {last_article["summary"]}")
print(type(last_article))

Loaded 129347 tickers from cache.
Last article: Benzinga examined the prospects for many investors&#39; favorite stocks over the last week — here&#39;s a look at some of our top stories. 
<class 'pandas.core.series.Series'>


In [99]:


state = build_affiliation_state(raw_ticker_data)


TypeError: build_affiliation_state() missing 1 required positional argument: 'model'

In [None]:

print(f"state shape: {state.shape}")

Scores DataFrame shape: (129346, 8409)


In [None]:
db.save_affiliation_data(state)

TypeError: Sparse pandas data (column A) not supported.

In [None]:
new_score_df, state = add_article(last_article, state)

In [None]:
print(new_score_df)
# Get the row as a Series
row = new_score_df.iloc[0]
# Max value
max_val = row.max()
# Column name of max
max_col = row.idxmax()
print(max_col , max_val)

         AA      AADI       AAL      AAPL      ABCM       ACB      ACCD  \
0 -0.028447 -0.011939 -0.057079  0.000126  0.049731  0.206803 -0.007461   

       ACGL      ACLX       ACM  ...      XXII      YMAB        YY         Z  \
0  0.075077 -0.012231  0.094351  ...  0.090893 -0.031706 -0.008624  0.037699   

        ZEV        ZG        ZH      ZLAB       ZM        ZS  
0  0.001134  0.037699 -0.028476  0.005944  0.03895  0.001426  

[1 rows x 913 columns]
BNTX 0.34557077288627625
