In [1]:
import pandas as pd
import numpy as np
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
from tqdm import tqdm

# Config
TEXT_COL = "sentiment_ready_text"
DATA_PATH_IN = "preprocessed_data.csv"        # or Oracle load
DATA_PATH_OUT = "sentiment_multi_model.csv"

# Ensure tqdm works nicely
tqdm.pandas()


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
df = pd.read_csv(DATA_PATH_IN)

print("Loaded:", df.shape)
df.head()


Loaded: (13663, 13)


Unnamed: 0,id,sentiment_ready_text,type,subreddit,created_utc,normalized_score,mentioned_tickers,n_tickers,text_length,word_count,date,hour,day_of_week
0,1hqr72t,What is a good ROE? (Return on Equity) - Quick...,post,investing,2024-12-31 23:56:46,0.019045,TOP,1,1072,171,2024-12-31,23,1
1,1hqqmq2,Soundhound ($SOUN) now has a market cap 75x it...,post,stocks,2024-12-31 23:24:29,0.039443,SOUN,1,476,82,2024-12-31,23,1
2,1hqqgv5,Is $SMCI beginning to be a play?. P/E is looki...,post,stocks,2024-12-31 23:15:10,0.011074,SMCI,1,419,75,2024-12-31,23,1
3,1hqpxkb,Any thoughts on RDDT feedback appreciated.,post,investing,2024-12-31 22:45:36,0.018076,RDDT,1,42,6,2024-12-31,22,1
4,1hqpw3r,Is D.R.Horton (DHI) good value ?. The stock is...,post,ValueInvesting,2024-12-31 22:43:25,0.051121,DHI,1,391,75,2024-12-31,22,1


In [5]:
MODEL_NAMES = {
    "finbert": "ProsusAI/finbert",
    "finbert_tone": "yiyanghkust/finbert-tone",
    "roberta_financial": "mrm8488/distilroberta-finetuned-financial-news-sentiment-analysis",
    "twitter_roberta": "cardiffnlp/twitter-roberta-base-sentiment-latest"
}

# Detect device
device = 0 if torch.cuda.is_available() else -1
print("Using device:", "GPU" if device == 0 else "CPU")

Using device: CPU


In [6]:
pipelines = {}

for name, model_name in MODEL_NAMES.items():
    print(f"Loading model: {name}  ({model_name})")

    tok = AutoTokenizer.from_pretrained(model_name)
    mod = AutoModelForSequenceClassification.from_pretrained(model_name)

    pipelines[name] = pipeline(
        "text-classification",
        model=mod,
        tokenizer=tok,
        device=device,
        return_all_scores=True,
        truncation=True,
        max_length=256
    )

print("\nLoaded pipelines:", list(pipelines.keys()))

Loading model: finbert  (ProsusAI/finbert)


Device set to use cpu


Loading model: finbert_tone  (yiyanghkust/finbert-tone)


Device set to use cpu


Loading model: roberta_financial  (mrm8488/distilroberta-finetuned-financial-news-sentiment-analysis)


Device set to use cpu


Loading model: twitter_roberta  (cardiffnlp/twitter-roberta-base-sentiment-latest)


To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Device set to use cpu



Loaded pipelines: ['finbert', 'finbert_tone', 'roberta_financial', 'twitter_roberta']


In [7]:
def classify_batch(pipe, texts):
    """
    Generic batch classifier for FinBERT/RoBERTa-style models.
    Returns list of dicts with prob distribution and score.
    """
    outputs = pipe(texts)
    results = []

    for out in outputs:
        probs = {d["label"].lower(): float(d["score"]) for d in out}

        p_pos = probs.get("positive", 0.0)
        p_neg = probs.get("negative", 0.0)
        p_neu = probs.get("neutral", 0.0)

        score = p_pos - p_neg
        label = max(probs, key=probs.get)

        results.append({
            "label": label,
            "score": score,
            "p_pos": p_pos,
            "p_neu": p_neu,
            "p_neg": p_neg
        })

    return results

In [8]:
BATCH_SIZE = 32

# Prepare text list
texts = df[TEXT_COL].fillna("").astype(str).tolist()
n = len(texts)

# Create empty result columns
for m in MODEL_NAMES.keys():
    df[f"{m}_label"] = None
    df[f"{m}_score"] = None
    df[f"{m}_p_pos"] = None
    df[f"{m}_p_neu"] = None
    df[f"{m}_p_neg"] = None

# Run models
for model_name, pipe in pipelines.items():
    print(f"\n=== Processing model: {model_name} ===")

    labels = []
    scores = []
    ppos = []
    pneu = []
    pneg = []

    for start in tqdm(range(0, n, BATCH_SIZE)):
        batch = texts[start:start+BATCH_SIZE]
        out = classify_batch(pipe, batch)

        for r in out:
            labels.append(r["label"])
            scores.append(r["score"])
            ppos.append(r["p_pos"])
            pneu.append(r["p_neu"])
            pneg.append(r["p_neg"])

    df[f"{model_name}_label"] = labels
    df[f"{model_name}_score"] = scores
    df[f"{model_name}_p_pos"] = ppos
    df[f"{model_name}_p_neu"] = pneu
    df[f"{model_name}_p_neg"] = pneg


=== Processing model: finbert ===


100%|██████████| 427/427 [15:28<00:00,  2.17s/it]



=== Processing model: finbert_tone ===


100%|██████████| 427/427 [15:43<00:00,  2.21s/it]



=== Processing model: roberta_financial ===


100%|██████████| 427/427 [08:19<00:00,  1.17s/it]



=== Processing model: twitter_roberta ===


100%|██████████| 427/427 [15:27<00:00,  2.17s/it]


In [9]:
compare_cols = [f"{m}_score" for m in MODEL_NAMES.keys()]
df["sentiment_variance"] = df[compare_cols].var(axis=1)
df["sentiment_mean"] = df[compare_cols].mean(axis=1)

df[["sentiment_mean", "sentiment_variance"] + compare_cols].head()

Unnamed: 0,sentiment_mean,sentiment_variance,finbert_score,finbert_tone_score,roberta_financial_score,twitter_roberta_score
0,0.018083,0.00391,-0.037234,0.001657,-2.2e-05,0.107932
1,-0.322214,0.232564,0.347428,-0.78254,-0.514806,-0.338939
2,0.641846,0.267421,-0.098329,0.999999,0.995892,0.669824
3,0.165409,0.071304,0.558265,-0.005054,0.000412,0.108014
4,0.03828,0.908065,-0.525966,0.999876,-0.997716,0.676925


In [10]:
df['mentioned_tickers'] = df['mentioned_tickers'].apply(lambda x: x if isinstance(x, list) else [t.strip() for t in str(x).split(',') if t.strip()])
df = df.explode('mentioned_tickers').reset_index(drop=True)

In [11]:
print("\n" + "="*80)
print("COMPUTING ENSEMBLE METRICS")
print("="*80)

score_cols = [f"{m}_score" for m in pipelines.keys()]
df["sentiment_mean"] = df[score_cols].mean(axis=1)
df["sentiment_variance"] = df[score_cols].var(axis=1)
df["sentiment_std"] = df[score_cols].std(axis=1)

print("✓ Added ensemble metrics: sentiment_mean, sentiment_variance, sentiment_std")


COMPUTING ENSEMBLE METRICS
✓ Added ensemble metrics: sentiment_mean, sentiment_variance, sentiment_std


In [12]:
print("\n" + "="*80)
print("MODEL COMPARISON ANALYSIS")
print("="*80)

print("\n1. SCORE STATISTICS BY MODEL")
print("-" * 80)
stats_df = df[score_cols].describe().T
stats_df.columns = ['count', 'mean', 'std', 'min', '25%', '50%', '75%', 'max']
print(stats_df.round(4))

print("\n2. LABEL DISTRIBUTION BY MODEL")
print("-" * 80)
label_cols = [f"{m}_label" for m in pipelines.keys()]
for col in label_cols:
    print(f"\n{col}:")
    print(df[col].value_counts(normalize=True).round(3))

print("\n3. MODEL CORRELATION (Pearson)")
print("-" * 80)
corr_matrix = df[score_cols].corr()
print(corr_matrix.round(3))

print("\n4. MODEL AGREEMENT")
print("-" * 80)


MODEL COMPARISON ANALYSIS

1. SCORE STATISTICS BY MODEL
--------------------------------------------------------------------------------
                           count    mean     std     min     25%     50%  \
finbert_score            29624.0  0.0137  0.3329 -0.9682 -0.0465  0.0182   
finbert_tone_score       29624.0  0.1199  0.5043 -1.0000 -0.0015 -0.0000   
roberta_financial_score  29624.0  0.1129  0.5603 -0.9983 -0.0000 -0.0000   
twitter_roberta_score    29624.0  0.0582  0.4971 -0.9511 -0.2844  0.0555   

                            75%     max  
finbert_score            0.0970  0.9439  
finbert_tone_score       0.0509  1.0000  
roberta_financial_score  0.1980  0.9996  
twitter_roberta_score    0.3869  0.9876  

2. LABEL DISTRIBUTION BY MODEL
--------------------------------------------------------------------------------

finbert_label:
finbert_label
neutral     0.823
negative    0.092
positive    0.085
Name: proportion, dtype: float64

finbert_tone_label:
finbert_tone_label
n

In [None]:
df.to_csv(DATA_PATH_OUT, index=False)
print("Saved:", DATA_PATH_OUT)