In [21]:
!pip install datasets



In [45]:
import os
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score
import torch
from transformers import (
    BertTokenizerFast,
    BertForSequenceClassification,
    Trainer,
    TrainingArguments,
    DataCollatorWithPadding
)
from datasets import Dataset, DatasetDict

In [61]:

from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [79]:
data_path = "/content/drive/MyDrive/w266/Project/expanded_stock.csv"
df = pd.read_csv(data_path, parse_dates=['timestamp'])

In [80]:
target_tickers = ['$AAPL', '$MSFT', '$AMD', '$NVDA', '$AMZN']
df = df[df['ticker'].isin(target_tickers)].copy()

In [81]:
# print(df)

In [82]:
def clean_sentiment(s):
    if isinstance(s, str):
        s = s.strip().lower()
        if "bullish" in s:
            return "BULLISH"
        elif "bearish" in s:
            return "BEARISH"
        elif "neutral" in s:
            return "NEUTRAL"
    return None

In [83]:
df['sentiment_clean'] = df['sentiment'].apply(clean_sentiment)
df = df.dropna(subset=['sentiment_clean'])  # Drop rows missing sentiment

In [84]:
sentiment_mapping = {"BEARISH": 0, "NEUTRAL": 1, "BULLISH": 2}
df['label'] = df['sentiment_clean'].map(sentiment_mapping)

In [85]:
print("Label distribution:")
print(df['label'].value_counts())

Label distribution:
label
2    1289
1     433
0     402
Name: count, dtype: int64


In [86]:
df = df.rename(columns={'description': 'text'})
df = df[['text', 'label']].reset_index(drop=True)

In [87]:
train_df, val_df = train_test_split(df, test_size=0.2, random_state=42, stratify=df['label'])
print("Train size:", len(train_df), "Validation size:", len(val_df))

train_dataset = Dataset.from_pandas(train_df)
val_dataset = Dataset.from_pandas(val_df)
dataset_dict = DatasetDict({
    "train": train_dataset,
    "validation": val_dataset
})

Train size: 1699 Validation size: 425


In [88]:
def fine_tune_bert(
    model_name,
    output_dir,
    num_train_epochs=3,
    learning_rate=2e-5,
    per_device_batch_size=16,
    dropout=None,
    warmup_steps=None,
    max_grad_norm=None
):
    """
    Fine-tune a BERT model for sentiment classification on our tweet dataset.
    Additional hyperparameters such as dropout adjustments, warmup steps, and gradient clipping can be provided.
    Returns the Trainer and evaluation metrics.
    """
    print(f"\nFine-tuning model {model_name} with epochs={num_train_epochs}, lr={learning_rate}, batch_size={per_device_batch_size}, "
          f"dropout={dropout}, warmup_steps={warmup_steps}, max_grad_norm={max_grad_norm}")

    # Load the tokenizer and model.
    tokenizer = BertTokenizerFast.from_pretrained(model_name)
    model = BertForSequenceClassification.from_pretrained(model_name, num_labels=3)

    # If a dropout value is provided, update the model's dropout configuration.
    if dropout is not None:
        model.config.hidden_dropout_prob = dropout
        model.config.attention_probs_dropout_prob = dropout

    # Tokenize the dataset.
    def tokenize_function(example):
        return tokenizer(example["text"], truncation=True, padding="max_length", max_length=128)

    tokenized_datasets = dataset_dict.map(tokenize_function, batched=True)
    # Remove extra columns; note that our dataset may have "__index_level_0__" as the index column.
    tokenized_datasets = tokenized_datasets.remove_columns(["text", "__index_level_0__"])
    tokenized_datasets = tokenized_datasets.rename_column("label", "labels")
    tokenized_datasets.set_format("torch")

    # Define training arguments.
    training_args = TrainingArguments(
        output_dir=output_dir,
        evaluation_strategy="epoch",
        save_strategy="epoch",
        learning_rate=learning_rate,
        per_device_train_batch_size=per_device_batch_size,
        per_device_eval_batch_size=per_device_batch_size,
        num_train_epochs=num_train_epochs,
        weight_decay=0.01,
        load_best_model_at_end=True,
        metric_for_best_model="accuracy",
        logging_dir=f"{output_dir}/logs",
        logging_steps=50,
        warmup_steps=warmup_steps if warmup_steps is not None else 0,
        max_grad_norm=max_grad_norm if max_grad_norm is not None else 1.0,
    )

    # Define compute_metrics function.
    def compute_metrics(eval_pred):
        logits, labels = eval_pred
        predictions = np.argmax(logits, axis=-1)
        acc = accuracy_score(labels, predictions)
        return {"accuracy": acc}

    data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

    # Initialize the Trainer.
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=tokenized_datasets["train"],
        eval_dataset=tokenized_datasets["validation"],
        tokenizer=tokenizer,
        data_collator=data_collator,
        compute_metrics=compute_metrics,
    )

    # Train the model.
    trainer.train()

    # Evaluate the model.
    eval_results = trainer.evaluate()
    print(f"Evaluation results for {model_name}:", eval_results)

    # Save the model and tokenizer.
    trainer.save_model(output_dir)
    tokenizer.save_pretrained(output_dir)

    return trainer, eval_results

In [90]:
# trainer_base, eval_base = fine_tune_bert(
#     model_name="bert-base-uncased",
#     output_dir="./bert_finetuned_baseline",
#     num_train_epochs=3,
#     learning_rate=2e-5,
#     per_device_batch_size=16
# )

In [None]:
trainer_iter2, eval_iter2 = fine_tune_bert(
    model_name="bert-base-uncased",
    output_dir="./bert_finetuned_iter2",
    num_train_epochs=5,        # increased epochs
    learning_rate=3e-5,        # slightly higher learning rate
    per_device_batch_size=16
)


Fine-tuning model bert-base-uncased with epochs=5, lr=3e-05, batch_size=16


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1699 [00:00<?, ? examples/s]

Map:   0%|          | 0/425 [00:00<?, ? examples/s]

  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy
1,0.8942,0.848963,0.632941
2,0.7025,0.801126,0.663529
3,0.482,0.823443,0.675294
4,0.3241,0.916168,0.710588
5,0.2115,0.986047,0.715294


Evaluation results for bert-base-uncased: {'eval_loss': 0.9860467910766602, 'eval_accuracy': 0.7152941176470589, 'eval_runtime': 3.0295, 'eval_samples_per_second': 140.288, 'eval_steps_per_second': 8.912, 'epoch': 5.0}


In [None]:
trainer_iter3, eval_iter3 = fine_tune_bert(
    model_name="bert-base-uncased",
    output_dir="./bert_finetuned_iter3",
    num_train_epochs=5,
    learning_rate=2e-5,
    per_device_batch_size=8      # reduced batch size
)


Fine-tuning model bert-base-uncased with epochs=5, lr=2e-05, batch_size=8


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1699 [00:00<?, ? examples/s]

Map:   0%|          | 0/425 [00:00<?, ? examples/s]

  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy
1,0.8834,0.808332,0.656471
2,0.6832,0.772319,0.682353
3,0.4189,0.899554,0.675294
4,0.1942,1.152404,0.703529
5,0.1246,1.26103,0.691765


Evaluation results for bert-base-uncased: {'eval_loss': 1.1524038314819336, 'eval_accuracy': 0.7035294117647058, 'eval_runtime': 3.0108, 'eval_samples_per_second': 141.161, 'eval_steps_per_second': 17.936, 'epoch': 5.0}


In [None]:
try:
    trainer_finbert, eval_finbert = fine_tune_bert(
        model_name="ProsusAI/finbert",   # FinBERT model if available on HuggingFace
        output_dir="./finbert_finetuned",
        num_train_epochs=3,
        learning_rate=2e-5,
        per_device_batch_size=16
    )
except Exception as e:
    print("FinBERT model not found or encountered an error:", e)
    trainer_finbert, eval_finbert = None, None


Fine-tuning model ProsusAI/finbert with epochs=3, lr=2e-05, batch_size=16


tokenizer_config.json:   0%|          | 0.00/252 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/758 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


pytorch_model.bin:   0%|          | 0.00/438M [00:00<?, ?B/s]

Map:   0%|          | 0/1699 [00:00<?, ? examples/s]

Map:   0%|          | 0/425 [00:00<?, ? examples/s]

  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy
1,0.8667,0.818996,0.644706
2,0.6952,0.801294,0.670588
3,0.5479,0.802365,0.665882


Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

Evaluation results for ProsusAI/finbert: {'eval_loss': 0.8012937903404236, 'eval_accuracy': 0.6705882352941176, 'eval_runtime': 2.9931, 'eval_samples_per_second': 141.993, 'eval_steps_per_second': 9.021, 'epoch': 3.0}


In [None]:
print("\n--- Experiment Evaluation Metrics ---")
print("Baseline BERT (bert-base-uncased, 3 epochs, lr=2e-5, batch=16):", eval_base)
print("Iteration 2 (bert-base-uncased, 5 epochs, lr=3e-5, batch=16):", eval_iter2)
print("Iteration 3 (bert-base-uncased, 5 epochs, lr=2e-5, batch=8):", eval_iter3)
if trainer_finbert is not None:
    print("FinBERT (ProsusAI/finbert, 3 epochs, lr=2e-5, batch=16):", eval_finbert)
else:
    print("FinBERT experiment did not run.")


--- Experiment Evaluation Metrics ---
Baseline BERT (bert-base-uncased, 3 epochs, lr=2e-5, batch=16): {'eval_loss': 0.8205244541168213, 'eval_accuracy': 0.6541176470588236, 'eval_runtime': 3.0409, 'eval_samples_per_second': 139.759, 'eval_steps_per_second': 8.879, 'epoch': 3.0}
Iteration 2 (bert-base-uncased, 5 epochs, lr=3e-5, batch=16): {'eval_loss': 0.9860467910766602, 'eval_accuracy': 0.7152941176470589, 'eval_runtime': 3.0295, 'eval_samples_per_second': 140.288, 'eval_steps_per_second': 8.912, 'epoch': 5.0}
Iteration 3 (bert-base-uncased, 5 epochs, lr=2e-5, batch=8): {'eval_loss': 1.1524038314819336, 'eval_accuracy': 0.7035294117647058, 'eval_runtime': 3.0108, 'eval_samples_per_second': 141.161, 'eval_steps_per_second': 17.936, 'epoch': 5.0}
FinBERT (ProsusAI/finbert, 3 epochs, lr=2e-5, batch=16): {'eval_loss': 0.8012937903404236, 'eval_accuracy': 0.6705882352941176, 'eval_runtime': 2.9931, 'eval_samples_per_second': 141.993, 'eval_steps_per_second': 9.021, 'epoch': 3.0}


In [34]:
# best_trainer = trainer_base  # Adjust as needed.
# predictions = best_trainer.predict(tokenized_datasets["validation"])
# predicted_labels = np.argmax(predictions.predictions, axis=-1)
# print("Predicted labels on validation set (example):", predicted_labels[:10])

In [35]:
# val_df = val_df.reset_index(drop=True)
# val_df["predicted_label"] = predicted_labels
# val_df["predicted_sentiment"] = val_df["predicted_label"].map({0: "BEARISH", 1: "NEUTRAL", 2: "BULLISH"})
# print(val_df.head())

In [None]:
experiments = {
    "baseline": {
        "model_name": "bert-base-uncased",
        "num_train_epochs": 3,
        "learning_rate": 2e-5,
        "per_device_batch_size": 16,
        "dropout": None,
        "warmup_steps": None,
        "max_grad_norm": None
    },
    "iter2": {
        "model_name": "bert-base-uncased",
        "num_train_epochs": 5,
        "learning_rate": 3e-5,
        "per_device_batch_size": 16,
        "dropout": None,
        "warmup_steps": None,
        "max_grad_norm": None
    },
    "iter3": {
        "model_name": "bert-base-uncased",
        "num_train_epochs": 5,
        "learning_rate": 2e-5,
        "per_device_batch_size": 8,
        "dropout": None,
        "warmup_steps": None,
        "max_grad_norm": None
    },
    "lower_lr_warmup": {
        "model_name": "bert-base-uncased",
        "num_train_epochs": 5,
        "learning_rate": 1e-5,
        "per_device_batch_size": 16,
        "dropout": None,
        "warmup_steps": 500,
        "max_grad_norm": None
    },
    "increased_dropout": {
        "model_name": "bert-base-uncased",
        "num_train_epochs": 5,
        "learning_rate": 2e-5,
        "per_device_batch_size": 16,
        "dropout": 0.3,  # Increase dropout to reduce overfitting.
        "warmup_steps": 300,
        "max_grad_norm": None
    },
    "grad_clip": {
        "model_name": "bert-base-uncased",
        "num_train_epochs": 5,
        "learning_rate": 2e-5,
        "per_device_batch_size": 16,
        "dropout": None,
        "warmup_steps": 300,
        "max_grad_norm": 1.0  # Apply gradient clipping.
    },
    "finbert": {
        "model_name": "ProsusAI/finbert",
        "num_train_epochs": 3,
        "learning_rate": 2e-5,
        "per_device_batch_size": 16,
        "dropout": None,
        "warmup_steps": None,
        "max_grad_norm": None
    }
}

In [None]:
results = {}

for exp_name, params in experiments.items():
    print(f"\n=== Running Experiment: {exp_name} ===")
    try:
        trainer, eval_results = fine_tune_bert(
            model_name=params["model_name"],
            output_dir=f"./finetuned_{exp_name}",
            num_train_epochs=params["num_train_epochs"],
            learning_rate=params["learning_rate"],
            per_device_batch_size=params["per_device_batch_size"],
            dropout=params["dropout"],
            warmup_steps=params["warmup_steps"],
            max_grad_norm=params["max_grad_norm"]
        )
        results[exp_name] = eval_results
    except Exception as e:
        print(f"Experiment {exp_name} failed with error: {e}")
        results[exp_name] = None


=== Running Experiment: baseline ===

Fine-tuning model bert-base-uncased with epochs=3, lr=2e-05, batch_size=16, dropout=None, warmup_steps=None, max_grad_norm=None


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1699 [00:00<?, ? examples/s]

Map:   0%|          | 0/425 [00:00<?, ? examples/s]

  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy
1,0.8957,0.845924,0.628235
2,0.767,0.81043,0.644706
3,0.6374,0.8101,0.618824


Evaluation results for bert-base-uncased: {'eval_loss': 0.8104299902915955, 'eval_accuracy': 0.6447058823529411, 'eval_runtime': 3.0528, 'eval_samples_per_second': 139.217, 'eval_steps_per_second': 8.844, 'epoch': 3.0}

=== Running Experiment: iter2 ===

Fine-tuning model bert-base-uncased with epochs=5, lr=3e-05, batch_size=16, dropout=None, warmup_steps=None, max_grad_norm=None


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1699 [00:00<?, ? examples/s]

Map:   0%|          | 0/425 [00:00<?, ? examples/s]

  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy
1,0.8942,0.848963,0.632941
2,0.7025,0.801126,0.663529
3,0.482,0.823443,0.675294
4,0.3241,0.916168,0.710588
5,0.2115,0.986047,0.715294


Evaluation results for bert-base-uncased: {'eval_loss': 0.9860467910766602, 'eval_accuracy': 0.7152941176470589, 'eval_runtime': 2.972, 'eval_samples_per_second': 143.002, 'eval_steps_per_second': 9.085, 'epoch': 5.0}

=== Running Experiment: iter3 ===

Fine-tuning model bert-base-uncased with epochs=5, lr=2e-05, batch_size=8, dropout=None, warmup_steps=None, max_grad_norm=None


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1699 [00:00<?, ? examples/s]

Map:   0%|          | 0/425 [00:00<?, ? examples/s]

  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy
1,0.8834,0.808332,0.656471
2,0.6832,0.772319,0.682353
3,0.4189,0.899554,0.675294
4,0.1942,1.152404,0.703529
5,0.1246,1.26103,0.691765


Evaluation results for bert-base-uncased: {'eval_loss': 1.1524038314819336, 'eval_accuracy': 0.7035294117647058, 'eval_runtime': 3.0559, 'eval_samples_per_second': 139.074, 'eval_steps_per_second': 17.671, 'epoch': 5.0}

=== Running Experiment: lower_lr_warmup ===

Fine-tuning model bert-base-uncased with epochs=5, lr=1e-05, batch_size=16, dropout=None, warmup_steps=500, max_grad_norm=None


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1699 [00:00<?, ? examples/s]

Map:   0%|          | 0/425 [00:00<?, ? examples/s]

  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy
1,1.0007,0.936094,0.614118
2,0.9111,0.871007,0.644706
3,0.8527,0.853462,0.637647
4,0.807,0.828116,0.637647
5,0.7438,0.812155,0.665882


Evaluation results for bert-base-uncased: {'eval_loss': 0.8121548891067505, 'eval_accuracy': 0.6658823529411765, 'eval_runtime': 2.9434, 'eval_samples_per_second': 144.39, 'eval_steps_per_second': 9.173, 'epoch': 5.0}

=== Running Experiment: increased_dropout ===

Fine-tuning model bert-base-uncased with epochs=5, lr=2e-05, batch_size=16, dropout=0.3, warmup_steps=300, max_grad_norm=None


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1699 [00:00<?, ? examples/s]

Map:   0%|          | 0/425 [00:00<?, ? examples/s]

  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy
1,0.9454,0.885897,0.632941
2,0.8834,0.847473,0.632941
3,0.7608,0.746669,0.672941
4,0.5956,0.792825,0.682353
5,0.3468,0.862654,0.677647


Evaluation results for bert-base-uncased: {'eval_loss': 0.7928246259689331, 'eval_accuracy': 0.6823529411764706, 'eval_runtime': 3.027, 'eval_samples_per_second': 140.404, 'eval_steps_per_second': 8.92, 'epoch': 5.0}

=== Running Experiment: grad_clip ===

Fine-tuning model bert-base-uncased with epochs=5, lr=2e-05, batch_size=16, dropout=None, warmup_steps=300, max_grad_norm=1.0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1699 [00:00<?, ? examples/s]

Map:   0%|          | 0/425 [00:00<?, ? examples/s]

  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy
1,0.9454,0.885897,0.632941
2,0.8834,0.847473,0.632941
3,0.7608,0.746669,0.672941
4,0.5956,0.792825,0.682353
5,0.3468,0.862654,0.677647


Evaluation results for bert-base-uncased: {'eval_loss': 0.7928246259689331, 'eval_accuracy': 0.6823529411764706, 'eval_runtime': 3.033, 'eval_samples_per_second': 140.127, 'eval_steps_per_second': 8.902, 'epoch': 5.0}

=== Running Experiment: finbert ===

Fine-tuning model ProsusAI/finbert with epochs=3, lr=2e-05, batch_size=16, dropout=None, warmup_steps=None, max_grad_norm=None


Map:   0%|          | 0/1699 [00:00<?, ? examples/s]

Map:   0%|          | 0/425 [00:00<?, ? examples/s]

  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy
1,0.8667,0.818996,0.644706
2,0.6952,0.801294,0.670588
3,0.5479,0.802365,0.665882


Evaluation results for ProsusAI/finbert: {'eval_loss': 0.8012937903404236, 'eval_accuracy': 0.6705882352941176, 'eval_runtime': 3.0354, 'eval_samples_per_second': 140.013, 'eval_steps_per_second': 8.895, 'epoch': 3.0}


In [None]:
print("\n--- Experiment Evaluation Metrics ---")
for exp_name, metrics in results.items():
    print(f"{exp_name}: {metrics}")


--- Experiment Evaluation Metrics ---
baseline: {'eval_loss': 0.8104299902915955, 'eval_accuracy': 0.6447058823529411, 'eval_runtime': 3.0528, 'eval_samples_per_second': 139.217, 'eval_steps_per_second': 8.844, 'epoch': 3.0}
iter2: {'eval_loss': 0.9860467910766602, 'eval_accuracy': 0.7152941176470589, 'eval_runtime': 2.972, 'eval_samples_per_second': 143.002, 'eval_steps_per_second': 9.085, 'epoch': 5.0}
iter3: {'eval_loss': 1.1524038314819336, 'eval_accuracy': 0.7035294117647058, 'eval_runtime': 3.0559, 'eval_samples_per_second': 139.074, 'eval_steps_per_second': 17.671, 'epoch': 5.0}
lower_lr_warmup: {'eval_loss': 0.8121548891067505, 'eval_accuracy': 0.6658823529411765, 'eval_runtime': 2.9434, 'eval_samples_per_second': 144.39, 'eval_steps_per_second': 9.173, 'epoch': 5.0}
increased_dropout: {'eval_loss': 0.7928246259689331, 'eval_accuracy': 0.6823529411764706, 'eval_runtime': 3.027, 'eval_samples_per_second': 140.404, 'eval_steps_per_second': 8.92, 'epoch': 5.0}
grad_clip: {'eval_l

In [None]:
best_exp = "increased_dropout" if results.get("increased_dropout") else "baseline"
print(f"\nUsing best model from experiment: {best_exp}")


Using best model from experiment: increased_dropout


In [None]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
best_tokenizer = AutoTokenizer.from_pretrained(f"./finetuned_{best_exp}")
best_model = AutoModelForSequenceClassification.from_pretrained(f"./finetuned_{best_exp}")


In [None]:
# Tokenize the validation dataset for inference.
def tokenize_inference(example):
    return best_tokenizer(example["text"], truncation=True, padding="max_length", max_length=128)


In [None]:
val_dataset_inference = Dataset.from_pandas(val_df)

In [None]:
columns_to_remove = []
if "__index_level_0__" in val_dataset_inference.column_names:
    columns_to_remove.append("__index_level_0__")
val_dataset_inference = val_dataset_inference.map(tokenize_inference, batched=True)
val_dataset_inference = val_dataset_inference.remove_columns(["text"] + columns_to_remove)
val_dataset_inference.set_format("torch")

Map:   0%|          | 0/425 [00:00<?, ? examples/s]

In [None]:
inference_trainer = Trainer(
    model=best_model,
    tokenizer=best_tokenizer
)

  inference_trainer = Trainer(


In [None]:
preds = inference_trainer.predict(val_dataset_inference)
predicted_labels = np.argmax(preds.predictions, axis=-1)
print("Sample predicted labels:", predicted_labels[:10])


Sample predicted labels: [2 0 1 2 2 2 0 2 1 2]


In [None]:
val_df["predicted_label"] = predicted_labels
val_df["predicted_sentiment"] = val_df["predicted_label"].map({0: "BEARISH", 1: "NEUTRAL", 2: "BULLISH"})
print(val_df.head())

                                                   text  label  \
588   $AAPL\n\n1H close to double top breakdown conf...      1   
1754  New lod on $aapl\n\nOne to watch today\n\nFrid...      0   
1892  Watching $TSM $AMD $COST  $SPY to name a few t...      2   
1753  Microsoft boosting AI investments \n\n> [@DeIt...      2   
1426   Insane strength from $AAPL after it gapped up 4%      2   

      predicted_label predicted_sentiment  
588                 2             BULLISH  
1754                0             BEARISH  
1892                1             NEUTRAL  
1753                2             BULLISH  
1426                2             BULLISH  


In [36]:
trainer_best, eval_best = fine_tune_bert(
    model_name="bert-base-uncased",
    output_dir="./finetuned_increased_dropout_10epochs",
    num_train_epochs=10,
    learning_rate=2e-5,
    per_device_batch_size=16,
    dropout=0.3,
    warmup_steps=300,
    max_grad_norm=None
)


Fine-tuning model bert-base-uncased with epochs=10, lr=2e-05, batch_size=16, dropout=0.3, warmup_steps=300, max_grad_norm=None


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1699 [00:00<?, ? examples/s]

Map:   0%|          | 0/425 [00:00<?, ? examples/s]

  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy
1,0.9306,0.89538,0.616471
2,0.8436,0.859372,0.637647
3,0.7354,0.782465,0.656471
4,0.5958,0.779139,0.694118
5,0.355,0.877678,0.682353
6,0.2053,0.969334,0.682353
7,0.1593,1.245341,0.689412
8,0.1198,1.285658,0.68
9,0.0949,1.404241,0.696471
10,0.0689,1.400791,0.687059


Evaluation results for bert-base-uncased: {'eval_loss': 1.404240608215332, 'eval_accuracy': 0.6964705882352941, 'eval_runtime': 2.884, 'eval_samples_per_second': 147.365, 'eval_steps_per_second': 9.362, 'epoch': 10.0}


In [94]:
trainer_best_finbert, eval_best_finbert = fine_tune_bert(
    model_name="ProsusAI/finbert",
    output_dir="./finetuned_increased_dropout_10epochs_finbert",
    num_train_epochs=10,
    learning_rate=2e-5,
    per_device_batch_size=16,
    dropout=0.3,
    warmup_steps=300,
    max_grad_norm=None
)


Fine-tuning model ProsusAI/finbert with epochs=10, lr=2e-05, batch_size=16, dropout=0.3, warmup_steps=300, max_grad_norm=None


tokenizer_config.json:   0%|          | 0.00/252 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/758 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


pytorch_model.bin:   0%|          | 0.00/438M [00:00<?, ?B/s]

Map:   0%|          | 0/1699 [00:00<?, ? examples/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


Map:   0%|          | 0/425 [00:00<?, ? examples/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy
1,1.0032,0.881925,0.609412
2,0.8237,0.860156,0.644706
3,0.7111,0.819797,0.656471
4,0.5088,0.872277,0.661176
5,0.2918,1.065545,0.668235
6,0.1613,1.097667,0.672941
7,0.134,1.29367,0.68
8,0.0856,1.380286,0.691765
9,0.0799,1.545824,0.698824
10,0.0349,1.51778,0.677647


Evaluation results for ProsusAI/finbert: {'eval_loss': 1.5458240509033203, 'eval_accuracy': 0.6988235294117647, 'eval_runtime': 2.9016, 'eval_samples_per_second': 146.471, 'eval_steps_per_second': 9.305, 'epoch': 10.0}


In [38]:
best_model_dir = "./finetuned_increased_dropout_10epochs"
from transformers import AutoTokenizer, AutoModelForSequenceClassification

tokenizer = AutoTokenizer.from_pretrained(best_model_dir)
model = AutoModelForSequenceClassification.from_pretrained(best_model_dir)
model.eval()
model.config.output_hidden_states = True

In [97]:
# import torch
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# model.to(device)

In [40]:
embeddings_dict = {}
predictions_dict = {}

In [92]:
df.head()

Unnamed: 0,text,label
0,$NVDA Put Writer ✍️,0
1,$AAPL\n\nBreakout or back to midline?,1
2,$AAPL daily shooting star at strong supply zon...,0
3,$AMZN (daily) Shooting star to bearish engulfing,0
4,$AMZN\n\nRetesting 11ema on daily \n\nSpot for...,2


In [72]:
dfs = pd.read_csv(data_path, parse_dates=['timestamp'])

In [93]:
dfs.head()

Unnamed: 0,timestamp,description,url,embed_title,tweet_type,financial_info,sentiment,tickers_list,ticker,exchanges,price,percentage_change,4h_ta_result,4h_ta_details,1d_ta_result,1d_ta_details
0,2023-11-15T01:06:39.739000+00:00,Currently at $2860--Documenting everything in ...,https://twitter.com/user/status/17245942794969...,<:quote_tweet:1130467736133316700> Albertrays ...,quote tweet,"[{'ticker': '$AI', 'exchanges': [], 'price': '...",Bullish,['$AI'],$AI,[],29.63,+6.85%,BUY,"14 buy, 8 hold, 4 sell",STRONG_BUY,"16 buy, 9 hold, 1 sell"
1,2023-11-15T02:11:41.182000+00:00,$NET \n\nWas waiting on Cloudflare and it erup...,https://twitter.com/user/status/17246106454321...,Don't follow Shardi B If You Hate Money tweete...,tweet,"[{'ticker': '$NET', 'exchanges': [], 'price': ...",Neutral,['$NET'],$NET,[],70.61,+9.52%,STRONG_BUY,"16 buy, 9 hold, 1 sell",STRONG_BUY,"17 buy, 9 hold, 0 sell"
2,2023-11-15T02:11:47.180000+00:00,"RT @TrendSpider: $SPY, $QQQ and $AAPL all prin...",https://twitter.com/user/status/17246108137746...,<:retweet:1130467740306657360> Theta Warrior r...,retweet,"[{'ticker': '$QQQ', 'exchanges': [], 'price': ...",Bearish,"['$QQQ', '$AAPL', '$SPY']",$QQQ,[],385.33,+2.11%,BUY,"17 buy, 6 hold, 3 sell",STRONG_BUY,"17 buy, 9 hold, 0 sell"
3,2023-11-15T04:01:40.263000+00:00,RT @TriggerTrades: $SPX remained above 4375 th...,https://twitter.com/user/status/17246376379723...,<:retweet:1130467740306657360> ReciKnows retwe...,retweet,"[{'ticker': '$SPX', 'exchanges': [], 'price': ...",Bearish,['$SPX'],$SPX,[],4495.71,+1.87%,,,,
4,2023-11-15T04:01:44.817000+00:00,RT @coiledspringcap: #SPX equal weight breakin...,https://twitter.com/user/status/17246376783927...,<:retweet:1130467740306657360> ReciKnows retwe...,retweet,"[{'ticker': '$SPX', 'exchanges': [], 'price': ...",Bullish,['$SPX'],$SPX,[],4495.71,+1.87%,,,,


In [98]:
# batch_size = 16

# for ticker in target_tickers:
#     print(f"\nProcessing {ticker}...")
#     df_ticker = dfs[dfs['ticker'] == ticker].copy()
#     if df_ticker.empty:
#         continue
#     texts = df['text'].tolist()

#     all_preds = []
#     all_embeds = []

#     # Process the texts in batches.
#     for i in range(0, len(texts), batch_size):
#         batch_texts = texts[i:i+batch_size]
#         # Tokenize the batch.
#         inputs = tokenizer(batch_texts, padding=True, truncation=True, max_length=128, return_tensors="pt")
#         inputs = {k: v.to(device) for k, v in inputs.items()}

#         # Run inference on the batch.
#         with torch.no_grad():
#             outputs = model(**inputs)
#         logits = outputs.logits.cpu().numpy()
#         batch_preds = np.argmax(logits, axis=-1)
#         all_preds.extend(batch_preds)

#         # Extract embeddings from the BERT encoder (CLS token from pooler output).
#         with torch.no_grad():
#             bert_outputs = model.bert(**inputs, output_hidden_states=True)
#         pooler_output = bert_outputs.pooler_output  # shape: (batch_size, hidden_size)
#         batch_embeddings = pooler_output.cpu().numpy()
#         all_embeds.append(batch_embeddings)

#         # Free up GPU memory.
#         torch.cuda.empty_cache()

#     # Concatenate embeddings from all batches.
#     all_embeds = np.concatenate(all_embeds, axis=0)

#     # Save predictions and embeddings.
#     df_ticker['predicted_label'] = all_preds
#     df_ticker['predicted_sentiment'] = df_ticker['predicted_label'].map({0: "BEARISH", 1: "NEUTRAL", 2: "BULLISH"})
#     predictions_dict[ticker] = df_ticker.copy()
#     embeddings_dict[ticker] = all_embeds

#     # Optionally, save individual predictions to CSV.
#     df_ticker.to_csv(f"{ticker}_sentiment_predictions.csv", index=False)

#     # TSNE Visualization for current company's embeddings.
#     tsne = TSNE(n_components=2, random_state=42)
#     embeddings_2d = tsne.fit_transform(all_embeds)

#     plt.figure(figsize=(8, 6))
#     scatter = plt.scatter(embeddings_2d[:, 0], embeddings_2d[:, 1], c=all_preds, cmap='viridis', alpha=0.7)
#     plt.colorbar(scatter, ticks=[0, 1, 2], label='Predicted Sentiment')
#     plt.title(f"TSNE Visualization of {ticker} Tweet Embeddings")
#     plt.xlabel("Dimension 1")
#     plt.ylabel("Dimension 2")
#     plt.savefig(f"{ticker}_tsne.png")
#     plt.show()

In [None]:
# all_embeddings = np.concatenate(list(embeddings_dict.values()), axis=0)
# all_tickers = []
# all_preds = []
# for ticker, emb in embeddings_dict.items():
#     count = emb.shape[0]
#     all_tickers.extend([ticker] * count)
#     # For color coding, use corresponding predictions.
#     preds = predictions_dict[ticker]['predicted_label'].values
#     all_preds.extend(preds)

In [None]:
# tsne_global = TSNE(n_components=2, random_state=42)
# global_embeddings_2d = tsne_global.fit_transform(all_embeddings)

In [None]:
# plt.figure(figsize=(10, 8))
# # Color-code by ticker. We'll assign each ticker a unique integer.
# ticker_to_int = {ticker: idx for idx, ticker in enumerate(target_tickers)}
# colors = [ticker_to_int[t] for t in all_tickers]
# scatter = plt.scatter(global_embeddings_2d[:, 0], global_embeddings_2d[:, 1], c=colors, cmap='tab10')
# plt.colorbar(scatter, ticks=list(ticker_to_int.values()), label='Ticker')
# plt.title("Global TSNE Visualization of Tweet Embeddings Across Companies")
# plt.xlabel("Dimension 1")
# plt.ylabel("Dimension 2")
# plt.savefig("global_tsne.png")
# plt.show()

In [None]:
# np.savez("global_embeddings.npz", embeddings=all_embeddings, tickers=np.array(all_tickers), predictions=np.array(all_preds))