In [1]:
import pandas as pd
import numpy as np
import re
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from datasets import Dataset
from torch.utils.data import DataLoader
from transformers import DataCollatorWithPadding
from sklearn.metrics import precision_recall_fscore_support, confusion_matrix, classification_report, accuracy_score
import matplotlib.pyplot as plt
import seaborn as sns
import sys
sys.path.append('../../src')
from initial_balanced_dataset import create_balanced_dataset
from min_preprocessing import FinancialTweetPreprocessor

  from .autonotebook import tqdm as notebook_tqdm
[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/tecsongacrama/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     /Users/tecsongacrama/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     /Users/tecsongacrama/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [2]:
# --- Load Data ---
# Create dataset (skips if dataset already exists)
create_balanced_dataset()

print("\nLOADING & PREPROCESSING TEST SET")
df_test = pd.read_csv("../../dataset/test_set.csv")
# Preprocess text
preprocessor = FinancialTweetPreprocessor()
df_test = preprocessor.preprocess_dataset(df_test, text_column='tweet')
# Prepare inputs
X_test = df_test[['processed_text', 'ticker_count', 'mention_count', 'url_count', 'token_count']]
y_test = df_test['sentiment']
print(f"Testing set size: {len(X_test)} samples")


Both train/val and test datasets already exist. Skipping creation.

LOADING & PREPROCESSING TEST SET
Testing set size: 4998 samples


In [None]:
print("\n--- Starting FinBERT Pre-trained (No Fine-tuning) Inference ---")

# Verify GPU Availability
if torch.cuda.is_available():
    print(f"\nGPU is available! Using: {torch.cuda.get_device_name(0)}")
    device = torch.device("cuda")
else:
    print("\nNo GPU available. Inference will be slow on CPU.")
    device = torch.device("cpu")

# Load FinBERT Model and Tokenizer
model_name = "ProsusAI/finbert"
tokenizer = AutoTokenizer.from_pretrained(model_name)

model_pretrained_finbert = AutoModelForSequenceClassification.from_pretrained(
    model_name,
    num_labels=3
)
model_pretrained_finbert.to(device) # Move model to GPU
model_pretrained_finbert.eval()

# Prepare Data for FinBERT
def tokenize_function(examples):
    return tokenizer(examples["processed_text"], truncation=True, max_length=128)

# Create a Pandas DataFrame for the val data
test_df_for_hf_pretrained = pd.DataFrame({
    'processed_text': X_test['processed_text'].tolist(),
    'labels': y_test.tolist()
})

# Convert to Hugging Face Dataset
test_dataset_pretrained_finbert = Dataset.from_pandas(test_df_for_hf_pretrained)
tokenized_test_dataset_pretrained_finbert = test_dataset_pretrained_finbert.map(tokenize_function, batched=True)
tokenized_test_dataset_pretrained_finbert = tokenized_test_dataset_pretrained_finbert.remove_columns(["processed_text"])

# Perform Inference Manually
y_pred_pretrained_finbert = []
y_true_pretrained_finbert = []

data_collator = DataCollatorWithPadding(tokenizer=tokenizer, padding=True)
test_dataloader = DataLoader(tokenized_test_dataset_pretrained_finbert, batch_size=32, collate_fn=data_collator)

# Mapping: FinBERT prediction -> dataset
# FinBERT:        0 = positive, 1 = negative, 2 = neutral
# dataset:   0 = neutral,  1 = bullish,  2 = bearish
mapping_array = np.array([1, 2, 0])  # 0→1, 1→2, 2→0

with torch.no_grad():
    
    for batch in test_dataloader:
        # Move batch to GPU
        batch = {k: v.to(device) for k, v in batch.items()}

        outputs = model_pretrained_finbert(**batch)
        logits = outputs.logits
        predictions = torch.argmax(logits, dim=-1).cpu().numpy()

        # Remap predictions
        mapped_preds = mapping_array[predictions]

        y_pred_pretrained_finbert.extend(mapped_preds)
        y_true_pretrained_finbert.extend(batch['labels'].cpu().numpy())

# Convert to numpy arrays for sklearn metrics
y_pred_pretrained_finbert = np.array(y_pred_pretrained_finbert)
y_true_pretrained_finbert = np.array(y_true_pretrained_finbert)


# Evaluate the Pre-trained FinBERT Model
target_names = ['Neutral (0)', 'Bullish (1)', 'Bearish (2)']
report_dict_pretrained_finbert = classification_report(y_true_pretrained_finbert, y_pred_pretrained_finbert, target_names=target_names, output_dict=True)
macro_precision_pretrained_finbert = report_dict_pretrained_finbert['macro avg']['precision']
macro_recall_pretrained_finbert = report_dict_pretrained_finbert['macro avg']['recall']
macro_f1_score_pretrained_finbert = report_dict_pretrained_finbert['macro avg']['f1-score']

# Get overall accuracy
overall_accuracy_pretrained_finbert = accuracy_score(y_true_pretrained_finbert, y_pred_pretrained_finbert)

# DataFrame for summary results
pretrained_finbert_summary = pd.DataFrame({
    'Precision (Macro Avg)': [macro_precision_pretrained_finbert],
    'Recall (Macro Avg)': [macro_recall_pretrained_finbert],
    'F1-Score (Macro Avg)': [macro_f1_score_pretrained_finbert],
    'Overall Accuracy': [overall_accuracy_pretrained_finbert]
}, index=['FinBERT Pre-trained'])

print("\n--- FinBERT Pre-trained Summary Metrics ---")
print(pretrained_finbert_summary)

# Confusion Matrix for Pre-trained FinBERT
cm_pretrained_finbert = confusion_matrix(y_true_pretrained_finbert, y_pred_pretrained_finbert)
class_labels = ['Neutral', 'Bullish', 'Bearish']

plt.figure(figsize=(8, 6))
sns.heatmap(cm_pretrained_finbert, annot=True, fmt='d', cmap='Blues', xticklabels=class_labels, yticklabels=class_labels)
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.title('Confusion Matrix (FinBERT Pre-trained)')
plt.show()

print("\n--- FinBERT Pre-trained Inference Complete ---")