In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report
import torch
from transformers import RobertaTokenizer, RobertaForSequenceClassification, Trainer, TrainingArguments
from datasets import Dataset

# Load data
df = pd.read_csv('/kaggle/input/financial-sentiment-analysis/data.csv')

# Remove Class 1 (neutral sentiment)
df = df[df['Sentiment'] != 'neutral']
# Encode labels (negative=0, positive=1)
encoder = LabelEncoder()
df['Sentiment'] = encoder.fit_transform(df['Sentiment'])

# Split data
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)

# Convert to Hugging Face Dataset
train_dataset = Dataset.from_pandas(train_df[['Sentence', 'Sentiment']].rename(columns={'Sentence': 'text', 'Sentiment': 'label'}))
test_dataset = Dataset.from_pandas(test_df[['Sentence', 'Sentiment']].rename(columns={'Sentence': 'text', 'Sentiment': 'label'}))

# Load RoBERTa tokenizer and model
tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
model = RobertaForSequenceClassification.from_pretrained('roberta-base', num_labels=2)  # 2 classes: negative, positive

# Tokenize data
def tokenize_function(examples):
    return tokenizer(examples['text'], padding='max_length', truncation=True, max_length=128)

train_dataset = train_dataset.map(tokenize_function, batched=True)
test_dataset = test_dataset.map(tokenize_function, batched=True)

# Set format for PyTorch
train_dataset.set_format('torch', columns=['input_ids', 'attention_mask', 'label'])
test_dataset.set_format('torch', columns=['input_ids', 'attention_mask', 'label'])

# Compute class weights (for 2 classes)
class_counts = np.bincount(train_df['Sentiment'])
total_samples = len(train_df)
class_weights = total_samples / (2 * class_counts)
class_weights = torch.tensor(class_weights, dtype=torch.float).to('cuda' if torch.cuda.is_available() else 'cpu')

# Custom Trainer with class weights
class WeightedTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False, num_items_in_batch=None):
        labels = inputs.get('labels')
        outputs = model(**inputs)
        logits = outputs.get('logits')
        loss_fct = torch.nn.CrossEntropyLoss(weight=class_weights)
        loss = loss_fct(logits.view(-1, self.model.config.num_labels), labels.view(-1))
        return (loss, outputs) if return_outputs else loss

# Define compute_metrics function
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    accuracy = accuracy_score(labels, predictions)
    report = classification_report(labels, predictions, output_dict=True)
    return {'accuracy': accuracy, 'report': report}

# Training arguments
training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    warmup_steps=200,
    weight_decay=0.01,
    learning_rate=2e-5,
    logging_dir='./logs',
    logging_steps=10,
    evaluation_strategy='epoch',
    save_strategy='epoch',
    load_best_model_at_end=True,
    metric_for_best_model='accuracy',
    report_to='none'
)

# Trainer
trainer = WeightedTrainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    compute_metrics=compute_metrics
)

# Train the model
print("Training model...")
trainer.train()

# Evaluate
print("Evaluating model...")
eval_results = trainer.evaluate()
accuracy = eval_results['eval_accuracy']
print(f"\n--- RoBERTa Results (Class 1 Removed) ---")
print(f"Accuracy: {accuracy}")

# Predict and get detailed report
predictions = trainer.predict(test_dataset)
y_pred = np.argmax(predictions.predictions, axis=-1)
y_test = test_dataset['label']
report = classification_report(y_test, y_pred)
print("Classification Report:")
print(report)

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/2169 [00:00<?, ? examples/s]

Map:   0%|          | 0/543 [00:00<?, ? examples/s]

Training model...


Epoch,Training Loss,Validation Loss,Accuracy,Report
1,0.4033,0.242926,0.928177,"{'0': {'precision': 0.8397790055248618, 'recall': 0.9382716049382716, 'f1-score': 0.8862973760932945, 'support': 162}, '1': {'precision': 0.9723756906077348, 'recall': 0.9238845144356955, 'f1-score': 0.9475100942126514, 'support': 381}, 'accuracy': 0.9281767955801105, 'macro avg': {'precision': 0.9060773480662982, 'recall': 0.9310780596869835, 'f1-score': 0.916903735152973, 'support': 543}, 'weighted avg': {'precision': 0.93281645859406, 'recall': 0.9281767955801105, 'f1-score': 0.9292477363206886, 'support': 543}}"
2,0.3062,0.316286,0.944751,"{'0': {'precision': 0.9342105263157895, 'recall': 0.8765432098765432, 'f1-score': 0.9044585987261147, 'support': 162}, '1': {'precision': 0.948849104859335, 'recall': 0.973753280839895, 'f1-score': 0.961139896373057, 'support': 381}, 'accuracy': 0.9447513812154696, 'macro avg': {'precision': 0.9415298155875622, 'recall': 0.9251482453582192, 'f1-score': 0.9327992475495859, 'support': 543}, 'weighted avg': {'precision': 0.9444817941336364, 'recall': 0.9447513812154696, 'f1-score': 0.9442294539811515, 'support': 543}}"
3,0.0777,0.259561,0.952118,"{'0': {'precision': 0.9146341463414634, 'recall': 0.9259259259259259, 'f1-score': 0.9202453987730062, 'support': 162}, '1': {'precision': 0.9683377308707124, 'recall': 0.963254593175853, 'f1-score': 0.9657894736842105, 'support': 381}, 'accuracy': 0.9521178637200737, 'macro avg': {'precision': 0.9414859386060879, 'recall': 0.9445902595508895, 'f1-score': 0.9430174362286083, 'support': 543}, 'weighted avg': {'precision': 0.9523156669780082, 'recall': 0.9521178637200737, 'f1-score': 0.952201738627829, 'support': 543}}"
4,0.0959,0.324353,0.948435,"{'0': {'precision': 0.935064935064935, 'recall': 0.8888888888888888, 'f1-score': 0.9113924050632912, 'support': 162}, '1': {'precision': 0.9537275064267352, 'recall': 0.973753280839895, 'f1-score': 0.9636363636363636, 'support': 381}, 'accuracy': 0.9484346224677717, 'macro avg': {'precision': 0.9443962207458352, 'recall': 0.9313210848643919, 'f1-score': 0.9375143843498275, 'support': 543}, 'weighted avg': {'precision': 0.9481596674569164, 'recall': 0.9484346224677717, 'f1-score': 0.948049768260972, 'support': 543}}"
5,0.0247,0.261257,0.953959,"{'0': {'precision': 0.9151515151515152, 'recall': 0.9320987654320988, 'f1-score': 0.9235474006116208, 'support': 162}, '1': {'precision': 0.9708994708994709, 'recall': 0.963254593175853, 'f1-score': 0.9670619235836627, 'support': 381}, 'accuracy': 0.9539594843462247, 'macro avg': {'precision': 0.9430254930254931, 'recall': 0.9476766793039759, 'f1-score': 0.9453046620976417, 'support': 543}, 'weighted avg': {'precision': 0.9542674841017382, 'recall': 0.9539594843462247, 'f1-score': 0.9540796902107884, 'support': 543}}"


Evaluating model...



--- RoBERTa Results (Class 1 Removed) ---
Accuracy: 0.9539594843462247
Classification Report:
              precision    recall  f1-score   support

           0       0.92      0.93      0.92       162
           1       0.97      0.96      0.97       381

    accuracy                           0.95       543
   macro avg       0.94      0.95      0.95       543
weighted avg       0.95      0.95      0.95       543

