In [41]:
from google.colab import drive
import pandas as pd
import torch
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score
from sklearn.preprocessing import LabelEncoder
from transformers import EarlyStoppingCallback
import torch.nn as nn
import torch.nn.functional as F

In [2]:
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
stock_data = '/content/drive/MyDrive/stock_data.csv'
df = pd.read_csv(stock_data)
df.head()

Unnamed: 0,Text,Sentiment
0,Kickers on my watchlist XIDE TIT SOQ PNK CPW B...,1
1,user: AAP MOVIE. 55% return for the FEA/GEED i...,1
2,user I'd be afraid to short AMZN - they are lo...,1
3,MNTA Over 12.00,1
4,OI Over 21.37,1


In [4]:
missing_values = df.isnull().sum()
print(missing_values)

Text         0
Sentiment    0
dtype: int64


In [5]:
le = LabelEncoder()
df['Sentiment'] = le.fit_transform(df['Sentiment'])

In [16]:
model_name = "bert-base-uncased"
tokenizer = BertTokenizer.from_pretrained(model_name)
model = BertForSequenceClassification.from_pretrained(model_name, num_labels=2)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [17]:
def tokenize_function(sentences):
    return tokenizer(sentences, padding=True, truncation=True, max_length=128, return_tensors='pt')

In [18]:
X = df['Text']
y = df['Sentiment']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

In [19]:
train_encodings = tokenize_function(X_train.tolist())
test_encodings = tokenize_function(X_test.tolist())

In [20]:
train_labels = torch.tensor(y_train.tolist())
test_labels = torch.tensor(y_test.tolist())

In [21]:
class FinancialSentimentDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: val[idx].clone().detach() for key, val in self.encodings.items()}
        item['labels'] = self.labels[idx].clone().detach()
        return item

    def __len__(self):
        return len(self.labels)

In [22]:
train_dataset = FinancialSentimentDataset(train_encodings, train_labels)
test_dataset = FinancialSentimentDataset(test_encodings, test_labels)

In [23]:
training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=10,
    per_device_train_batch_size=32,
    per_device_eval_batch_size=32,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir='./logs',
    logging_steps=10,
    eval_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2e-5,
    load_best_model_at_end=True,
    metric_for_best_model="eval_loss"
)

In [24]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=3)]
)

In [25]:
trainer.train()

Epoch,Training Loss,Validation Loss
1,0.6312,0.614421
2,0.6114,0.518424
3,0.3695,0.460036
4,0.3206,0.482772
5,0.2192,0.527828
6,0.1494,0.651955


TrainOutput(global_step=870, training_loss=0.39403931375207574, metrics={'train_runtime': 92.3953, 'train_samples_per_second': 501.324, 'train_steps_per_second': 15.693, 'total_flos': 871201815399360.0, 'train_loss': 0.39403931375207574, 'epoch': 6.0})

In [26]:
predictions = trainer.predict(test_dataset)
y_pred = torch.argmax(torch.tensor(predictions.predictions), axis=1).numpy()

In [27]:
accuracy = accuracy_score(y_test, y_pred)
target_names = ['Negative', 'Positive']
report = classification_report(y_test, y_pred, target_names = target_names)

In [28]:
print(f'BERT Accuracy: {accuracy}')
print(report)

BERT Accuracy: 0.7808455565142364
              precision    recall  f1-score   support

    Negative       0.67      0.79      0.72       421
    Positive       0.86      0.78      0.82       738

    accuracy                           0.78      1159
   macro avg       0.77      0.78      0.77      1159
weighted avg       0.79      0.78      0.78      1159



In [43]:
def balanced_binary_focal_loss(logits, labels, gamma=2.0, alpha=0.25):

    probs = F.softmax(logits, dim=1)

    true_probs = probs[range(len(labels)), labels]

    true_probs = torch.clamp(true_probs, min=1e-7, max=1 - 1e-7)

    bce_loss = -torch.log(true_probs)

    focal_loss = (alpha * (1 - true_probs).pow(gamma) * bce_loss).mean()

    return focal_loss

In [44]:
class CustomTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False, **kwargs):
        labels = inputs.pop("labels")
        outputs = model(**inputs)
        logits = outputs.logits
        loss = balanced_binary_focal_loss(logits, labels)
        return (loss, outputs) if return_outputs else loss

In [45]:
trainer = CustomTrainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=3)]
)

In [46]:
trainer.train()

Epoch,Training Loss,Validation Loss
1,0.0171,0.033599
2,0.0147,0.036124
3,0.0058,0.038792
4,0.013,0.040005


TrainOutput(global_step=580, training_loss=0.013369431879756779, metrics={'train_runtime': 65.1244, 'train_samples_per_second': 711.254, 'train_steps_per_second': 22.265, 'total_flos': 580801210266240.0, 'train_loss': 0.013369431879756779, 'epoch': 4.0})

In [47]:
predictions = trainer.predict(test_dataset)
y_pred = torch.argmax(torch.tensor(predictions.predictions), axis=1).numpy()

In [48]:
accuracy = accuracy_score(y_test, y_pred)
target_names = ['Negative', 'Positive']
report = classification_report(y_test, y_pred, target_names = target_names)

In [49]:
print(f'BERT with Focal Loss Accuracy: {accuracy}')
print(report)

BERT with Focal Loss Accuracy: 0.8101811906816221
              precision    recall  f1-score   support

    Negative       0.77      0.68      0.72       421
    Positive       0.83      0.89      0.86       738

    accuracy                           0.81      1159
   macro avg       0.80      0.78      0.79      1159
weighted avg       0.81      0.81      0.81      1159

