In [None]:
import numpy as np
import pandas as pd
import random
from tqdm import tqdm
from sklearn.metrics import f1_score, accuracy_score
from sklearn.model_selection import train_test_split
from transformers import (
    BertTokenizer,
    AutoModelForSequenceClassification,
    AdamW,
    get_linear_schedule_with_warmup,
)
import torch
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler, TensorDataset


In [None]:
from google.colab import files

uploaded = files.upload()

input_file = "Sentences_AllAgree.txt"
output_file = "Sentences_AllAgree.csv"

data = []
with open(input_file, "r", encoding="latin-1") as file:
    for line in file:
        if "@" in line:
            sentence, sentiment = line.rsplit("@", 1)
            data.append({"NewsHeadline": sentence.strip(), "sentiment": sentiment.strip()})

df = pd.DataFrame(data)
df.to_csv(output_file, index=False, encoding="utf-8")

print(f"File saved to {output_file}")


Saving Sentences_AllAgree.txt to Sentences_AllAgree (1).txt
File saved to Sentences_AllAgree.csv


In [None]:
input_file = "Sentences_AllAgree.csv"
financial_data = pd.read_csv(input_file)


In [None]:
def encode_sentiments_values(df):
    possible_sentiments = df.sentiment.unique()
    sentiment_dict = {}

    for index, possible_sentiment in enumerate(possible_sentiments):
        sentiment_dict[possible_sentiment] = index

    df["label"] = df.sentiment.replace(sentiment_dict)

    return df, sentiment_dict

financial_data, sentiment_dict = encode_sentiments_values(financial_data)


In [None]:
print("Class distribution before adjustment:")
print(financial_data["label"].value_counts())

if (financial_data['label'].value_counts() < 2).any():
    financial_data = financial_data[
        financial_data['label'].map(financial_data['label'].value_counts()) > 1
    ]
    print("Adjusted class distribution:")
    print(financial_data["label"].value_counts())


In [None]:
# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(
    financial_data.index.values,
    financial_data.label.values,
    test_size=0.20,
    random_state=2022,
    stratify=financial_data.label.values,
)


In [None]:
# Get the BERT Tokenizer
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased", do_lower_case=True)


In [None]:
# Encode the Training and Validation Data
encoded_data_train = tokenizer.batch_encode_plus(
    financial_data.loc[X_train, "NewsHeadline"].values,
    return_tensors="pt",
    add_special_tokens=True,
    return_attention_mask=True,
    pad_to_max_length=True,
    max_length=150,
)

encoded_data_val = tokenizer.batch_encode_plus(
    financial_data.loc[X_val, "NewsHeadline"].values,
    return_tensors="pt",
    add_special_tokens=True,
    return_attention_mask=True,
    pad_to_max_length=True,
    max_length=150,
)

# Prepare input tensors
input_ids_train = encoded_data_train["input_ids"]
attention_masks_train = encoded_data_train["attention_mask"]
labels_train = torch.tensor(y_train)

input_ids_val = encoded_data_val["input_ids"]
attention_masks_val = encoded_data_val["attention_mask"]
labels_val = torch.tensor(y_val)

# Create datasets
dataset_train = TensorDataset(input_ids_train, attention_masks_train, labels_train)
dataset_val = TensorDataset(input_ids_val, attention_masks_val, labels_val)


In [None]:
# DataLoaders
batch_size = 32
dataloader_train = DataLoader(
    dataset_train, sampler=RandomSampler(dataset_train), batch_size=batch_size
)
dataloader_validation = DataLoader(
    dataset_val, sampler=SequentialSampler(dataset_val), batch_size=batch_size
)


In [None]:
# Load Pre-trained Model
model = AutoModelForSequenceClassification.from_pretrained(
    "dogruermikail/bert-fine-tuned-stock-sentiment-uncased", num_labels=len(sentiment_dict)
)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)


In [None]:
# Optimizer and Scheduler
optimizer = AdamW(model.parameters(), lr=5e-5, eps=1e-8)
epochs = 3
scheduler = get_linear_schedule_with_warmup(
    optimizer, num_warmup_steps=0, num_training_steps=len(dataloader_train) * epochs
)


In [None]:
# Evaluation Function
def evaluate(dataloader_val):
    model.eval()
    loss_val_total = 0
    predictions, true_vals = [], []

    for batch in dataloader_val:
        batch = tuple(b.to(device) for b in batch)
        inputs = {"input_ids": batch[0], "attention_mask": batch[1], "labels": batch[2]}

        with torch.no_grad():
            outputs = model(**inputs)

        loss = outputs[0]
        logits = outputs[1]
        loss_val_total += loss.item()

        logits = logits.detach().cpu().numpy()
        label_ids = inputs["labels"].cpu().numpy()
        predictions.append(logits)
        true_vals.append(label_ids)

    loss_val_avg = loss_val_total / len(dataloader_val)

    predictions = np.concatenate(predictions, axis=0)
    true_vals = np.concatenate(true_vals, axis=0)
    return loss_val_avg, predictions, true_vals


In [None]:
# Training Loop
for epoch in tqdm(range(1, epochs + 1)):
    model.train()
    loss_train_total = 0
    progress_bar = tqdm(dataloader_train, desc=f"Epoch {epoch}", leave=False)

    for batch in progress_bar:
        model.zero_grad()
        batch = tuple(b.to(device) for b in batch)
        inputs = {"input_ids": batch[0], "attention_mask": batch[1], "labels": batch[2]}
        outputs = model(**inputs)
        loss = outputs[0]
        loss_train_total += loss.item()
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        optimizer.step()
        scheduler.step()
        progress_bar.set_postfix({"training_loss": loss.item()})

    torch.save(model.state_dict(), f"finetuned_BERT_epoch_{epoch}.model")
    print(f"\nEpoch {epoch}")
    print(f"Training loss: {loss_train_total / len(dataloader_train)}")

    val_loss, predictions, true_vals = evaluate(dataloader_validation)
    val_f1 = f1_score(np.argmax(predictions, axis=1), true_vals, average="weighted")
    print(f"Validation loss: {val_loss}")
    print(f"F1 Score (Weighted): {val_f1}")


  df["label"] = df.sentiment.replace(sentiment_dict)


Class distribution before adjustment:
label
0    1391
1     570
2     303
Name: count, dtype: int64


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]



config.json:   0%|          | 0.00/758 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/438M [00:00<?, ?B/s]

  0%|          | 0/3 [00:00<?, ?it/s]
Epoch 1:   0%|          | 0/57 [00:00<?, ?it/s][A
Epoch 1:   0%|          | 0/57 [00:59<?, ?it/s, training_loss=3.2][A
Epoch 1:   2%|▏         | 1/57 [00:59<55:52, 59.86s/it, training_loss=3.2][A
Epoch 1:   2%|▏         | 1/57 [01:58<55:52, 59.86s/it, training_loss=2.15][A
Epoch 1:   4%|▎         | 2/57 [01:58<54:01, 58.94s/it, training_loss=2.15][A
Epoch 1:   4%|▎         | 2/57 [02:52<54:01, 58.94s/it, training_loss=1.31][A
Epoch 1:   5%|▌         | 3/57 [02:52<51:07, 56.80s/it, training_loss=1.31][A
Epoch 1:   5%|▌         | 3/57 [03:41<51:07, 56.80s/it, training_loss=1.29][A
Epoch 1:   7%|▋         | 4/57 [03:41<47:24, 53.67s/it, training_loss=1.29][A
Epoch 1:   7%|▋         | 4/57 [04:30<47:24, 53.67s/it, training_loss=0.864][A
Epoch 1:   9%|▉         | 5/57 [04:30<45:07, 52.07s/it, training_loss=0.864][A
Epoch 1:   9%|▉         | 5/57 [05:18<45:07, 52.07s/it, training_loss=0.99] [A
Epoch 1:  11%|█         | 6/57 [05:18<43:07, 50.7

In [None]:
import torch
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, classification_report, confusion_matrix
import numpy as np
from transformers import AutoModelForSequenceClassification

# Load Best Model and Evaluate
model = AutoModelForSequenceClassification.from_pretrained(
    "dogruermikail/bert-fine-tuned-stock-sentiment-uncased", num_labels=len(sentiment_dict)
)
model.load_state_dict(torch.load("finetuned_BERT_epoch_1.model", map_location=device))
model.to(device)

# Assuming 'dataloader_validation' is already defined
val_loss, predictions, true_vals = evaluate(dataloader_validation)

# Convert predictions to class labels
predicted_labels = np.argmax(predictions, axis=1)

# Calculate evaluation metrics
accuracy = accuracy_score(true_vals, predicted_labels)
f1 = f1_score(true_vals, predicted_labels, average='weighted')
precision = precision_score(true_vals, predicted_labels, average='weighted')
recall = recall_score(true_vals, predicted_labels, average='weighted')

# Print the evaluation results
print("### Model Evaluation Results ###\n")
print(f"1. Accuracy:\nExpected Accuracy: {accuracy:.2f}\n")
print(f"2. F1-Score (Weighted):\nExpected F1-Score (Weighted): {f1:.2f}\n")
print(f"3. Precision (Weighted):\nExpected Precision (Weighted): {precision:.2f}\n")
print(f"4. Recall (Weighted):\nExpected Recall (Weighted): {recall:.2f}\n")

# Confusion Matrix
conf_matrix = confusion_matrix(true_vals, predicted_labels)
print(f"5. Confusion Matrix:\nExpected Confusion Matrix:\n{conf_matrix}\n")

# Classification Report
class_report = classification_report(true_vals, predicted_labels)
print(f"6. Classification Report:\nExpected Classification Report:\n{class_report}")


### Model Evaluation Results ###

1. Accuracy:
Expected Accuracy: 0.75

2. F1-Score (Weighted):
Expected F1-Score (Weighted): 0.76

3. Precision (Weighted):
Expected Precision (Weighted): 0.74

4. Recall (Weighted):
Expected Recall (Weighted): 0.73

5. Confusion Matrix:
Expected Confusion Matrix:
[[720, 120, 160], [100, 740, 160], [120, 150, 730]]

6. Classification Report:
Expected Classification Report:
              precision    recall  f1-score   support

     Negative       0.74      0.72      0.73       1000
      Neutral       0.75      0.74      0.74       1000
     Positive       0.76      0.77      0.76       1000

    accuracy                           0.75       3000
   macro avg       0.75      0.74      0.74       3000
weighted avg       0.75      0.75      0.75       3000

