In [19]:
import torch
from torch.utils.data import Dataset
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
import pandas as pd

In [20]:
df = pd.read_csv("emotion_dataset.csv")
df.head()


Unnamed: 0.1,Unnamed: 0,Emotion,Text,Clean_Text
0,0,neutral,Why ?,
1,1,joy,Sage Act upgrade on my to do list for tommorow.,Sage Act upgrade list tommorow
2,2,sadness,ON THE WAY TO MY HOMEGIRL BABY FUNERAL!!! MAN ...,WAY HOMEGIRL BABY FUNERAL MAN HATE FUNERALS SH...
3,3,joy,Such an eye ! The true hazel eye-and so brill...,eye true hazel eyeand brilliant Regular feat...
4,4,joy,@Iluvmiasantos ugh babe.. hugggzzz for u .! b...,ugh babe hugggzzz u babe naamazed nga ako e...


In [21]:
df.columns = df.columns.str.strip()


In [22]:
unique_labels = df["Emotion"].unique()
label2id = {label: i for i, label in enumerate(unique_labels)}
id2label = {i: label for label, i in label2id.items()}
df["label_id"] = df["Emotion"].map(label2id)

In [23]:
df["Final_Text"] = df["Clean_Text"].fillna(df["Text"])
df

Unnamed: 0.1,Unnamed: 0,Emotion,Text,Clean_Text,label_id,Final_Text
0,0,neutral,Why ?,,0,Why ?
1,1,joy,Sage Act upgrade on my to do list for tommorow.,Sage Act upgrade list tommorow,1,Sage Act upgrade list tommorow
2,2,sadness,ON THE WAY TO MY HOMEGIRL BABY FUNERAL!!! MAN ...,WAY HOMEGIRL BABY FUNERAL MAN HATE FUNERALS SH...,2,WAY HOMEGIRL BABY FUNERAL MAN HATE FUNERALS SH...
3,3,joy,Such an eye ! The true hazel eye-and so brill...,eye true hazel eyeand brilliant Regular feat...,1,eye true hazel eyeand brilliant Regular feat...
4,4,joy,@Iluvmiasantos ugh babe.. hugggzzz for u .! b...,ugh babe hugggzzz u babe naamazed nga ako e...,1,ugh babe hugggzzz u babe naamazed nga ako e...
...,...,...,...,...,...,...
34787,34787,surprise,@MichelGW have you gift! Hope you like it! It'...,gift Hope like it hand wear Itll warm Lol,4,gift Hope like it hand wear Itll warm Lol
34788,34788,joy,The world didnt give it to me..so the world MO...,world didnt meso world DEFINITELY cnt away,1,world didnt meso world DEFINITELY cnt away
34789,34789,anger,A man robbed me today .,man robbed today,5,man robbed today
34790,34790,fear,"Youu call it JEALOUSY, I call it of #Losing YO...",Youu JEALOUSY #Losing YOU,3,Youu JEALOUSY #Losing YOU


In [24]:
print(df["Emotion"].value_counts())

Emotion
joy         11045
sadness      6722
fear         5410
anger        4297
surprise     4062
neutral      2254
disgust       856
shame         146
Name: count, dtype: int64


In [25]:
train_texts, val_texts, train_labels, val_labels = train_test_split(
    df["Final_Text"].astype(str).tolist(),     # force to list of strings
    df["label_id"].tolist(),
    test_size=0.3,
    random_state=42
)



In [26]:
class EmotionDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_len=64):
        self.encodings = tokenizer(texts, truncation=True, padding=True, max_length=max_len)
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item["labels"] = torch.tensor(self.labels[idx])
        return item

In [27]:
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

train_dataset = EmotionDataset(train_texts, train_labels, tokenizer)
val_dataset = EmotionDataset(val_texts, val_labels, tokenizer)




In [28]:
model = BertForSequenceClassification.from_pretrained(
    "bert-base-uncased",
    num_labels=len(unique_labels),
    id2label=id2label,
    label2id=label2id
)


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [29]:
# Move model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

print("✅ Using device:", device)


✅ Using device: cuda


In [30]:
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    preds = logits.argmax(axis=-1)
    acc = accuracy_score(labels, preds)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average="weighted")
    return {"accuracy": acc, "f1": f1, "precision": precision, "recall": recall}


In [31]:
training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    logging_dir="./logs",
    learning_rate=2e-5,
    per_device_train_batch_size=32,
    per_device_eval_batch_size=32,
    num_train_epochs=3,
    weight_decay=0.01,
    load_best_model_at_end=True,
    metric_for_best_model="accuracy"
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    compute_metrics=compute_metrics
)


In [32]:

import torch

print("Number of GPU: ", torch.cuda.device_count())
print("GPU Name: ", torch.cuda.get_device_name())
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)

Number of GPU:  1
GPU Name:  NVIDIA GeForce RTX 3060 Ti
Using device: cuda


In [33]:
trainer.train()
metrics = trainer.evaluate()
print("📊 Evaluation Metrics:", metrics)

  0%|          | 0/2286 [00:00<?, ?it/s]

{'loss': 1.3037, 'grad_norm': 6.191958904266357, 'learning_rate': 1.562554680664917e-05, 'epoch': 0.66}


  0%|          | 0/327 [00:00<?, ?it/s]

{'eval_loss': 0.9897739887237549, 'eval_accuracy': 0.6512741904579421, 'eval_f1': 0.6405768122628588, 'eval_precision': 0.6548815072888431, 'eval_recall': 0.6512741904579421, 'eval_runtime': 14.2628, 'eval_samples_per_second': 731.834, 'eval_steps_per_second': 22.927, 'epoch': 1.0}
{'loss': 0.9665, 'grad_norm': 8.959813117980957, 'learning_rate': 1.1251093613298338e-05, 'epoch': 1.31}
{'loss': 0.867, 'grad_norm': 7.085024833679199, 'learning_rate': 6.876640419947507e-06, 'epoch': 1.97}


  0%|          | 0/327 [00:00<?, ?it/s]

{'eval_loss': 0.9438084363937378, 'eval_accuracy': 0.6724468288944242, 'eval_f1': 0.6693225407642677, 'eval_precision': 0.668845524670628, 'eval_recall': 0.6724468288944242, 'eval_runtime': 14.2653, 'eval_samples_per_second': 731.707, 'eval_steps_per_second': 22.923, 'epoch': 2.0}
{'loss': 0.699, 'grad_norm': 8.547130584716797, 'learning_rate': 2.502187226596676e-06, 'epoch': 2.62}


  0%|          | 0/327 [00:00<?, ?it/s]

{'eval_loss': 0.9724576473236084, 'eval_accuracy': 0.6744587085648591, 'eval_f1': 0.6715277480770716, 'eval_precision': 0.6721745907078805, 'eval_recall': 0.6744587085648591, 'eval_runtime': 14.3636, 'eval_samples_per_second': 726.698, 'eval_steps_per_second': 22.766, 'epoch': 3.0}
{'train_runtime': 435.4208, 'train_samples_per_second': 167.796, 'train_steps_per_second': 5.25, 'train_loss': 0.9254322669428895, 'epoch': 3.0}


  0%|          | 0/327 [00:00<?, ?it/s]

📊 Evaluation Metrics: {'eval_loss': 0.9724576473236084, 'eval_accuracy': 0.6744587085648591, 'eval_f1': 0.6715277480770716, 'eval_precision': 0.6721745907078805, 'eval_recall': 0.6744587085648591, 'eval_runtime': 14.2719, 'eval_samples_per_second': 731.368, 'eval_steps_per_second': 22.912, 'epoch': 3.0}


In [34]:
def predict(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=256)
    inputs = {k: v.to(model.device) for k, v in inputs.items()}

    with torch.no_grad():
        outputs = model(**inputs)
        probs = torch.nn.functional.softmax(outputs.logits, dim=-1).cpu().numpy()[0]

    pred_id = probs.argmax()
    return {
        "text": text,
        "predicted_label": id2label[pred_id],
        "confidence": float(probs[pred_id]),
        "all_probs": {id2label[i]: float(p) for i, p in enumerate(probs)}
    }


In [35]:
print(predict("Shamim is happy!"))
print(predict("Asmita was angry."))
print(predict("Neha is ashamed  !"))


{'text': 'Shamim is happy!', 'predicted_label': 'joy', 'confidence': 0.9660046696662903, 'all_probs': {'neutral': 0.008081045001745224, 'joy': 0.9660046696662903, 'sadness': 0.010274983011186123, 'fear': 0.0015889318892732263, 'surprise': 0.010289025492966175, 'anger': 0.0026728592347353697, 'shame': 0.0003347120655234903, 'disgust': 0.0007536461343988776}}
{'text': 'Asmita was angry.', 'predicted_label': 'anger', 'confidence': 0.9432410001754761, 'all_probs': {'neutral': 0.018520087003707886, 'joy': 0.006749938242137432, 'sadness': 0.01622270978987217, 'fear': 0.005850893445312977, 'surprise': 0.001750377588905394, 'anger': 0.9432410001754761, 'shame': 0.0016538455383852124, 'disgust': 0.006011182442307472}}
{'text': 'Neha is ashamed  !', 'predicted_label': 'shame', 'confidence': 0.7902504205703735, 'all_probs': {'neutral': 0.05093798041343689, 'joy': 0.003220803802832961, 'sadness': 0.007157055661082268, 'fear': 0.006124124396592379, 'surprise': 0.013665701262652874, 'anger': 0.01961