# Multi-Label Emotion Recognition from Text
Using BERT and the GoEmotions dataset

##Install Required Packages

In [14]:
# 📌 Step 1: Install Required Packages
!pip install transformers datasets scikit-learn torch



##Import Libraries

In [15]:
# 📌 Step 2: Import Libraries
import torch
import numpy as np
import pandas as pd
from transformers import BertTokenizerFast, BertForSequenceClassification, Trainer, TrainingArguments
from transformers import DataCollatorWithPadding
from datasets import load_dataset
from sklearn.metrics import f1_score, classification_report, hamming_loss
from sklearn.preprocessing import MultiLabelBinarizer
from torch.nn import BCEWithLogitsLoss
from huggingface_hub import login
import random

##Hugging Face Login

In [16]:
# 📌 Step 3: Install and Login to Hugging Face Hub
!pip install huggingface_hub
from huggingface_hub import login
login()  # Enter your token when prompted



VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [17]:
# 📌 Step 3: Login to Hugging Face (required for GoEmotions dataset)
login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

##Load Dataset

In [19]:
# 📌 Step 4: Load GoEmotions Dataset
dataset = load_dataset("go_emotions")
label_list = dataset['train'].features['labels'].feature.names
num_labels = len(label_list)

##PreProcessing

In [20]:
# 📌 Step 5: Preprocessing - Tokenization and Label Binarization
tokenizer = BertTokenizerFast.from_pretrained("bert-base-uncased")
mlb = MultiLabelBinarizer(classes=list(range(num_labels)))

def preprocess(example):
    encoding = tokenizer(example['text'], truncation=True, padding='max_length', max_length=128)
    labels = mlb.fit_transform([example['labels']])[0]
    encoding["labels"] = labels
    return encoding

encoded_dataset = dataset.map(preprocess, remove_columns=dataset["train"].column_names)
encoded_dataset.set_format(type="torch")

Map:   0%|          | 0/5426 [00:00<?, ? examples/s]

##Trainer

In [22]:
# 📌 Step 6: Custom Trainer with BCE Loss
class CustomTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False):
        labels = inputs.pop("labels").float()
        outputs = model(**inputs)
        logits = outputs.logits
        loss_fn = BCEWithLogitsLoss()
        loss = loss_fn(logits, labels)
        return (loss, outputs) if return_outputs else loss

##Model Development

In [23]:
# 📌 Step 7: Define Model
model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=num_labels, problem_type="multi_label_classification")

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [24]:
# 📌 Step 8: Training Arguments
training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    num_train_epochs=3,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=32,
    logging_dir="./logs",
    logging_steps=100,
    load_best_model_at_end=True,
    metric_for_best_model="eval_loss"
)

Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


In [25]:
# 📌 Step 9: Define Metrics
def compute_metrics(pred):
    logits, labels = pred
    preds = (torch.sigmoid(torch.tensor(logits)) > 0.5).int().numpy()
    labels = labels.astype(int)
    f1_micro = f1_score(labels, preds, average='micro')
    hamming = hamming_loss(labels, preds)
    return {"f1_micro": f1_micro, "hamming_loss": hamming}

In [27]:
class CustomTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False, **kwargs):  # add **kwargs here
        labels = inputs.pop("labels").float()
        outputs = model(**inputs)
        logits = outputs.logits
        loss_fn = BCEWithLogitsLoss()
        loss = loss_fn(logits, labels)
        return (loss, outputs) if return_outputs else loss


##Evaluation

In [28]:
# 📌 Step 11: Evaluate on Test Set
preds = trainer.predict(encoded_dataset["test"])
logits = preds.predictions
true_labels = preds.label_ids
pred_labels = (torch.sigmoid(torch.tensor(logits)) > 0.5).int().numpy()
print(classification_report(true_labels, pred_labels, target_names=label_list, zero_division=0))

Epoch,Training Loss,Validation Loss


                precision    recall  f1-score   support

    admiration       0.09      0.97      0.17       504
     amusement       0.05      0.99      0.09       264
         anger       0.33      0.01      0.01       198
     annoyance       0.06      0.97      0.11       320
      approval       0.06      0.36      0.11       351
        caring       0.00      0.00      0.00       135
     confusion       0.03      0.99      0.05       153
     curiosity       0.05      1.00      0.10       284
        desire       0.00      0.00      0.00        83
disappointment       0.04      0.11      0.06       151
   disapproval       0.05      0.10      0.07       267
       disgust       0.02      0.91      0.04       123
 embarrassment       0.01      0.97      0.01        37
    excitement       0.02      0.68      0.04       103
          fear       0.01      0.86      0.03        78
     gratitude       0.07      1.00      0.12       352
         grief       0.00      0.83      0.00  

In [29]:
# 📌 Step 12: Test on Custom Texts
def predict_emotions(texts):
    tokens = tokenizer(texts, padding=True, truncation=True, return_tensors="pt", max_length=128)
    outputs = model(**tokens)
    probs = torch.sigmoid(outputs.logits).detach().numpy()
    predictions = (probs > 0.5).astype(int)
    for i, text in enumerate(texts):
        emotions = [label_list[j] for j, val in enumerate(predictions[i]) if val == 1]
        print(f"\nText: {text}\nDetected Emotions: {emotions if emotions else ['neutral']}")

# 🔍 Example Use
custom_texts = [
    "I’m really happy with the service I received!",
    "This makes me so angry and disappointed.",
    "It’s a confusing situation but I’m trying to stay calm."
]
predict_emotions(custom_texts)


Text: I’m really happy with the service I received!
Detected Emotions: ['admiration', 'amusement', 'annoyance', 'confusion', 'curiosity', 'disgust', 'embarrassment', 'gratitude', 'grief', 'joy', 'optimism', 'neutral']

Text: This makes me so angry and disappointed.
Detected Emotions: ['admiration', 'amusement', 'annoyance', 'confusion', 'curiosity', 'disapproval', 'embarrassment', 'fear', 'gratitude', 'joy', 'optimism', 'sadness', 'surprise', 'neutral']

Text: It’s a confusing situation but I’m trying to stay calm.
Detected Emotions: ['admiration', 'amusement', 'annoyance', 'approval', 'confusion', 'curiosity', 'disgust', 'embarrassment', 'fear', 'gratitude', 'joy', 'optimism', 'surprise', 'neutral']
