# Finetuning a transformer model (BERT or RoBERTa) for multi-label emotion classification

In [None]:
from datasets import load_dataset
import pandas as pd

# Load the GoEmotions dataset from Hugging Face (train split only for now)
goemotions = load_dataset("go_emotions", "raw")

# Inspect a sample
print(goemotions["train"][0])

{'text': 'That game hurt.', 'id': 'eew5j0j', 'author': 'Brdd9', 'subreddit': 'nrl', 'link_id': 't3_ajis4z', 'parent_id': 't1_eew18eq', 'created_utc': 1548381056.0, 'rater_id': 1, 'example_very_unclear': False, 'admiration': 0, 'amusement': 0, 'anger': 0, 'annoyance': 0, 'approval': 0, 'caring': 0, 'confusion': 0, 'curiosity': 0, 'desire': 0, 'disappointment': 0, 'disapproval': 0, 'disgust': 0, 'embarrassment': 0, 'excitement': 0, 'fear': 0, 'gratitude': 0, 'grief': 0, 'joy': 0, 'love': 0, 'nervousness': 0, 'optimism': 0, 'pride': 0, 'realization': 0, 'relief': 0, 'remorse': 0, 'sadness': 1, 'surprise': 0, 'neutral': 0}


In [None]:
print(goemotions["train"][0])
print(goemotions)

{'text': 'That game hurt.', 'id': 'eew5j0j', 'author': 'Brdd9', 'subreddit': 'nrl', 'link_id': 't3_ajis4z', 'parent_id': 't1_eew18eq', 'created_utc': 1548381056.0, 'rater_id': 1, 'example_very_unclear': False, 'admiration': 0, 'amusement': 0, 'anger': 0, 'annoyance': 0, 'approval': 0, 'caring': 0, 'confusion': 0, 'curiosity': 0, 'desire': 0, 'disappointment': 0, 'disapproval': 0, 'disgust': 0, 'embarrassment': 0, 'excitement': 0, 'fear': 0, 'gratitude': 0, 'grief': 0, 'joy': 0, 'love': 0, 'nervousness': 0, 'optimism': 0, 'pride': 0, 'realization': 0, 'relief': 0, 'remorse': 0, 'sadness': 1, 'surprise': 0, 'neutral': 0}
DatasetDict({
    train: Dataset({
        features: ['text', 'id', 'author', 'subreddit', 'link_id', 'parent_id', 'created_utc', 'rater_id', 'example_very_unclear', 'admiration', 'amusement', 'anger', 'annoyance', 'approval', 'caring', 'confusion', 'curiosity', 'desire', 'disappointment', 'disapproval', 'disgust', 'embarrassment', 'excitement', 'fear', 'gratitude', 'gri

In [None]:
import torch
# Emotion categories — already correct
emotion_labels = [
    "admiration", "amusement", "anger", "annoyance", "approval",
    "caring", "confusion", "curiosity", "desire", "disappointment",
    "disapproval", "disgust", "embarrassment", "excitement", "fear",
    "gratitude", "grief", "joy", "love", "nervousness", "optimism",
    "pride", "realization", "relief", "remorse", "sadness", "surprise",
    "neutral"
]

# Preprocess without MultiLabelBinarizer
def preprocess_batch(batch):
    batch["labels_multi"] = [
        [batch[col][i] for col in emotion_labels]
        for i in range(len(batch["text"]))
    ]
    return batch

goemotions["train"] = goemotions["train"].map(preprocess_batch, batched=True)
# Split train into train and validation (90-10 split)
goemotions = goemotions["train"].train_test_split(test_size=0.1)
print(goemotions)  # Now it has train and test keys

Map:   0%|          | 0/101025 [00:00<?, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['text', 'id', 'author', 'subreddit', 'link_id', 'parent_id', 'created_utc', 'rater_id', 'example_very_unclear', 'admiration', 'amusement', 'anger', 'annoyance', 'approval', 'caring', 'confusion', 'curiosity', 'desire', 'disappointment', 'disapproval', 'disgust', 'embarrassment', 'excitement', 'fear', 'gratitude', 'grief', 'joy', 'love', 'nervousness', 'optimism', 'pride', 'realization', 'relief', 'remorse', 'sadness', 'surprise', 'neutral', 'labels', 'input_ids', 'token_type_ids', 'attention_mask', 'labels_multi'],
        num_rows: 90922
    })
    test: Dataset({
        features: ['text', 'id', 'author', 'subreddit', 'link_id', 'parent_id', 'created_utc', 'rater_id', 'example_very_unclear', 'admiration', 'amusement', 'anger', 'annoyance', 'approval', 'caring', 'confusion', 'curiosity', 'desire', 'disappointment', 'disapproval', 'disgust', 'embarrassment', 'excitement', 'fear', 'gratitude', 'grief', 'joy', 'love', 'nervousness', '

In [37]:
from transformers import BertTokenizer

tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

def tokenize_function(example):
    return tokenizer(
        example["text"],
        padding="max_length",
        truncation=True,
        max_length=128
    )

goemotions = goemotions.map(tokenize_function, batched=True)


Map:   0%|          | 0/101025 [00:00<?, ? examples/s]

Map:   0%|          | 0/11225 [00:00<?, ? examples/s]

In [56]:
from datasets import Features, Value, Sequence

# Define the correct schema
features = goemotions["train"].features.copy()
features["labels"] = Sequence(Value("float32"))

# Remap with forced schema
goemotions["train"] = goemotions["train"].map(
    lambda x: {"labels": x["labels_multi"]},
    features=features
)

goemotions["test"] = goemotions["test"].map(
    lambda x: {"labels": x["labels_multi"]},
    features=features
)

# Set torch format
goemotions.set_format(
    type="torch",
    columns=["input_ids", "attention_mask", "labels"]
)


Map:   0%|          | 0/90922 [00:00<?, ? examples/s]

Map:   0%|          | 0/10103 [00:00<?, ? examples/s]

In [None]:
print(goemotions["train"][0]["labels"].dtype)  


torch.float32


In [58]:
from transformers import BertForSequenceClassification

# Multi-label classification = set problem_type explicitly
model = BertForSequenceClassification.from_pretrained(
    "bert-base-uncased",
    num_labels=len(emotion_labels),
    problem_type="multi_label_classification"
)


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [59]:
sample = goemotions["train"][0]
print(type(sample["labels"]), sample["labels"].dtype)
print(sample["labels"].shape)


<class 'torch.Tensor'> torch.float32
torch.Size([28])


In [None]:
from transformers import Trainer, TrainingArguments

training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=32,
    num_train_epochs=3,
    weight_decay=0.01
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=goemotions["train"],
    eval_dataset=goemotions["test"] 
)


trainer.train()


Epoch,Training Loss,Validation Loss
1,0.1182,0.114606
2,0.1086,0.112067
3,0.1025,0.112678


TrainOutput(global_step=17049, training_loss=0.11535557241158455, metrics={'train_runtime': 60797.0512, 'train_samples_per_second': 4.487, 'train_steps_per_second': 0.28, 'total_flos': 1.7946125960435712e+16, 'train_loss': 0.11535557241158455, 'epoch': 3.0})

In [68]:
import torch
import numpy as np

test_text = "I am tired, i feel like i am too lazy"

# Setup device
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
model.to(device)

# Tokenize and move to device
inputs = tokenizer(test_text, return_tensors="pt", truncation=True, padding=True).to(device)

# Predict
with torch.no_grad():
    outputs = model(**inputs)
logits = outputs.logits.cpu().numpy()  # Move logits back to CPU for NumPy

# Multi-label sigmoid + threshold
preds = (torch.sigmoid(outputs.logits).cpu().numpy() > 0.2).astype(int)

# Show predicted emotions
print("Predicted emotions:", [emotion_labels[i] for i, val in enumerate(preds[0]) if val == 1])


Predicted emotions: ['disappointment', 'sadness']


In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score

# Convert text into numerical features using TF-IDF
vectorizer = TfidfVectorizer(max_features=5000)  # Limit features to prevent overfitting
X = vectorizer.fit_transform(combined_data['clean_text'])
y = combined_data['sentiment']  # Target labels

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train and evaluate Naïve Bayes
nb_model = MultinomialNB()
nb_model.fit(X_train, y_train)
nb_preds = nb_model.predict(X_test)
print("Naïve Bayes Performance:")
print(classification_report(y_test, nb_preds))
print("Accuracy:", accuracy_score(y_test, nb_preds))

# Train and evaluate SVM
svm_model = SVC(kernel='linear')
svm_model.fit(X_train, y_train)
svm_preds = svm_model.predict(X_test)
print("\nSVM Performance:")
print(classification_report(y_test, svm_preds))
print("Accuracy:", accuracy_score(y_test, svm_preds))
