# Emotion Detection in Tweets using a Fine-Tuned Transformer

This notebook fine-tunes a pre-trained DistilBERT model to classify tweets into six basic emotions: anger, fear, joy, love, sadness, and surprise. Using the Hugging Face Transformers library, the workflow includes data loading, text tokenization, model training, evaluation on a test set, and sample predictions.

In [None]:
# Install necessary libraries
!pip -q install torch torchvision
!pip -q install numpy pandas
!pip -q install transformers datasets scikit-learn


In [None]:
import pandas as pd
import numpy as np
import torch
from datasets import load_dataset, Dataset
from transformers import (DistilBertTokenizer, DataCollatorWithPadding,
                          DistilBertForSequenceClassification, Trainer, TrainingArguments)
from sklearn.metrics import accuracy_score, precision_recall_fscore_support


In [None]:
# Load the DAIR AI emotion dataset
dataset = load_dataset('dair-ai/emotion')

# Inspect dataset and label names
print(dataset)
label_names = dataset['train'].features['label'].names
print('Label names:', label_names)


In [None]:
# Initialize tokenizer and prepare tokenization function
tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')

def tokenize_function(example):
    return tokenizer(example['text'], truncation=True)

# Tokenize the dataset and rename label column to 'labels'
tokenized_dataset = dataset.map(tokenize_function, batched=True, remove_columns=['text'])
tokenized_dataset = tokenized_dataset.rename_column('label', 'labels')

# Data collator for dynamic padding
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)


In [None]:
# Instantiate the model for sequence classification with 6 labels
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = DistilBertForSequenceClassification.from_pretrained('distilbert-base-uncased', num_labels=6).to(device)

# Define training arguments
training_args = TrainingArguments(
    output_dir='./results',
    evaluation_strategy='epoch',
    save_strategy='epoch',
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=3,
    weight_decay=0.01,
    logging_dir='./logs',
    logging_steps=50,
)

# Create the Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset['train'],
    eval_dataset=tokenized_dataset['validation'],
    tokenizer=tokenizer,
    data_collator=data_collator,
)

# Train the model (this step may take some time)
trainer.train()


In [None]:
# Evaluate on the test split of the emotion dataset
test_dataset = tokenized_dataset['test']
test_dataset.set_format(type='torch', columns=['input_ids','attention_mask','labels'])

test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=8, shuffle=False, collate_fn=data_collator)

model.eval()
predictions, true_labels = [], []
for batch in test_dataloader:
    batch = {k: v.to(device) for k, v in batch.items()}
    with torch.no_grad():
        outputs = model(**batch)
    logits = outputs.logits
    preds = torch.argmax(logits, dim=1)
    predictions.extend(preds.cpu().numpy())
    true_labels.extend(batch['labels'].cpu().numpy())

# Calculate evaluation metrics
accuracy = accuracy_score(true_labels, predictions)
precision, recall, f1, _ = precision_recall_fscore_support(true_labels, predictions, average='weighted')
print(f'Test Accuracy: {accuracy:.4f}')
print(f'Test Precision (weighted): {precision:.4f}')
print(f'Test Recall (weighted): {recall:.4f}')
print(f'Test F1-score (weighted): {f1:.4f}')


In [None]:
# Evaluate generalization on TweetEval emotion task
other_dataset = load_dataset('cardiffnlp/tweet_eval', 'emotion')
test_other_dataset = other_dataset['test']

# Tokenize the TweetEval test set
test_other_dataset = test_other_dataset.map(tokenize_function, batched=True)
test_other_dataset.set_format(type='torch', columns=['input_ids','attention_mask','label'])

# Map our model's six-label output to TweetEval's four labels
labels_map = {
    0: 3,  # sadness -> sadness
    1: 1,  # joy -> joy
    2: 2,  # love -> optimism
    3: 0,  # anger -> anger
    4: 3,  # fear -> sadness
    5: 1   # surprise -> joy
}

# Create DataLoader for TweetEval
tweet_dataloader = torch.utils.data.DataLoader(test_other_dataset, batch_size=8, shuffle=False, collate_fn=data_collator)

model.eval()
mapped_predictions, tweet_true_labels = [], []
for batch in tweet_dataloader:
    batch = {k: v.to(device) for k, v in batch.items()}
    with torch.no_grad():
        outputs = model(input_ids=batch['input_ids'], attention_mask=batch['attention_mask'])
    logits = outputs.logits
    preds = torch.argmax(logits, dim=1).cpu().numpy()
    mapped_preds = [labels_map[p] for p in preds]
    mapped_predictions.extend(mapped_preds)
    tweet_true_labels.extend(batch['label'].cpu().numpy())

# Compute evaluation metrics
accuracy2 = accuracy_score(tweet_true_labels, mapped_predictions)
precision2, recall2, f12, _ = precision_recall_fscore_support(tweet_true_labels, mapped_predictions, average='weighted')
print(f"TweetEval Accuracy: {accuracy2:.4f}")
print(f"TweetEval Precision (weighted): {precision2:.4f}")
print(f"TweetEval Recall (weighted): {recall2:.4f}")
print(f"TweetEval F1-score (weighted): {f12:.4f}")


In [None]:
# Helper function to predict the emotion of a given text
def predict_emotion(text):
    inputs = tokenizer(text, return_tensors='pt', padding=True, truncation=True).to(device)
    model.eval()
    with torch.no_grad():
        outputs = model(**inputs)
    predicted_index = int(torch.argmax(outputs.logits, dim=1).cpu().numpy())
    return label_names[predicted_index]

# Example predictions
text_short = 'I am feeling absolutely wonderful today'
print(f"Text: '{text_short}' -> Predicted emotion: {predict_emotion(text_short)}")

text_long = 'I lost my job and I am very scared about what will happen next'
print(f"Text: '{text_long}' -> Predicted emotion: {predict_emotion(text_long)}")
