In [2]:
from datasets import load_dataset
from transformers import BertTokenizer, BertForSequenceClassification
from torch.utils.data import DataLoader
from transformers import AdamW
from torch.optim import lr_scheduler
import torch
import torch.nn as nn
from tqdm import tqdm
import numpy as np
from sklearn.metrics import f1_score, precision_score, recall_score



# Load GoEmotions dataset
dataset = load_dataset("go_emotions")

# Check dataset structure
print(dataset)

# Load BERT tokenizer
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

# Tokenize the dataset
def tokenize_function(examples):
    return tokenizer(
        examples["text"],
        padding="max_length",
        truncation=True,
        max_length=128,
        return_tensors="pt",
    )

tokenized_datasets = dataset.map(tokenize_function, batched=True)
# def map_labels_to_7(examples):
#     # Project 28 labels to 7 categories
#     new_labels = [0] * 7
#     for label in examples["labels"]:
#         new_labels[label_projection[label]] = 1
#     return {"labels": new_labels}

# Convert labels to multi-label format
tokenized_datasets = tokenized_datasets.map(lambda x: {"labels": [1 if i in x["labels"] else 0 for i in range(28)]})
tokenized_datasets.set_format(
    type="torch",
    columns=["input_ids", "attention_mask", "labels"],
)


# Prepare train, validation, and test datasets
train_dataset = tokenized_datasets["train"]
val_dataset = tokenized_datasets["validation"]
test_dataset = tokenized_datasets["test"]

# Load pre-trained BERT model
model = BertForSequenceClassification.from_pretrained(
    "bert-base-uncased",
    num_labels=28,  # GoEmotions has 28 emotion labels
    problem_type="multi_label_classification"
)

# Optimizer
optimizer = AdamW(model.parameters(), lr=5e-5)

# Scheduler
scheduler = lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.1)

loss_function = nn.BCEWithLogitsLoss()

# Check for GPU availability
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)  # Move model to GPU

# Training loop
def train_model(model, train_loader, optimizer, loss_function, scheduler, epochs=1):
    model.train()
    
    for epoch in range(epochs):
        total_loss = 0
        loop = tqdm(train_loader, leave=True)
        
        for batch in loop:
            # Move data to device
            input_ids = batch["input_ids"].to(device)
            attention_mask = batch["attention_mask"].to(device)
            labels = batch["labels"].to(device).float()
            
            # Forward pass
            outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
            loss = outputs.loss
            
            # Backward pass
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            # Update metrics
            total_loss += loss.item()
            loop.set_description(f"Epoch {epoch}")
            loop.set_postfix(loss=loss.item())
        
        scheduler.step()
        print(f"Epoch {epoch} Loss: {total_loss / len(train_loader)}")


train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32)

# Train the model
train_model(model, train_loader, optimizer, loss_function, scheduler, epochs=1)

# Save the model and tokenizer



DatasetDict({
    train: Dataset({
        features: ['text', 'labels', 'id'],
        num_rows: 43410
    })
    validation: Dataset({
        features: ['text', 'labels', 'id'],
        num_rows: 5426
    })
    test: Dataset({
        features: ['text', 'labels', 'id'],
        num_rows: 5427
    })
})


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 0: 100%|██████████| 1357/1357 [1:58:34<00:00,  5.24s/it, loss=0.0833]   

Epoch 0 Loss: 0.12458196861083312





In [3]:
model.save_pretrained("goemotions_bert_finetuned_28")
tokenizer.save_pretrained("goemotions_bert_finetuned_28")

('goemotions_bert_finetuned_28/tokenizer_config.json',
 'goemotions_bert_finetuned_28/special_tokens_map.json',
 'goemotions_bert_finetuned_28/vocab.txt',
 'goemotions_bert_finetuned_28/added_tokens.json')

In [11]:
from sklearn.metrics import accuracy_score, confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt
# Evaluation function
def evaluate_model(model, val_loader):
    model.eval()
    true_labels = []
    predictions = []
    
    with torch.no_grad():
        for batch in val_loader:
            input_ids = batch["input_ids"].to(device)
            attention_mask = batch["attention_mask"].to(device)
            labels = batch["labels"].to(device).float()
            
            outputs = model(input_ids, attention_mask=attention_mask)
            logits = outputs.logits
            preds = torch.sigmoid(logits).cpu().numpy() > 0.5
            
            true_labels.extend(labels.cpu().numpy())
            predictions.extend(preds)
    
    # Compute metrics
    
    f1 = f1_score(true_labels, predictions, average="macro")
    precision = precision_score(true_labels, predictions, average="macro")
    recall = recall_score(true_labels, predictions, average="macro")
    accuracy = accuracy_score(true_labels, predictions)
    print(f"F1 Score: {f1}")
    print(f"Precision: {precision}")
    print(f"Recall: {recall}")
    print(f"Accuracy: {accuracy}")
    
# Evaluate the model
evaluate_model(model, val_loader)

# Predict function



F1 Score: 0.3067695152343108
Precision: 0.5230141032394783
Recall: 0.26706473362086774
Accuracy: 0.42167342425359383


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [10]:
def predict_emotions(model, tokenizer, text):
    model.eval()
    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=128)
    inputs = {k: v.to(device) for k, v in inputs.items()}  # Move inputs to GPU
    outputs = model(**inputs)
    probs = torch.sigmoid(outputs.logits).detach().cpu().numpy()  
    print(probs)# Move back to CPU
    return probs > 0.5  # Return multi-label predictions

# Example prediction
text = "I am so happy and excited!"
predictions = predict_emotions(model, tokenizer, text)
print(predictions)

[[0.07227654 0.01273712 0.00933806 0.01149558 0.04897002 0.02392617
  0.00944283 0.02783507 0.02170407 0.00621075 0.01060471 0.00489169
  0.00582826 0.39637524 0.00960317 0.02187285 0.00511216 0.46701762
  0.04616576 0.00652231 0.02203087 0.01029757 0.01764769 0.01721916
  0.0111461  0.01415378 0.05258131 0.05201788]]
[[False False False False False False False False False False False False
  False False False False False False False False False False False False
  False False False False]]
