# Training RoBERTa on GoEmotions

## Libraries

In [1]:
import pandas as pd
import numpy as np
import torch
from torch.utils.data import DataLoader, Dataset
from torch.optim import AdamW 
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
import seaborn as sns
import warnings

# Ignore the harmless registration warnings
warnings.filterwarnings("ignore", category=UserWarning)
print("✅ Libraries imported successfully.")

✅ Libraries imported successfully.


## Data Load

In [2]:
df = pd.read_csv('/kaggle/input/goemotions/GoEmotions.csv')
print("\n✅ Dataset loaded successfully.")


# Define emotion labels
emotion_cols = ['admiration', 'amusement', 'anger', 'annoyance', 'approval', 'caring', 'confusion', 
                'curiosity', 'desire', 'disappointment', 'disapproval', 'disgust', 'embarrassment', 
                'excitement', 'fear', 'gratitude', 'grief', 'joy', 'love', 'nervousness', 'optimism', 
                'pride', 'realization', 'relief', 'remorse', 'sadness', 'surprise', 'neutral']


✅ Dataset loaded successfully.


## Data Labeling

In [3]:
# Prepare labels for the model
df['labels'] = df[emotion_cols].values.tolist()
df_model = df[['text', 'labels']].copy()

# Using 50k samples for a robust comparison
df_sample = df_model.sample(n=50000, random_state=42)
train_df, test_df = train_test_split(df_sample, test_size=0.2, random_state=42)

print("\n✅ Data prepared:")
print(f" - Training set size: {len(train_df)}")
print(f" - Test set size: {len(test_df)}")


✅ Data prepared:
 - Training set size: 40000
 - Test set size: 10000


## Model and Tokenization

In [4]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"\nUsing device: {device}")


Using device: cuda


In [5]:
MODEL_NAME = 'roberta-base'
print(f"\n🔥 Loading model: {MODEL_NAME}")
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForSequenceClassification.from_pretrained(
    MODEL_NAME,
    num_labels=len(emotion_cols),
    problem_type="multi_label_classification"
)
model.to(device)


🔥 Loading model: roberta-base


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/481 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

2025-08-02 04:36:14.737401: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1754109374.983337      36 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1754109375.054112      36 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


RobertaForSequenceClassification(
  (roberta): RobertaModel(
    (embeddings): RobertaEmbeddings(
      (word_embeddings): Embedding(50265, 768, padding_idx=1)
      (position_embeddings): Embedding(514, 768, padding_idx=1)
      (token_type_embeddings): Embedding(1, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): RobertaEncoder(
      (layer): ModuleList(
        (0-11): 12 x RobertaLayer(
          (attention): RobertaAttention(
            (self): RobertaSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): RobertaSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
         

### Weights for Class Imbalance

In [6]:
print("\n⚖️ Calculating weights for handling class imbalance...")

# Recreate the emotion columns from the 'labels' list to perform the sum
temp_labels_df = pd.DataFrame(train_df['labels'].tolist(), columns=emotion_cols)
positive_counts = temp_labels_df.sum()

negative_counts = len(train_df) - positive_counts
class_weights = negative_counts / positive_counts
# Now we can safely move the tensor to the device
pos_weight_tensor = torch.tensor(class_weights.values, dtype=torch.float).to(device)
print("✅ Weights calculated.")


⚖️ Calculating weights for handling class imbalance...
✅ Weights calculated.


In [7]:
MAX_LEN = 128
class GoEmotionsDataset(Dataset):
    def __init__(self, dataframe, tokenizer, max_len):
        self.tokenizer, self.text, self.labels, self.max_len = tokenizer, dataframe.text.values, dataframe.labels.values, max_len
    def __len__(self): return len(self.text)
    def __getitem__(self, index):
        text = str(self.text[index])
        inputs = self.tokenizer.encode_plus(text, None, add_special_tokens=True, max_length=self.max_len, padding='max_length', return_token_type_ids=True, truncation=True)
        return {'input_ids': torch.tensor(inputs['input_ids'], dtype=torch.long), 'attention_mask': torch.tensor(inputs['attention_mask'], dtype=torch.long), 'labels': torch.tensor(self.labels[index], dtype=torch.float)}

train_dataset = GoEmotionsDataset(train_df, tokenizer, MAX_LEN)
test_dataset = GoEmotionsDataset(test_df, tokenizer, MAX_LEN)

In [8]:
BATCH_SIZE = 32 
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

print("\n✅ PyTorch Datasets and DataLoaders created.")


✅ PyTorch Datasets and DataLoaders created.


## Model Setup

In [9]:
optimizer = AdamW(model.parameters(), lr=2e-5)
criterion = torch.nn.BCEWithLogitsLoss(pos_weight=pos_weight_tensor)
print("\n✅ Model, optimizer, and weighted loss function are set up.")


✅ Model, optimizer, and weighted loss function are set up.


## Manual Training

In [10]:
NUM_EPOCHS = 4 # Using 4 epochs as planned
print("\n🚀 Starting model fine-tuning with weighted loss...")

for epoch in range(NUM_EPOCHS):
    model.train()
    total_loss = 0
    progress_bar = tqdm(train_loader, desc=f"Epoch {epoch + 1}/{NUM_EPOCHS}", leave=False)
    for batch in progress_bar:
        input_ids, attention_mask, labels = batch['input_ids'].to(device), batch['attention_mask'].to(device), batch['labels'].to(device)
        optimizer.zero_grad()
        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        loss = criterion(outputs.logits, labels)
        total_loss += loss.item()
        loss.backward()
        optimizer.step()
        progress_bar.set_postfix({'training_loss': f'{loss.item():.3f}'})
    avg_train_loss = total_loss / len(train_loader)
    print(f"\nEpoch {epoch + 1} | Average Training Loss: {avg_train_loss:.4f}")

print("\n✅ Model training complete.")


🚀 Starting model fine-tuning with weighted loss...


Epoch 1/4:   0%|          | 0/1250 [00:00<?, ?it/s]


Epoch 1 | Average Training Loss: 1.0073


Epoch 2/4:   0%|          | 0/1250 [00:00<?, ?it/s]


Epoch 2 | Average Training Loss: 0.8368


Epoch 3/4:   0%|          | 0/1250 [00:00<?, ?it/s]


Epoch 3 | Average Training Loss: 0.7444


Epoch 4/4:   0%|          | 0/1250 [00:00<?, ?it/s]


Epoch 4 | Average Training Loss: 0.6670

✅ Model training complete.


## Evaluation

In [13]:
print("\n📊 Evaluating the model...")
model.eval()
all_preds, all_labels = [], []
with torch.no_grad():
    for batch in tqdm(test_loader, desc="Evaluating"):
        input_ids, attention_mask, labels = batch['input_ids'].to(device), batch['attention_mask'].to(device), batch['labels'].to(device)
        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        logits = outputs.logits
        predictions = (torch.sigmoid(logits) > 0.6).int().cpu().numpy()
        all_preds.extend(predictions)
        all_labels.extend(labels.cpu().numpy())
print("\nFinal Classification Report:")
print(classification_report(all_labels, all_preds, target_names=emotion_cols, zero_division=0))


📊 Evaluating the model...


Evaluating:   0%|          | 0/313 [00:00<?, ?it/s]


Final Classification Report:
                precision    recall  f1-score   support

    admiration       0.34      0.79      0.48       821
     amusement       0.38      0.78      0.51       447
         anger       0.19      0.70      0.30       405
     annoyance       0.18      0.66      0.28       671
      approval       0.18      0.52      0.26       804
        caring       0.15      0.67      0.25       269
     confusion       0.16      0.82      0.27       341
     curiosity       0.24      0.84      0.37       451
        desire       0.10      0.64      0.17       188
disappointment       0.10      0.70      0.18       423
   disapproval       0.17      0.69      0.28       563
       disgust       0.11      0.73      0.19       244
 embarrassment       0.05      0.62      0.09       112
    excitement       0.12      0.60      0.20       264
          fear       0.17      0.67      0.27       149
     gratitude       0.51      0.89      0.64       569
         grief   

In [14]:
np.save('y_test_roberta.npy', all_labels) 
np.save('y_pred_roberta.npy', all_preds)
print("\n✅ Saved RoBERTa results to 'y_pred_roberta.npy'")


✅ Saved RoBERTa results to 'y_pred_roberta.npy'
