<a href="https://colab.research.google.com/github/Sajiiidddd/Neuro-Muse-MultiLabel-Emotion/blob/main/neuromuse_text_emotion_classifier_(2).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 🌈 NeuroMuse - Multilabel Emotion Classifier (GoEmotions + BERT)

This notebook sets up a multi-label emotion classifier using the GoEmotions dataset and BERT.

**Libraries:** PyTorch, Hugging Face Transformers, Datasets

We'll:
- Load and preprocess the dataset
- Tokenize text with BERT tokenizer
- Train a multi-label classifier
- Evaluate performance


In [None]:
# 📦 Install Required Libraries
!pip install -q transformers==4.35.2

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m123.5/123.5 kB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.9/7.9 MB[0m [31m68.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.6/3.6 MB[0m [31m54.9 MB/s[0m eta [36m0:00:00[0m
[?25h[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
sentence-transformers 3.4.1 requires transformers<5.0.0,>=4.41.0, but you have transformers 4.35.2 which is incompatible.[0m[31m
[0m

In [None]:
#!pip install -q fsspec==2025.3.2


In [None]:
# 📚 Imports
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from transformers import AutoTokenizer, AutoModel
from torch.optim import AdamW
from sklearn.preprocessing import MultiLabelBinarizer
from tqdm import tqdm
import numpy as np


  _torch_pytree._register_pytree_node(


In [None]:
# 🔹 Load GoEmotions Dataset
train_df = pd.read_csv("goemotions_1.csv")
val_df = pd.read_csv("goemotions_2.csv")
test_df = pd.read_csv("goemotions_3.csv")

In [None]:
import pandas as pd

df = pd.read_csv("goemotions_1.csv")
pd.set_option('display.max_columns', None)

In [None]:

# Automatically detect emotion columns (anything that's not 'text' or 'id')
non_emotion_cols = ['text', 'id']
emotion_columns = [col for col in df.columns if col not in non_emotion_cols]
# Combine active emotion columns (where value == 1) into a list
def combine_emotions(row):
    return [emotion for emotion in emotion_columns if row[emotion] == 1]
# Apply the function to create a new 'emotions' column
df["emotions"] = df.apply(combine_emotions, axis=1)
# Drop the individual emotion columns and ID column
df.drop(columns=emotion_columns + ["id"], inplace=True)
# (Optional) View the cleaned dataframe
df.head()

Unnamed: 0,text,emotions
0,That game hurt.,"[rater_id, sadness]"
1,>sexuality shouldn’t be a grouping category I...,[example_very_unclear]
2,"You do right, if you don't care then fuck 'em!",[neutral]
3,Man I love reddit.,[love]
4,"[NAME] was nowhere near them, he was by the Fa...",[neutral]


In [None]:
from sklearn.model_selection import train_test_split

train_texts, temp_texts, train_labels, temp_labels = train_test_split(df["text"], df["emotions"], test_size=0.2, random_state=42)
val_texts, test_texts, val_labels, test_labels = train_test_split(temp_texts, temp_labels, test_size=0.5, random_state=42)

train_df = pd.DataFrame({"text": train_texts, "emotions": train_labels})
val_df = pd.DataFrame({"text": val_texts, "emotions": val_labels})
test_df = pd.DataFrame({"text": test_texts, "emotions": test_labels})

from sklearn.preprocessing import MultiLabelBinarizer

mlb = MultiLabelBinarizer()
mlb.fit(train_df["emotions"])

train_labels = mlb.transform(train_df["emotions"])
val_labels = mlb.transform(val_df["emotions"])
test_labels = mlb.transform(test_df["emotions"])


In [None]:
# ✅ Custom Dataset class
class EmotionDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_len=128):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = str(self.texts[idx])
        label = self.labels[idx]
        enc = tokenizer(text, padding="max_length", truncation=True, max_length=self.max_len, return_tensors="pt")
        return {
            "input_ids": enc["input_ids"].squeeze(),
            "attention_mask": enc["attention_mask"].squeeze(),
            "labels": torch.FloatTensor(label)
        }

In [None]:
from transformers import AutoTokenizer

# Load a pre-trained tokenizer
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

In [None]:
# ✅ Create PyTorch Datasets and DataLoaders
train_dataset = EmotionDataset(train_df["text"].tolist(), train_labels, tokenizer)
val_dataset = EmotionDataset(val_df["text"].tolist(), val_labels, tokenizer)
test_dataset = EmotionDataset(test_df["text"].tolist(), test_labels, tokenizer)

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=16)
test_loader = DataLoader(test_dataset, batch_size=16)

In [None]:
# ✅ Define the Multi-label Classifier
class BertForMultilabel(nn.Module):
    def __init__(self, num_labels):
        super().__init__()
        self.bert = AutoModel.from_pretrained("bert-base-uncased")
        self.classifier = nn.Linear(self.bert.config.hidden_size, num_labels)

    def forward(self, input_ids, attention_mask):
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        logits = self.classifier(outputs.pooler_output)
        return logits

In [None]:
# ✅ Initialize model, optimizer, and loss
num_labels = len(mlb.classes_)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = BertForMultilabel(num_labels).to(device)
optimizer = AdamW(model.parameters(), lr=2e-5)
criterion = nn.BCEWithLogitsLoss()

  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

In [13]:
from sklearn.metrics import f1_score, precision_score, recall_score
num_epochs = 5  # Increase the epochs
for epoch in range(num_epochs):
    model.train()
    total_loss = 0

    for batch in tqdm(train_loader, desc=f"Epoch {epoch+1}"):
        optimizer.zero_grad()
        input_ids = batch["input_ids"].to(device)
        attention_mask = batch["attention_mask"].to(device)
        labels = batch["labels"].to(device)

        logits = model(input_ids, attention_mask)  # ✅ Ensure logits are extracted
        loss = criterion(logits, labels)

        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    avg_train_loss = total_loss / len(train_loader)
    print(f"Epoch {epoch+1} Training Loss: {avg_train_loss:.4f}")

    # ✅ Evaluate on validation set
    model.eval()
    val_loss = 0
    all_labels = []
    all_preds = []

    with torch.no_grad():
        for batch in val_loader:
            input_ids = batch["input_ids"].to(device)
            attention_mask = batch["attention_mask"].to(device)
            labels = batch["labels"].cpu().numpy()

            logits = model(input_ids, attention_mask)
            loss = criterion(logits, torch.FloatTensor(labels).to(device))
            val_loss += loss.item()

            preds = torch.sigmoid(logits).cpu().numpy()
            preds = (preds > 0.5).astype(int)

            all_labels.extend(labels)
            all_preds.extend(preds)

    avg_val_loss = val_loss / len(val_loader)
    f1_micro = f1_score(np.array(all_labels), np.array(all_preds), average="micro")

    print(f"Epoch {epoch+1} Validation Loss: {avg_val_loss:.4f}, F1 Score (micro): {f1_micro:.4f}")

    # ✅ Adjust learning rate if needed
    #lr_scheduler.step()

Epoch 1: 100%|██████████| 3500/3500 [19:35<00:00,  2.98it/s]


Epoch 1 Training Loss: 0.1357
Epoch 1 Validation Loss: 0.1154, F1 Score (micro): 0.2604


Epoch 2: 100%|██████████| 3500/3500 [19:46<00:00,  2.95it/s]


Epoch 2 Training Loss: 0.1095
Epoch 2 Validation Loss: 0.1131, F1 Score (micro): 0.3339


Epoch 3: 100%|██████████| 3500/3500 [19:49<00:00,  2.94it/s]


Epoch 3 Training Loss: 0.1007
Epoch 3 Validation Loss: 0.1137, F1 Score (micro): 0.3571


Epoch 4: 100%|██████████| 3500/3500 [19:49<00:00,  2.94it/s]


Epoch 4 Training Loss: 0.0923
Epoch 4 Validation Loss: 0.1179, F1 Score (micro): 0.3595


Epoch 5: 100%|██████████| 3500/3500 [19:49<00:00,  2.94it/s]


Epoch 5 Training Loss: 0.0843
Epoch 5 Validation Loss: 0.1231, F1 Score (micro): 0.3624


In [14]:
from sklearn.metrics import f1_score, precision_score, recall_score

# ✅ Improved Evaluation Function
def evaluate(model, dataloader, criterion):
    model.eval()
    total_loss = 0
    all_labels = []
    all_preds = []

    with torch.no_grad():
        for batch in dataloader:
            input_ids = batch["input_ids"].to(device)
            attention_mask = batch["attention_mask"].to(device)
            labels = batch["labels"].cpu().numpy()

            logits = model(input_ids, attention_mask)  # ✅ Ensure logits are extracted
            loss = criterion(logits, torch.FloatTensor(labels).to(device))  # ✅ Compute loss
            total_loss += loss.item()

            preds = torch.sigmoid(logits).cpu().numpy()  # Convert logits to probabilities
            preds = (preds > 0.5).astype(int)  # Apply thresholding

            all_labels.extend(labels)
            all_preds.extend(preds)

    # Convert to NumPy arrays
    all_labels = np.array(all_labels)
    all_preds = np.array(all_preds)

    # ✅ Avoid zero division errors
    f1_micro = f1_score(all_labels, all_preds, average="micro", zero_division=0)
    f1_macro = f1_score(all_labels, all_preds, average="macro", zero_division=0)
    precision_micro = precision_score(all_labels, all_preds, average="micro", zero_division=0)
    recall_micro = recall_score(all_labels, all_preds, average="micro", zero_division=0)

    avg_loss = total_loss / len(dataloader)

    print(f"Loss: {avg_loss:.4f}")
    print(f"F1 Score (Micro): {f1_micro:.4f}")
    print(f"F1 Score (Macro): {f1_macro:.4f}")
    print(f"Precision (Micro): {precision_micro:.4f}")
    print(f"Recall (Micro): {recall_micro:.4f}")

    return avg_loss, f1_micro  # Return values for tracking

In [15]:
# Save model state
torch.save(model.state_dict(), "neuromuse_emotion_bert.pt")

# Save label encoder (optional)
import pickle
with open("label_encoder.pkl", "wb") as f:
    pickle.dump(mlb, f)

In [16]:
def predict_emotions(text, model, tokenizer, mlb, threshold=0.5):
    model.eval()
    enc = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=128)
    input_ids = enc["input_ids"].to(device)
    attention_mask = enc["attention_mask"].to(device)

    with torch.no_grad():
        logits = model(input_ids, attention_mask)
        probs = torch.sigmoid(logits).cpu().numpy()[0]
        predicted_indices = np.where(probs > threshold)[0]
        predicted_emotions = mlb.classes_[predicted_indices]

    return predicted_emotions, probs[predicted_indices]

In [17]:
import torch
import numpy as np

def predict_emotions(text, model, tokenizer, mlb, threshold=0.2):
    model.eval()  # Set to evaluation mode
    enc = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=128)

    # Move inputs to the same device as the model
    device = next(model.parameters()).device
    input_ids = enc["input_ids"].to(device)
    attention_mask = enc["attention_mask"].to(device)

    with torch.no_grad():
        # Get model predictions
        output = model(input_ids=input_ids, attention_mask=attention_mask)

        # Extract logits correctly
        logits = output.logits if hasattr(output, 'logits') else output[0]

        # Convert logits to probabilities
        probs = torch.sigmoid(logits).cpu().numpy().flatten()  # Ensure it's a 1D array

        # Get indices of emotions above threshold
        predicted_indices = np.where(probs > threshold)[0]

        # Handle cases where no emotions meet the threshold
        if predicted_indices.size == 0:
            return ["No strong emotion detected"], []

        # Convert indices to emotion labels
        predicted_emotions = mlb.classes_[predicted_indices]

    return predicted_emotions, probs[predicted_indices]

In [18]:
text = "I don't want anything to eat or drink."

emotions, scores = predict_emotions(text, model, tokenizer, mlb, threshold=0.2)
print("Predicted Emotions:", emotions)
print("Confidence Scores:", scores)

# Print all probabilities for each emotion
probs = predict_emotions(text, model, tokenizer, mlb, threshold=0.0)[1]  # Threshold 0 to see all
for label, score in zip(mlb.classes_, probs):
    print(f"{label}: {score:.3f}")


Predicted Emotions: ['disappointment' 'disapproval']
Confidence Scores: [0.3380395  0.39328992]
admiration: 0.001
amusement: 0.001
anger: 0.064
annoyance: 0.163
approval: 0.007
caring: 0.001
confusion: 0.006
curiosity: 0.002
desire: 0.002
disappointment: 0.338
disapproval: 0.393
disgust: 0.033
embarrassment: 0.011
example_very_unclear: 0.007
excitement: 0.001
fear: 0.003
gratitude: 0.001
grief: 0.003
joy: 0.002
love: 0.002
nervousness: 0.007
neutral: 0.169
optimism: 0.002
pride: 0.001
rater_id: 0.007
realization: 0.017
relief: 0.001
remorse: 0.004
sadness: 0.072
surprise: 0.002
