In [1]:
# Install required packages
!pip install -q transformers datasets accelerate torch scikit-learn matplotlib


In [2]:
# Disable W&B to avoid login prompt
import os
os.environ["WANDB_MODE"] = "disabled"
# Newer HF versions recommend --report_to none, but disabling W&B env works

# Imports
from datasets import load_dataset
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    Trainer,
    TrainingArguments,
    DataCollatorWithPadding
)
import torch
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import f1_score, accuracy_score
from torch.nn import BCEWithLogitsLoss
import torch.nn.functional as F


In [3]:
# Load GoEmotions (simplified)
dataset = load_dataset("go_emotions", "simplified")
print(dataset)

# Quick peek
print("Example:", dataset["train"][0])


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md: 0.00B [00:00, ?B/s]

simplified/train-00000-of-00001.parquet:   0%|          | 0.00/2.77M [00:00<?, ?B/s]

simplified/validation-00000-of-00001.par(…):   0%|          | 0.00/350k [00:00<?, ?B/s]

simplified/test-00000-of-00001.parquet:   0%|          | 0.00/347k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/43410 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/5426 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/5427 [00:00<?, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['text', 'labels', 'id'],
        num_rows: 43410
    })
    validation: Dataset({
        features: ['text', 'labels', 'id'],
        num_rows: 5426
    })
    test: Dataset({
        features: ['text', 'labels', 'id'],
        num_rows: 5427
    })
})
Example: {'text': "My favourite food is anything I didn't have to cook myself.", 'labels': [27], 'id': 'eebbqej'}


In [4]:
# Load GoEmotions (simplified)
dataset = load_dataset("go_emotions", "simplified")
print(dataset)

# Quick peek
print("Example:", dataset["train"][0])


DatasetDict({
    train: Dataset({
        features: ['text', 'labels', 'id'],
        num_rows: 43410
    })
    validation: Dataset({
        features: ['text', 'labels', 'id'],
        num_rows: 5426
    })
    test: Dataset({
        features: ['text', 'labels', 'id'],
        num_rows: 5427
    })
})
Example: {'text': "My favourite food is anything I didn't have to cook myself.", 'labels': [27], 'id': 'eebbqej'}


In [6]:
tokenizer = AutoTokenizer.from_pretrained("roberta-base")

def tokenize_fn(batch):
    return tokenizer(batch["text"], padding="max_length", truncation=True, max_length=128)

encoded = dataset.map(tokenize_fn, batched=True)

# Rename 'labels' → 'label' if needed
if "label" not in encoded["train"].column_names and "labels" in encoded["train"].column_names:
    encoded = encoded.rename_column("labels", "label")

# ensure columns exist and set torch format
encoded.set_format(type="torch", columns=["input_ids", "attention_mask", "label"])
print("✅ Columns after formatting:", encoded["train"].column_names)



Map:   0%|          | 0/43410 [00:00<?, ? examples/s]

Map:   0%|          | 0/5426 [00:00<?, ? examples/s]

Map:   0%|          | 0/5427 [00:00<?, ? examples/s]

✅ Columns after formatting: ['text', 'label', 'id', 'input_ids', 'attention_mask']


In [7]:
import torch
import numpy as np

# Automatically pick GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("🚀 Using device:", device)

# Get the number of labels (you printed it earlier)
num_labels = encoded["train"].features["label"].feature.num_classes
print("Number of labels:", num_labels)

# Calculate per-class frequencies
label_counts = np.zeros(num_labels, dtype=int)
for ex in dataset["train"]:
    for l in ex["labels"]:
        label_counts[l] += 1

print("Label counts (first 10):", label_counts[:10])

# Compute pos_weight = (num_neg / num_pos)
num_samples = len(dataset["train"])
neg_counts = num_samples - label_counts
pos_weight = torch.tensor(neg_counts / (label_counts + 1e-6), dtype=torch.float).to(device)

print("✅ pos_weight shape:", pos_weight.shape)
print("Example pos_weight values:", pos_weight[:10])


🚀 Using device: cuda
Number of labels: 28
Label counts (first 10): [4130 2328 1567 2470 2939 1087 1368 2191  641 1269]
✅ pos_weight shape: torch.Size([28])
Example pos_weight values: tensor([ 9.5109, 17.6469, 26.7026, 16.5749, 13.7703, 38.9356, 30.7325, 18.8129,
        66.7223, 33.2080], device='cuda:0')


In [8]:
from transformers import AutoModelForSequenceClassification

# Load the pretrained RoBERTa base model
model = AutoModelForSequenceClassification.from_pretrained(
    "roberta-base",
    num_labels=num_labels,
    problem_type="multi_label_classification"  # tells it we’re doing multi-label, not single-label
).to(device)

print("✅ Model loaded and moved to", device)


model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


✅ Model loaded and moved to cuda


In [10]:
!pip install evaluate -q


[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/84.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m4.8 MB/s[0m eta [36m0:00:00[0m
[?25h

In [11]:
import torch
import numpy as np
import evaluate
from transformers import DataCollatorWithPadding

# Custom collator (handles dynamic padding)
def custom_collate_fn(features):
    batch = {}
    batch["labels"] = torch.tensor([f["labels"] for f in features], dtype=torch.float)
    batch_inputs = {k: [f[k] for f in features] for k in ["input_ids", "attention_mask"]}
    collated = DataCollatorWithPadding(tokenizer)(batch_inputs)
    batch.update(collated)
    return batch

# Metrics for multi-label classification
metric = evaluate.load("f1")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    probs = torch.sigmoid(torch.tensor(logits))
    preds = (probs > 0.5).int().numpy()
    labels = labels.astype(int)

    f1 = metric.compute(predictions=preds, references=labels, average="macro")["f1"]
    acc = (preds == labels).mean()
    return {"accuracy": acc, "f1": f1}

print("✅ Data collator and metrics ready.")


Downloading builder script: 0.00B [00:00, ?B/s]

✅ Data collator and metrics ready.


In [14]:
!pip install -U transformers




In [18]:
from transformers import DataCollatorWithPadding

# Fixed collate function for your dataset
def custom_collate_fn(features):
    batch = {}
    # Fix: use the correct key name 'label'
    batch["labels"] = torch.tensor([f["label"] for f in features], dtype=torch.float)
    batch_inputs = {k: [f[k] for f in features] for k in ["input_ids", "attention_mask"]}
    collated = DataCollatorWithPadding(tokenizer)(batch_inputs)
    batch.update(collated)
    return batch


In [20]:
from transformers import DataCollatorWithPadding

# Handles both single-label and multi-label data
def custom_collate_fn(features):
    batch = {}

    # Detect if labels are lists (multi-label)
    if isinstance(features[0]["label"], (list, np.ndarray)):
        batch["labels"] = torch.tensor(
            [f["label"] for f in features], dtype=torch.float
        )  # multi-label → float
    else:
        batch["labels"] = torch.tensor(
            [f["label"] for f in features], dtype=torch.long
        )  # single-label → long (int)

    # Tokenized inputs
    batch_inputs = {k: [f[k] for f in features] for k in ["input_ids", "attention_mask"]}
    collated = DataCollatorWithPadding(tokenizer)(batch_inputs)
    batch.update(collated)
    return batch


In [24]:
# Run in your notebook to inspect labels and one tokenized example
from collections import Counter
import torch, numpy as np

# show sample entries
for i in range(5):
    ex = encoded["train"][i]
    print(f"index {i} keys:", ex.keys())
    lbl = ex.get("label", ex.get("labels", None))
    print(" type:", type(lbl), " dtype:", getattr(lbl, "dtype", None), " len(if applicable):",
          (len(lbl) if hasattr(lbl, "__len__") else "scalar"), " value(sample):",
          (lbl[:10] if hasattr(lbl, "__len__") else lbl))
    print("---")

# summary stats over small subset
def summarize_labels(ds, n=1000):
    types = Counter()
    lens = Counter()
    for i, ex in enumerate(ds):
        if i>=n: break
        lbl = ex.get("label", ex.get("labels", None))
        types[type(lbl).__name__] += 1
        try:
            lens[len(lbl)] += 1
        except Exception:
            lens["scalar_or_unknown"] += 1
    return types, lens

types, lens = summarize_labels(encoded["train"], n=2000)
print("Label object types (sample):", types)
print("Label lengths distribution (sample):", lens)



index 0 keys: dict_keys(['label', 'input_ids', 'attention_mask'])
 type: <class 'torch.Tensor'>  dtype: torch.int64  len(if applicable): 1  value(sample): tensor([27])
---
index 1 keys: dict_keys(['label', 'input_ids', 'attention_mask'])
 type: <class 'torch.Tensor'>  dtype: torch.int64  len(if applicable): 1  value(sample): tensor([27])
---
index 2 keys: dict_keys(['label', 'input_ids', 'attention_mask'])
 type: <class 'torch.Tensor'>  dtype: torch.int64  len(if applicable): 1  value(sample): tensor([2])
---
index 3 keys: dict_keys(['label', 'input_ids', 'attention_mask'])
 type: <class 'torch.Tensor'>  dtype: torch.int64  len(if applicable): 1  value(sample): tensor([14])
---
index 4 keys: dict_keys(['label', 'input_ids', 'attention_mask'])
 type: <class 'torch.Tensor'>  dtype: torch.int64  len(if applicable): 1  value(sample): tensor([3])
---
Label object types (sample): Counter({'Tensor': 2000})
Label lengths distribution (sample): Counter({1: 1672, 2: 297, 3: 28, 4: 3})


In [25]:
# get all possible unique label ids
unique_labels = set()
for ex in encoded["train"]:
    lbl = ex["label"].tolist() if torch.is_tensor(ex["label"]) else ex["label"]
    unique_labels.update(lbl if isinstance(lbl, (list, tuple)) else [lbl])
num_labels = max(unique_labels) + 1
print("num_labels =", num_labels, "unique labels:", sorted(unique_labels))



num_labels = 28 unique labels: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27]


In [26]:
import numpy as np, torch

def normalize_multilabel(example):
    lbl = example["label"]
    if torch.is_tensor(lbl):
        lbl = lbl.tolist()
    if isinstance(lbl, (int, float)):
        lbl = [int(lbl)]
    if not isinstance(lbl, list):
        lbl = list(lbl)
    arr = np.zeros(28, dtype=int)
    for l in lbl:
        if 0 <= l < 28:
            arr[l] = 1
    example["label"] = arr.tolist()
    return example

encoded = encoded.map(normalize_multilabel)
print("✅ Normalized example label:", encoded["train"][0]["label"])





Map:   0%|          | 0/43410 [00:00<?, ? examples/s]

Map:   0%|          | 0/5426 [00:00<?, ? examples/s]

Map:   0%|          | 0/5427 [00:00<?, ? examples/s]

✅ Normalized example label: tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 1])


In [27]:
from collections import Counter
lengths = Counter(len(ex["label"]) for ex in encoded["train"])
print("label length check:", lengths)


label length check: Counter({28: 43410})


In [28]:
from evaluate import load

# Load standard metrics
f1_metric = load("f1")
accuracy_metric = load("accuracy")

# Compute metrics function for Trainer
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    # Apply sigmoid for multi-label
    probs = torch.sigmoid(torch.tensor(logits))
    preds = (probs > 0.5).int()

    # Compute micro & macro F1
    f1_micro = f1_metric.compute(predictions=preds, references=labels, average="micro")
    f1_macro = f1_metric.compute(predictions=preds, references=labels, average="macro")
    acc = accuracy_metric.compute(predictions=preds, references=labels)

    return {
        "f1_micro": f1_micro["f1"],
        "f1_macro": f1_macro["f1"],
        "accuracy": acc["accuracy"],
    }

print("✅ Metrics ready.")


Downloading builder script: 0.00B [00:00, ?B/s]

✅ Metrics ready.


In [29]:
from transformers import Trainer, TrainingArguments, DataCollatorWithPadding
import torch

# ============================================================
# Custom collate function (handles multi-label tensors)
# ============================================================
def custom_collate_fn(features):
    batch = {}
    # Stack input tensors correctly
    batch["labels"] = torch.stack([f["label"] for f in features]).float()
    batch_inputs = {k: [f[k] for f in features] for k in ["input_ids", "attention_mask"]}
    collated = DataCollatorWithPadding(tokenizer)(batch_inputs)
    batch.update(collated)
    return batch

# ============================================================
# Training Arguments
# ============================================================
training_args = TrainingArguments(
    output_dir="./sentiment_model",
    num_train_epochs=2,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    warmup_ratio=0.1,
    learning_rate=2e-5,
    logging_steps=100,
    save_total_limit=1,
)

# ============================================================
# Trainer
# ============================================================
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=encoded["train"],
    eval_dataset=encoded["validation"],
    tokenizer=tokenizer,
    data_collator=custom_collate_fn,
    compute_metrics=compute_metrics,
)

# ============================================================
# Train the model
# ============================================================
train_result = trainer.train()
print("✅ Training finished. result:", train_result)


  trainer = Trainer(


Step,Training Loss
100,0.6973
200,0.4671
300,0.2784
400,0.2135
500,0.1807
600,0.1646
700,0.1555
800,0.1529
900,0.1477
1000,0.1368


✅ Training finished. result: TrainOutput(global_step=10854, training_loss=0.10809144667110018, metrics={'train_runtime': 2686.9765, 'train_samples_per_second': 32.311, 'train_steps_per_second': 4.039, 'total_flos': 5712158611722240.0, 'train_loss': 0.10809144667110018, 'epoch': 2.0})


In [31]:
from google.colab import drive
drive.mount('/content/drive')

!cp -r /content/sentiment_model /content/drive/MyDrive/sentiment_model_roberta/


Mounted at /content/drive


In [32]:
import numpy as np
from sklearn.metrics import f1_score, accuracy_score, classification_report

def compute_metrics(eval_pred):
    """Handles multi-label classification correctly."""
    logits, labels = eval_pred
    preds = (1 / (1 + np.exp(-logits)))  # sigmoid
    preds = (preds > 0.5).astype(int)    # threshold at 0.5

    # Convert to integer labels if you want micro/macro F1
    f1_micro = f1_score(labels, preds, average="micro", zero_division=0)
    f1_macro = f1_score(labels, preds, average="macro", zero_division=0)
    acc = accuracy_score(labels, preds)

    return {
        "accuracy": acc,
        "f1_micro": f1_micro,
        "f1_macro": f1_macro
    }


In [34]:
from transformers import Trainer

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=encoded["train"],
    eval_dataset=encoded["validation"],
    tokenizer=tokenizer,
    data_collator=custom_collate_fn,
    compute_metrics=compute_metrics
)




  trainer = Trainer(


In [37]:
from transformers import Trainer

# Recreate the trainer using the same setup as before
trainer = Trainer(
    model=model,
    args=training_args,
    eval_dataset=encoded["validation"],
    tokenizer=tokenizer,
    data_collator=custom_collate_fn,
    compute_metrics=compute_metrics
)

# Run evaluation
eval_results = trainer.evaluate()

print("✅ Evaluation complete!")
print(eval_results)



  trainer = Trainer(


✅ Evaluation complete!
{'eval_loss': 0.0843556672334671, 'eval_model_preparation_time': 0.007, 'eval_accuracy': 0.4574272023590122, 'eval_f1_micro': 0.5770920991117344, 'eval_f1_macro': 0.4001685157201097, 'eval_runtime': 45.7997, 'eval_samples_per_second': 118.472, 'eval_steps_per_second': 14.825}


In [38]:
# Run evaluation again but get predictions
predictions_output = trainer.predict(encoded["validation"])

# Extract predicted label IDs
preds = torch.tensor(predictions_output.predictions)
predicted_classes = preds.argmax(dim=1).tolist()

# True labels
true_labels = encoded["validation"]["label"]

# Print first 20 predictions vs actual
for i in range(20):
    print(f"Text: {dataset['validation'][i]['text'][:80]}...")
    print(f"  ➤ Predicted: {predicted_classes[i]}")
    print(f"  ➤ Actual: {true_labels[i]}")
    print("----")


Text: Is this in New Orleans?? I really feel like this is New Orleans....
  ➤ Predicted: 7
  ➤ Actual: tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 1])
----
Text: You know the answer man, you are programmed to capture those codes they send you...
  ➤ Predicted: 5
  ➤ Actual: tensor([0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 1])
----
Text: I've never been this sad in my life!...
  ➤ Predicted: 25
  ➤ Actual: tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 1, 0, 0])
----
Text: The economy is heavily controlled and subsidized by the government. In any case,...
  ➤ Predicted: 27
  ➤ Actual: tensor([0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 1])
----
Text: He could have easily taken a real camera from a legitimate source and change the...
  ➤ Predicted: 27
  ➤ Actual: tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 

In [39]:
import torch

# Get raw logits again
preds = torch.tensor(predictions_output.predictions)
probs = torch.sigmoid(preds)

# Predict labels with threshold 0.5
multi_pred_labels = (probs > 0.5).int().tolist()

# Print a few results
for i in range(10):
    print(f"\nText: {dataset['validation'][i]['text'][:80]}...")
    print(f"Predicted indices: {torch.where(torch.tensor(multi_pred_labels[i])==1)[0].tolist()}")
    print(f"Actual indices: {torch.where(encoded['validation']['label'][i]==1)[0].tolist()}")



Text: Is this in New Orleans?? I really feel like this is New Orleans....
Predicted indices: []
Actual indices: [27]

Text: You know the answer man, you are programmed to capture those codes they send you...
Predicted indices: [5]
Actual indices: [4, 27]

Text: I've never been this sad in my life!...
Predicted indices: [25]
Actual indices: [25]

Text: The economy is heavily controlled and subsidized by the government. In any case,...
Predicted indices: [27]
Actual indices: [4, 27]

Text: He could have easily taken a real camera from a legitimate source and change the...
Predicted indices: [27]
Actual indices: [20]

Text: Thank you for your vote of confidence, but we statistically can't get to 10 wins...
Predicted indices: [15]
Actual indices: [15]

Text: Wah Mum other people call me on my bullshit and I can't ban them , Go out side s...
Predicted indices: []
Actual indices: [2]

Text: There it is!...
Predicted indices: []
Actual indices: [27]

Text: At least now [NAME] has more time t

In [40]:
import torch
import numpy as np

# 1️⃣ Define the emotion label names (you can adjust these if your dataset has specific ones)
# Example 28 emotions — replace with your actual label list if available
id2label = {
    0: "admiration", 1: "amusement", 2: "anger", 3: "annoyance", 4: "approval", 5: "caring",
    6: "confusion", 7: "curiosity", 8: "desire", 9: "disappointment", 10: "disapproval",
    11: "disgust", 12: "embarrassment", 13: "excitement", 14: "fear", 15: "gratitude",
    16: "grief", 17: "joy", 18: "love", 19: "nervousness", 20: "optimism", 21: "pride",
    22: "realization", 23: "relief", 24: "remorse", 25: "sadness", 26: "surprise", 27: "neutral"
}

# 2️⃣ Convert logits → probabilities → binary multi-labels
preds = torch.tensor(predictions_output.predictions)
probs = torch.sigmoid(preds)
multi_pred_labels = (probs > 0.5).int().tolist()

# 3️⃣ Loop through samples and print
for i in range(10):  # you can increase the number
    text = dataset["validation"][i]["text"][:90].replace("\n", " ") + "..."
    pred_indices = torch.where(torch.tensor(multi_pred_labels[i]) == 1)[0].tolist()
    true_indices = torch.where(encoded["validation"]["label"][i] == 1)[0].tolist()

    pred_names = [id2label[idx] for idx in pred_indices] if pred_indices else ["(none)"]
    true_names = [id2label[idx] for idx in true_indices]

    print(f"\n📝 Text: {text}")
    print(f"🤖 Predicted indices: {pred_indices}")
    print(f"🤖 Predicted labels: {pred_names}")
    print(f"✅ Actual indices: {true_indices}")
    print(f"✅ Actual labels: {true_names}")



📝 Text: Is this in New Orleans?? I really feel like this is New Orleans....
🤖 Predicted indices: []
🤖 Predicted labels: ['(none)']
✅ Actual indices: [27]
✅ Actual labels: ['neutral']

📝 Text: You know the answer man, you are programmed to capture those codes they send you, don’t av...
🤖 Predicted indices: [5]
🤖 Predicted labels: ['caring']
✅ Actual indices: [4, 27]
✅ Actual labels: ['approval', 'neutral']

📝 Text: I've never been this sad in my life!...
🤖 Predicted indices: [25]
🤖 Predicted labels: ['sadness']
✅ Actual indices: [25]
✅ Actual labels: ['sadness']

📝 Text: The economy is heavily controlled and subsidized by the government. In any case, I was pok...
🤖 Predicted indices: [27]
🤖 Predicted labels: ['neutral']
✅ Actual indices: [4, 27]
✅ Actual labels: ['approval', 'neutral']

📝 Text: He could have easily taken a real camera from a legitimate source and change the price in ...
🤖 Predicted indices: [27]
🤖 Predicted labels: ['neutral']
✅ Actual indices: [20]
✅ Actual labels: ['