In [2]:
import warnings
warnings.filterwarnings("ignore")
# Environment check
import sys, torch, transformers, datasets, sklearn

print("Python version:", sys.version)
print("which python:", sys.executable)
print("PyTorch version:", torch.__version__)
print("Transformers version:", transformers.__version__)
print("Datasets version:", datasets.__version__)
print("CUDA available:", torch.cuda.is_available())


Python version: 3.10.2 (v3.10.2:a58ebcc701, Jan 13 2022, 14:50:16) [Clang 13.0.0 (clang-1300.0.29.30)]
which python: /Users/hd/Desktop/EMOTION-PRED/.venv/bin/python
PyTorch version: 2.4.1
Transformers version: 4.44.2
Datasets version: 4.4.1
CUDA available: False


In [3]:
# Import core libraries

# Hugging Face
from transformers import (
    pipeline,
    AutoTokenizer,
    AutoModelForSequenceClassification,
    AutoConfig
)
from datasets import load_dataset
from huggingface_hub import list_datasets

# Data and visualization
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

# Evaluation and metrics
from sklearn.metrics import classification_report, confusion_matrix

# System utilities

import re
import json
import torch



In [4]:
import os

try:
    # Running as normal Python script inside src/
    this_file = os.path.abspath(__file__)
    src_root = os.path.dirname(this_file)                        # EMOTION-PRED/src
    project_root = os.path.dirname(src_root)                    # EMOTION-PRED/
except NameError:
    # Running inside Jupyter (likely src/notebooks or src/)
    cwd = os.getcwd()

    # If running inside src/notebooks â†’ go up one level
    if cwd.endswith("notebooks"):
        src_root = os.path.abspath(os.path.join(cwd, ".."))
        project_root = os.path.dirname(src_root)
    else:
        # Running from project root directly
        project_root = cwd
        src_root = os.path.join(project_root, "src")

# Final unified paths
results_root = os.path.join(src_root, "results")
data_root = os.path.join(src_root, "data")
print(f"ðŸ“‚ Project root: {project_root}"
      f"\nðŸ“‚ Source root: {src_root}"
      f"\nðŸ“‚ Results root: {results_root}"
      f"\nðŸ“‚ Data root: {data_root}")

ðŸ“‚ Project root: /Users/hd/Desktop/EMOTION-PRED
ðŸ“‚ Source root: /Users/hd/Desktop/EMOTION-PRED/src
ðŸ“‚ Results root: /Users/hd/Desktop/EMOTION-PRED/src/results
ðŸ“‚ Data root: /Users/hd/Desktop/EMOTION-PRED/src/data


In [5]:
# Add src/ to Python path
import sys
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..")))

In [6]:

# Step -- 2 Search all datasets mentioning "emotion" (limit to 50)
datasets_list = list_datasets(search="emotion", limit=50)

records = []
for ds in datasets_list:
    # Safely get attributes (handle None cases)
    card = ds.cardData or {}
    desc = card.get("description") or ""
    tasks = card.get("task_categories") or []
    
    records.append({
        "id": ds.id,
        "downloads": getattr(ds, "downloads", None),
        "likes": getattr(ds, "likes", None),
        "task_categories": tasks,
        "description": desc[:120]  # first 120 chars for readability
    })

df = pd.DataFrame(records)
pd.set_option("display.max_rows", None)
display(df)


Unnamed: 0,id,downloads,likes,task_categories,description
0,dair-ai/emotion,27813,411,[],
1,google-research-datasets/go_emotions,10573,239,[],
2,stapesai/ssi-speech-emotion-recognition,227,6,[],
3,Kratos-AI/korean-voice-emotion-dataset,268,16,[],
4,Mansooreh/sharif-emotional-speech-dataset,23,1,[],
5,Pyjay/emotion_nl,10,0,[],
6,SetFit/emotion,357,29,[],
7,SetFit/go_emotions,306,9,[],
8,jakeazcona/short-text-labeled-emotion-classifi...,526,2,[],
9,jakeazcona/short-text-multi-labeled-emotion-cl...,37,1,[],


In [7]:
# Step 3 â€” Load pre-annotated emotion dataset
dataset = load_dataset("emotion")
dataset_name = "emotion"

# Check available splits
print(dataset)

# View label names
label_names = dataset["train"].features["label"].names
print("Labels:", label_names)

# Quick preview
dataset["train"][0]


DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 16000
    })
    validation: Dataset({
        features: ['text', 'label'],
        num_rows: 2000
    })
    test: Dataset({
        features: ['text', 'label'],
        num_rows: 2000
    })
})
Labels: ['sadness', 'joy', 'love', 'anger', 'fear', 'surprise']


{'text': 'i didnt feel humiliated', 'label': 0}

In [8]:
# Step 4 â€” Prepare a manageable test subset
sample_size = 1000  # You can adjust later
texts = dataset["test"]["text"][:sample_size]
true_labels = dataset["test"]["label"][:sample_size]

print("Sample text example:", texts[0])
print("True label:", label_names[true_labels[0]])


Sample text example: im feeling rather rotten so im not very ambitious right now
True label: sadness


In [9]:
# Step 5 â€” Define models to evaluate
MODEL_NAMES = [
    "j-hartmann/emotion-english-distilroberta-base",
    "j-hartmann/emotion-english-roberta-large",
    "nateraw/bert-base-uncased-emotion",
    "joeddav/distilbert-base-uncased-go-emotions-student",
    "cardiffnlp/twitter-roberta-base-emotion",
    "mrm8488/t5-base-finetuned-emotion",
    "SamLowe/roberta-base-go_emotions"
]


In [10]:

# Folder to store built-in emotion label sets
BUILTIN_DIR = os.path.join(src_root, "built_in_emotions")
os.makedirs(BUILTIN_DIR, exist_ok=True)

for model_name in MODEL_NAMES:
    try:
        print(f"Inspecting {model_name} ...")

        # Load config (no weights needed)
        config = AutoConfig.from_pretrained(model_name)
        id2label = getattr(config, "id2label", None)
        label2id = getattr(config, "label2id", None)

        # Extract label list (some configs only have label2id)
        if id2label:
            labels = list(id2label.values())
        elif label2id:
            labels = list(label2id.keys())
        else:
            labels = []

        # Build metadata
        meta = {
            "model_name": model_name,
            "num_labels": len(labels),
            "labels": labels
        }

        # Safe file name
        safe_name = re.sub(r"[^a-zA-Z0-9]", "_", model_name)
        json_path = os.path.join(BUILTIN_DIR, f"{safe_name}.json")

        # Write JSON
        with open(json_path, "w") as f:
            json.dump(meta, f, indent=4)

        print(f"Saved â†’ {json_path}")

    except Exception as e:
        print(f"Failed for {model_name}: {e}")

print("All built-in emotion label sets exported.")


Inspecting j-hartmann/emotion-english-distilroberta-base ...
Saved â†’ /Users/hd/Desktop/EMOTION-PRED/src/built_in_emotions/j_hartmann_emotion_english_distilroberta_base.json
Inspecting j-hartmann/emotion-english-roberta-large ...
Saved â†’ /Users/hd/Desktop/EMOTION-PRED/src/built_in_emotions/j_hartmann_emotion_english_roberta_large.json
Inspecting nateraw/bert-base-uncased-emotion ...
Saved â†’ /Users/hd/Desktop/EMOTION-PRED/src/built_in_emotions/nateraw_bert_base_uncased_emotion.json
Inspecting joeddav/distilbert-base-uncased-go-emotions-student ...
Saved â†’ /Users/hd/Desktop/EMOTION-PRED/src/built_in_emotions/joeddav_distilbert_base_uncased_go_emotions_student.json
Inspecting cardiffnlp/twitter-roberta-base-emotion ...
Saved â†’ /Users/hd/Desktop/EMOTION-PRED/src/built_in_emotions/cardiffnlp_twitter_roberta_base_emotion.json
Inspecting mrm8488/t5-base-finetuned-emotion ...
Saved â†’ /Users/hd/Desktop/EMOTION-PRED/src/built_in_emotions/mrm8488_t5_base_finetuned_emotion.json
Inspecti

In [10]:
# Step 6 â€” Evaluation function
def evaluate_model(model_name, texts, true_labels, label_names, sample_limit=200):
    """
    Run inference on a given model and compute precision, recall, F1, confusion matrix.
    Handles different model architectures automatically.
    """
    print(f"\nðŸ”¹ Evaluating model: {model_name}")
    model_name_low = model_name.lower()

    # --- CASE 1: T5 (text2text generation) ---
    if "t5" in model_name_low:
        clf = pipeline("text2text-generation", model=model_name, tokenizer=model_name)

        def classify(text):
            result = clf(f"classify emotion: {text}")[0]["generated_text"]
            return result.strip().lower()

    # --- CASE 2: SamLowe / GoEmotions (multi-label classification) ---
    elif "go_emotions" in model_name_low:
        clf = pipeline("text-classification", model=model_name, tokenizer=model_name, return_all_scores=True)

        def classify(text):
            results = clf(text)[0]
            best = sorted(results, key=lambda x: x["score"], reverse=True)[0]
            return best["label"].lower()

    # --- CASE 3: CardiffNLP (twitter) ---
    elif "cardiffnlp" in model_name_low:
        tokenizer = AutoTokenizer.from_pretrained(model_name)
        model = AutoModelForSequenceClassification.from_pretrained(model_name)
        labels = ['anger', 'joy', 'optimism', 'sadness']

        def classify(text):
            inputs = tokenizer(text, return_tensors="pt")
            with torch.no_grad():
                logits = model(**inputs).logits
            pred = torch.softmax(logits, dim=1)
            label = labels[torch.argmax(pred).item()]
            return label.lower()

    # --- CASE 4: Default (simple classifier) ---
    else:
        clf = pipeline("text-classification", model=model_name, tokenizer=model_name, top_k=1 ,device=0)

        def classify(text):
            result = clf(text)[0]
            if isinstance(result, list):  # handle list-of-dicts case
                result = result[0]
            return result["label"].lower()

    # --- RUN PREDICTIONS ---
    preds = []
    for t in texts[:sample_limit]:
        try:
            preds.append(classify(t))
        except Exception as e:
            preds.append("unknown")


 
    # --- ALIGN LABELS ---
    pred_indices = [label_names.index(p) if p in label_names else -1 for p in preds]
    valid_idx = [i for i, x in enumerate(pred_indices) if x != -1]

    y_true = [true_labels[i] for i in valid_idx]
    y_pred = [pred_indices[i] for i in valid_idx]

    # --- METRICS ---
    report = classification_report(y_true, y_pred, target_names=label_names, output_dict=True)
    df_report = pd.DataFrame(report).transpose()

    cm = confusion_matrix(y_true, y_pred)

    # --- Log missing labels ---
    predicted_classes = set([label_names[i] for i in pred_indices if i != -1])
    missing = set(label_names) - predicted_classes
    if missing:
        print(f"Model did not predict these classes: {', '.join(missing)}")
    else:
        print("Model predicted all emotion classes.")

    

    return df_report, cm


In [None]:
# Step 7 â€” Test one model first
report_df, cm = evaluate_model(
    "j-hartmann/emotion-english-distilroberta-base",
    texts, true_labels, label_names, sample_limit=200
)

display(report_df)

sns.heatmap(cm, annot=True, fmt="d", cmap="Blues",
            xticklabels=label_names, yticklabels=label_names)
plt.xlabel("Predicted")
plt.ylabel("True")
plt.title("Confusion Matrix â€” j-hartmann/distilroberta-base")
plt.show()



ðŸ”¹ Evaluating model: j-hartmann/emotion-english-distilroberta-base


In [None]:
# Step 8 â€” Evaluate all models
results = {}

for model in MODEL_NAMES:
    try:
        df_report, cm = evaluate_model(model, texts, true_labels, label_names, sample_limit=200)
        results[model] = df_report
    except Exception as e:
        print(f" {model} failed: {e}")



ðŸ”¹ Evaluating model: j-hartmann/emotion-english-distilroberta-base
Model did not predict these classes: love

ðŸ”¹ Evaluating model: j-hartmann/emotion-english-roberta-large
Model did not predict these classes: love

ðŸ”¹ Evaluating model: nateraw/bert-base-uncased-emotion
Model predicted all emotion classes.

ðŸ”¹ Evaluating model: joeddav/distilbert-base-uncased-go-emotions-student
Model predicted all emotion classes.

ðŸ”¹ Evaluating model: cardiffnlp/twitter-roberta-base-emotion
Model did not predict these classes: love, surprise, fear

ðŸ”¹ Evaluating model: mrm8488/t5-base-finetuned-emotion


You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565
Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.


Model predicted all emotion classes.


In [None]:
# Step 9 â€” Compare macro-F1 across models
summary = pd.DataFrame({
    model: results[model].loc["macro avg", "f1-score"]
    for model in results.keys()
}, index=["Macro F1"]).T

summary.sort_values("Macro F1", ascending=False)


Unnamed: 0,Macro F1
mrm8488/t5-base-finetuned-emotion,0.871192
nateraw/bert-base-uncased-emotion,0.863528
j-hartmann/emotion-english-roberta-large,0.665317
j-hartmann/emotion-english-distilroberta-base,0.640337
joeddav/distilbert-base-uncased-go-emotions-student,0.596212
cardiffnlp/twitter-roberta-base-emotion,0.313499


In [None]:
# Step 10 â€” Save metrics to disk in a dedicated results folder
import os

RESULTS_DIR = os.path.join(results_root, f"results_{dataset_name}")
os.makedirs(RESULTS_DIR, exist_ok=True)  # Create if not exists

for model, df in results.items():
    safe_name = model.replace("/", "_")
    output_path = os.path.join(RESULTS_DIR, f"results_{safe_name}.csv")
    df.to_csv(output_path, index=True)
    print(f"Saved metrics â†’ {output_path}")

print("\n All model reports saved to:", os.path.abspath(RESULTS_DIR))


Saved metrics â†’ /Users/hd/Desktop/EMOTION-PRED/src/results/results_emotion/results_j-hartmann_emotion-english-distilroberta-base.csv
Saved metrics â†’ /Users/hd/Desktop/EMOTION-PRED/src/results/results_emotion/results_j-hartmann_emotion-english-roberta-large.csv
Saved metrics â†’ /Users/hd/Desktop/EMOTION-PRED/src/results/results_emotion/results_nateraw_bert-base-uncased-emotion.csv
Saved metrics â†’ /Users/hd/Desktop/EMOTION-PRED/src/results/results_emotion/results_joeddav_distilbert-base-uncased-go-emotions-student.csv
Saved metrics â†’ /Users/hd/Desktop/EMOTION-PRED/src/results/results_emotion/results_cardiffnlp_twitter-roberta-base-emotion.csv
Saved metrics â†’ /Users/hd/Desktop/EMOTION-PRED/src/results/results_emotion/results_mrm8488_t5-base-finetuned-emotion.csv

 All model reports saved to: /Users/hd/Desktop/EMOTION-PRED/src/results/results_emotion
