# VibeQ – Deberta-v3-large Emotion Classifier

Fastai + BCE loss


## Environment setup

Install dependencies and mount Google Drive when running in Colab.

In [1]:
%pip install -q fastai transformers

In [2]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


## Imports

Load fastai, transformers, and evaluation utilities.

In [3]:
# =====================================================
# 0. Setup: Install & Imports (Colab-friendly)
# =====================================================

from fastai.text.all import *
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, classification_report, confusion_matrix
import pandas as pd
import numpy as np
import torch
import random
import os



## Configuration

Set paths, model choice, and emotion label list.

In [4]:
# ==========================
# Configuration
# ==========================

DATA_PATH = "/content/drive/MyDrive/VibeQ-EIE/data/go_emotions_dataset.csv"   # <-- change if needed
MODEL_NAME = "microsoft/deberta-v3-large"
OUTPUT_DIR = "/content/drive/MyDrive/VibeQ-EIE/models"
os.makedirs(OUTPUT_DIR, exist_ok=True)

SEED = 42

EMOTION_LABELS = [
    "joy",
    "sadness",
    "anger",
    "disgust",
    "fear",
    "caring",
    "anticipation",
    "surprise",
    "neutral",
]

# Fix seeds
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(SEED)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device


device(type='cuda')

## Load dataset preview

Read the raw dataset and check basic stats.

In [5]:
df = pd.read_csv(DATA_PATH)

print("Columns:", df.columns.tolist())
print("Raw size:", len(df))
df.head()


Columns: ['id', 'text', 'example_very_unclear', 'admiration', 'amusement', 'anger', 'annoyance', 'approval', 'caring', 'confusion', 'curiosity', 'desire', 'disappointment', 'disapproval', 'disgust', 'embarrassment', 'excitement', 'fear', 'gratitude', 'grief', 'joy', 'love', 'nervousness', 'optimism', 'pride', 'realization', 'relief', 'remorse', 'sadness', 'surprise', 'neutral']
Raw size: 211225


Unnamed: 0,id,text,example_very_unclear,admiration,amusement,anger,annoyance,approval,caring,confusion,...,love,nervousness,optimism,pride,realization,relief,remorse,sadness,surprise,neutral
0,eew5j0j,That game hurt.,False,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
1,eemcysk,">sexuality shouldn’t be a grouping category It makes you different from othet ppl so imo it fits the definition of ""grouping""",True,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,ed2mah1,"You do right, if you don't care then fuck 'em!",False,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
3,eeibobj,Man I love reddit.,False,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
4,eda6yn6,"[NAME] was nowhere near them, he was by the Falcon.",False,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1


## Clean dataset and identify label columns

Filter unclear rows and collect the emotion label columns.

In [6]:
df = pd.read_csv(DATA_PATH)
print("Columns:", df.columns.tolist())
print("Raw size:", len(df))

# Remove unclear / noisy examples
if "example_very_unclear" in df.columns:
    df = df[df["example_very_unclear"] == 0].reset_index(drop=True)
    print("After removing unclear examples:", len(df))

# Identify which columns are NOT emotions
non_emotion_cols = ["id", "text", "example_very_unclear"]
for extra in ["comment_id", "comment_id_original"]:
    if extra in df.columns:
        non_emotion_cols.append(extra)

# All other columns are emotion labels (0/1)
emotion_cols = [c for c in df.columns if c not in non_emotion_cols]
print("Detected emotion columns:", emotion_cols)

# Convert multi-hot → list of active labels
def get_active_emotions(row):
    return [col for col in emotion_cols if row[col] == 1]

df["labels"] = df[emotion_cols].apply(get_active_emotions, axis=1)

# Drop rows with no active label
df = df[df["labels"].map(len) > 0].reset_index(drop=True)

df["text"] = df["text"].astype(str).str.strip()
df = df[df["text"] != ""].reset_index(drop=True)

print("Clean size:", len(df))
df[["text", "labels"]].head()


Columns: ['id', 'text', 'example_very_unclear', 'admiration', 'amusement', 'anger', 'annoyance', 'approval', 'caring', 'confusion', 'curiosity', 'desire', 'disappointment', 'disapproval', 'disgust', 'embarrassment', 'excitement', 'fear', 'gratitude', 'grief', 'joy', 'love', 'nervousness', 'optimism', 'pride', 'realization', 'relief', 'remorse', 'sadness', 'surprise', 'neutral']
Raw size: 211225
After removing unclear examples: 207814
Detected emotion columns: ['admiration', 'amusement', 'anger', 'annoyance', 'approval', 'caring', 'confusion', 'curiosity', 'desire', 'disappointment', 'disapproval', 'disgust', 'embarrassment', 'excitement', 'fear', 'gratitude', 'grief', 'joy', 'love', 'nervousness', 'optimism', 'pride', 'realization', 'relief', 'remorse', 'sadness', 'surprise', 'neutral']
Clean size: 207814


Unnamed: 0,text,labels
0,That game hurt.,[sadness]
1,"You do right, if you don't care then fuck 'em!",[neutral]
2,Man I love reddit.,[love]
3,"[NAME] was nowhere near them, he was by the Falcon.",[neutral]
4,"Right? Considering it’s such an important document, I should know the damned thing backwards and forwards... thanks again for the help!",[gratitude]


## Tokenization transform

Wrap the HuggingFace tokenizer as a fastai Transform for input IDs and masks.

In [7]:
# ===========================================
# Custom fastai Transform for HuggingFace tokenizer
# ===========================================

from fastai.text.all import Transform, TensorText, TitledStr

class HF_Tokenizer(Transform):
    """
    Fastai Transform wrapper around a HuggingFace tokenizer.
    Input: raw text
    Output: (input_ids, attention_mask)
    """
    def __init__(self, tokenizer, max_length=128):
        self.tokenizer = tokenizer
        self.max_length = max_length

    def encodes(self, x: str):
        enc = self.tokenizer(
            x,
            truncation=True,
            padding='max_length',
            max_length=self.max_length,
        )
        input_ids = enc["input_ids"]
        attn_mask = enc["attention_mask"]

        return (TensorText(input_ids), TensorText(attn_mask))

    def decodes(self, x):
        # x is tuple of (input_ids, attn_mask)
        ids = x[0].long().tolist()
        txt = self.tokenizer.decode(ids, skip_special_tokens=True)
        return TitledStr(txt)


## Build label vocabulary

Derive the sorted list of fine-grained emotion labels from the data.

In [8]:

FINE_EMOTIONS = sorted(list(set(e for row in df["labels"] for e in row)))
print("Number of fine-grained emotions:", len(FINE_EMOTIONS))
print(FINE_EMOTIONS)


Number of fine-grained emotions: 28
['admiration', 'amusement', 'anger', 'annoyance', 'approval', 'caring', 'confusion', 'curiosity', 'desire', 'disappointment', 'disapproval', 'disgust', 'embarrassment', 'excitement', 'fear', 'gratitude', 'grief', 'joy', 'love', 'nervousness', 'neutral', 'optimism', 'pride', 'realization', 'relief', 'remorse', 'sadness', 'surprise']


## Train/validation split

Create a hold-out set for evaluation.

In [9]:
from sklearn.model_selection import train_test_split

# df has columns: ["text", "labels", ...]
train_df, test_df = train_test_split(
    df,
    test_size=0.2,
    random_state=SEED,
)

print(len(train_df), len(test_df))


166251 41563


## DataBlock and DataLoaders

Tokenize text and prepare multi-label batches.

In [10]:

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
tok_tf = HF_Tokenizer(tokenizer, max_length=128)

def get_x(r): return r["text"]
def get_y(r): return r["labels"]

dblock = DataBlock(
    blocks=(TransformBlock, MultiCategoryBlock(vocab=FINE_EMOTIONS)),
    get_x=get_x,
    get_y=get_y,
    splitter=RandomSplitter(seed=SEED),  # splits *train_df* into train/valid
    item_tfms=tok_tf,
)

dls = dblock.dataloaders(train_df, bs=16)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/580 [00:00<?, ?B/s]

spm.model:   0%|          | 0.00/2.46M [00:00<?, ?B/s]



## Inspect a sample batch

Sanity-check tensor shapes coming from the dataloaders.

In [11]:
xb, yb = dls.one_batch()
print("xb type:", type(xb))
print("len(xb):", len(xb))
print("input_ids shape:", xb[0].shape)
print("attention_mask shape:", xb[1].shape)
print("yb shape:", yb.shape)


xb type: <class 'tuple'>
len(xb): 2
input_ids shape: torch.Size([16, 128])
attention_mask shape: torch.Size([16, 128])
yb shape: torch.Size([16, 28])


## HuggingFace classifier wrapper

Adapter module to use DeBERTa with fastai for multi-label outputs.

In [12]:
from transformers import AutoModelForSequenceClassification

num_labels = len(FINE_EMOTIONS)

class HF_SequenceClassifierMulti(Module):
    def __init__(self, model_name: str, num_labels: int):
        super().__init__()
        self.hf_model = AutoModelForSequenceClassification.from_pretrained(
            model_name,
            num_labels=num_labels,
            problem_type="multi_label_classification",
        )

    def forward(self, x):
        input_ids, attention_mask = x
        input_ids = input_ids.long()
        attention_mask = attention_mask.long()
        outputs = self.hf_model(
            input_ids=input_ids,
            attention_mask=attention_mask,
        )
        return outputs.logits


## Loss and metrics

Use BCEWithLogits loss and custom multi-label F1 metrics.

In [13]:
from fastai.losses import BCEWithLogitsLossFlat

class MultiLabelF1(Metric):
    def __init__(self, thresh=0.2, average="macro"):
        self.thresh = thresh
        self.average = average

    def reset(self):
        self.preds, self.targets = [], []

    def accumulate(self, learn):
        logits = learn.pred
        targs = learn.y
        probs = logits.sigmoid()
        preds = (probs > self.thresh).int().cpu()
        targs = targs.int().cpu()
        self.preds.append(preds)
        self.targets.append(targs)

    @property
    def value(self):
        if len(self.preds) == 0: return None
        preds = torch.cat(self.preds)
        targs = torch.cat(self.targets)
        return f1_score(
            targs.numpy(),
            preds.numpy(),
            average=self.average,
            zero_division=0,
        )

    @property
    def name(self):
        return f"f1_{self.average}"


## Fine-tune model

Train the DeBERTa-v3-large classifier with fastai.

In [14]:
hf_model = HF_SequenceClassifierMulti(MODEL_NAME, num_labels=num_labels)

learn = Learner(
    dls,
    hf_model,
    loss_func=BCEWithLogitsLossFlat(),
    metrics=[MultiLabelF1(average="macro"), MultiLabelF1(average="micro")],
)

if torch.cuda.is_available():
    learn.to_fp16()

learn.fine_tune(3, base_lr=2e-5)


pytorch_model.bin:   0%|          | 0.00/874M [00:00<?, ?B/s]

Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-large and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


model.safetensors:   0%|          | 0.00/874M [00:00<?, ?B/s]

epoch,train_loss,valid_loss,f1_macro,f1_micro,time
0,0.115696,0.115445,0.341003,0.446418,52:35


epoch,train_loss,valid_loss,f1_macro,f1_micro,time
0,0.113803,0.11279,0.365341,0.45405,52:34
1,0.106391,0.111122,0.384956,0.460131,52:37


epoch,train_loss,valid_loss,f1_macro,f1_micro,time
0,0.113803,0.11279,0.365341,0.45405,52:34
1,0.106391,0.111122,0.384956,0.460131,52:37
2,0.099751,0.112287,0.389131,0.454503,52:39


## Evaluate on test set

Run the model on held-out data to collect probabilities.

In [17]:
# Test DataLoader WITH labels
test_dl = dls.test_dl(test_df, with_labels=True)

preds, targs = learn.get_preds(dl=test_dl)

# If preds already look like probabilities in [0,1], keep them as is.
print(preds.min().item(), preds.max().item())  # quick sanity check

if preds.min() >= 0 and preds.max() <= 1:
    probs = preds          # already probabilities
else:
    probs = preds.sigmoid()  # logits → probs (just in case)

pred_labels = (probs > 0.5).int()

for thr in [0.2, 0.25, 0.3, 0.35]:
    pred_labels = (probs > thr).int()
    y_true = targs.numpy()
    y_pred = pred_labels.numpy()

    print("Threshold:", thr)
    print("F1-macro:", f1_score(y_true, y_pred, average="macro", zero_division=0))
    print("F1-micro:", f1_score(y_true, y_pred, average="micro", zero_division=0))
    print("---")



8.2035330706276e-06 0.9974589943885803
Threshold: 0.2
F1-macro: 0.39052928514509627
F1-micro: 0.4558779490914977
---
Threshold: 0.25
F1-macro: 0.38674503154970136
F1-micro: 0.4599399508688927
---
Threshold: 0.3
F1-macro: 0.3762830824440299
F1-micro: 0.4560287388616842
---
Threshold: 0.35
F1-macro: 0.3631347016150707
F1-micro: 0.4459787722945618
---


## Thresholding and reporting

Apply a decision threshold and print F1 scores and classification report.

In [18]:
BEST_THR = 0.25

pred_labels = (probs > BEST_THR).int()
y_true = targs.numpy()
y_pred = pred_labels.numpy()

from sklearn.metrics import f1_score, classification_report

print("Final threshold:", BEST_THR)
print("Test F1-macro:", f1_score(y_true, y_pred, average="macro", zero_division=0))
print("Test F1-micro:", f1_score(y_true, y_pred, average="micro", zero_division=0))

print("\nClassification report (fine-grained emotions):")
print(classification_report(y_true, y_pred, target_names=FINE_EMOTIONS, zero_division=0))


Final threshold: 0.25
Test F1-macro: 0.38674503154970136
Test F1-micro: 0.4599399508688927

Classification report (fine-grained emotions):
                precision    recall  f1-score   support

    admiration       0.48      0.71      0.57      3429
     amusement       0.53      0.78      0.63      1785
         anger       0.37      0.47      0.41      1681
     annoyance       0.27      0.34      0.30      2767
      approval       0.32      0.29      0.30      3552
        caring       0.33      0.40      0.36      1189
     confusion       0.35      0.37      0.36      1494
     curiosity       0.37      0.63      0.47      1972
        desire       0.40      0.35      0.37       808
disappointment       0.28      0.24      0.26      1656
   disapproval       0.31      0.35      0.33      2323
       disgust       0.31      0.32      0.31      1095
 embarrassment       0.41      0.23      0.30       521
    excitement       0.32      0.28      0.30      1137
          fear      

## Save trained artifacts

Persist the fastai learner and HuggingFace weights to Drive.

In [19]:
# =========================================
# SAVE EMOTION CLASSIFIER (Fastai + HF)
# =========================================

import os
from pathlib import Path

# Output path
OUTPUT_DIR = Path("/content/drive/MyDrive/VibeQ-EIE/models")

# Create directory if not exists
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

# 1️⃣ Save Fastai learner (.pkl)
learn.export(OUTPUT_DIR / 'fastai_emotion_classifier2.pkl')
print(f"✓ Fastai model saved → {OUTPUT_DIR/'fastai_emotion_classifier2.pkl'}")

# 2️⃣ Save HuggingFace model + tokenizer
hf_save_path = OUTPUT_DIR / "hf_emotion_model2"
hf_model.hf_model.save_pretrained(hf_save_path)
tokenizer.save_pretrained(hf_save_path)
print(f"✓ HuggingFace model & tokenizer saved → {hf_save_path}")


✓ Fastai model saved → /content/drive/MyDrive/VibeQ-EIE/models/fastai_emotion_classifier2.pkl
✓ HuggingFace model & tokenizer saved → /content/drive/MyDrive/VibeQ-EIE/models/hf_emotion_model2
