In [1]:
# ============================================
# Phase 1: Emotion Chatbot – Multi-label Training
# ============================================

import os
import json
import pandas as pd
import numpy as np
import torch
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score
from transformers import RobertaTokenizer, TrainingArguments, Trainer, EarlyStoppingCallback
from transformers import RobertaPreTrainedModel, RobertaModel, RobertaConfig
import torch.nn as nn
import datasets

# --------------------------
# 1) Config & Device
# --------------------------
OUTPUT_DIR = "../results/models/phase1_emotion_roberta"
LOG_DIR = "../results/logs"
os.makedirs(OUTPUT_DIR, exist_ok=True)
os.makedirs(LOG_DIR, exist_ok=True)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# --------------------------
# 2) Load dataset
# --------------------------
df = pd.read_csv("../data/dataset2_emotions/processed/emotions_clean.csv")
print("Raw dataset shape:", df.shape)
print(df.head())

# 👇 If dataset has "emotion" column (single string label), convert to one-hot
if "emotion" in df.columns:
    all_emotions = sorted(df["emotion"].unique().tolist())
    print("Unique emotions:", all_emotions)

    # create one-hot columns
    for emo in all_emotions:
        df[emo] = (df["emotion"] == emo).astype(int)

    df = df.drop(columns=["emotion"])

    # group by text → merge duplicate texts with multiple labels
    df = df.groupby("text")[all_emotions].max().reset_index()

print("Processed dataset shape:", df.shape)
print(df.head())

emotion_cols = [c for c in df.columns if c != "text"]
num_labels = len(emotion_cols)
print("Emotion labels:", emotion_cols)

# --------------------------
# 3) Train/Val/Test split
# --------------------------
train_df, temp_df = train_test_split(df, test_size=0.2, random_state=42)
val_df, test_df = train_test_split(temp_df, test_size=0.5, random_state=42)

print(f"Train: {len(train_df)}, Val: {len(val_df)}, Test: {len(test_df)}")

# --------------------------
# 4) Tokenizer & Dataset
# --------------------------
tokenizer = RobertaTokenizer.from_pretrained("roberta-base")

def tokenize_and_format(df):
    encodings = tokenizer(
        df["text"].tolist(),
        padding="max_length",
        truncation=True,
        max_length=128
    )
    labels = df[emotion_cols].astype(float).values.tolist()

    dataset = datasets.Dataset.from_dict({
        "input_ids": encodings["input_ids"],
        "attention_mask": encodings["attention_mask"],
        "labels": labels
    })
    dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])
    return dataset

train_ds = tokenize_and_format(train_df)
val_ds   = tokenize_and_format(val_df)
test_ds  = tokenize_and_format(test_df)

# --------------------------
# 5) Custom Roberta Model (multi-label with BCEWithLogitsLoss)
# --------------------------
class RobertaForMultiLabel(RobertaPreTrainedModel):
    def __init__(self, config):
        super().__init__(config)
        self.num_labels = config.num_labels
        self.roberta = RobertaModel(config)
        self.dropout = nn.Dropout(config.hidden_dropout_prob)
        self.classifier = nn.Linear(config.hidden_size, config.num_labels)
        self.loss_fn = nn.BCEWithLogitsLoss()
        self.post_init()

    def forward(self, input_ids=None, attention_mask=None, labels=None):
        outputs = self.roberta(input_ids=input_ids, attention_mask=attention_mask)
        pooled_output = outputs[0][:, 0, :]  # CLS token
        pooled_output = self.dropout(pooled_output)
        logits = self.classifier(pooled_output)

        loss = None
        if labels is not None:
            loss = self.loss_fn(logits, labels.float())
        return {"loss": loss, "logits": logits}

config = RobertaConfig.from_pretrained("roberta-base", num_labels=num_labels)
model = RobertaForMultiLabel.from_pretrained("roberta-base", config=config).to(device)

# --------------------------
# 6) Metrics
# --------------------------
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    probs = torch.sigmoid(torch.tensor(logits))
    preds = (probs > 0.5).int()
    labels = torch.tensor(labels)

    acc = accuracy_score(labels, preds)
    micro_f1 = f1_score(labels, preds, average="micro", zero_division=0)
    macro_f1 = f1_score(labels, preds, average="macro", zero_division=0)

    return {"accuracy": acc, "micro_f1": micro_f1, "macro_f1": macro_f1}

# --------------------------
# 7) Training setup with Early Stopping
# --------------------------
training_args = TrainingArguments(
    output_dir=OUTPUT_DIR,
    eval_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=16,   # increase if GPU memory allows
    per_device_eval_batch_size=16,
    gradient_accumulation_steps=1,
    num_train_epochs=3,               # start small, can increase later
    weight_decay=0.01,
    load_best_model_at_end=True,
    metric_for_best_model="micro_f1",
    greater_is_better=True,
    logging_dir=LOG_DIR,
    logging_strategy="steps",
    logging_steps=100,
    save_total_limit=2,
    fp16=True,                        # mixed precision (fast on GPU)
    dataloader_num_workers=2
)


trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_ds,
    eval_dataset=val_ds,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=2)]
)

# --------------------------
# 8) Train with Early Stopping
# --------------------------
trainer.train()

# --------------------------
# 9) Evaluate on test set
# --------------------------
results = trainer.evaluate(test_ds)
print("Test results:", results)

# --------------------------
# 9.1) Find best thresholds on validation set
# --------------------------
import numpy as np
from sklearn.metrics import f1_score

# Get validation predictions
val_outputs = trainer.predict(val_ds)
val_logits = val_outputs.predictions
val_labels = np.array(val_outputs.label_ids)

val_probs = torch.sigmoid(torch.tensor(val_logits)).numpy()

def find_best_thresholds(y_true, y_pred_probs, emotion_cols):
    thresholds = {}
    for i in range(y_true.shape[1]):
        best_t, best_f1 = 0.5, 0.0
        for t in np.arange(0.1, 0.9, 0.05):
            preds = (y_pred_probs[:, i] >= t).astype(int)
            f1 = f1_score(y_true[:, i], preds, zero_division=0)
            if f1 > best_f1:
                best_t, best_f1 = t, f1
        thresholds[emotion_cols[i]] = best_t
    return thresholds

thresholds = find_best_thresholds(val_labels, val_probs, emotion_cols)
print("\nBest thresholds per emotion:")
print(thresholds)

# --------------------------
# 10) Predict on few examples using thresholds
# --------------------------
def predict_emotions(prob, emotion_cols, thresholds, max_labels=5, neutral_label="neutral"):
    preds = []

    # Step 1: Apply thresholds
    for i, p in enumerate(prob):
        if p >= thresholds.get(emotion_cols[i], 0.5):
            preds.append((emotion_cols[i], p))

    # Step 2: Fallback if nothing passes threshold
    if not preds:
        top_idx = np.argmax(prob)
        preds = [(emotion_cols[top_idx], prob[top_idx])]

    # Step 3: Sort by probability (high → low)
    preds = sorted(preds, key=lambda x: x[1], reverse=True)

    # Step 4: Keep at most `max_labels`
    preds = preds[:max_labels]

    # Step 5: Drop "neutral" if other strong emotions exist
    labels = [lbl for lbl, p in preds]
    if neutral_label in labels and len(labels) > 1:
        labels = [lbl for lbl in labels if lbl != neutral_label]

    return labels


sample_texts = [
    "I am really anxious and can't sleep 😟",
    "I feel so happy and grateful today 😊",
    "I am angry but also disappointed 😡😞",

    "I feel like nothing matters anymore.",
    "Life has been so heavy lately.",
    "I can’t stop crying, everything hurts.",

    "Why does this always happen to me?!",
    "I’m so pissed off right now.",
    "People never listen and it makes me furious.",

    "I’m scared I’ll fail my exams.",
    "What if something bad happens tomorrow?",
    "I can’t stop worrying, my heart is racing.",

    "I finally got the job I wanted!",
    "I feel so grateful today.",
    "This is the happiest I’ve been in years.",

    "I really care about you.",
    "Spending time with my family makes me feel loved.",
    "I think I’m falling for them.",

    "Wow, I didn’t see that coming at all!",
    "You won’t believe what just happened.",
    "That was totally unexpected.",

    "I’m nervous about the interview, but also excited.",  # (fear + joy)
    "I’m happy for them, but I feel lonely inside.",       # (joy + sadness)
    "I’m so angry, but also deeply hurt.",                 # (anger + sadness)
    "I feel grateful but anxious about the future."        # (joy + fear)
]
encodings = tokenizer(sample_texts, return_tensors="pt", truncation=True, padding=True, max_length=128).to(device)
outputs = model(**encodings)
probs = torch.sigmoid(outputs["logits"]).detach().cpu().numpy()

for text, prob in zip(sample_texts, probs):
    preds = predict_emotions(prob, emotion_cols, thresholds, max_labels=5, neutral_label="neutral")
    print(f"\nText: {text}\nPredicted emotions: {preds}")

# ✅ Save final model + tokenizer (Phase 1 emotions)
save_dir = "../results/models/phase1_emotion_roberta"
trainer.save_model(save_dir)
tokenizer.save_pretrained(save_dir)

print(f"Model saved at {save_dir}")

# ==========================
# Save idx2emotion.json
# ==========================
idx2emotion = {i: emo for i, emo in enumerate(emotion_cols)}
with open(os.path.join(save_dir, "idx2emotion.json"), "w") as f:
    json.dump(idx2emotion, f, indent=4)

print(f"idx2emotion.json saved at {save_dir}")

# ==========================
# Save thresholds.json
# ==========================
with open(os.path.join(save_dir, "thresholds.json"), "w") as f:
    json.dump(thresholds, f, indent=4)

print(f"thresholds.json saved at {save_dir}")



  from .autonotebook import tqdm as notebook_tqdm


Using device: cuda
Raw dataset shape: (269529, 2)
                                                text  emotion
0                            i didnt feel humiliated  sadness
1  i can go from feeling so hopeless to so damned...  sadness
2   im grabbing a minute to post i feel greedy wrong    anger
3  i am ever feeling nostalgic about the fireplac...     love
4                               i am feeling grouchy    anger
Unique emotions: ['admiration', 'amusement', 'anger', 'annoyance', 'approval', 'caring', 'confusion', 'curiosity', 'desire', 'disappointment', 'disapproval', 'disgust', 'embarrassment', 'excitement', 'fear', 'gratitude', 'grief', 'joy', 'love', 'nervousness', 'neutral', 'optimism', 'pride', 'realization', 'relief', 'remorse', 'sadness', 'surprise']
Processed dataset shape: (77678, 29)
                                                text  admiration  amusement  \
0   "If you don't wear BROWN AND ORANGE...YOU DON...           0          0   
1   "What do Scottish people loo

Some weights of RobertaForMultiLabel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  1%|          | 100/11652 [01:25<1:38:56,  1.95it/s]

{'loss': 0.3267, 'grad_norm': 0.529172420501709, 'learning_rate': 1.982835564709921e-05, 'epoch': 0.03}


  2%|▏         | 200/11652 [02:19<1:48:43,  1.76it/s]

{'loss': 0.2409, 'grad_norm': 0.5613012313842773, 'learning_rate': 1.9656711294198423e-05, 'epoch': 0.05}


  3%|▎         | 300/11652 [03:22<1:14:16,  2.55it/s]

{'loss': 0.226, 'grad_norm': 0.89483243227005, 'learning_rate': 1.9485066941297632e-05, 'epoch': 0.08}


  3%|▎         | 401/11652 [03:53<44:17,  4.23it/s]  

{'loss': 0.2075, 'grad_norm': 0.9081907272338867, 'learning_rate': 1.931513903192585e-05, 'epoch': 0.1}


  4%|▍         | 501/11652 [04:16<44:49,  4.15it/s]

{'loss': 0.2012, 'grad_norm': 0.9544550776481628, 'learning_rate': 1.914349467902506e-05, 'epoch': 0.13}


  5%|▌         | 600/11652 [04:42<43:40,  4.22it/s]  

{'loss': 0.1857, 'grad_norm': 0.9380977749824524, 'learning_rate': 1.8971850326124273e-05, 'epoch': 0.15}


  6%|▌         | 701/11652 [05:06<43:57,  4.15it/s]

{'loss': 0.1853, 'grad_norm': 0.9083397388458252, 'learning_rate': 1.8800205973223482e-05, 'epoch': 0.18}


  7%|▋         | 800/11652 [05:31<49:12,  3.68it/s]

{'loss': 0.1815, 'grad_norm': 0.9145340919494629, 'learning_rate': 1.8628561620322695e-05, 'epoch': 0.21}


  8%|▊         | 900/11652 [05:59<49:10,  3.64it/s]  

{'loss': 0.1813, 'grad_norm': 0.9602134823799133, 'learning_rate': 1.8456917267421904e-05, 'epoch': 0.23}


  9%|▊         | 1001/11652 [06:28<45:07,  3.93it/s] 

{'loss': 0.1824, 'grad_norm': 0.9514389038085938, 'learning_rate': 1.8285272914521113e-05, 'epoch': 0.26}


  9%|▉         | 1100/11652 [06:54<43:10,  4.07it/s]

{'loss': 0.1709, 'grad_norm': 0.8150638937950134, 'learning_rate': 1.8113628561620326e-05, 'epoch': 0.28}


 10%|█         | 1200/11652 [07:21<44:49,  3.89it/s]  

{'loss': 0.1682, 'grad_norm': 1.2034807205200195, 'learning_rate': 1.7941984208719535e-05, 'epoch': 0.31}


 11%|█         | 1300/11652 [07:48<48:48,  3.53it/s]  

{'loss': 0.1718, 'grad_norm': 1.0300977230072021, 'learning_rate': 1.7770339855818747e-05, 'epoch': 0.33}


 12%|█▏        | 1400/11652 [08:17<46:28,  3.68it/s]  

{'loss': 0.1667, 'grad_norm': 0.8157229423522949, 'learning_rate': 1.7598695502917956e-05, 'epoch': 0.36}


 13%|█▎        | 1500/11652 [08:45<43:17,  3.91it/s]  

{'loss': 0.1678, 'grad_norm': 0.930014967918396, 'learning_rate': 1.7427051150017166e-05, 'epoch': 0.39}


 14%|█▎        | 1600/11652 [09:14<46:54,  3.57it/s]  

{'loss': 0.1652, 'grad_norm': 0.7847495675086975, 'learning_rate': 1.7255406797116375e-05, 'epoch': 0.41}


 15%|█▍        | 1700/11652 [09:44<42:46,  3.88it/s]  

{'loss': 0.1707, 'grad_norm': 1.0219154357910156, 'learning_rate': 1.7083762444215587e-05, 'epoch': 0.44}


 15%|█▌        | 1800/11652 [10:12<41:19,  3.97it/s]  

{'loss': 0.1645, 'grad_norm': 0.9350370764732361, 'learning_rate': 1.6912118091314796e-05, 'epoch': 0.46}


 16%|█▋        | 1900/11652 [10:42<47:11,  3.44it/s]  

{'loss': 0.1658, 'grad_norm': 0.9337978959083557, 'learning_rate': 1.6740473738414005e-05, 'epoch': 0.49}


 17%|█▋        | 2001/11652 [11:10<40:57,  3.93it/s]  

{'loss': 0.1594, 'grad_norm': 0.9764830470085144, 'learning_rate': 1.6568829385513218e-05, 'epoch': 0.51}


 18%|█▊        | 2100/11652 [11:40<41:19,  3.85it/s]  

{'loss': 0.1636, 'grad_norm': 0.9079669713973999, 'learning_rate': 1.6397185032612427e-05, 'epoch': 0.54}


 19%|█▉        | 2200/11652 [12:09<39:40,  3.97it/s]  

{'loss': 0.1658, 'grad_norm': 2.5128567218780518, 'learning_rate': 1.6225540679711636e-05, 'epoch': 0.57}


 20%|█▉        | 2300/11652 [12:37<42:05,  3.70it/s]  

{'loss': 0.1686, 'grad_norm': 1.0648045539855957, 'learning_rate': 1.605389632681085e-05, 'epoch': 0.59}


 21%|██        | 2400/11652 [13:06<49:37,  3.11it/s]  

{'loss': 0.1651, 'grad_norm': 0.9005778431892395, 'learning_rate': 1.5882251973910058e-05, 'epoch': 0.62}


 21%|██▏       | 2500/11652 [13:35<39:22,  3.87it/s]  

{'loss': 0.1594, 'grad_norm': 1.4789823293685913, 'learning_rate': 1.571060762100927e-05, 'epoch': 0.64}


 22%|██▏       | 2600/11652 [14:03<38:46,  3.89it/s]

{'loss': 0.1569, 'grad_norm': 1.0434048175811768, 'learning_rate': 1.553896326810848e-05, 'epoch': 0.67}


 23%|██▎       | 2700/11652 [14:32<42:23,  3.52it/s]

{'loss': 0.1629, 'grad_norm': 1.9678736925125122, 'learning_rate': 1.5367318915207692e-05, 'epoch': 0.7}


 24%|██▍       | 2800/11652 [15:03<37:19,  3.95it/s]  

{'loss': 0.1549, 'grad_norm': 0.9539303779602051, 'learning_rate': 1.5195674562306901e-05, 'epoch': 0.72}


 25%|██▍       | 2900/11652 [15:32<45:39,  3.20it/s]

{'loss': 0.1559, 'grad_norm': 0.9329333901405334, 'learning_rate': 1.502403020940611e-05, 'epoch': 0.75}


 26%|██▌       | 3000/11652 [16:06<52:26,  2.75it/s]  

{'loss': 0.1594, 'grad_norm': 0.6625390648841858, 'learning_rate': 1.4852385856505323e-05, 'epoch': 0.77}


 27%|██▋       | 3100/11652 [16:36<38:56,  3.66it/s]  

{'loss': 0.1561, 'grad_norm': 0.8354858756065369, 'learning_rate': 1.4680741503604532e-05, 'epoch': 0.8}


 27%|██▋       | 3200/11652 [17:12<44:32,  3.16it/s]  

{'loss': 0.1597, 'grad_norm': 0.7710334062576294, 'learning_rate': 1.4509097150703744e-05, 'epoch': 0.82}


 28%|██▊       | 3300/11652 [17:43<39:53,  3.49it/s]

{'loss': 0.152, 'grad_norm': 0.937045693397522, 'learning_rate': 1.4337452797802953e-05, 'epoch': 0.85}


 29%|██▉       | 3400/11652 [18:12<35:56,  3.83it/s]

{'loss': 0.1578, 'grad_norm': 0.755032479763031, 'learning_rate': 1.4165808444902164e-05, 'epoch': 0.88}


 30%|███       | 3500/11652 [18:43<40:31,  3.35it/s]

{'loss': 0.1574, 'grad_norm': 0.766982913017273, 'learning_rate': 1.3994164092001375e-05, 'epoch': 0.9}


 31%|███       | 3600/11652 [19:14<40:34,  3.31it/s]

{'loss': 0.1581, 'grad_norm': 2.007122755050659, 'learning_rate': 1.3822519739100584e-05, 'epoch': 0.93}


 32%|███▏      | 3700/11652 [19:46<37:04,  3.58it/s]

{'loss': 0.1579, 'grad_norm': 1.5309393405914307, 'learning_rate': 1.3650875386199795e-05, 'epoch': 0.95}


 33%|███▎      | 3800/11652 [20:16<36:13,  3.61it/s]

{'loss': 0.1531, 'grad_norm': 0.8822766542434692, 'learning_rate': 1.3479231033299006e-05, 'epoch': 0.98}


                                                      
 33%|███▎      | 3884/11652 [22:08<1:42:18,  1.27it/s]

{'eval_loss': 0.15537972748279572, 'eval_accuracy': 0.33483522142121525, 'eval_micro_f1': 0.583716756403981, 'eval_macro_f1': 0.4560543066774894, 'eval_runtime': 84.0078, 'eval_samples_per_second': 92.468, 'eval_steps_per_second': 5.785, 'epoch': 1.0}


 33%|███▎      | 3900/11652 [22:47<1:03:06,  2.05it/s] 

{'loss': 0.1585, 'grad_norm': 0.991072952747345, 'learning_rate': 1.3307586680398217e-05, 'epoch': 1.0}


 34%|███▍      | 4000/11652 [23:19<37:47,  3.37it/s]  

{'loss': 0.1495, 'grad_norm': 0.8335378766059875, 'learning_rate': 1.3135942327497426e-05, 'epoch': 1.03}


 35%|███▌      | 4100/11652 [23:48<35:55,  3.50it/s]

{'loss': 0.1475, 'grad_norm': 0.5969946384429932, 'learning_rate': 1.2964297974596635e-05, 'epoch': 1.06}


 36%|███▌      | 4200/11652 [24:17<32:18,  3.84it/s]

{'loss': 0.147, 'grad_norm': 1.260074257850647, 'learning_rate': 1.2792653621695847e-05, 'epoch': 1.08}


 37%|███▋      | 4300/11652 [24:49<38:46,  3.16it/s]  

{'loss': 0.1502, 'grad_norm': 0.9220777153968811, 'learning_rate': 1.2621009268795057e-05, 'epoch': 1.11}


 38%|███▊      | 4400/11652 [25:21<34:59,  3.45it/s]

{'loss': 0.1521, 'grad_norm': 0.9733738303184509, 'learning_rate': 1.2449364915894269e-05, 'epoch': 1.13}


 39%|███▊      | 4500/11652 [25:52<35:09,  3.39it/s]

{'loss': 0.1435, 'grad_norm': 1.124644160270691, 'learning_rate': 1.2277720562993478e-05, 'epoch': 1.16}


 39%|███▉      | 4600/11652 [26:21<33:45,  3.48it/s]

{'loss': 0.1434, 'grad_norm': 0.6979444622993469, 'learning_rate': 1.2106076210092689e-05, 'epoch': 1.18}


 40%|████      | 4700/11652 [26:50<31:04,  3.73it/s]

{'loss': 0.1505, 'grad_norm': 0.9845307469367981, 'learning_rate': 1.19344318571919e-05, 'epoch': 1.21}


 41%|████      | 4800/11652 [27:23<34:06,  3.35it/s]

{'loss': 0.1424, 'grad_norm': 0.7935779690742493, 'learning_rate': 1.1762787504291109e-05, 'epoch': 1.24}


 42%|████▏     | 4900/11652 [27:54<35:43,  3.15it/s]

{'loss': 0.145, 'grad_norm': 0.7900859117507935, 'learning_rate': 1.159114315139032e-05, 'epoch': 1.26}


 43%|████▎     | 5000/11652 [28:24<32:45,  3.38it/s]

{'loss': 0.1454, 'grad_norm': 0.827347457408905, 'learning_rate': 1.141949879848953e-05, 'epoch': 1.29}


 44%|████▍     | 5100/11652 [28:53<30:00,  3.64it/s]

{'loss': 0.1504, 'grad_norm': 0.9434016346931458, 'learning_rate': 1.1247854445588741e-05, 'epoch': 1.31}


 45%|████▍     | 5200/11652 [29:23<31:00,  3.47it/s]

{'loss': 0.1442, 'grad_norm': 0.781606912612915, 'learning_rate': 1.107621009268795e-05, 'epoch': 1.34}


 45%|████▌     | 5300/11652 [29:56<35:06,  3.01it/s]

{'loss': 0.1474, 'grad_norm': 0.6991073489189148, 'learning_rate': 1.090628218331617e-05, 'epoch': 1.36}


 46%|████▋     | 5400/11652 [30:27<32:54,  3.17it/s]

{'loss': 0.1489, 'grad_norm': 0.7410860061645508, 'learning_rate': 1.0734637830415379e-05, 'epoch': 1.39}


 47%|████▋     | 5500/11652 [30:57<29:40,  3.46it/s]

{'loss': 0.1417, 'grad_norm': 1.17274808883667, 'learning_rate': 1.0562993477514591e-05, 'epoch': 1.42}


 48%|████▊     | 5600/11652 [31:29<29:38,  3.40it/s]

{'loss': 0.1362, 'grad_norm': 1.0315662622451782, 'learning_rate': 1.0393065568142809e-05, 'epoch': 1.44}


 49%|████▉     | 5700/11652 [32:01<27:48,  3.57it/s]

{'loss': 0.1465, 'grad_norm': 0.799900472164154, 'learning_rate': 1.022142121524202e-05, 'epoch': 1.47}


 50%|████▉     | 5800/11652 [32:31<33:07,  2.94it/s]

{'loss': 0.1421, 'grad_norm': 0.8873801827430725, 'learning_rate': 1.0049776862341229e-05, 'epoch': 1.49}


 51%|█████     | 5900/11652 [33:03<28:50,  3.32it/s]

{'loss': 0.142, 'grad_norm': 0.7450132369995117, 'learning_rate': 9.87813250944044e-06, 'epoch': 1.52}


 51%|█████▏    | 6000/11652 [33:35<28:29,  3.31it/s]

{'loss': 0.1494, 'grad_norm': 1.0169073343276978, 'learning_rate': 9.70648815653965e-06, 'epoch': 1.54}


 52%|█████▏    | 6100/11652 [34:06<26:51,  3.44it/s]

{'loss': 0.1436, 'grad_norm': 0.6582849025726318, 'learning_rate': 9.534843803638861e-06, 'epoch': 1.57}


 53%|█████▎    | 6200/11652 [34:36<28:59,  3.13it/s]

{'loss': 0.1445, 'grad_norm': 1.0290895700454712, 'learning_rate': 9.363199450738072e-06, 'epoch': 1.6}


 54%|█████▍    | 6300/11652 [35:04<23:27,  3.80it/s]

{'loss': 0.1481, 'grad_norm': 0.9388842582702637, 'learning_rate': 9.191555097837281e-06, 'epoch': 1.62}


 55%|█████▍    | 6400/11652 [35:32<23:07,  3.78it/s]

{'loss': 0.1431, 'grad_norm': 1.0336825847625732, 'learning_rate': 9.019910744936492e-06, 'epoch': 1.65}


 56%|█████▌    | 6500/11652 [36:00<23:34,  3.64it/s]

{'loss': 0.1473, 'grad_norm': 1.0222867727279663, 'learning_rate': 8.848266392035703e-06, 'epoch': 1.67}


 57%|█████▋    | 6600/11652 [36:27<21:57,  3.83it/s]

{'loss': 0.1443, 'grad_norm': 0.7767317891120911, 'learning_rate': 8.676622039134914e-06, 'epoch': 1.7}


 58%|█████▊    | 6700/11652 [36:54<21:27,  3.85it/s]

{'loss': 0.1495, 'grad_norm': 2.184269905090332, 'learning_rate': 8.504977686234125e-06, 'epoch': 1.73}


 58%|█████▊    | 6800/11652 [37:22<21:26,  3.77it/s]

{'loss': 0.1375, 'grad_norm': 0.867718517780304, 'learning_rate': 8.333333333333334e-06, 'epoch': 1.75}


 59%|█████▉    | 6900/11652 [37:49<21:03,  3.76it/s]

{'loss': 0.1488, 'grad_norm': 1.583793044090271, 'learning_rate': 8.161688980432545e-06, 'epoch': 1.78}


 60%|██████    | 7000/11652 [38:17<21:04,  3.68it/s]

{'loss': 0.1459, 'grad_norm': 0.5974979996681213, 'learning_rate': 7.990044627531754e-06, 'epoch': 1.8}


 61%|██████    | 7100/11652 [38:44<19:46,  3.84it/s]

{'loss': 0.1397, 'grad_norm': 1.1085604429244995, 'learning_rate': 7.818400274630964e-06, 'epoch': 1.83}


 62%|██████▏   | 7200/11652 [39:12<20:04,  3.70it/s]

{'loss': 0.1472, 'grad_norm': 0.7301547527313232, 'learning_rate': 7.646755921730175e-06, 'epoch': 1.85}


 63%|██████▎   | 7300/11652 [39:40<19:46,  3.67it/s]

{'loss': 0.147, 'grad_norm': 1.5015320777893066, 'learning_rate': 7.475111568829386e-06, 'epoch': 1.88}


 64%|██████▎   | 7400/11652 [40:06<17:26,  4.06it/s]

{'loss': 0.1435, 'grad_norm': 1.0543409585952759, 'learning_rate': 7.303467215928597e-06, 'epoch': 1.91}


 64%|██████▍   | 7501/11652 [40:31<16:50,  4.11it/s]

{'loss': 0.1409, 'grad_norm': 0.8177542686462402, 'learning_rate': 7.131822863027808e-06, 'epoch': 1.93}


 65%|██████▌   | 7600/11652 [40:55<16:31,  4.09it/s]

{'loss': 0.1478, 'grad_norm': 1.1037131547927856, 'learning_rate': 6.960178510127017e-06, 'epoch': 1.96}


 66%|██████▌   | 7701/11652 [41:20<16:09,  4.08it/s]

{'loss': 0.1464, 'grad_norm': 1.113085389137268, 'learning_rate': 6.788534157226228e-06, 'epoch': 1.98}


                                                    
 67%|██████▋   | 7768/11652 [42:34<39:13,  1.65it/s]

{'eval_loss': 0.14966513216495514, 'eval_accuracy': 0.34397528321318227, 'eval_micro_f1': 0.6051058175951409, 'eval_macro_f1': 0.48685520977475616, 'eval_runtime': 56.8553, 'eval_samples_per_second': 136.627, 'eval_steps_per_second': 8.548, 'epoch': 2.0}


 67%|██████▋   | 7800/11652 [43:09<15:04,  4.26it/s]   

{'loss': 0.1406, 'grad_norm': 0.8910148739814758, 'learning_rate': 6.6168898043254385e-06, 'epoch': 2.01}


 68%|██████▊   | 7900/11652 [43:33<14:34,  4.29it/s]

{'loss': 0.136, 'grad_norm': 0.8361377120018005, 'learning_rate': 6.4452454514246485e-06, 'epoch': 2.03}


 69%|██████▊   | 8000/11652 [43:57<15:11,  4.01it/s]

{'loss': 0.1343, 'grad_norm': 0.7745679616928101, 'learning_rate': 6.273601098523859e-06, 'epoch': 2.06}


 70%|██████▉   | 8101/11652 [44:21<14:36,  4.05it/s]

{'loss': 0.1346, 'grad_norm': 1.0197728872299194, 'learning_rate': 6.10195674562307e-06, 'epoch': 2.09}


 70%|███████   | 8201/11652 [44:45<13:57,  4.12it/s]

{'loss': 0.1376, 'grad_norm': 1.2101637125015259, 'learning_rate': 5.930312392722279e-06, 'epoch': 2.11}


 71%|███████   | 8300/11652 [53:24<18:26,  3.03it/s]     

{'loss': 0.1364, 'grad_norm': 0.7723003029823303, 'learning_rate': 5.75866803982149e-06, 'epoch': 2.14}


 72%|███████▏  | 8400/11652 [53:55<17:16,  3.14it/s]

{'loss': 0.1339, 'grad_norm': 1.063509225845337, 'learning_rate': 5.587023686920701e-06, 'epoch': 2.16}


 73%|███████▎  | 8500/11652 [54:28<24:07,  2.18it/s]

{'loss': 0.1362, 'grad_norm': 1.061368703842163, 'learning_rate': 5.415379334019911e-06, 'epoch': 2.19}


 74%|███████▍  | 8600/11652 [55:15<19:20,  2.63it/s]

{'loss': 0.1344, 'grad_norm': 0.6850615739822388, 'learning_rate': 5.243734981119122e-06, 'epoch': 2.21}


 75%|███████▍  | 8700/11652 [55:56<24:52,  1.98it/s]

{'loss': 0.1322, 'grad_norm': 1.2529878616333008, 'learning_rate': 5.0720906282183325e-06, 'epoch': 2.24}


 76%|███████▌  | 8800/11652 [56:40<20:10,  2.36it/s]

{'loss': 0.1363, 'grad_norm': 0.9001365303993225, 'learning_rate': 4.9004462753175424e-06, 'epoch': 2.27}


 76%|███████▋  | 8900/11652 [57:16<15:32,  2.95it/s]

{'loss': 0.1373, 'grad_norm': 1.2384488582611084, 'learning_rate': 4.728801922416753e-06, 'epoch': 2.29}


 77%|███████▋  | 9000/11652 [57:53<15:07,  2.92it/s]

{'loss': 0.1382, 'grad_norm': 0.9841037392616272, 'learning_rate': 4.557157569515963e-06, 'epoch': 2.32}


 78%|███████▊  | 9100/11652 [58:28<15:08,  2.81it/s]

{'loss': 0.1301, 'grad_norm': 1.0265719890594482, 'learning_rate': 4.385513216615174e-06, 'epoch': 2.34}


 79%|███████▉  | 9200/11652 [59:03<15:12,  2.69it/s]

{'loss': 0.1379, 'grad_norm': 1.1883503198623657, 'learning_rate': 4.213868863714384e-06, 'epoch': 2.37}


 80%|███████▉  | 9300/11652 [59:39<12:59,  3.02it/s]

{'loss': 0.1367, 'grad_norm': 0.9412618279457092, 'learning_rate': 4.042224510813594e-06, 'epoch': 2.39}


 81%|████████  | 9400/11652 [1:07:46<12:13,  3.07it/s]    

{'loss': 0.1291, 'grad_norm': 0.8156429529190063, 'learning_rate': 3.870580157912805e-06, 'epoch': 2.42}


 82%|████████▏ | 9500/11652 [1:08:21<11:00,  3.26it/s]

{'loss': 0.1338, 'grad_norm': 0.7523133754730225, 'learning_rate': 3.6989358050120156e-06, 'epoch': 2.45}


 82%|████████▏ | 9600/11652 [1:08:52<10:18,  3.32it/s]

{'loss': 0.1341, 'grad_norm': 1.9678202867507935, 'learning_rate': 3.5272914521112256e-06, 'epoch': 2.47}


 83%|████████▎ | 9700/11652 [1:09:24<09:54,  3.29it/s]

{'loss': 0.1336, 'grad_norm': 0.9400346279144287, 'learning_rate': 3.355647099210436e-06, 'epoch': 2.5}


 84%|████████▍ | 9800/11652 [1:09:55<09:26,  3.27it/s]

{'loss': 0.1381, 'grad_norm': 0.678368330001831, 'learning_rate': 3.184002746309647e-06, 'epoch': 2.52}


 85%|████████▍ | 9900/11652 [1:12:48<09:09,  3.19it/s]   

{'loss': 0.1363, 'grad_norm': 0.9378496408462524, 'learning_rate': 3.014074836937865e-06, 'epoch': 2.55}


 86%|████████▌ | 10000/11652 [1:13:20<08:41,  3.17it/s]

{'loss': 0.1348, 'grad_norm': 0.8891355991363525, 'learning_rate': 2.842430484037075e-06, 'epoch': 2.57}


 87%|████████▋ | 10100/11652 [1:13:51<07:52,  3.29it/s]

{'loss': 0.1401, 'grad_norm': 0.6934526562690735, 'learning_rate': 2.670786131136286e-06, 'epoch': 2.6}


 88%|████████▊ | 10200/11652 [1:14:23<07:41,  3.15it/s]

{'loss': 0.1356, 'grad_norm': 0.8206148147583008, 'learning_rate': 2.4991417782354964e-06, 'epoch': 2.63}


 88%|████████▊ | 10300/11652 [1:14:55<09:57,  2.26it/s]

{'loss': 0.136, 'grad_norm': 1.0788235664367676, 'learning_rate': 2.3274974253347064e-06, 'epoch': 2.65}


 89%|████████▉ | 10400/11652 [1:15:28<06:30,  3.21it/s]

{'loss': 0.134, 'grad_norm': 0.766542375087738, 'learning_rate': 2.155853072433917e-06, 'epoch': 2.68}


 90%|█████████ | 10500/11652 [1:16:00<05:53,  3.26it/s]

{'loss': 0.1304, 'grad_norm': 0.8675375580787659, 'learning_rate': 1.9842087195331276e-06, 'epoch': 2.7}


 91%|█████████ | 10600/11652 [1:16:33<05:25,  3.24it/s]

{'loss': 0.1344, 'grad_norm': 1.0468363761901855, 'learning_rate': 1.8125643666323378e-06, 'epoch': 2.73}


 92%|█████████▏| 10700/11652 [1:17:05<04:53,  3.25it/s]

{'loss': 0.1314, 'grad_norm': 1.238012671470642, 'learning_rate': 1.6409200137315484e-06, 'epoch': 2.75}


 93%|█████████▎| 10800/11652 [1:17:37<04:25,  3.21it/s]

{'loss': 0.1343, 'grad_norm': 0.7501091361045837, 'learning_rate': 1.4692756608307588e-06, 'epoch': 2.78}


 94%|█████████▎| 10900/11652 [1:18:10<04:04,  3.07it/s]

{'loss': 0.1388, 'grad_norm': 1.382698893547058, 'learning_rate': 1.2976313079299694e-06, 'epoch': 2.81}


 94%|█████████▍| 11000/11652 [1:18:43<03:43,  2.92it/s]

{'loss': 0.137, 'grad_norm': 1.0016018152236938, 'learning_rate': 1.1259869550291795e-06, 'epoch': 2.83}


 95%|█████████▌| 11100/11652 [1:19:15<02:50,  3.24it/s]

{'loss': 0.1371, 'grad_norm': 1.05009925365448, 'learning_rate': 9.543426021283902e-07, 'epoch': 2.86}


 96%|█████████▌| 11200/11652 [1:19:47<02:18,  3.27it/s]

{'loss': 0.1372, 'grad_norm': 1.0459686517715454, 'learning_rate': 7.826982492276004e-07, 'epoch': 2.88}


 97%|█████████▋| 11300/11652 [1:20:20<01:52,  3.14it/s]

{'loss': 0.133, 'grad_norm': 0.9347423911094666, 'learning_rate': 6.110538963268109e-07, 'epoch': 2.91}


 98%|█████████▊| 11400/11652 [1:20:52<01:21,  3.08it/s]

{'loss': 0.1398, 'grad_norm': 0.8357214331626892, 'learning_rate': 4.3940954342602133e-07, 'epoch': 2.94}


 99%|█████████▊| 11500/11652 [1:21:24<00:47,  3.18it/s]

{'loss': 0.1363, 'grad_norm': 0.8736221790313721, 'learning_rate': 2.6776519052523173e-07, 'epoch': 2.96}


100%|█████████▉| 11600/11652 [1:22:00<00:19,  2.70it/s]

{'loss': 0.138, 'grad_norm': 1.0520814657211304, 'learning_rate': 9.612083762444216e-08, 'epoch': 2.99}


                                                       
100%|██████████| 11652/11652 [1:24:04<00:00,  1.24it/s]

{'eval_loss': 0.14951489865779877, 'eval_accuracy': 0.3453913491246138, 'eval_micro_f1': 0.6081327383571115, 'eval_macro_f1': 0.504229874590064, 'eval_runtime': 97.8017, 'eval_samples_per_second': 79.426, 'eval_steps_per_second': 4.969, 'epoch': 3.0}


100%|██████████| 11652/11652 [1:24:12<00:00,  2.31it/s]


{'train_runtime': 5052.0586, 'train_samples_per_second': 36.901, 'train_steps_per_second': 2.306, 'train_loss': 0.15191054880271201, 'epoch': 3.0}


100%|██████████| 486/486 [00:45<00:00, 10.74it/s]


Test results: {'eval_loss': 0.14884571731090546, 'eval_accuracy': 0.3532440782698249, 'eval_micro_f1': 0.6067224774973249, 'eval_macro_f1': 0.5014063659130918, 'eval_runtime': 88.3739, 'eval_samples_per_second': 87.899, 'eval_steps_per_second': 5.499, 'epoch': 3.0}


100%|██████████| 486/486 [00:44<00:00, 10.81it/s]



Best thresholds per emotion:
{'admiration': np.float64(0.3500000000000001), 'amusement': np.float64(0.5000000000000001), 'anger': np.float64(0.40000000000000013), 'annoyance': np.float64(0.25000000000000006), 'approval': np.float64(0.20000000000000004), 'caring': np.float64(0.30000000000000004), 'confusion': np.float64(0.30000000000000004), 'curiosity': np.float64(0.30000000000000004), 'desire': np.float64(0.45000000000000007), 'disappointment': np.float64(0.20000000000000004), 'disapproval': np.float64(0.25000000000000006), 'disgust': np.float64(0.20000000000000004), 'embarrassment': np.float64(0.30000000000000004), 'excitement': np.float64(0.25000000000000006), 'fear': np.float64(0.45000000000000007), 'gratitude': np.float64(0.40000000000000013), 'grief': np.float64(0.15000000000000002), 'joy': np.float64(0.40000000000000013), 'love': np.float64(0.3500000000000001), 'nervousness': np.float64(0.15000000000000002), 'neutral': np.float64(0.30000000000000004), 'optimism': np.float64(0.3

In [2]:
# ✅ Save final model + tokenizer (Phase 1 emotions)
save_dir = "../results/models/phase1_emotion_roberta"
trainer.save_model(save_dir)
tokenizer.save_pretrained(save_dir)

print(f"Model saved at {save_dir}")


Model saved at ../results/models/phase1_emotion_roberta


In [3]:
# --------------------------
# 9.1) Find best thresholds on validation set
# --------------------------
import numpy as np
from sklearn.metrics import f1_score

# Get validation predictions
val_outputs = trainer.predict(val_ds)
val_logits = val_outputs.predictions
val_labels = np.array(val_outputs.label_ids)

val_probs = torch.sigmoid(torch.tensor(val_logits)).numpy()

def find_best_thresholds(y_true, y_pred_probs, emotion_cols):
    thresholds = {}
    for i in range(y_true.shape[1]):
        best_t, best_f1 = 0.5, 0.0
        for t in np.arange(0.1, 0.9, 0.05):
            preds = (y_pred_probs[:, i] >= t).astype(int)
            f1 = f1_score(y_true[:, i], preds, zero_division=0)
            if f1 > best_f1:
                best_t, best_f1 = t, f1
        thresholds[emotion_cols[i]] = best_t
    return thresholds

thresholds = find_best_thresholds(val_labels, val_probs, emotion_cols)
print("\nBest thresholds per emotion:")
print(thresholds)

# --------------------------
# 10) Predict on few examples using thresholds
# --------------------------
def predict_emotions(prob, emotion_cols, thresholds, max_labels=5, neutral_label="neutral"):
    preds = []

    # Step 1: Apply thresholds
    for i, p in enumerate(prob):
        if p >= thresholds.get(emotion_cols[i], 0.5):
            preds.append((emotion_cols[i], p))

    # Step 2: Fallback if nothing passes threshold
    if not preds:
        top_idx = np.argmax(prob)
        preds = [(emotion_cols[top_idx], prob[top_idx])]

    # Step 3: Sort by probability (high → low)
    preds = sorted(preds, key=lambda x: x[1], reverse=True)

    # Step 4: Keep at most `max_labels`
    preds = preds[:max_labels]

    # Step 5: Drop "neutral" if other strong emotions exist
    labels = [lbl for lbl, p in preds]
    if neutral_label in labels and len(labels) > 1:
        labels = [lbl for lbl in labels if lbl != neutral_label]

    return labels


sample_texts = [
    "I am really anxious and can't sleep 😟",
    "I feel so happy and grateful today 😊",
    "I am angry but also disappointed 😡😞",

    "I feel like nothing matters anymore.",
    "Life has been so heavy lately.",
    "I can’t stop crying, everything hurts.",

    "Why does this always happen to me?!",
    "I’m so pissed off right now.",
    "People never listen and it makes me furious.",

    "I’m scared I’ll fail my exams.",
    "What if something bad happens tomorrow?",
    "I can’t stop worrying, my heart is racing.",

    "I finally got the job I wanted!",
    "I feel so grateful today.",
    "This is the happiest I’ve been in years.",

    "I really care about you.",
    "Spending time with my family makes me feel loved.",
    "I think I’m falling for them.",

    "Wow, I didn’t see that coming at all!",
    "You won’t believe what just happened.",
    "That was totally unexpected.",

    "I’m nervous about the interview, but also excited.",  # (fear + joy)
    "I’m happy for them, but I feel lonely inside.",       # (joy + sadness)
    "I’m so angry, but also deeply hurt.",                 # (anger + sadness)
    "I feel grateful but anxious about the future."        # (joy + fear)
]
encodings = tokenizer(sample_texts, return_tensors="pt", truncation=True, padding=True, max_length=128).to(device)
outputs = model(**encodings)
probs = torch.sigmoid(outputs["logits"]).detach().cpu().numpy()

for text, prob in zip(sample_texts, probs):
    preds = predict_emotions(prob, emotion_cols, thresholds, max_labels=5, neutral_label="neutral")
    print(f"\nText: {text}\nPredicted emotions: {preds}")

100%|██████████| 486/486 [00:47<00:00, 10.28it/s]



Best thresholds per emotion:
{'admiration': np.float64(0.3500000000000001), 'amusement': np.float64(0.5000000000000001), 'anger': np.float64(0.40000000000000013), 'annoyance': np.float64(0.25000000000000006), 'approval': np.float64(0.20000000000000004), 'caring': np.float64(0.30000000000000004), 'confusion': np.float64(0.30000000000000004), 'curiosity': np.float64(0.30000000000000004), 'desire': np.float64(0.45000000000000007), 'disappointment': np.float64(0.20000000000000004), 'disapproval': np.float64(0.25000000000000006), 'disgust': np.float64(0.20000000000000004), 'embarrassment': np.float64(0.30000000000000004), 'excitement': np.float64(0.25000000000000006), 'fear': np.float64(0.45000000000000007), 'gratitude': np.float64(0.40000000000000013), 'grief': np.float64(0.15000000000000002), 'joy': np.float64(0.40000000000000013), 'love': np.float64(0.3500000000000001), 'nervousness': np.float64(0.15000000000000002), 'neutral': np.float64(0.30000000000000004), 'optimism': np.float64(0.3

In [4]:
import torch
print("CUDA available:", torch.cuda.is_available())
print("CUDA version (PyTorch):", torch.version.cuda)
print("Device name:", torch.cuda.get_device_name(0) if torch.cuda.is_available() else "CPU only")


CUDA available: True
CUDA version (PyTorch): 12.4
Device name: NVIDIA GeForce RTX 3050 Laptop GPU


In [5]:
import transformers, os
print(transformers.__file__)


c:\Users\rijju\Desktop\mental-health-chatbot\myenv\Lib\site-packages\transformers\__init__.py
