In [1]:
from huggingface_hub import login
login(new_session=False)

In [2]:
import os

# Set the HF_TOKEN environment variable using the secret
os.environ['HF_TOKEN'] = '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'
print("HF_TOKEN has been set in the environment variables.")

HF_TOKEN has been set in the environment variables.


In [3]:
import pandas as pd
import numpy as np
import joblib
from sklearn.metrics import accuracy_score, f1_score, classification_report
from sklearn.preprocessing import LabelEncoder
from datasets import Dataset
from transformers import (
    BertTokenizerFast, BertForSequenceClassification,
    TrainingArguments, Trainer, EarlyStoppingCallback
)

# ============================
# 1. Load & Prepare Data
# ============================

def load_and_prepare(file_path):
    df = pd.read_csv(file_path)[['Situation', 'empathetic_dialogues', 'emotion']].dropna()
    df['input'] = df['Situation'] + " " + df['empathetic_dialogues']
    return df[['input', 'emotion']]

train_df = load_and_prepare("train.csv")
val_df = load_and_prepare("valid.csv")
test_df = load_and_prepare("test.csv")

le = LabelEncoder()
train_df['label'] = le.fit_transform(train_df['emotion'])
val_df['label'] = le.transform(val_df['emotion'])
test_df['label'] = le.transform(test_df['emotion'])
joblib.dump(le, "label_encoder.pkl")

train_dataset = Dataset.from_pandas(train_df[['input', 'label']])
val_dataset = Dataset.from_pandas(val_df[['input', 'label']])
test_dataset = Dataset.from_pandas(test_df[['input', 'label']])

# ============================
# 2. Tokenization
# ============================

tokenizer = BertTokenizerFast.from_pretrained("bert-base-uncased")

def tokenize_function(example):
    return tokenizer(example['input'], padding="max_length", truncation=True, max_length=128)

train_dataset = train_dataset.map(tokenize_function, batched=True)
val_dataset = val_dataset.map(tokenize_function, batched=True)
test_dataset = test_dataset.map(tokenize_function, batched=True)

train_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label'])
val_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label'])
test_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label'])

# ============================
# 3. Model & Metrics
# ============================

def model_init():
    return BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=32)

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    preds = np.argmax(logits, axis=1)
    return {
        "accuracy": accuracy_score(labels, preds),
        "f1": f1_score(labels, preds, average="weighted")
    }

# ============================
# 4. Training with best hyperparameters (from trial 3)
# ============================

best_args = TrainingArguments(
    output_dir="./best_model",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    save_total_limit=1,
    load_best_model_at_end=True,
    metric_for_best_model="f1",
    greater_is_better=True,
    learning_rate=2.057581070476546e-05,
    per_device_train_batch_size=8,
    num_train_epochs=6,
    weight_decay=0.12018648613579168,
    logging_dir="./logs",
    disable_tqdm=False,
    report_to="none"
)

best_trainer = Trainer(
    model_init=model_init,
    args=best_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    compute_metrics=compute_metrics,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=2)]
)

best_trainer.train()

# ============================
# 5. Load Best Model from Disk
# ============================

loaded_model = BertForSequenceClassification.from_pretrained("./best_model")

loaded_trainer = Trainer(
    model=loaded_model,
    args=best_args,
    compute_metrics=compute_metrics
)

# ============================
# 6. Final Evaluation
# ============================

val_metrics = loaded_trainer.evaluate(eval_dataset=val_dataset)
print("📊 Validation Metrics:", val_metrics)

test_metrics = loaded_trainer.evaluate(eval_dataset=test_dataset)
print("📊 Test Metrics:", test_metrics)

predictions = loaded_trainer.predict(test_dataset)
y_true = test_dataset["label"]
y_pred = np.argmax(predictions.predictions, axis=1)

print(classification_report(y_true, y_pred, target_names=le.classes_))

Map:   0%|          | 0/51672 [00:00<?, ? examples/s]

Map:   0%|          | 0/6459 [00:00<?, ? examples/s]

Map:   0%|          | 0/6460 [00:00<?, ? examples/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
[I 2025-07-15 13:35:32,429] A new study created in memory with name: no-name-f5c548c8-7d34-4341-8130-c53e753f545c
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,0.9398,0.812659,0.744852,0.73897
2,0.4058,0.412685,0.884502,0.884603
3,0.1628,0.219298,0.953398,0.953338
4,0.043,0.169974,0.969345,0.969313


[I 2025-07-15 13:58:29,695] Trial 0 finished with value: 1.9386583209560022 and parameters: {'learning_rate': 4.3392781202710994e-05, 'per_device_train_batch_size': 8, 'num_train_epochs': 4, 'weight_decay': 0.13254498954942515}. Best is trial 0 with value: 1.9386583209560022.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,0.9516,0.791524,0.742375,0.735126
2,0.4492,0.422919,0.866233,0.865753
3,0.1752,0.242864,0.938845,0.938975
4,0.0885,0.163254,0.964855,0.964906
5,0.0367,0.133583,0.974145,0.974153


[I 2025-07-15 14:18:49,126] Trial 1 finished with value: 1.9482974252962428 and parameters: {'learning_rate': 3.442381292063296e-05, 'per_device_train_batch_size': 16, 'num_train_epochs': 5, 'weight_decay': 0.19907396568216087}. Best is trial 1 with value: 1.9482974252962428.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,1.2858,1.118021,0.649636,0.634236
2,0.8735,0.800597,0.740827,0.738377
3,0.5853,0.631991,0.801672,0.800907
4,0.488,0.567392,0.824431,0.823908


[I 2025-07-15 14:35:00,829] Trial 2 finished with value: 1.6483394498972297 and parameters: {'learning_rate': 1.0222591962557778e-05, 'per_device_train_batch_size': 16, 'num_train_epochs': 4, 'weight_decay': 0.022179482328929245}. Best is trial 1 with value: 1.9482974252962428.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,0.9545,0.808283,0.749652,0.745751
2,0.449,0.477726,0.857718,0.85775
3,0.2175,0.318499,0.931259,0.931163
4,0.1173,0.242323,0.953708,0.953729
5,0.058,0.168834,0.970739,0.970724
6,0.025,0.147595,0.974919,0.974896


[I 2025-07-15 15:09:57,106] Trial 3 finished with value: 1.9498142335774902 and parameters: {'learning_rate': 2.057581070476546e-05, 'per_device_train_batch_size': 8, 'num_train_epochs': 6, 'weight_decay': 0.12018648613579168}. Best is trial 3 with value: 1.9498142335774902.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,0.9105,0.770371,0.756619,0.753409
2,0.4014,0.420909,0.882799,0.883039
3,0.167,0.294142,0.929401,0.929235


[I 2025-07-15 15:27:26,411] Trial 4 finished with value: 1.8586354438702943 and parameters: {'learning_rate': 2.6520905509334693e-05, 'per_device_train_batch_size': 8, 'num_train_epochs': 3, 'weight_decay': 0.0019643879959534427}. Best is trial 3 with value: 1.9498142335774902.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss


[W 2025-07-15 15:28:40,441] Trial 5 failed with parameters: {'learning_rate': 1.261552998836375e-05, 'per_device_train_batch_size': 8, 'num_train_epochs': 5, 'weight_decay': 0.29054561283663005} because of the following error: KeyboardInterrupt().
Traceback (most recent call last):
  File "/usr/local/lib/python3.12/dist-packages/optuna/study/_optimize.py", line 201, in _run_trial
    value_or_values = func(trial)
                      ^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/transformers/integrations/integration_utils.py", line 200, in _objective
    trainer.train(resume_from_checkpoint=checkpoint, trial=trial)
  File "/usr/local/lib/python3.12/dist-packages/transformers/trainer.py", line 1885, in train
    return inner_training_loop(
           ^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/transformers/trainer.py", line 2279, in _inner_training_loop
    self.optimizer.step()
  File "/usr/local/lib/python3.12/dist-packages/accelerate/optimizer.

KeyboardInterrupt: 

In [6]:
import torch
print(torch.version.cuda)
print(torch.cuda.is_available())


12.1
True


In [7]:
!nvidia-smi


Tue Jul 15 13:20:48 2025       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.183.06             Driver Version: 535.183.06   CUDA Version: 12.6     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA A100-SXM4-80GB          Off | 00000000:BD:00.0 Off |                   On |
| N/A   32C    P0              61W / 400W |                  N/A |     N/A      Default |
|                                         |                      |            Disabled* |
+-----------------------------------------+----------------------+----------------------+

+------------------------------------------------------------------

In [4]:
!nvidia-smi


Tue Jul 15 15:29:35 2025       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.183.06             Driver Version: 535.183.06   CUDA Version: 12.6     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


|   0  NVIDIA A100-SXM4-80GB          Off | 00000000:BD:00.0 Off |                   On |
| N/A   34C    P0              62W / 400W |                  N/A |     N/A      Default |
|                                         |                      |            Disabled* |
+-----------------------------------------+----------------------+----------------------+

+---------------------------------------------------------------------------------------+
| MIG devices:                                                                          |
+------------------+--------------------------------+-----------+-----------------------+
| GPU  GI  CI  MIG |                   Memory-Usage |        Vol|      Shared           |
|      ID  ID  Dev |                     BAR1-Usage | SM     Unc| CE ENC DEC OFA JPG    |
|                  |                                |        ECC|                       |
|  0    2   0   0  |            7673MiB / 40192MiB  | 42      0 |  3   0    2    0    0 |
|        

In [5]:
import pandas as pd
import numpy as np
import joblib
from sklearn.metrics import accuracy_score, f1_score, classification_report
from sklearn.preprocessing import LabelEncoder
from datasets import Dataset
from transformers import (
    BertTokenizerFast, BertForSequenceClassification,
    TrainingArguments, Trainer, EarlyStoppingCallback
)

# ============================
# 1. Load & Prepare Data
# ============================

def load_and_prepare(file_path):
    df = pd.read_csv(file_path)[['Situation', 'empathetic_dialogues', 'emotion']].dropna()
    df['input'] = df['Situation'] + " " + df['empathetic_dialogues']
    return df[['input', 'emotion']]

train_df = load_and_prepare("train.csv")
val_df = load_and_prepare("valid.csv")
test_df = load_and_prepare("test.csv")

le = LabelEncoder()
train_df['label'] = le.fit_transform(train_df['emotion'])
val_df['label'] = le.transform(val_df['emotion'])
test_df['label'] = le.transform(test_df['emotion'])
joblib.dump(le, "label_encoder.pkl")

train_dataset = Dataset.from_pandas(train_df[['input', 'label']])
val_dataset = Dataset.from_pandas(val_df[['input', 'label']])
test_dataset = Dataset.from_pandas(test_df[['input', 'label']])

# ============================
# 2. Tokenization
# ============================

tokenizer = BertTokenizerFast.from_pretrained("bert-base-uncased")

def tokenize_function(example):
    return tokenizer(example['input'], padding="max_length", truncation=True, max_length=128)

train_dataset = train_dataset.map(tokenize_function, batched=True)
val_dataset = val_dataset.map(tokenize_function, batched=True)
test_dataset = test_dataset.map(tokenize_function, batched=True)

train_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label'])
val_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label'])
test_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label'])

# ============================
# 3. Model & Metrics
# ============================

def model_init():
    return BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=32)

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    preds = np.argmax(logits, axis=1)
    return {
        "accuracy": accuracy_score(labels, preds),
        "f1": f1_score(labels, preds, average="weighted")
    }

# ============================
# 4. Training with best hyperparameters (from trial 3)
# ============================

best_args = TrainingArguments(
    output_dir="./best_model",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    save_total_limit=1,
    load_best_model_at_end=True,
    metric_for_best_model="f1",
    greater_is_better=True,
    learning_rate=2.057581070476546e-05,
    per_device_train_batch_size=8,
    num_train_epochs=6,
    weight_decay=0.12018648613579168,
    logging_dir="./logs",
    disable_tqdm=False,
    report_to="none"
)

best_trainer = Trainer(
    model_init=model_init,
    args=best_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    compute_metrics=compute_metrics,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=2)]
)

best_trainer.train()

# ============================
# 5. Load Best Model from Disk
# ============================

loaded_model = BertForSequenceClassification.from_pretrained("./best_model")

loaded_trainer = Trainer(
    model=loaded_model,
    args=best_args,
    compute_metrics=compute_metrics
)

# ============================
# 6. Final Evaluation
# ============================

val_metrics = loaded_trainer.evaluate(eval_dataset=val_dataset)
print("📊 Validation Metrics:", val_metrics)

test_metrics = loaded_trainer.evaluate(eval_dataset=test_dataset)
print("📊 Test Metrics:", test_metrics)

predictions = loaded_trainer.predict(test_dataset)
y_true = test_dataset["label"]
y_pred = np.argmax(predictions.predictions, axis=1)

print(classification_report(y_true, y_pred, target_names=le.classes_))


Map:   0%|          | 0/51672 [00:00<?, ? examples/s]

Map:   0%|          | 0/6459 [00:00<?, ? examples/s]

Map:   0%|          | 0/6460 [00:00<?, ? examples/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,0.9545,0.808283,0.749652,0.745751
2,0.449,0.477726,0.857718,0.85775
3,0.2175,0.318499,0.931259,0.931163
4,0.1173,0.242323,0.953708,0.953729
5,0.058,0.168834,0.970739,0.970724
6,0.025,0.147595,0.974919,0.974896


OSError: ./best_model does not appear to have a file named config.json. Checkout 'https://huggingface.co/./best_model/tree/main' for available files.

In [9]:
# train_model.py

import pandas as pd
import joblib
from sklearn.preprocessing import LabelEncoder
from datasets import Dataset
from transformers import (
    BertTokenizerFast, BertForSequenceClassification,
    TrainingArguments, Trainer, EarlyStoppingCallback
)

# ============================
# 1. Load & Prepare Data
# ============================

def load_and_prepare(file_path):
    df = pd.read_csv(file_path)[['Situation', 'empathetic_dialogues', 'emotion']].dropna()
    df['input'] = df['Situation'] + " " + df['empathetic_dialogues']
    return df[['input', 'emotion']]

train_df = load_and_prepare("train.csv")
val_df = load_and_prepare("valid.csv")

# Label Encoding
le = LabelEncoder()
train_df['label'] = le.fit_transform(train_df['emotion'])
val_df['label'] = le.transform(val_df['emotion'])
joblib.dump(le, "label_encoder.pkl")

train_dataset = Dataset.from_pandas(train_df[['input', 'label']])
val_dataset = Dataset.from_pandas(val_df[['input', 'label']])

# ============================
# 2. Tokenization
# ============================

tokenizer = BertTokenizerFast.from_pretrained("bert-base-uncased")

def tokenize_function(example):
    return tokenizer(example['input'], padding="max_length", truncation=True, max_length=128)

train_dataset = train_dataset.map(tokenize_function, batched=True)
val_dataset = val_dataset.map(tokenize_function, batched=True)

train_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label'])
val_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label'])

# ============================
# 3. Model & Trainer
# ============================

def model_init():
    return BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=32)

training_args = TrainingArguments(
    output_dir="./checkpoints",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    save_total_limit=1,
    load_best_model_at_end=True,
    metric_for_best_model="f1",
    greater_is_better=True,
    learning_rate=2.057581070476546e-05,
    per_device_train_batch_size=8,
    num_train_epochs=6,
    weight_decay=0.12018648613579168,
    logging_dir="./logs",
    disable_tqdm=False,
    report_to="none"
)

trainer = Trainer(
    model_init=model_init,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    compute_metrics=lambda eval_pred: {
        "accuracy": __import__('sklearn.metrics').metrics.accuracy_score(eval_pred[1], eval_pred[0].argmax(axis=1)),
        "f1": __import__('sklearn.metrics').metrics.f1_score(eval_pred[1], eval_pred[0].argmax(axis=1), average="weighted")
    },
    callbacks=[EarlyStoppingCallback(early_stopping_patience=2)]
)

# ============================
# 4. Train & Save
# ============================

trainer.train()
trainer.save_model("./best_model")
tokenizer.save_pretrained("./best_model")


Map:   0%|          | 0/51672 [00:00<?, ? examples/s]

Map:   0%|          | 0/6459 [00:00<?, ? examples/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,0.9545,0.808283,0.749652,0.745751
2,0.449,0.477726,0.857718,0.85775
3,0.2175,0.318499,0.931259,0.931163
4,0.1173,0.242323,0.953708,0.953729
5,0.058,0.168834,0.970739,0.970724
6,0.025,0.147595,0.974919,0.974896


('./best_model/tokenizer_config.json',
 './best_model/special_tokens_map.json',
 './best_model/vocab.txt',
 './best_model/added_tokens.json',
 './best_model/tokenizer.json')

In [11]:
# evaluate_model.py

import pandas as pd
import numpy as np
import joblib
from sklearn.metrics import accuracy_score, f1_score, classification_report
from datasets import Dataset
from transformers import BertTokenizerFast, BertForSequenceClassification, Trainer, TrainingArguments

# ============================
# 1. Load Data & Labels
# ============================

def load_and_prepare(file_path):
    df = pd.read_csv(file_path)[['Situation', 'empathetic_dialogues', 'emotion']].dropna()
    df['input'] = df['Situation'] + " " + df['empathetic_dialogues']
    return df[['input', 'emotion']]

val_df = load_and_prepare("valid.csv")
test_df = load_and_prepare("test.csv")

le = joblib.load("label_encoder.pkl")
val_df['label'] = le.transform(val_df['emotion'])
test_df['label'] = le.transform(test_df['emotion'])

val_dataset = Dataset.from_pandas(val_df[['input', 'label']])
test_dataset = Dataset.from_pandas(test_df[['input', 'label']])

# ============================
# 2. Tokenization
# ============================

tokenizer = BertTokenizerFast.from_pretrained("./best_model")

def tokenize_function(example):
    return tokenizer(example['input'], padding="max_length", truncation=True, max_length=128)

val_dataset = val_dataset.map(tokenize_function, batched=True)
test_dataset = test_dataset.map(tokenize_function, batched=True)

val_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label'])
test_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label'])

# ============================
# 3. Load Model & Trainer
# ============================

model = BertForSequenceClassification.from_pretrained("./best_model")

training_args = TrainingArguments(
    output_dir="./temp_eval",
    per_device_eval_batch_size=8,
    report_to="none"
)

trainer = Trainer(
    model=model,
    args=training_args,
    compute_metrics=lambda eval_pred: {
        "accuracy": accuracy_score(eval_pred[1], np.argmax(eval_pred[0], axis=1)),
        "f1": f1_score(eval_pred[1], np.argmax(eval_pred[0], axis=1), average="weighted")
    }
)

# ============================
# 4. Evaluate
# ============================

val_metrics = trainer.evaluate(eval_dataset=val_dataset)
print("📊 Validation Metrics:", val_metrics)

test_metrics = trainer.evaluate(eval_dataset=test_dataset)
print("📊 Test Metrics:", test_metrics)

# Classification Report
predictions = trainer.predict(test_dataset)
y_true = test_dataset["label"]
y_pred = np.argmax(predictions.predictions, axis=1)

print("📋 Classification Report:")
print(classification_report(y_true, y_pred, target_names=le.classes_))


Map:   0%|          | 0/6459 [00:00<?, ? examples/s]

Map:   0%|          | 0/6460 [00:00<?, ? examples/s]

📊 Validation Metrics: {'eval_loss': 0.14759500324726105, 'eval_accuracy': 0.9749187180678124, 'eval_f1': 0.9748955155096778, 'eval_runtime': 11.916, 'eval_samples_per_second': 542.047, 'eval_steps_per_second': 67.808}
📊 Test Metrics: {'eval_loss': 0.15616798400878906, 'eval_accuracy': 0.974922600619195, 'eval_f1': 0.9749004594692415, 'eval_runtime': 12.9966, 'eval_samples_per_second': 497.053, 'eval_steps_per_second': 62.17}
📋 Classification Report:
              precision    recall  f1-score   support

      afraid       0.97      0.96      0.96       210
       angry       0.96      0.93      0.95       230
     annoyed       0.96      0.98      0.97       222
anticipating       0.98      0.98      0.98       202
     anxious       0.99      0.95      0.97       204
apprehensive       0.99      0.99      0.99       155
     ashamed       0.95      0.97      0.96       169
      caring       0.99      0.95      0.97       176
   confident       0.97      0.99      0.98       204
     

In [14]:
import torch
import torch.nn.functional as F
import joblib
import numpy as np
from transformers import BertTokenizerFast, BertForSequenceClassification
from typing import List, Dict
import os

# ================================
# 1. Load model, tokenizer, labels
# ================================

MODEL_PATH = "./best_model"
ENCODER_PATH = "./label_encoder.pkl"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print("🔹 Loading tokenizer and model...")
tokenizer = BertTokenizerFast.from_pretrained(MODEL_PATH)
model = BertForSequenceClassification.from_pretrained(MODEL_PATH)
model.to(device)
model.eval()

# Load label mapping
if os.path.exists(ENCODER_PATH):
    le = joblib.load(ENCODER_PATH)
    id2label = {i: label for i, label in enumerate(le.classes_)}
    print("✅ Loaded label names from label_encoder.pkl")
else:
    # Fallback to generic label names
    num_labels = model.config.num_labels
    id2label = {i: f"LABEL_{i}" for i in range(num_labels)}
    print("⚠️ label_encoder.pkl not found, using LABEL_0 format")

# ================================
# 2. Inference Function
# ================================

def predict_top3_emotions(texts: List[str], top_k: int = 3) -> List[Dict]:
    results = []
    for text in texts:
        inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=128)
        inputs = {k: v.to(device) for k, v in inputs.items()}

        with torch.no_grad():
            logits = model(**inputs).logits
            probs = F.softmax(logits, dim=-1).cpu().numpy()[0]

        top_indices = probs.argsort()[-top_k:][::-1]
        top_preds = [
            {"label": id2label[i], "confidence": round(float(probs[i]), 3)}
            for i in top_indices
        ]

        results.append({
            "text": text,
            "top_emotions": top_preds
        })
    return results

# ================================
# 3. Example Mixed Emotion Inputs
# ================================

texts = [
    "I try to stay strong for everyone around me, but inside I'm falling apart.",
    "Even though I achieved my goals, I still feel empty inside.",
    "I'm proud of how far I've come, but I'm scared of what's next.",
    "I laugh with friends but cry alone every night.",
    "Sometimes I feel like I'm a burden to those I love.",
    "I’ve been feeling hopeful but also overwhelmed with pressure.",
    "I'm constantly anxious, yet somehow functioning like nothing is wrong.",
    "Grateful for my blessings but exhausted mentally.",
    "My parents are proud but I feel like I'm failing myself.",
    "I want to cry and scream but all I do is smile.",
    "Work is great but I feel emotionally drained.",
    "I finally spoke to someone, and I feel a bit lighter.",
    "I feel numb most of the time, like I’m just existing.",
    "People say I look confident, but I doubt every step I take.",
    "Sometimes I just want to disappear and see if anyone notices.",
    "Everything is fine, yet I feel like I’m crumbling inside.",
    "I had a great day, but my chest still feels heavy.",
    "I’m tired of pretending to be okay when I’m clearly not.",
    "I’m scared of opening up, but also desperate to be understood.",
    "Even in a crowd, I feel completely alone.",
    "I love my life, but my thoughts betray me sometimes.",
    "I thought I was healing, but I’m breaking again.",
    "I don’t know what I want, I just want peace.",
    "I'm motivated, but also deeply afraid of failing.",
    "It's hard to explain how I feel—happy yet broken.",
    "I push through my pain with a smile on my face.",
    "I want to be strong, but I’m tired of fighting.",
    "I enjoy life’s little moments, but darkness still lingers.",
    "I want to talk to someone, but I don’t want to bother them.",
    "I’m finally learning to love myself, but it’s hard.",
    "The silence is comforting, but it also scares me.",
    "I laugh loudly so no one sees I’m struggling.",
    "I’m grateful and hopeless at the same time.",
    "I’m tired, but not the kind of tired sleep can fix.",
    "I celebrate small wins, but fear big losses.",
    "I want to scream into a pillow and disappear for a while.",
    "I feel everything so deeply, it’s exhausting.",
    "Sometimes I cry without knowing why.",
    "I want help, but I’m afraid of judgment.",
    "I want to open up to people more, but I’m scared they’ll think I’m too much to handle."
]

# ================================
# 4. Run Inference
# ================================

results = predict_top3_emotions(texts)

# ================================
# 5. Print Results
# ================================

for i, res in enumerate(results):
    print(f"\n📝 [{i+1}] Text: {res['text']}")
    for emo in res["top_emotions"]:
        print(f"🔹 {emo['label']} (Confidence: {emo['confidence']})")


🔹 Loading tokenizer and model...
✅ Loaded label names from label_encoder.pkl

📝 [1] Text: I try to stay strong for everyone around me, but inside I'm falling apart.
🔹 sad (Confidence: 0.644)
🔹 devastated (Confidence: 0.341)
🔹 ashamed (Confidence: 0.007)

📝 [2] Text: Even though I achieved my goals, I still feel empty inside.
🔹 lonely (Confidence: 0.998)
🔹 sad (Confidence: 0.002)
🔹 guilty (Confidence: 0.0)

📝 [3] Text: I'm proud of how far I've come, but I'm scared of what's next.
🔹 apprehensive (Confidence: 0.982)
🔹 afraid (Confidence: 0.017)
🔹 lonely (Confidence: 0.0)

📝 [4] Text: I laugh with friends but cry alone every night.
🔹 lonely (Confidence: 0.997)
🔹 guilty (Confidence: 0.002)
🔹 faithful (Confidence: 0.0)

📝 [5] Text: Sometimes I feel like I'm a burden to those I love.
🔹 sad (Confidence: 0.99)
🔹 ashamed (Confidence: 0.003)
🔹 anxious (Confidence: 0.001)

📝 [6] Text: I’ve been feeling hopeful but also overwhelmed with pressure.
🔹 anxious (Confidence: 0.996)
🔹 apprehensive (Confi