# üöÄ Emotion Classification - Google Colab

**Models**: BERTweet, CardiffRoBERTa, ModernBERT  
**Task**: 6-class emotion classification  
**Runtime**: ~45 min on T4 GPU

In [1]:
# Cell 1: Install packages
!pip install -q torch transformers pandas numpy scikit-learn matplotlib seaborn imbalanced-learn tqdm
!pip install hf_xet
!pip3 install emoji==0.6.0

import torch
print(f"PyTorch: {torch.__version__}")
print(f"CUDA Available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")

Collecting emoji==0.6.0
  Downloading emoji-0.6.0.tar.gz (51 kB)
[2K     [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m51.0/51.0 kB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: emoji
  Building wheel for emoji (setup.py) ... [?25l[?25hdone
  Created wheel for emoji: filename=emoji-0.6.0-py3-none-any.whl size=49719 sha256=1375414a75b29f7d8442da8124eb96fc6712ac3c91bb16da900a3bd9e58aadb4
  Stored in directory: /root/.cache/pip/wheels/0d/bf/a2/536017b4a6232aef0fb92831af35facd6590c0af0f3983f63b
Successfully built emoji
Installing collected packages: emoji
Successfully installed emoji-0.6.0
PyTorch: 2.9.0+cu126
CUDA Available: True
GPU: Tesla T4


In [2]:
# Cell 2: Mount Google Drive
from google.colab import drive
import os

drive.mount('/content/gdrive')
work_dir = '/content/gdrive/My Drive/emotion_classifier_v2'
os.makedirs(work_dir, exist_ok=True)
os.chdir(work_dir)
print(f"Working dir: {os.getcwd()}")


Mounted at /content/gdrive
Working dir: /content/gdrive/My Drive/emotion_classifier_v2


In [3]:
# Cell 3: Imports
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torch.optim import AdamW
from transformers import AutoTokenizer, AutoModelForSequenceClassification, get_linear_schedule_with_warmup
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, f1_score
from imblearn.over_sampling import SMOTE
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
import time
from tqdm.auto import tqdm
import re
from collections import Counter
import json
import gc

warnings.filterwarnings('ignore')
np.random.seed(42)
torch.manual_seed(42)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(42)

print("‚úì Imports complete")


‚úì Imports complete


In [4]:
# Cell 4: Configuration (OPTIMIZED)
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using: {DEVICE}")

EMOTION_LABELS = {
    0: "Sadness",
    1: "Joy",
    2: "Love",
    3: "Anger",
    4: "Fear",
    5: "Surprise"
}

MODELS = {
    'BERTweet': 'vinai/bertweet-base',
    'CardiffRoBERTa': 'cardiffnlp/twitter-roberta-base',
    'ModernBERT': 'answerdotai/ModernBERT-base'
}

# Optimized hyperparameters
BATCH_SIZE   = 32       # reduce to 16 if OOM
EPOCHS       = 5
LEARNING_RATE = 5e-5
MAX_LENGTH   = 128
WARMUP_STEPS = 500
WEIGHT_DECAY = 0.01

print("\n‚öôÔ∏è Configuration:")
print(f"  Batch Size:   {BATCH_SIZE}")
print(f"  Max Length:   {MAX_LENGTH}")
print(f"  Epochs:       {EPOCHS}")
print(f"  Learning Rate:{LEARNING_RATE}")
print(f"  Warmup Steps: {WARMUP_STEPS}")
print(f"  Weight Decay: {WEIGHT_DECAY}")


Using: cuda

‚öôÔ∏è Configuration:
  Batch Size:   32
  Max Length:   128
  Epochs:       5
  Learning Rate:5e-05
  Warmup Steps: 500
  Weight Decay: 0.01


In [5]:
# Cell 5: DataCleaner
class DataCleaner:
    @staticmethod
    def clean_text(text):
        if not isinstance(text, str):
            return ""
        text = re.sub(r'http\S+|www.\S+', '', text)
        text = re.sub(r'\S+@\S+', '', text)
        text = re.sub(r'@\w+', '', text)
        text = re.sub(r'#(\w+)', r'\1', text)
        text = text.encode('ascii', 'ignore').decode('ascii')
        text = re.sub(r'\s+', ' ', text).strip()
        return text.lower()

    @staticmethod
    def remove_duplicates(df):
        initial = len(df)
        df = df.drop_duplicates(subset=['text'], keep='first')
        print(f"  Removed {initial - len(df)} duplicates")
        return df

    @staticmethod
    def remove_outliers(df, min_len=3, max_len=512):
        initial = len(df)
        df = df[(df['text'].str.len() >= min_len) & (df['text'].str.len() <= max_len)]
        print(f"  Removed {initial - len(df)} outliers")
        return df

    @staticmethod
    def handle_missing_values(df):
        return df.dropna(subset=['text', 'label'])

print("‚úì DataCleaner defined")


‚úì DataCleaner defined


In [6]:
# Cell 6: EmotionDataset
class EmotionDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_length=128):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = self.texts.iloc[idx] if hasattr(self.texts, 'iloc') else self.texts[idx]
        label = self.labels.iloc[idx] if hasattr(self.labels, 'iloc') else self.labels[idx]
        encoding = self.tokenizer(
            text,
            max_length=self.max_length,
            padding='max_length',
            truncation=True,
            return_tensors='pt'
        )
        return {
            'input_ids': encoding['input_ids'].squeeze(),
            'attention_mask': encoding['attention_mask'].squeeze(),
            'labels': torch.tensor(label, dtype=torch.long)
        }

print("‚úì EmotionDataset defined")


‚úì EmotionDataset defined


In [7]:
# Cell 7: EmotionClassifier (FULL FINE-TUNING)
class EmotionClassifier:
    def __init__(self, model_name, model_id, device, class_weights=None):
        self.model_name = model_name
        self.model_id = model_id
        self.device = device
        self.class_weights = class_weights

        self.tokenizer = AutoTokenizer.from_pretrained(model_id)
        self.model = AutoModelForSequenceClassification.from_pretrained(
            model_id,
            num_labels=6,
            ignore_mismatched_sizes=True
        )

        # FULL fine-tuning: all params trainable
        trainable = sum(p.numel() for p in self.model.parameters() if p.requires_grad)
        total = sum(p.numel() for p in self.model.parameters())
        print(f"  üîì Full Fine-tuning: {trainable:,}/{total:,} ({100*trainable/total:.2f}%)")

        self.model.to(device)
        self.history = {'train_loss': [], 'val_loss': [], 'val_acc': []}
        self.best_val_loss = float('inf')
        self.train_time = 0

    def get_loss_fn(self):
        if self.class_weights is not None:
            weights = torch.tensor(self.class_weights, dtype=torch.float).to(self.device)
            return nn.CrossEntropyLoss(weight=weights)
        return nn.CrossEntropyLoss()

    def train_epoch(self, train_loader, optimizer, scheduler, loss_fn):
        self.model.train()
        total_loss = 0
        for batch in tqdm(train_loader, desc=f"Training {self.model_name}", leave=False):
            optimizer.zero_grad()
            input_ids = batch['input_ids'].to(self.device)
            attention_mask = batch['attention_mask'].to(self.device)
            labels = batch['labels'].to(self.device)

            outputs = self.model(input_ids=input_ids, attention_mask=attention_mask)
            logits = outputs.logits
            loss = loss_fn(logits, labels)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(self.model.parameters(), 1.0)
            optimizer.step()
            scheduler.step()
            total_loss += loss.item()
        return total_loss / len(train_loader)

    def evaluate(self, val_loader, loss_fn):
        self.model.eval()
        total_loss = 0
        all_preds, all_labels = [], []
        with torch.no_grad():
            for batch in tqdm(val_loader, desc=f"Eval {self.model_name}", leave=False):
                input_ids = batch['input_ids'].to(self.device)
                attention_mask = batch['attention_mask'].to(self.device)
                labels = batch['labels'].to(self.device)

                outputs = self.model(input_ids=input_ids, attention_mask=attention_mask)
                logits = outputs.logits
                loss = loss_fn(logits, labels)
                total_loss += loss.item()

                preds = torch.argmax(logits, dim=1)
                all_preds.extend(preds.cpu().numpy())
                all_labels.extend(labels.cpu().numpy())
        acc = accuracy_score(all_labels, all_preds)
        return total_loss / len(val_loader), acc, all_preds, all_labels

    def train(self, train_loader, val_loader, num_epochs=5):
        loss_fn = self.get_loss_fn()
        optimizer = AdamW(self.model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)
        total_steps = len(train_loader) * num_epochs
        scheduler = get_linear_schedule_with_warmup(
            optimizer,
            num_warmup_steps=WARMUP_STEPS,
            num_training_steps=total_steps
        )

        patience = 3
        patience_counter = 0
        start_time = time.time()

        for epoch in range(num_epochs):
            print(f"\nEpoch {epoch+1}/{num_epochs} - {self.model_name}")
            train_loss = self.train_epoch(train_loader, optimizer, scheduler, loss_fn)
            val_loss, val_acc, _, _ = self.evaluate(val_loader, loss_fn)

            self.history['train_loss'].append(train_loss)
            self.history['val_loss'].append(val_loss)
            self.history['val_acc'].append(val_acc)

            print(f"Loss: {train_loss:.4f} | Val Loss: {val_loss:.4f} | Val Acc: {val_acc:.4f}")

            if val_loss < self.best_val_loss:
                self.best_val_loss = val_loss
                patience_counter = 0
                torch.save(self.model.state_dict(), f'best_{self.model_name}.pt')
            else:
                patience_counter += 1
                if patience_counter >= patience:
                    print(f"Early stopping (patience={patience})")
                    break

        self.train_time = time.time() - start_time
        self.model.load_state_dict(torch.load(f'best_{self.model_name}.pt'))

    def predict(self, test_loader):
        self.model.eval()
        all_preds, all_labels, all_logits = [], [], []
        inference_times = []
        with torch.no_grad():
            for batch in tqdm(test_loader, desc=f"Predict {self.model_name}", leave=False):
                input_ids = batch['input_ids'].to(self.device)
                attention_mask = batch['attention_mask'].to(self.device)
                labels = batch['labels'].to(self.device)

                start = time.time()
                outputs = self.model(input_ids=input_ids, attention_mask=attention_mask)
                elapsed = time.time() - start
                inference_times.append(elapsed / len(labels))

                logits = outputs.logits
                preds = torch.argmax(logits, dim=1)
                all_preds.extend(preds.cpu().numpy())
                all_labels.extend(labels.cpu().numpy())
                all_logits.extend(logits.cpu().numpy())
        avg_time = np.mean(inference_times) if inference_times else 0
        return all_preds, all_labels, all_logits, avg_time

print("‚úì EmotionClassifier defined")


‚úì EmotionClassifier defined


In [8]:
# Cell 8: Data loading helpers
def load_and_prepare_data(train_path='train.csv', val_path='validation.csv'):
    print("\n" + "="*50)
    print("LOADING DATA")
    print("="*50)
    try:
        train_df = pd.read_csv(train_path)
        val_df = pd.read_csv(val_path)
    except Exception as e:
        print("Error: CSV files not found!", e)
        return None, None

    print(f"Original: {len(train_df)} train, {len(val_df)} val")
    cleaner = DataCleaner()

    train_df = cleaner.handle_missing_values(train_df)
    val_df = cleaner.handle_missing_values(val_df)
    train_df['text'] = train_df['text'].apply(cleaner.clean_text)
    val_df['text'] = val_df['text'].apply(cleaner.clean_text)
    train_df = cleaner.remove_duplicates(train_df)
    val_df = cleaner.remove_duplicates(val_df)
    train_df = cleaner.remove_outliers(train_df)
    val_df = cleaner.remove_outliers(val_df)

    print(f"Cleaned: {len(train_df)} train, {len(val_df)} val")
    return train_df, val_df


def get_class_weights(labels):
    counts = Counter(labels)
    total = len(labels)
    weights = [total / counts[i] for i in range(6)]
    weights = np.array(weights)
    weights = weights / weights.sum() * 6
    return weights

print("‚úì Data loading functions defined")


‚úì Data loading functions defined


In [9]:
# Cell 9: Find and copy CSVs
import os, shutil

print("üîç Searching for CSV files...\n")

possible_paths = [
    '/content/gdrive/My Drive/NLP/Project2',
    '/content/gdrive/My Drive/emotion_classifier',
    '/content/gdrive/My Drive'
]

found = False
for path in possible_paths:
    if os.path.exists(path):
        train_path = os.path.join(path, 'train.csv')
        val_path = os.path.join(path, 'validation.csv')
        if os.path.exists(train_path) and os.path.exists(val_path):
            print(f"‚úì Found CSVs in: {path}")
            shutil.copy(train_path, './train.csv')
            shutil.copy(val_path, './validation.csv')
            found = True
            break

if not found:
    print("‚ö†Ô∏è Could not auto-find CSVs. Place train.csv and validation.csv in one of:")
    for p in possible_paths:
        print("  -", p)
else:
    print("\nFiles in working dir:", os.listdir('.'))


üîç Searching for CSV files...

‚úì Found CSVs in: /content/gdrive/My Drive/NLP/Project2

Files in working dir: ['visualizations', 'train.csv', 'validation.csv', 'best_BERTweet.pt', 'best_CardiffRoBERTa.pt', 'best_ModernBERT.pt', 'emotion_classification_report.json']


In [10]:
# DEBUGGING - Check file locations
import os

print("Current working directory:")
print(os.getcwd())

print("\nFiles in current directory:")
for f in os.listdir('.'):
    print(f"  {f}")

print("\nLooking for CSV files:")
csv_files = [f for f in os.listdir('.') if f.endswith('.csv')]
if csv_files:
    print(f"‚úì Found: {csv_files}")
else:
    print("‚úó No CSV files in current directory")

# Check if they exist with explicit path
print(f"\ntrain.csv exists: {os.path.exists('train.csv')}")
print(f"validation.csv exists: {os.path.exists('validation.csv')}")


Current working directory:
/content/gdrive/My Drive/emotion_classifier_v2

Files in current directory:
  visualizations
  train.csv
  validation.csv
  best_BERTweet.pt
  best_CardiffRoBERTa.pt
  best_ModernBERT.pt
  emotion_classification_report.json

Looking for CSV files:
‚úì Found: ['train.csv', 'validation.csv']

train.csv exists: True
validation.csv exists: True


In [11]:
# Cell 10: MAIN TRAINING
os.makedirs('visualizations', exist_ok=True)

train_df, val_df = load_and_prepare_data()
if train_df is None:
    raise Exception("Data not loaded")

class_weights = get_class_weights(train_df['label'].values)
print("\nClass weights:", class_weights)
print("\nClass distribution:\n", train_df['label'].value_counts().sort_index())

models_results = {}

for model_name, model_id in MODELS.items():
    print("\n" + "="*70)
    print(f"TRAINING: {model_name}")
    print("="*70)

    try:
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
        gc.collect()

        clf = EmotionClassifier(model_name, model_id, DEVICE, class_weights=class_weights)

        train_ds = EmotionDataset(train_df['text'], train_df['label'], clf.tokenizer, MAX_LENGTH)
        val_ds   = EmotionDataset(val_df['text'],  val_df['label'],  clf.tokenizer, MAX_LENGTH)

        train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True,  num_workers=0)
        val_loader   = DataLoader(val_ds,   batch_size=BATCH_SIZE, shuffle=False, num_workers=0)

        clf.train(train_loader, val_loader, num_epochs=EPOCHS)

        preds, true_labels, logits, infer_time = clf.predict(val_loader)

        acc = accuracy_score(true_labels, preds)
        f1  = f1_score(true_labels, preds, average='weighted', zero_division=0)
        cr  = classification_report(true_labels, preds, output_dict=True, zero_division=0)

        param_size  = sum(p.numel() * p.element_size() for p in clf.model.parameters())
        buffer_size = sum(b.numel() * b.element_size() for b in clf.model.buffers())
        model_size  = (param_size + buffer_size) / 1024 / 1024

        models_results[model_name] = {
            'classifier': clf,
            'predictions': preds,
            'true_labels': true_labels,
            'logits': logits,
            'accuracy': acc,
            'f1_score': f1,
            'inference_time': infer_time,
            'model_size': model_size,
            'history': clf.history,
            'classification_report': cr
        }

        print(f"\n‚úÖ {model_name} Results:")
        print(f"  Accuracy: {acc:.4f} ({acc*100:.2f}%)")
        print(f"  F1-Score: {f1:.4f}")
        print(f"  Inference: {infer_time*1000:.2f} ms")
        print(f"  Size: {model_size:.2f} MB")

        del clf.model
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
        gc.collect()

    except Exception as e:
        print("‚ùå Error training", model_name, ":", e)
        import traceback; traceback.print_exc()

print("\n" + "="*70)
print("‚úÖ TRAINING COMPLETE!")
print("="*70)



LOADING DATA
Original: 16000 train, 2000 val
  Removed 31 duplicates
  Removed 2 duplicates
  Removed 0 outliers
  Removed 0 outliers
Cleaned: 15969 train, 1998 val

Class weights: [0.32875638 0.28660182 1.18038472 0.71151729 0.79323319 2.6995066 ]

Class distribution:
 label
0    4664
1    5350
2    1299
3    2155
4    1933
5     568
Name: count, dtype: int64

TRAINING: BERTweet


config.json:   0%|          | 0.00/558 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

bpe.codes: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

pytorch_model.bin:   0%|          | 0.00/543M [00:00<?, ?B/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  üîì Full Fine-tuning: 134,904,582/134,904,582 (100.00%)


model.safetensors:   0%|          | 0.00/543M [00:00<?, ?B/s]


Epoch 1/5 - BERTweet


Training BERTweet:   0%|          | 0/500 [00:00<?, ?it/s]

Eval BERTweet:   0%|          | 0/63 [00:00<?, ?it/s]

Loss: 0.9912 | Val Loss: 0.2818 | Val Acc: 0.9084

Epoch 2/5 - BERTweet


Training BERTweet:   0%|          | 0/500 [00:00<?, ?it/s]

Eval BERTweet:   0%|          | 0/63 [00:00<?, ?it/s]

Loss: 0.2301 | Val Loss: 0.1692 | Val Acc: 0.9384

Epoch 3/5 - BERTweet


Training BERTweet:   0%|          | 0/500 [00:00<?, ?it/s]

Eval BERTweet:   0%|          | 0/63 [00:00<?, ?it/s]

Loss: 0.1490 | Val Loss: 0.1458 | Val Acc: 0.9419

Epoch 4/5 - BERTweet


Training BERTweet:   0%|          | 0/500 [00:00<?, ?it/s]

Eval BERTweet:   0%|          | 0/63 [00:00<?, ?it/s]

Loss: 0.1190 | Val Loss: 0.1573 | Val Acc: 0.9454

Epoch 5/5 - BERTweet


Training BERTweet:   0%|          | 0/500 [00:00<?, ?it/s]

Eval BERTweet:   0%|          | 0/63 [00:00<?, ?it/s]

Loss: 0.0997 | Val Loss: 0.2057 | Val Acc: 0.9354


Predict BERTweet:   0%|          | 0/63 [00:00<?, ?it/s]


‚úÖ BERTweet Results:
  Accuracy: 0.9419 (94.19%)
  F1-Score: 0.9433
  Inference: 0.30 ms
  Size: 514.62 MB

TRAINING: CardiffRoBERTa


config.json:   0%|          | 0.00/565 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

pytorch_model.bin:   0%|          | 0.00/501M [00:00<?, ?B/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at cardiffnlp/twitter-roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  üîì Full Fine-tuning: 124,650,246/124,650,246 (100.00%)

Epoch 1/5 - CardiffRoBERTa


Training CardiffRoBERTa:   0%|          | 0/500 [00:00<?, ?it/s]

model.safetensors:   0%|          | 0.00/501M [00:00<?, ?B/s]

Eval CardiffRoBERTa:   0%|          | 0/63 [00:00<?, ?it/s]

Loss: 0.8499 | Val Loss: 0.2237 | Val Acc: 0.9184

Epoch 2/5 - CardiffRoBERTa


Training CardiffRoBERTa:   0%|          | 0/500 [00:00<?, ?it/s]

Eval CardiffRoBERTa:   0%|          | 0/63 [00:00<?, ?it/s]

Loss: 0.2296 | Val Loss: 0.1809 | Val Acc: 0.9364

Epoch 3/5 - CardiffRoBERTa


Training CardiffRoBERTa:   0%|          | 0/500 [00:00<?, ?it/s]

Eval CardiffRoBERTa:   0%|          | 0/63 [00:00<?, ?it/s]

Loss: 0.1451 | Val Loss: 0.1390 | Val Acc: 0.9404

Epoch 4/5 - CardiffRoBERTa


Training CardiffRoBERTa:   0%|          | 0/500 [00:00<?, ?it/s]

Eval CardiffRoBERTa:   0%|          | 0/63 [00:00<?, ?it/s]

Loss: 0.1182 | Val Loss: 0.1865 | Val Acc: 0.9394

Epoch 5/5 - CardiffRoBERTa


Training CardiffRoBERTa:   0%|          | 0/500 [00:00<?, ?it/s]

Eval CardiffRoBERTa:   0%|          | 0/63 [00:00<?, ?it/s]

Loss: 0.0879 | Val Loss: 0.1854 | Val Acc: 0.9389


Predict CardiffRoBERTa:   0%|          | 0/63 [00:00<?, ?it/s]


‚úÖ CardiffRoBERTa Results:
  Accuracy: 0.9404 (94.04%)
  F1-Score: 0.9417
  Inference: 0.28 ms
  Size: 475.51 MB

TRAINING: ModernBERT


tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/694 [00:00<?, ?B/s]

config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/599M [00:00<?, ?B/s]

Some weights of ModernBertForSequenceClassification were not initialized from the model checkpoint at answerdotai/ModernBERT-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  üîì Full Fine-tuning: 149,609,478/149,609,478 (100.00%)

Epoch 1/5 - ModernBERT


Training ModernBERT:   0%|          | 0/500 [00:00<?, ?it/s]

W0118 10:27:57.969000 585 torch/_inductor/utils.py:1558] [1/0_1] Not enough SMs to use max_autotune_gemm mode


Eval ModernBERT:   0%|          | 0/63 [00:00<?, ?it/s]

Loss: 0.9496 | Val Loss: 0.2845 | Val Acc: 0.8914

Epoch 2/5 - ModernBERT


Training ModernBERT:   0%|          | 0/500 [00:00<?, ?it/s]

Eval ModernBERT:   0%|          | 0/63 [00:00<?, ?it/s]

Loss: 0.1966 | Val Loss: 0.1586 | Val Acc: 0.9314

Epoch 3/5 - ModernBERT


Training ModernBERT:   0%|          | 0/500 [00:00<?, ?it/s]

Eval ModernBERT:   0%|          | 0/63 [00:00<?, ?it/s]

Loss: 0.1212 | Val Loss: 0.1340 | Val Acc: 0.9424

Epoch 4/5 - ModernBERT


Training ModernBERT:   0%|          | 0/500 [00:00<?, ?it/s]

Eval ModernBERT:   0%|          | 0/63 [00:00<?, ?it/s]

Loss: 0.0757 | Val Loss: 0.3150 | Val Acc: 0.9384

Epoch 5/5 - ModernBERT


Training ModernBERT:   0%|          | 0/500 [00:00<?, ?it/s]

Eval ModernBERT:   0%|          | 0/63 [00:00<?, ?it/s]

Loss: 0.0196 | Val Loss: 0.4224 | Val Acc: 0.9394


Predict ModernBERT:   0%|          | 0/63 [00:00<?, ?it/s]


‚úÖ ModernBERT Results:
  Accuracy: 0.9424 (94.24%)
  F1-Score: 0.9435
  Inference: 0.97 ms
  Size: 570.72 MB

‚úÖ TRAINING COMPLETE!


In [12]:
# Cell 11: Results Summary
if models_results:
    print("\nüìä FINAL RESULTS\n")
    df = pd.DataFrame({
        'Model': list(models_results.keys()),
        'Accuracy': [models_results[m]['accuracy'] for m in models_results],
        'F1-Score': [models_results[m]['f1_score'] for m in models_results],
        'Inference (ms)': [models_results[m]['inference_time']*1000 for m in models_results],
        'Size (MB)': [models_results[m]['model_size'] for m in models_results],
    })
    print(df.to_string(index=False))

    best = max(models_results.keys(), key=lambda x: models_results[x]['accuracy'])
    print(f"\nüèÜ Best model: {best}")
    print(f"   Accuracy: {models_results[best]['accuracy']:.4f} ({models_results[best]['accuracy']*100:.2f}%)")

    report = {
        'best_model': best,
        'models_comparison': {
            m: {
                'accuracy': r['accuracy'],
                'f1_score': r['f1_score'],
                'inference_time_ms': r['inference_time']*1000,
                'model_size_mb': r['model_size'],
            }
            for m, r in models_results.items()
        }
    }
    with open('emotion_classification_report.json', 'w') as f:
        json.dump(report, f, indent=2)
    print("\n‚úì Report saved: emotion_classification_report.json")
else:
    print("No results to display ‚Äì check Cell 10 for errors.")



üìä FINAL RESULTS

         Model  Accuracy  F1-Score  Inference (ms)  Size (MB)
      BERTweet  0.941942  0.943284        0.295783 514.622124
CardiffRoBERTa  0.940440  0.941746        0.279590 475.510796
    ModernBERT  0.942442  0.943473        0.972409 570.717552

üèÜ Best model: ModernBERT
   Accuracy: 0.9424 (94.24%)

‚úì Report saved: emotion_classification_report.json
