In [24]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import DataLoader,Dataset
from transformers import AutoTokenizer,AutoModel
from sklearn.metrics  import accuracy_score,f1_score,precision_score,recall_score
from typing import Dict,List,Optional
from dataclasses import dataclass
import warnings
warnings.filterwarnings('ignore')

In [27]:
from tqdm import tqdm

In [56]:
df=pd.read_csv('train (2).csv')
test=pd.read_csv('test (3).csv')
df.shape,test.shape


((6827, 8), (1707, 2))

In [29]:
import re
import html
def preprocessing(text):
  text = html.unescape(text)
  text = text.lower()

  text = re.sub(r'["‚Äú‚Äù\'\`~\-=&;#\\/<>\|\[\]\(\)_¬∂]', ' ', text)  # symbols
  text = re.sub(r'\.{2,}', ' ', text)  # multiple dots
  return text

In [30]:
df

Unnamed: 0,id,text,anger,fear,joy,sadness,surprise,emotions
0,0,the dentist that did the work apparently did a...,1,0,0,1,0,['anger' 'sadness']
1,1,i'm gonna absolutely ~~suck~~ be terrible duri...,0,1,0,1,0,['fear' 'sadness']
2,2,"bridge: so leave me drowning calling houston, ...",0,1,0,1,0,['fear' 'sadness']
3,3,after that mess i went to see my now ex-girlfr...,1,1,0,1,0,['anger' 'fear' 'sadness']
4,4,"as he stumbled i ran off, afraid it might some...",0,1,0,0,0,['fear']
...,...,...,...,...,...,...,...,...
6822,6822,there is not a cloud in the sky and the sun is...,0,0,1,0,0,['joy']
6823,6823,&gt; the grave stomper,0,0,0,0,1,['surprise']
6824,6824,my ear was still freaking stuck.,1,1,0,0,0,['anger' 'fear']
6825,6825,i felt like there was an electric current flow...,0,1,0,1,0,['fear' 'sadness']


In [31]:
from sklearn.model_selection import  train_test_split
train_size=int(0.8*df.shape[0])
train_df=df.iloc[:train_size,1:7]
val_df=df.iloc[train_size:,1:7]

print(f"Training samples: {len(train_df)}")
print(f"Validation samples: {len(val_df)}")


Training samples: 5461
Validation samples: 1366


In [57]:
train_df.head()

Unnamed: 0,id,text
0,0,she wanted to fight over every single little t...
1,1,"anyway, back to tuesday."
2,2,she shrieked at the dog to go back.
3,3,yelling for everyone to get back or get inside...
4,4,still kind of freaky.


In [33]:
model_name='bert-base-uncased'
tokenizer=AutoTokenizer.from_pretrained(model_name)

In [64]:
class EmotionDataset(Dataset):
  def __init__(self,dataframe:pd.DataFrame,tokenizer:AutoTokenizer,max_length:int=128):
    self.texts=dataframe['text'].values
    # self.labels=dataframe[['anger', 'fear', 'joy', 'sadness', 'surprise']].values
    self.id=dataframe['id'].values
    self.tokenizer=tokenizer
    self.max_length=max_length
  def __len__(self)->int:
    return len(self.texts)
  def __getitem__(self,idx)->Dict[str,torch.Tensor]:
    text=str(self.texts[idx])
    # labels=self.labels[idx]
    ids=self.id[idx]
    text=preprocessing(text)
    encoding=self.tokenizer(text,
                            truncation=True,
                            add_special_tokens=True,
                            padding='max_length',
                            max_length=self.max_length,
                            return_tensors='pt')
    return {
        'input_ids':encoding['input_ids'].flatten(),
        'attention_mask':encoding['attention_mask'].flatten(),
        'id':ids
        # 'labels':torch.tensor(labels,dtype=torch.float)
    }





In [63]:
test.head()

Unnamed: 0,id,text
0,0,she wanted to fight over every single little t...
1,1,"anyway, back to tuesday."
2,2,she shrieked at the dog to go back.
3,3,yelling for everyone to get back or get inside...
4,4,still kind of freaky.


In [66]:
test_dataset=EmotionDataset(test,tokenizer)

In [35]:
train_dataset = EmotionDataset(train_df, tokenizer)
val_dataset = EmotionDataset(val_df, tokenizer)

print(f"Training dataset size: {len(train_dataset)}")
print(f"Validation dataset size: {len(val_dataset)}")
print(f"\nSample from dataset:")
sample = train_dataset[0]
for key, value in sample.items():
    print(f"{key}: {value.shape}")

Training dataset size: 5461
Validation dataset size: 1366

Sample from dataset:
input_ids: torch.Size([128])
attention_mask: torch.Size([128])
labels: torch.Size([5])


In [72]:
@dataclass
class DataCollator:

    def __call__(self, batch: List[Dict[str, torch.Tensor]]) -> Dict[str, torch.Tensor]:
        input_ids = torch.stack([item['input_ids'] for item in batch])
        attention_mask = torch.stack([item['attention_mask'] for item in batch])
        ids=[item['id'] for item in batch]
        # labels = torch.stack([item['labels'] for item in batch])

        return {
            'id':ids,
            'input_ids': input_ids,
            'attention_mask': attention_mask
            # 'labels': labels
        }

collator = DataCollator()
print("Data collator initialized successfully")

Data collator initialized successfully


In [37]:
BATCH_SIZE = 32

train_loader = DataLoader(
    train_dataset,
    batch_size=BATCH_SIZE,
    shuffle=True,
    collate_fn=collator
)

val_loader = DataLoader(
    val_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    collate_fn=collator
)

print(f"Number of training batches: {len(train_loader)}")
print(f"Number of validation batches: {len(val_loader)}")

print("\nSample batch from DataLoader:")
for batch in train_loader:
    for key, value in batch.items():
        print(f"{key}: {value.shape}")
    break

Number of training batches: 171
Number of validation batches: 43

Sample batch from DataLoader:
input_ids: torch.Size([32, 128])
attention_mask: torch.Size([32, 128])
labels: torch.Size([32, 5])


In [73]:
test_loader = DataLoader(
    test_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    collate_fn=collator
)

In [38]:
class EmotionClassifier(nn.Module):

    def __init__(self, model_name: str, num_labels: int = 5, dropout: float = 0.3):
        super(EmotionClassifier, self).__init__()

        self.transformer = AutoModel.from_pretrained(model_name)
        self.dropout = nn.Dropout(dropout)
        self.classifier = nn.Linear(self.transformer.config.hidden_size, num_labels)

    def forward(self, input_ids: torch.Tensor, attention_mask: torch.Tensor) -> torch.Tensor:
        outputs = self.transformer(
            input_ids=input_ids,
            attention_mask=attention_mask
        )

        pooled_output = outputs.last_hidden_state[:, 0, :]
        pooled_output = self.dropout(pooled_output)
        logits = self.classifier(pooled_output)

        return logits
MODEL_NAME = 'bert-base-uncased'
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = EmotionClassifier(MODEL_NAME, num_labels=5)
model = model.to(device)

print(f"Device: {device}")
print(f"\nModel architecture:")
print(model)
print(f"\nTotal parameters: {sum(p.numel() for p in model.parameters()):,}")
print(f"Trainable parameters: {sum(p.numel() for p in model.parameters() if p.requires_grad):,}")

Device: cuda

Model architecture:
EmotionClassifier(
  (transformer): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNo

In [46]:
import torch
import os
import numpy as np
from tqdm import tqdm
from sklearn.metrics import accuracy_score

class Trainer:
    def __init__(self, model: torch.nn.Module, train_loader, val_loader,
                 criterion, optimizer, device: torch.device,
                 checkpoint_path="checkpoint.pth", patience=5):
        self.model = model
        self.train_loader = train_loader
        self.val_loader = val_loader
        self.criterion = criterion
        self.optimizer = optimizer
        self.device = device
        self.history = {'train_loss': [], 'val_loss': [], 'val_acc': []}
        self.checkpoint_path = checkpoint_path
        self.best_model_path = "best_model.pth"
        self.start_epoch = 0
        self.best_val_acc = 0.0
        self.patience = patience
        self.no_improve_epochs = 0

        # Auto-load last checkpoint if available
        if os.path.exists(self.checkpoint_path):
            print(f"üîÑ Found checkpoint. Resuming from {self.checkpoint_path} ...")
            self.load_checkpoint()

    def save_checkpoint(self, epoch, is_best=False):
        """Save model, optimizer, and training state"""
        state = {
            'epoch': epoch,
            'model_state_dict': self.model.state_dict(),
            'optimizer_state_dict': self.optimizer.state_dict(),
            'history': self.history,
            'best_val_acc': self.best_val_acc,
            'no_improve_epochs': self.no_improve_epochs
        }
        torch.save(state, self.checkpoint_path)

        if is_best:
            torch.save(self.model.state_dict(), self.best_model_path)
            print(f"‚úÖ Best model saved (Val Acc: {self.best_val_acc:.4f})")

    def load_checkpoint(self):
        """Load checkpoint if available"""
        checkpoint = torch.load(self.checkpoint_path, map_location=self.device)
        self.model.load_state_dict(checkpoint['model_state_dict'])
        self.optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        self.history = checkpoint['history']
        self.best_val_acc = checkpoint['best_val_acc']
        self.no_improve_epochs = checkpoint.get('no_improve_epochs', 0)
        self.start_epoch = checkpoint['epoch'] + 1
        print(f"‚úÖ Resumed from epoch {self.start_epoch}")

    def train_epoch(self) -> float:
        self.model.train()
        total_loss = 0

        for batch in tqdm(self.train_loader, desc="Training"):
            input_ids = batch['input_ids'].to(self.device)
            attention_mask = batch['attention_mask'].to(self.device)
            labels = batch['labels'].to(self.device)

            self.optimizer.zero_grad()
            logits = self.model(input_ids=input_ids, attention_mask=attention_mask)
            loss = self.criterion(logits, labels)
            loss.backward()
            self.optimizer.step()
            total_loss += loss.item()

        avg_loss = total_loss / len(self.train_loader)
        return avg_loss

    def validate_epoch(self) -> tuple:
        self.model.eval()
        total_loss = 0
        all_preds, all_labels = [], []

        with torch.no_grad():
            for batch in tqdm(self.val_loader, desc="Validation"):
                input_ids = batch['input_ids'].to(self.device)
                attention_mask = batch['attention_mask'].to(self.device)
                labels = batch['labels'].to(self.device)

                logits = self.model(input_ids=input_ids, attention_mask=attention_mask)
                loss = self.criterion(logits, labels)
                total_loss += loss.item()

                preds = (torch.sigmoid(logits) > 0.5).cpu().numpy()
                all_preds.append(preds)
                all_labels.append(labels.cpu().numpy())

        avg_loss = total_loss / len(self.val_loader)
        all_preds = np.vstack(all_preds)
        all_labels = np.vstack(all_labels)
        accuracy = accuracy_score(all_labels, all_preds)
        return avg_loss, accuracy

    def train(self, num_epochs: int):
        print("üöÄ Starting training...")

        for epoch in range(self.start_epoch, num_epochs):
            print(f"\nEpoch {epoch + 1}/{num_epochs}")

            train_loss = self.train_epoch()
            val_loss, val_acc = self.validate_epoch()

            self.history['train_loss'].append(train_loss)
            self.history['val_loss'].append(val_loss)
            self.history['val_acc'].append(val_acc)

            print(f"Train Loss: {train_loss:.4f} | Val Loss: {val_loss:.4f} | Val Acc: {val_acc:.4f}")

            # Check improvement
            is_best = val_acc > self.best_val_acc
            if is_best:
                self.best_val_acc = val_acc
                self.no_improve_epochs = 0
            else:
                self.no_improve_epochs += 1
                print(f"‚ö†Ô∏è No improvement for {self.no_improve_epochs} epoch(s)")

            # Save checkpoint (always) and best model (if improved)
            self.save_checkpoint(epoch, is_best=is_best)

            # Early stopping
            if self.no_improve_epochs >= self.patience:
                print(f"üõë Early stopping triggered after {self.patience} epochs without improvement.")
                break

        print("\nüéâ Training complete!")


In [47]:
criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=2e-5, weight_decay=0.01)

NUM_EPOCHS = 30
trainer = Trainer(model, train_loader, val_loader, criterion, optimizer, device)

In [48]:
trainer.train(NUM_EPOCHS)

üöÄ Starting training...

Epoch 1/30


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 171/171 [01:47<00:00,  1.59it/s]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 43/43 [00:09<00:00,  4.72it/s]


Train Loss: 0.1142 | Val Loss: 0.2629 | Val Acc: 0.6369
‚úÖ Best model saved (Val Acc: 0.6369)

Epoch 2/30


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 171/171 [01:47<00:00,  1.60it/s]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 43/43 [00:09<00:00,  4.76it/s]


Train Loss: 0.0742 | Val Loss: 0.2666 | Val Acc: 0.6816
‚úÖ Best model saved (Val Acc: 0.6816)

Epoch 3/30


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 171/171 [01:47<00:00,  1.60it/s]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 43/43 [00:09<00:00,  4.72it/s]


Train Loss: 0.0467 | Val Loss: 0.2900 | Val Acc: 0.6735
‚ö†Ô∏è No improvement for 1 epoch(s)

Epoch 4/30


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 171/171 [01:46<00:00,  1.60it/s]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 43/43 [00:09<00:00,  4.74it/s]


Train Loss: 0.0344 | Val Loss: 0.2908 | Val Acc: 0.6977
‚úÖ Best model saved (Val Acc: 0.6977)

Epoch 5/30


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 171/171 [01:47<00:00,  1.60it/s]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 43/43 [00:09<00:00,  4.72it/s]


Train Loss: 0.0242 | Val Loss: 0.3060 | Val Acc: 0.6874
‚ö†Ô∏è No improvement for 1 epoch(s)

Epoch 6/30


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 171/171 [01:47<00:00,  1.60it/s]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 43/43 [00:09<00:00,  4.73it/s]


Train Loss: 0.0212 | Val Loss: 0.3256 | Val Acc: 0.6794
‚ö†Ô∏è No improvement for 2 epoch(s)

Epoch 7/30


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 171/171 [01:47<00:00,  1.60it/s]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 43/43 [00:09<00:00,  4.73it/s]


Train Loss: 0.0166 | Val Loss: 0.3269 | Val Acc: 0.6794
‚ö†Ô∏è No improvement for 3 epoch(s)

Epoch 8/30


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 171/171 [01:47<00:00,  1.60it/s]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 43/43 [00:09<00:00,  4.74it/s]


Train Loss: 0.0132 | Val Loss: 0.3354 | Val Acc: 0.6830
‚ö†Ô∏è No improvement for 4 epoch(s)

Epoch 9/30


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 171/171 [01:46<00:00,  1.60it/s]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 43/43 [00:09<00:00,  4.72it/s]


Train Loss: 0.0115 | Val Loss: 0.3377 | Val Acc: 0.6859
‚ö†Ô∏è No improvement for 5 epoch(s)
üõë Early stopping triggered after 5 epochs without improvement.

üéâ Training complete!


In [50]:
from torch.utils.data import ConcatDataset, DataLoader

# Merge both sets
full_dataset = ConcatDataset([train_loader.dataset, val_loader.dataset])

# Create a new DataLoader for full data
full_loader = DataLoader(full_dataset, batch_size=train_loader.batch_size, shuffle=True)


In [52]:
criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=2e-5, weight_decay=0.01)

NUM_EPOCHS = 20

# Using full_loader for both (train and val won't really matter)
trainer = Trainer(model, full_loader, full_loader, criterion, optimizer, device)
trainer.train(NUM_EPOCHS)


üîÑ Found checkpoint. Resuming from checkpoint.pth ...
‚úÖ Resumed from epoch 9
üöÄ Starting training...

Epoch 10/20


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 214/214 [02:14<00:00,  1.59it/s]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 214/214 [00:44<00:00,  4.76it/s]


Train Loss: 0.0894 | Val Loss: 0.0292 | Val Acc: 0.9673
‚úÖ Best model saved (Val Acc: 0.9673)

Epoch 11/20


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 214/214 [02:15<00:00,  1.58it/s]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 214/214 [00:45<00:00,  4.74it/s]


Train Loss: 0.0317 | Val Loss: 0.0103 | Val Acc: 0.9924
‚úÖ Best model saved (Val Acc: 0.9924)

Epoch 12/20


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 214/214 [02:15<00:00,  1.58it/s]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 214/214 [00:45<00:00,  4.74it/s]


Train Loss: 0.0151 | Val Loss: 0.0058 | Val Acc: 0.9969
‚úÖ Best model saved (Val Acc: 0.9969)

Epoch 13/20


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 214/214 [02:14<00:00,  1.59it/s]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 214/214 [00:45<00:00,  4.75it/s]


Train Loss: 0.0096 | Val Loss: 0.0046 | Val Acc: 0.9972
‚úÖ Best model saved (Val Acc: 0.9972)

Epoch 14/20


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 214/214 [02:14<00:00,  1.59it/s]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 214/214 [00:45<00:00,  4.75it/s]


Train Loss: 0.0079 | Val Loss: 0.0038 | Val Acc: 0.9975
‚úÖ Best model saved (Val Acc: 0.9975)

Epoch 15/20


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 214/214 [02:14<00:00,  1.59it/s]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 214/214 [00:45<00:00,  4.75it/s]


Train Loss: 0.0067 | Val Loss: 0.0034 | Val Acc: 0.9982
‚úÖ Best model saved (Val Acc: 0.9982)

Epoch 16/20


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 214/214 [02:14<00:00,  1.59it/s]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 214/214 [00:45<00:00,  4.75it/s]


Train Loss: 0.0063 | Val Loss: 0.0039 | Val Acc: 0.9975
‚ö†Ô∏è No improvement for 1 epoch(s)

Epoch 17/20


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 214/214 [02:14<00:00,  1.59it/s]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 214/214 [00:45<00:00,  4.69it/s]


Train Loss: 0.0064 | Val Loss: 0.0034 | Val Acc: 0.9975
‚ö†Ô∏è No improvement for 2 epoch(s)

Epoch 18/20


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 214/214 [02:14<00:00,  1.59it/s]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 214/214 [00:45<00:00,  4.75it/s]


Train Loss: 0.0070 | Val Loss: 0.0038 | Val Acc: 0.9965
‚ö†Ô∏è No improvement for 3 epoch(s)

Epoch 19/20


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 214/214 [02:14<00:00,  1.59it/s]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 214/214 [00:44<00:00,  4.76it/s]


Train Loss: 0.0061 | Val Loss: 0.0029 | Val Acc: 0.9979
‚ö†Ô∏è No improvement for 4 epoch(s)

Epoch 20/20


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 214/214 [02:14<00:00,  1.59it/s]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 214/214 [00:45<00:00,  4.75it/s]


Train Loss: 0.0055 | Val Loss: 0.0041 | Val Acc: 0.9962
‚ö†Ô∏è No improvement for 5 epoch(s)
üõë Early stopping triggered after 5 epochs without improvement.

üéâ Training complete!


In [49]:
class Evaluator:

    def __init__(self, model: nn.Module, val_loader: DataLoader, device: torch.device):
        self.model = model
        self.val_loader = val_loader
        self.device = device

    def evaluate(self) -> dict:
        self.model.eval()
        all_preds = []
        all_labels = []

        with torch.no_grad():
            for batch in tqdm(self.val_loader, desc="Evaluating"):
                input_ids = batch['input_ids'].to(self.device)
                attention_mask = batch['attention_mask'].to(self.device)
                labels = batch['labels'].to(self.device)

                logits = self.model(input_ids=input_ids, attention_mask=attention_mask)
                preds = torch.sigmoid(logits) > 0.5

                all_preds.append(preds.cpu().numpy())
                all_labels.append(labels.cpu().numpy())

        all_preds = np.vstack(all_preds)
        all_labels = np.vstack(all_labels)

        accuracy = accuracy_score(all_labels, all_preds)
        precision_macro = precision_score(all_labels, all_preds, average='macro', zero_division=0)
        recall_macro = recall_score(all_labels, all_preds, average='macro', zero_division=0)
        f1_macro = f1_score(all_labels, all_preds, average='macro', zero_division=0)

        precision_micro = precision_score(all_labels, all_preds, average='micro', zero_division=0)
        recall_micro = recall_score(all_labels, all_preds, average='micro', zero_division=0)
        f1_micro = f1_score(all_labels, all_preds, average='micro', zero_division=0)

        return {
            'accuracy': accuracy,
            'precision_macro': precision_macro,
            'recall_macro': recall_macro,
            'f1_macro': f1_macro,
            'precision_micro': precision_micro,
            'recall_micro': recall_micro,
            'f1_micro': f1_micro,
            'predictions': all_preds,
            'labels': all_labels
        }

evaluator = Evaluator(model, val_loader, device)
results = evaluator.evaluate()

print("=" * 50)
print("EVALUATION RESULTS")
print("=" * 50)
print(f"Exact Match Accuracy: {results['accuracy']:.4f}")
print(f"\nMacro Metrics (average across labels):")
print(f"  Precision: {results['precision_macro']:.4f}")
print(f"  Recall:    {results['recall_macro']:.4f}")
print(f"  F1 Score:  {results['f1_macro']:.4f}")
print(f"\nMicro Metrics (aggregate all labels):")
print(f"  Precision: {results['precision_micro']:.4f}")
print(f"  Recall:    {results['recall_micro']:.4f}")
print(f"  F1 Score:  {results['f1_micro']:.4f}")
print("=" * 50)

Evaluating: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 43/43 [00:08<00:00,  4.80it/s]

EVALUATION RESULTS
Exact Match Accuracy: 0.6859

Macro Metrics (average across labels):
  Precision: 0.8679
  Recall:    0.8045
  F1 Score:  0.8341

Micro Metrics (aggregate all labels):
  Precision: 0.8772
  Recall:    0.8340
  F1 Score:  0.8551





In [74]:
import torch
import pandas as pd
import numpy as np
label_columns = ['anger', 'fear', 'joy', 'sadness', 'surprise']

# Load best model
model.load_state_dict(torch.load("best_model.pth", map_location=device))
model.to(device)
model.eval()

all_ids, all_preds = [], []

with torch.no_grad():
    for batch in tqdm(test_loader, desc="Generating predictions"):
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        ids = batch['id']

        logits = model(input_ids=input_ids, attention_mask=attention_mask)
        probs = torch.sigmoid(logits)
        preds = (probs > 0.5).int().cpu().numpy()

        all_ids.extend(ids)
        all_preds.append(preds)

# Combine results
all_preds = np.vstack(all_preds)

# Create DataFrame
pred_df = pd.DataFrame(all_preds, columns=label_columns)
pred_df.insert(0, 'id', all_ids)

# Save CSV
pred_df.to_csv("predictions.csv", index=False)
print("‚úÖ predictions.csv saved successfully!")


Generating predictions: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 54/54 [00:11<00:00,  4.64it/s]

‚úÖ predictions.csv saved successfully!





In [75]:
pred_df

Unnamed: 0,id,anger,fear,joy,sadness,surprise
0,0,1,1,0,1,0
1,1,0,0,0,0,0
2,2,1,1,0,0,0
3,3,0,1,0,0,0
4,4,0,1,0,0,1
...,...,...,...,...,...,...
1702,1702,0,1,0,1,0
1703,1703,0,0,0,0,0
1704,1704,0,1,0,1,0
1705,1705,0,0,0,0,0


In [76]:
test.shape

(1707, 2)