In [1]:
!nvidia-smi

Wed Apr 23 20:41:44 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 570.133.07             Driver Version: 572.83         CUDA Version: 12.8     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA GeForce RTX 3080        On  |   00000000:01:00.0  On |                  N/A |
| 48%   58C    P2            115W /  288W |    8260MiB /  10240MiB |     17%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [1]:
# Block 1: Setup and Configuration
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from datasets import load_from_disk
from tqdm.notebook import tqdm  # Use notebook version of tqdm for better display
from sklearn.metrics import accuracy_score, f1_score, classification_report
from collections import Counter
import numpy as np
import os

# --- Configuration ---
precomputed_dataset_path = "./iemocap_precomputed" # Path where you saved the embeddings
model_save_path = "late_fusion_model.pth" # Path to save the trained model

# Model Hyperparameters (tune these)
audio_embedding_dim = 768  # Hubert-base last hidden state dimension
text_embedding_dim = 768   # RoBERTa-base last hidden state dimension
hidden_dim = 256           # Hidden dimension for the classifier
dropout_rate = 0.3         # Dropout rate for regularization
learning_rate = 1e-4       # Learning rate for the optimizer
batch_size = 32            # Number of samples per batch
num_epochs = 15            # Number of training epochs (adjust based on convergence)

# Emotion mapping (ensure this matches your preprocessing)
emotion_labels = ['neutral', 'happy', 'sad', 'angry', 'frustrated', 'excited', 'fear', 'disgust', 'surprise', 'other']
num_classes = len(emotion_labels)
label_to_idx = {label: idx for idx, label in enumerate(emotion_labels)}
idx_to_label = {idx: label for label, idx in label_to_idx.items()} # For interpreting results

# --- Device Setup ---
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
print(f"Number of classes: {num_classes}")
print(f"Labels: {emotion_labels}")

Using device: cuda
Number of classes: 10
Labels: ['neutral', 'happy', 'sad', 'angry', 'frustrated', 'excited', 'fear', 'disgust', 'surprise', 'other']


In [2]:
# Block 2: Load Precomputed Data
print(f"Loading precomputed dataset from {precomputed_dataset_path}...")

if not os.path.exists(precomputed_dataset_path):
    print(f"ERROR: Dataset directory not found at '{precomputed_dataset_path}'.")
    print("Please ensure the path is correct and you have run the preprocessing script.")
    # In a notebook, you might stop execution here or handle it differently
    processed_dataset = None
else:
    try:
        processed_dataset = load_from_disk(precomputed_dataset_path)
        print("Dataset loaded successfully.")
        print("\nDataset structure:")
        print(processed_dataset)

        # Verify necessary columns exist (important!)
        required_columns = ['audio_embedding', 'text_embedding', 'label_id']
        # Check in one of the splits, e.g., 'train' if it exists
        example_split = next(iter(processed_dataset.keys())) # Get the name of the first split
        print(f"\nChecking columns in split: '{example_split}'")
        if all(col in processed_dataset[example_split].column_names for col in required_columns):
            print("Required columns found.")
            print(f"Features in '{example_split}' split: {processed_dataset[example_split].features}")
        else:
            print(f"ERROR: Missing one or more required columns: {required_columns}")
            print(f"Available columns: {processed_dataset[example_split].column_names}")
            processed_dataset = None # Invalidate dataset if columns missing

    except Exception as e:
        print(f"An error occurred while loading the dataset: {e}")
        processed_dataset = None

# Optional: Display a sample item
if processed_dataset and 'train' in processed_dataset:
     print("\nSample data item (from 'train' split):")
     print(processed_dataset['train'][0])
elif processed_dataset:
    example_split = next(iter(processed_dataset.keys()))
    print(f"\nSample data item (from '{example_split}' split):")
    print(processed_dataset[example_split][0])

Loading precomputed dataset from ./iemocap_precomputed...
Dataset loaded successfully.

Dataset structure:
DatasetDict({
    train: Dataset({
        features: ['file', 'audio', 'frustrated', 'angry', 'sad', 'disgust', 'excited', 'fear', 'neutral', 'surprise', 'happy', 'EmoAct', 'EmoVal', 'EmoDom', 'gender', 'transcription', 'major_emotion', 'speaking_rate', 'pitch_mean', 'pitch_std', 'rms', 'relative_db', 'audio_embedding', 'text_embedding', 'label_id'],
        num_rows: 10039
    })
})

Checking columns in split: 'train'
Required columns found.
Features in 'train' split: {'file': Value(dtype='string', id=None), 'audio': Audio(sampling_rate=None, mono=True, decode=True, id=None), 'frustrated': Value(dtype='float32', id=None), 'angry': Value(dtype='float32', id=None), 'sad': Value(dtype='float32', id=None), 'disgust': Value(dtype='float32', id=None), 'excited': Value(dtype='float32', id=None), 'fear': Value(dtype='float32', id=None), 'neutral': Value(dtype='float32', id=None), 'surpri

In [3]:
# Block 3: Define the Late Fusion Model
class LateFusionModel(nn.Module):
    def __init__(self, audio_dim, text_dim, num_classes, hidden_dim, dropout_rate):
        super().__init__()
        self.audio_dim = audio_dim
        self.text_dim = text_dim
        self.combined_dim = audio_dim + text_dim # Dimension after concatenation

        # Classifier head
        self.classifier = nn.Sequential(
            # Consider LayerNorm for potentially better stability
            # nn.LayerNorm(self.combined_dim),
            nn.Linear(self.combined_dim, hidden_dim),
            nn.ReLU(),
            nn.Dropout(dropout_rate),
            nn.Linear(hidden_dim, num_classes)
            # No Softmax here, nn.CrossEntropyLoss handles it
        )

    def forward(self, audio_embedding, text_embedding):
        # Ensure inputs are tensors (DataLoader usually handles this)
        # Concatenate along the feature dimension (dim=1)
        fused_features = torch.cat((audio_embedding, text_embedding), dim=1)

        # Pass through the classifier
        logits = self.classifier(fused_features)
        return logits

print("LateFusionModel class defined.")

LateFusionModel class defined.


In [4]:
# Block 4: Instantiate Model and Move to Device

# Ensure audio_embedding_dim and text_embedding_dim are correctly set
# If they vary, you might need to check the actual embedding dimensions from your dataset
if processed_dataset:
    try:
        # Assuming the embedding dimensions are consistent
        # You could double check: len(processed_dataset['train'][0]['audio_embedding'])
        model = LateFusionModel(
            audio_dim=audio_embedding_dim,
            text_dim=text_embedding_dim,
            num_classes=num_classes,
            hidden_dim=hidden_dim,
            dropout_rate=dropout_rate
        ).to(device)

        print("Model instantiated successfully.")
        print("\nModel Architecture:")
        print(model)

        # Count parameters
        total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
        print(f'\nTotal Trainable Parameters: {total_params:,}')

    except Exception as e:
        print(f"Error during model instantiation: {e}")
        model = None
else:
    print("Skipping model instantiation because the dataset was not loaded.")
    model = None

Model instantiated successfully.

Model Architecture:
LateFusionModel(
  (classifier): Sequential(
    (0): Linear(in_features=1536, out_features=256, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.3, inplace=False)
    (3): Linear(in_features=256, out_features=10, bias=True)
  )
)

Total Trainable Parameters: 396,042


In [5]:
# Block 5: Prepare DataLoaders and Handle Imbalance (Optional)

# Only run if 'train' exists and 'validation'/'dev' do not
if processed_dataset and 'train' in processed_dataset and 'validation' not in processed_dataset and 'dev' not in processed_dataset:
    print("\nNo 'validation' or 'dev' split found. Splitting 'train' set...")
    # Split the train set (e.g., 90% train, 10% validation)
    # Adjust test_size as needed (e.g., 0.1 for 10%, 0.2 for 20%)
    train_val_split = processed_dataset['train'].train_test_split(test_size=0.1, shuffle=True, seed=42) # Use a seed for reproducibility

    # Update the dataset dictionary
    processed_dataset['train'] = train_val_split['train']
    processed_dataset['validation'] = train_val_split['test'] # The 'test' part of the split becomes our validation set

    print("Created 'validation' split from 'train' set.")
    print("New dataset structure:")
    print(processed_dataset)
elif processed_dataset and ('validation' in processed_dataset or 'dev' in processed_dataset):
    print("\nValidation split already exists.")
elif not processed_dataset:
     print("\nCannot create validation split because dataset failed to load.")

train_loader = None
val_loader = None
test_loader = None # Optional: if you have a test set
class_weights = None

if processed_dataset and model: # Proceed only if dataset and model are ready
    # Set format for PyTorch
    try:
        processed_dataset.set_format(type='torch', columns=['audio_embedding', 'text_embedding', 'label_id'])
        print("Dataset format set to 'torch'.")

        # --- Calculate Class Weights (for handling imbalance) ---
        if 'train' in processed_dataset:
            print("\nCalculating class weights for the training set...")
            label_counts = Counter(processed_dataset['train']['label_id'].numpy())
            total_samples = len(processed_dataset['train'])
            weights = []
            # Ensure weights are calculated in the order of label indices (0 to num_classes-1)
            for i in range(num_classes):
                count = label_counts.get(i, 0) # Get count, default to 0 if class not present
                if count == 0:
                     # Assign a default high weight or handle as needed if a class is missing
                     print(f"Warning: Class index {i} ('{idx_to_label.get(i)}') not found in training data. Assigning weight 1.0.")
                     weights.append(1.0)
                else:
                    # Common formula: weight = total_samples / (num_classes * count)
                    weight = total_samples / (num_classes * count)
                    weights.append(weight)
                    print(f"  Class {i} ('{idx_to_label.get(i)}'): Count={count}, Weight={weight:.4f}")

            class_weights = torch.tensor(weights, dtype=torch.float32).to(device)
            print("Class weights calculated and moved to device.")

        else:
            print("Skipping class weight calculation: 'train' split not found.")
            class_weights = None # No weights if no train set

        # --- Create DataLoaders ---
        if 'train' in processed_dataset:
            train_dataset = processed_dataset['train']
            train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2, pin_memory=True if device == 'cuda' else False)
            print(f"\nTrain DataLoader created with {len(train_loader)} batches.")

        if 'validation' in processed_dataset: # Common split name
            val_dataset = processed_dataset['validation']
            val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=2, pin_memory=True if device == 'cuda' else False)
            print(f"Validation DataLoader created with {len(val_loader)} batches.")
        elif 'dev' in processed_dataset: # Another common name
             val_dataset = processed_dataset['dev']
             val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=2, pin_memory=True if device == 'cuda' else False)
             print(f"Development (Validation) DataLoader created with {len(val_loader)} batches.")

        if 'test' in processed_dataset:
            test_dataset = processed_dataset['test']
            test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=2, pin_memory=True if device == 'cuda' else False)
            print(f"Test DataLoader created with {len(test_loader)} batches.")

        # Verify at least train and validation loaders are available for training
        if not train_loader or not val_loader:
            print("\nWarning: Training requires both train and validation DataLoaders.")

    except Exception as e:
        print(f"\nError setting dataset format or creating DataLoaders: {e}")
        train_loader, val_loader, test_loader = None, None, None

else:
    print("Skipping DataLoader creation because dataset or model is not ready.")
    


# ===================================================================


No 'validation' or 'dev' split found. Splitting 'train' set...
Created 'validation' split from 'train' set.
New dataset structure:
DatasetDict({
    train: Dataset({
        features: ['file', 'audio', 'frustrated', 'angry', 'sad', 'disgust', 'excited', 'fear', 'neutral', 'surprise', 'happy', 'EmoAct', 'EmoVal', 'EmoDom', 'gender', 'transcription', 'major_emotion', 'speaking_rate', 'pitch_mean', 'pitch_std', 'rms', 'relative_db', 'audio_embedding', 'text_embedding', 'label_id'],
        num_rows: 9035
    })
    validation: Dataset({
        features: ['file', 'audio', 'frustrated', 'angry', 'sad', 'disgust', 'excited', 'fear', 'neutral', 'surprise', 'happy', 'EmoAct', 'EmoVal', 'EmoDom', 'gender', 'transcription', 'major_emotion', 'speaking_rate', 'pitch_mean', 'pitch_std', 'rms', 'relative_db', 'audio_embedding', 'text_embedding', 'label_id'],
        num_rows: 1004
    })
})
Dataset format set to 'torch'.

Calculating class weights for the training set...
  Class 0 ('neutral'): Cou

In [6]:
# Block 6: Define Loss Function and Optimizer

criterion = None
optimizer = None

if model: # Proceed only if model exists
    # Use class weights in the loss function if they were calculated
    if class_weights is not None:
        criterion = nn.CrossEntropyLoss(weight=class_weights)
        print("Using CrossEntropyLoss with class weights.")
    else:
        criterion = nn.CrossEntropyLoss()
        print("Using standard CrossEntropyLoss (no class weights).")

    optimizer = optim.AdamW(model.parameters(), lr=learning_rate)
    # You could also add a learning rate scheduler here if desired
    # e.g., scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=2)

    print("Loss function and optimizer defined.")
else:
    print("Skipping loss/optimizer definition because model is not instantiated.")

Using CrossEntropyLoss with class weights.
Loss function and optimizer defined.


In [7]:
# Block 7: Define Training and Evaluation Functions

def train_epoch(model, data_loader, criterion, optimizer, device, scheduler=None):
    model.train()
    total_loss = 0
    all_preds = []
    all_labels = []

    progress_bar = tqdm(data_loader, desc="Training", leave=False)
    for batch in progress_bar:
        audio_emb = batch['audio_embedding'].to(device)
        text_emb = batch['text_embedding'].to(device)
        labels = batch['label_id'].to(device, dtype=torch.long) # Ensure labels are Long

        optimizer.zero_grad()
        logits = model(audio_emb, text_emb)
        loss = criterion(logits, labels)
        loss.backward()
        # Optional: Gradient Clipping
        # torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        optimizer.step()

        total_loss += loss.item()
        preds = torch.argmax(logits, dim=1)
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

        progress_bar.set_postfix(loss=loss.item())

    avg_loss = total_loss / len(data_loader)
    accuracy = accuracy_score(all_labels, all_preds)
    f1_macro = f1_score(all_labels, all_preds, average='macro', zero_division=0)
    f1_weighted = f1_score(all_labels, all_preds, average='weighted', zero_division=0)

    # Optional: Step the scheduler if using one like ReduceLROnPlateau based on validation loss
    # if scheduler and isinstance(scheduler, torch.optim.lr_scheduler.ReduceLROnPlateau):
    #     pass # Step scheduler after evaluation based on validation metric
    # elif scheduler:
    #      scheduler.step() # For schedulers that step each epoch

    return avg_loss, accuracy, f1_macro, f1_weighted


def evaluate(model, data_loader, criterion, device):
    model.eval()
    total_loss = 0
    all_preds = []
    all_labels = []

    progress_bar = tqdm(data_loader, desc="Evaluating", leave=False)
    with torch.no_grad():
        for batch in progress_bar:
            audio_emb = batch['audio_embedding'].to(device)
            text_emb = batch['text_embedding'].to(device)
            labels = batch['label_id'].to(device, dtype=torch.long)

            logits = model(audio_emb, text_emb)
            loss = criterion(logits, labels)

            total_loss += loss.item()
            preds = torch.argmax(logits, dim=1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
            progress_bar.set_postfix(loss=loss.item())


    avg_loss = total_loss / len(data_loader)
    accuracy = accuracy_score(all_labels, all_preds)
    f1_macro = f1_score(all_labels, all_preds, average='macro', zero_division=0)
    f1_weighted = f1_score(all_labels, all_preds, average='weighted', zero_division=0)

    print("\nValidation Classification Report:")
    # Use idx_to_label to get names, ensure target_names are in correct order
    target_names = [idx_to_label.get(i, f"Class_{i}") for i in range(num_classes)]
    # Filter labels present in y_true and y_pred to avoid warnings in report
    present_labels = sorted(list(set(all_labels) | set(all_preds)))
    filtered_target_names = [idx_to_label.get(i, f"Class_{i}") for i in present_labels]

    try:
        print(classification_report(all_labels, all_preds, target_names=filtered_target_names, labels=present_labels, zero_division=0))
    except ValueError as e:
        print(f"Could not generate classification report: {e}")
        print(f"Present labels: {present_labels}")
        print(f"Target names for report: {filtered_target_names}")


    return avg_loss, accuracy, f1_macro, f1_weighted

print("Training and evaluation functions defined.")

Training and evaluation functions defined.


In [8]:
# Block 8: Training Loop

# --- Early Stopping Parameters ---
early_stopping_patience = 3  # Stop after N epochs with no improvement in validation F1 macro
best_val_f1_macro = -1.0     # Initialize best validation F1 score
epochs_no_improve = 0       # Counter for epochs without improvement
best_model_state = None     # To store the state dict of the best model

print(model)
print(criterion)
print(optimizer)
print(train_loader)
print(val_loader)

# --- Check if we can proceed ---
if model and criterion and optimizer and train_loader and val_loader:
    print("\nStarting Training...")
    print(f"Epochs: {num_epochs}")
    print(f"Batch size: {batch_size}")
    print(f"Learning rate: {learning_rate}")
    print(f"Device: {device}")
    print("-" * 50)

    for epoch in range(num_epochs):
        # --- Training ---
        train_loss, train_acc, train_f1_macro, train_f1_weighted = train_epoch(
            model, train_loader, criterion, optimizer, device #, scheduler # Pass scheduler if using one
        )

        # --- Evaluation ---
        val_loss, val_acc, val_f1_macro, val_f1_weighted = evaluate(
            model, val_loader, criterion, device
        )

        print(f"\nEpoch {epoch+1}/{num_epochs}:")
        print(f"  Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.4f} | Train F1 (Macro): {train_f1_macro:.4f} | Train F1 (Weighted): {train_f1_weighted:.4f}")
        print(f"  Val Loss:   {val_loss:.4f} | Val Acc:   {val_acc:.4f} | Val F1 (Macro): {val_f1_macro:.4f} | Val F1 (Weighted): {val_f1_weighted:.4f}")

        # --- Optional: Learning Rate Scheduler Step (if based on validation metric) ---
        # if scheduler and isinstance(scheduler, torch.optim.lr_scheduler.ReduceLROnPlateau):
        #     scheduler.step(val_loss) # or scheduler.step(val_f1_macro) if monitoring F1
        #     current_lr = optimizer.param_groups[0]['lr']
        #     print(f"  Current LR: {current_lr:.6f}")

        # --- Early Stopping Check ---
        if val_f1_macro > best_val_f1_macro:
            best_val_f1_macro = val_f1_macro
            epochs_no_improve = 0
            # Save the best model state
            best_model_state = model.state_dict()
            print(f"  New best validation F1 Macro: {best_val_f1_macro:.4f}. Saving model state.")
            # Optionally save immediately: torch.save(model.state_dict(), model_save_path)
        else:
            epochs_no_improve += 1
            print(f"  Validation F1 Macro did not improve. ({epochs_no_improve}/{early_stopping_patience})")

        if epochs_no_improve >= early_stopping_patience:
            print(f"\nEarly stopping triggered after {epoch + 1} epochs.")
            break # Exit the training loop

        print("-" * 50)

    print("\nTraining finished.")

    # --- Load Best Model State ---
    if best_model_state:
        print(f"Loading model state from epoch with best validation F1 Macro: {best_val_f1_macro:.4f}")
        model.load_state_dict(best_model_state)
    else:
        print("No best model state was saved (perhaps training stopped early or validation metric never improved).")

else:
    # Check which components are missing for training   
    print("\nSkipping training loop due to missing model, data, loss function, or optimizer.")

LateFusionModel(
  (classifier): Sequential(
    (0): Linear(in_features=1536, out_features=256, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.3, inplace=False)
    (3): Linear(in_features=256, out_features=10, bias=True)
  )
)
CrossEntropyLoss()
AdamW (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    capturable: False
    decoupled_weight_decay: True
    differentiable: False
    eps: 1e-08
    foreach: None
    fused: None
    lr: 0.0001
    maximize: False
    weight_decay: 0.01
)
<torch.utils.data.dataloader.DataLoader object at 0x7fe7a70e53d0>
<torch.utils.data.dataloader.DataLoader object at 0x7fe7a6bb9eb0>

Starting Training...
Epochs: 15
Batch size: 32
Learning rate: 0.0001
Device: cuda
--------------------------------------------------


Training:   0%|          | 0/283 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/32 [00:00<?, ?it/s]


Validation Classification Report:
              precision    recall  f1-score   support

     neutral       0.25      0.25      0.25       170
       happy       0.12      0.29      0.17        63
         sad       0.35      0.68      0.46       129
       angry       0.22      0.76      0.34       127
  frustrated       0.00      0.00      0.00       285
     excited       0.00      0.00      0.00       197
        fear       0.00      0.00      0.00        14
    surprise       0.00      0.00      0.00        18
       other       0.00      0.00      0.00         1

    accuracy                           0.24      1004
   macro avg       0.10      0.22      0.14      1004
weighted avg       0.12      0.24      0.16      1004


Epoch 1/15:
  Train Loss: 2.1783 | Train Acc: 0.2069 | Train F1 (Macro): 0.1238 | Train F1 (Weighted): 0.1825
  Val Loss:   2.0674 | Val Acc:   0.2430 | Val F1 (Macro): 0.1358 | Val F1 (Weighted): 0.1556
  New best validation F1 Macro: 0.1358. Saving model st

Training:   0%|          | 0/283 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/32 [00:00<?, ?it/s]


Validation Classification Report:
              precision    recall  f1-score   support

     neutral       0.42      0.03      0.05       170
       happy       0.13      0.06      0.09        63
         sad       0.36      0.67      0.47       129
       angry       0.30      0.59      0.40       127
  frustrated       0.39      0.29      0.33       285
     excited       0.39      0.41      0.40       197
        fear       0.00      0.00      0.00        14
    surprise       0.20      0.44      0.28        18
       other       0.00      0.00      0.00         1

    accuracy                           0.34      1004
   macro avg       0.24      0.28      0.22      1004
weighted avg       0.35      0.34      0.30      1004


Epoch 2/15:
  Train Loss: 2.0516 | Train Acc: 0.2874 | Train F1 (Macro): 0.1748 | Train F1 (Weighted): 0.2626
  Val Loss:   1.9455 | Val Acc:   0.3406 | Val F1 (Macro): 0.2240 | Val F1 (Weighted): 0.3030
  New best validation F1 Macro: 0.2240. Saving model st

Training:   0%|          | 0/283 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/32 [00:00<?, ?it/s]


Validation Classification Report:
              precision    recall  f1-score   support

     neutral       0.43      0.13      0.20       170
       happy       0.14      0.17      0.16        63
         sad       0.37      0.69      0.48       129
       angry       0.28      0.70      0.40       127
  frustrated       0.36      0.12      0.18       285
     excited       0.42      0.32      0.36       197
        fear       0.00      0.00      0.00        14
    surprise       0.15      0.56      0.24        18
       other       0.00      0.00      0.00         1

    accuracy                           0.32      1004
   macro avg       0.24      0.30      0.22      1004
weighted avg       0.35      0.32      0.28      1004


Epoch 3/15:
  Train Loss: 1.9775 | Train Acc: 0.3061 | Train F1 (Macro): 0.1889 | Train F1 (Weighted): 0.2831
  Val Loss:   1.8784 | Val Acc:   0.3177 | Val F1 (Macro): 0.2244 | Val F1 (Weighted): 0.2837
  New best validation F1 Macro: 0.2244. Saving model st

Training:   0%|          | 0/283 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/32 [00:00<?, ?it/s]


Validation Classification Report:
              precision    recall  f1-score   support

     neutral       0.34      0.15      0.20       170
       happy       0.17      0.06      0.09        63
         sad       0.42      0.60      0.49       129
       angry       0.30      0.70      0.42       127
  frustrated       0.37      0.23      0.28       285
     excited       0.41      0.40      0.40       197
        fear       0.17      0.07      0.10        14
    surprise       0.21      0.56      0.30        18
       other       0.00      0.00      0.00         1

    accuracy                           0.35      1004
   macro avg       0.26      0.31      0.26      1004
weighted avg       0.35      0.35      0.32      1004


Epoch 4/15:
  Train Loss: 1.9382 | Train Acc: 0.3184 | Train F1 (Macro): 0.2038 | Train F1 (Weighted): 0.2979
  Val Loss:   1.8532 | Val Acc:   0.3486 | Val F1 (Macro): 0.2552 | Val F1 (Weighted): 0.3226
  New best validation F1 Macro: 0.2552. Saving model st

Training:   0%|          | 0/283 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/32 [00:00<?, ?it/s]


Validation Classification Report:
              precision    recall  f1-score   support

     neutral       0.47      0.11      0.17       170
       happy       0.12      0.21      0.15        63
         sad       0.40      0.72      0.51       129
       angry       0.30      0.72      0.42       127
  frustrated       0.35      0.16      0.22       285
     excited       0.43      0.24      0.31       197
        fear       0.07      0.21      0.10        14
    surprise       0.21      0.39      0.27        18
       other       0.00      0.00      0.00         1

    accuracy                           0.32      1004
   macro avg       0.26      0.31      0.24      1004
weighted avg       0.36      0.32      0.29      1004


Epoch 5/15:
  Train Loss: 1.8915 | Train Acc: 0.3339 | Train F1 (Macro): 0.2168 | Train F1 (Weighted): 0.3182
  Val Loss:   1.8067 | Val Acc:   0.3167 | Val F1 (Macro): 0.2404 | Val F1 (Weighted): 0.2873
  Validation F1 Macro did not improve. (1/3)
----------

Training:   0%|          | 0/283 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/32 [00:00<?, ?it/s]


Validation Classification Report:
              precision    recall  f1-score   support

     neutral       0.42      0.19      0.27       170
       happy       0.14      0.25      0.18        63
         sad       0.41      0.72      0.52       129
       angry       0.38      0.65      0.48       127
  frustrated       0.47      0.26      0.34       285
     excited       0.45      0.25      0.32       197
        fear       0.09      0.14      0.11        14
    surprise       0.17      0.67      0.27        18
       other       0.00      0.00      0.00         1

    accuracy                           0.36      1004
   macro avg       0.28      0.35      0.28      1004
weighted avg       0.41      0.36      0.35      1004


Epoch 6/15:
  Train Loss: 1.8605 | Train Acc: 0.3416 | Train F1 (Macro): 0.2239 | Train F1 (Weighted): 0.3299
  Val Loss:   1.7612 | Val Acc:   0.3616 | Val F1 (Macro): 0.2756 | Val F1 (Weighted): 0.3489
  New best validation F1 Macro: 0.2756. Saving model st

Training:   0%|          | 0/283 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/32 [00:00<?, ?it/s]


Validation Classification Report:
              precision    recall  f1-score   support

     neutral       0.38      0.40      0.39       170
       happy       0.18      0.16      0.17        63
         sad       0.46      0.63      0.53       129
       angry       0.33      0.70      0.45       127
  frustrated       0.45      0.16      0.24       285
     excited       0.45      0.34      0.39       197
        fear       0.00      0.00      0.00        14
    surprise       0.18      0.67      0.29        18
       other       0.00      0.00      0.00         1

    accuracy                           0.37      1004
   macro avg       0.27      0.34      0.27      1004
weighted avg       0.40      0.37      0.35      1004


Epoch 7/15:
  Train Loss: 1.8324 | Train Acc: 0.3484 | Train F1 (Macro): 0.2344 | Train F1 (Weighted): 0.3403
  Val Loss:   1.7540 | Val Acc:   0.3725 | Val F1 (Macro): 0.2731 | Val F1 (Weighted): 0.3522
  Validation F1 Macro did not improve. (1/3)
----------

Training:   0%|          | 0/283 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/32 [00:00<?, ?it/s]


Validation Classification Report:
              precision    recall  f1-score   support

     neutral       0.48      0.31      0.37       170
       happy       0.16      0.29      0.20        63
         sad       0.43      0.73      0.54       129
       angry       0.39      0.62      0.48       127
  frustrated       0.52      0.24      0.32       285
     excited       0.45      0.35      0.39       197
        fear       0.12      0.14      0.13        14
    surprise       0.19      0.67      0.29        18
       other       0.00      0.00      0.00         1

    accuracy                           0.39      1004
   macro avg       0.30      0.37      0.30      1004
weighted avg       0.44      0.39      0.38      1004


Epoch 8/15:
  Train Loss: 1.7960 | Train Acc: 0.3668 | Train F1 (Macro): 0.2488 | Train F1 (Weighted): 0.3595
  Val Loss:   1.7213 | Val Acc:   0.3904 | Val F1 (Macro): 0.3041 | Val F1 (Weighted): 0.3818
  New best validation F1 Macro: 0.3041. Saving model st

Training:   0%|          | 0/283 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/32 [00:00<?, ?it/s]


Validation Classification Report:
              precision    recall  f1-score   support

     neutral       0.41      0.43      0.42       170
       happy       0.19      0.16      0.17        63
         sad       0.43      0.72      0.54       129
       angry       0.40      0.58      0.47       127
  frustrated       0.51      0.21      0.30       285
     excited       0.45      0.38      0.41       197
        fear       0.08      0.29      0.12        14
    surprise       0.24      0.56      0.34        18
       other       0.00      0.00      0.00         1

    accuracy                           0.40      1004
   macro avg       0.30      0.37      0.31      1004
weighted avg       0.43      0.40      0.39      1004


Epoch 9/15:
  Train Loss: 1.7694 | Train Acc: 0.3641 | Train F1 (Macro): 0.2475 | Train F1 (Weighted): 0.3572
  Val Loss:   1.7149 | Val Acc:   0.3974 | Val F1 (Macro): 0.3093 | Val F1 (Weighted): 0.3858
  New best validation F1 Macro: 0.3093. Saving model st

Training:   0%|          | 0/283 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/32 [00:00<?, ?it/s]


Validation Classification Report:
              precision    recall  f1-score   support

     neutral       0.49      0.23      0.31       170
       happy       0.17      0.27      0.20        63
         sad       0.45      0.70      0.55       129
       angry       0.35      0.72      0.47       127
  frustrated       0.45      0.21      0.29       285
     excited       0.47      0.35      0.40       197
        fear       0.13      0.14      0.14        14
    surprise       0.18      0.67      0.29        18
       other       0.00      0.00      0.00         1

    accuracy                           0.38      1004
   macro avg       0.30      0.36      0.29      1004
weighted avg       0.42      0.38      0.36      1004


Epoch 10/15:
  Train Loss: 1.7515 | Train Acc: 0.3744 | Train F1 (Macro): 0.2523 | Train F1 (Weighted): 0.3676
  Val Loss:   1.6863 | Val Acc:   0.3785 | Val F1 (Macro): 0.2943 | Val F1 (Weighted): 0.3630
  Validation F1 Macro did not improve. (1/3)
---------

Training:   0%|          | 0/283 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/32 [00:00<?, ?it/s]


Validation Classification Report:
              precision    recall  f1-score   support

     neutral       0.43      0.44      0.43       170
       happy       0.19      0.21      0.20        63
         sad       0.46      0.65      0.54       129
       angry       0.39      0.66      0.49       127
  frustrated       0.50      0.24      0.32       285
     excited       0.49      0.37      0.42       197
        fear       0.09      0.21      0.13        14
    surprise       0.23      0.67      0.34        18
       other       0.00      0.00      0.00         1

    accuracy                           0.41      1004
   macro avg       0.31      0.38      0.32      1004
weighted avg       0.44      0.41      0.40      1004


Epoch 11/15:
  Train Loss: 1.7243 | Train Acc: 0.3832 | Train F1 (Macro): 0.2613 | Train F1 (Weighted): 0.3777
  Val Loss:   1.6738 | Val Acc:   0.4074 | Val F1 (Macro): 0.3192 | Val F1 (Weighted): 0.3978
  New best validation F1 Macro: 0.3192. Saving model s

Training:   0%|          | 0/283 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/32 [00:00<?, ?it/s]


Validation Classification Report:
              precision    recall  f1-score   support

     neutral       0.48      0.34      0.40       170
       happy       0.18      0.33      0.23        63
         sad       0.44      0.73      0.55       129
       angry       0.40      0.57      0.47       127
  frustrated       0.50      0.24      0.32       285
     excited       0.46      0.29      0.36       197
        fear       0.07      0.07      0.07        14
    surprise       0.13      0.67      0.22        18
       other       0.00      0.00      0.00         1

    accuracy                           0.38      1004
   macro avg       0.29      0.36      0.29      1004
weighted avg       0.43      0.38      0.38      1004


Epoch 12/15:
  Train Loss: 1.6987 | Train Acc: 0.3904 | Train F1 (Macro): 0.2662 | Train F1 (Weighted): 0.3843
  Val Loss:   1.6434 | Val Acc:   0.3825 | Val F1 (Macro): 0.2905 | Val F1 (Weighted): 0.3780
  Validation F1 Macro did not improve. (1/3)
---------

Training:   0%|          | 0/283 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/32 [00:00<?, ?it/s]


Validation Classification Report:
              precision    recall  f1-score   support

     neutral       0.45      0.49      0.47       170
       happy       0.20      0.27      0.23        63
         sad       0.47      0.61      0.53       129
       angry       0.36      0.68      0.47       127
  frustrated       0.52      0.22      0.31       285
     excited       0.54      0.31      0.39       197
        fear       0.07      0.21      0.10        14
    surprise       0.23      0.61      0.33        18
       other       0.00      0.00      0.00         1

    accuracy                           0.40      1004
   macro avg       0.32      0.38      0.32      1004
weighted avg       0.45      0.40      0.39      1004


Epoch 13/15:
  Train Loss: 1.6825 | Train Acc: 0.3969 | Train F1 (Macro): 0.2741 | Train F1 (Weighted): 0.3921
  Val Loss:   1.6360 | Val Acc:   0.4024 | Val F1 (Macro): 0.3160 | Val F1 (Weighted): 0.3950
  Validation F1 Macro did not improve. (2/3)
---------

Training:   0%|          | 0/283 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/32 [00:00<?, ?it/s]


Validation Classification Report:
              precision    recall  f1-score   support

     neutral       0.46      0.38      0.41       170
       happy       0.24      0.29      0.26        63
         sad       0.47      0.62      0.54       129
       angry       0.38      0.65      0.48       127
  frustrated       0.48      0.30      0.37       285
     excited       0.52      0.34      0.41       197
        fear       0.08      0.21      0.12        14
    surprise       0.21      0.67      0.32        18
       other       0.00      0.00      0.00         1

    accuracy                           0.41      1004
   macro avg       0.31      0.38      0.32      1004
weighted avg       0.44      0.41      0.41      1004


Epoch 14/15:
  Train Loss: 1.6584 | Train Acc: 0.4003 | Train F1 (Macro): 0.2769 | Train F1 (Weighted): 0.3955
  Val Loss:   1.6233 | Val Acc:   0.4094 | Val F1 (Macro): 0.3221 | Val F1 (Weighted): 0.4076
  New best validation F1 Macro: 0.3221. Saving model s

Training:   0%|          | 0/283 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/32 [00:00<?, ?it/s]


Validation Classification Report:
              precision    recall  f1-score   support

     neutral       0.47      0.46      0.47       170
       happy       0.21      0.35      0.27        63
         sad       0.45      0.70      0.55       129
       angry       0.35      0.71      0.47       127
  frustrated       0.50      0.17      0.26       285
     excited       0.51      0.21      0.29       197
        fear       0.05      0.14      0.07        14
    surprise       0.21      0.67      0.32        18
       other       0.00      0.00      0.00         1

    accuracy                           0.38      1004
   macro avg       0.31      0.38      0.30      1004
weighted avg       0.44      0.38      0.36      1004


Epoch 15/15:
  Train Loss: 1.6367 | Train Acc: 0.4101 | Train F1 (Macro): 0.2819 | Train F1 (Weighted): 0.4059
  Val Loss:   1.6250 | Val Acc:   0.3825 | Val F1 (Macro): 0.2988 | Val F1 (Weighted): 0.3620
  Validation F1 Macro did not improve. (1/3)
---------