In [2]:
!nvidia-smi

Wed Apr 23 23:05:12 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 570.133.07             Driver Version: 572.83         CUDA Version: 12.8     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA GeForce RTX 3080        On  |   00000000:01:00.0  On |                  N/A |
| 45%   62C    P2            115W /  288W |    5782MiB /  10240MiB |     20%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [16]:
# Block 1: Setup and Configuration
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from datasets import load_from_disk, concatenate_datasets
from tqdm.notebook import tqdm
from sklearn.metrics import accuracy_score, f1_score, classification_report
from collections import Counter
import numpy as np
import os
import copy # For saving best model state

# --- Configuration ---
precomputed_dataset_path = "./iemocap_precomputed" # Path where you saved the embeddings
# <<< CHANGE: Only one model save path needed >>>
model_save_path = "combined_decision_avg_model.pth"

# Model Hyperparameters (can still have separate hidden dims if desired)
audio_embedding_dim = 768
text_embedding_dim = 768
audio_hidden_dim = 128 # Hidden dim for the audio pathway inside the combined model
text_hidden_dim = 128  # Hidden dim for the text pathway inside the combined model
dropout_rate = 0.3

# Training Hyperparameters
learning_rate = 1e-4 # <<< CHANGE: One LR for the combined model >>>
batch_size = 32
num_epochs = 15 # Number of training epochs for the combined model

# Emotion mapping (ensure this matches your preprocessing)
emotion_labels = ['neutral', 'happy', 'sad', 'angry', 'frustrated', 'excited', 'fear', 'disgust', 'surprise', 'other']
num_classes = len(emotion_labels)
label_to_idx = {label: idx for idx, label in enumerate(emotion_labels)}
idx_to_label = {idx: label for label, idx in label_to_idx.items()}

# --- Device Setup ---
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
print(f"Number of classes: {num_classes}")
print(f"Fusion strategy: Logit Averaging (within single model)")

Using device: cuda
Number of classes: 10
Fusion strategy: Logit Averaging (within single model)


In [17]:
# Block 2: Load Precomputed Data & Create Splits (If Necessary)

processed_dataset = None
print(f"Loading precomputed dataset from {precomputed_dataset_path}...")

if not os.path.exists(precomputed_dataset_path):
    print(f"ERROR: Dataset directory not found at '{precomputed_dataset_path}'.")
else:
    try:
        processed_dataset = load_from_disk(precomputed_dataset_path)
        print("Dataset loaded successfully.")
        print("\nInitial dataset structure:")
        print(processed_dataset)

        # --- Ensure Validation Split Exists ---
        validation_split_name = 'validation' # Default name
        if 'validation' not in processed_dataset and 'dev' not in processed_dataset:
            print("\nNo 'validation' or 'dev' split found. Attempting to split 'train' set...")
            if 'train' in processed_dataset:
                 # Split the train set (e.g., 90% train, 10% validation)
                 train_val_split = processed_dataset['train'].train_test_split(test_size=0.1, shuffle=True, seed=42)
                 processed_dataset['train'] = train_val_split['train']
                 processed_dataset['validation'] = train_val_split['test']
                 print("Created 'validation' split from 'train' set.")
                 print("Updated dataset structure:")
                 print(processed_dataset)
            else:
                 print("ERROR: Cannot create validation split because 'train' split is missing.")
                 processed_dataset = None # Invalidate if no train set to split from
        elif 'dev' in processed_dataset:
             validation_split_name = 'dev' # Use 'dev' if it exists
             print("\nValidation split ('dev') found.")
        else:
            print("\nValidation split ('validation') found.")

        # --- Verify Columns ---
        if processed_dataset:
            required_columns = ['audio_embedding', 'text_embedding', 'label_id']
            example_split = next(iter(processed_dataset.keys()))
            if all(col in processed_dataset[example_split].column_names for col in required_columns):
                print("Required columns found.")
            else:
                print(f"ERROR: Missing one or more required columns: {required_columns}")
                print(f"Available columns: {processed_dataset[example_split].column_names}")
                processed_dataset = None

    except Exception as e:
        print(f"An error occurred while loading or processing the dataset: {e}")
        processed_dataset = None

# --- Set Format for PyTorch ---
if processed_dataset:
    try:
        processed_dataset.set_format(type='torch', columns=['audio_embedding', 'text_embedding', 'label_id'])
        print("\nDataset format set to 'torch'.")
    except Exception as e:
        print(f"Error setting dataset format: {e}")
        processed_dataset = None
else:
     # Ensure validation_split_name is defined even if dataset loading fails, to avoid later errors
     validation_split_name = 'validation'

Loading precomputed dataset from ./iemocap_precomputed...
Dataset loaded successfully.

Initial dataset structure:
DatasetDict({
    train: Dataset({
        features: ['file', 'audio', 'frustrated', 'angry', 'sad', 'disgust', 'excited', 'fear', 'neutral', 'surprise', 'happy', 'EmoAct', 'EmoVal', 'EmoDom', 'gender', 'transcription', 'major_emotion', 'speaking_rate', 'pitch_mean', 'pitch_std', 'rms', 'relative_db', 'audio_embedding', 'text_embedding', 'label_id'],
        num_rows: 10039
    })
})

No 'validation' or 'dev' split found. Attempting to split 'train' set...
Created 'validation' split from 'train' set.
Updated dataset structure:
DatasetDict({
    train: Dataset({
        features: ['file', 'audio', 'frustrated', 'angry', 'sad', 'disgust', 'excited', 'fear', 'neutral', 'surprise', 'happy', 'EmoAct', 'EmoVal', 'EmoDom', 'gender', 'transcription', 'major_emotion', 'speaking_rate', 'pitch_mean', 'pitch_std', 'rms', 'relative_db', 'audio_embedding', 'text_embedding', 'label_id'],

In [18]:
# Block 3: Define the Combined Model

class CombinedDecisionAvgModel(nn.Module):
    def __init__(self, audio_dim, text_dim, num_classes,
                 audio_hidden_dim, text_hidden_dim, dropout_rate):
        super().__init__()

        # Pathway for Audio Features
        self.audio_classifier = nn.Sequential(
            nn.Linear(audio_dim, audio_hidden_dim),
            nn.ReLU(),
            nn.Dropout(dropout_rate),
            nn.Linear(audio_hidden_dim, num_classes)
        )

        # Pathway for Text Features
        self.text_classifier = nn.Sequential(
            nn.Linear(text_dim, text_hidden_dim),
            nn.ReLU(),
            nn.Dropout(dropout_rate),
            nn.Linear(text_hidden_dim, num_classes)
        )

    def forward(self, audio_embedding, text_embedding):
        # Get logits from each pathway
        audio_logits = self.audio_classifier(audio_embedding) # (batch_size, num_classes)
        text_logits = self.text_classifier(text_embedding)   # (batch_size, num_classes)

        # Average the logits
        # Ensure inputs are float for division
        final_logits = (audio_logits.float() + text_logits.float()) / 2.0

        return final_logits

print("CombinedDecisionAvgModel class defined.")

CombinedDecisionAvgModel class defined.


In [19]:
# Block 4: Instantiate Model, Define Loss, Optimizer

model = None
criterion = None
optimizer = None
class_weights = None

if processed_dataset:
    # --- Instantiate ONE Combined Model ---
    model = CombinedDecisionAvgModel(
        audio_dim=audio_embedding_dim,
        text_dim=text_embedding_dim,
        num_classes=num_classes,
        audio_hidden_dim=audio_hidden_dim,
        text_hidden_dim=text_hidden_dim,
        dropout_rate=dropout_rate
    ).to(device)

    print("Combined model instantiated.")
    print("\nCombined Model Architecture:")
    print(model)

    # --- Calculate Class Weights (Optional but Recommended) ---
    if 'train' in processed_dataset:
        # (Calculation code identical to previous Block 4 - can copy paste)
        print("\nCalculating class weights for the training set...")
        label_counts = Counter(processed_dataset['train']['label_id'].numpy())
        total_samples = len(processed_dataset['train'])
        weights = []
        for i in range(num_classes):
            count = label_counts.get(i, 0)
            if count == 0:
                 print(f"Warning: Class index {i} ('{idx_to_label.get(i)}') not found in training data. Assigning weight 1.0.")
                 weights.append(1.0)
            else:
                weight = total_samples / (num_classes * count)
                weights.append(weight)
        class_weights = torch.tensor(weights, dtype=torch.float32).to(device)
        print("Class weights calculated and moved to device.")
        criterion = nn.CrossEntropyLoss(weight=class_weights)
        print("Using CrossEntropyLoss with class weights.")
    else:
        print("Warning: 'train' split not found. Cannot calculate class weights.")
        print("Using standard CrossEntropyLoss.")
        criterion = nn.CrossEntropyLoss()

    # --- Define ONE Optimizer ---
    optimizer = optim.AdamW(model.parameters(), lr=learning_rate)
    print("Optimizer defined.")

else:
    print("Skipping model instantiation, loss, and optimizer definition because the dataset is not ready.")

Combined model instantiated.

Combined Model Architecture:
CombinedDecisionAvgModel(
  (audio_classifier): Sequential(
    (0): Linear(in_features=768, out_features=128, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.3, inplace=False)
    (3): Linear(in_features=128, out_features=10, bias=True)
  )
  (text_classifier): Sequential(
    (0): Linear(in_features=768, out_features=128, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.3, inplace=False)
    (3): Linear(in_features=128, out_features=10, bias=True)
  )
)

Calculating class weights for the training set...
Class weights calculated and moved to device.
Using CrossEntropyLoss with class weights.
Optimizer defined.


In [20]:
# Block 5: Prepare DataLoaders

train_loader = None
val_loader = None
test_loader = None

# validation_split_name should be set in Block 2 based on dataset contents
print(f"Using '{validation_split_name}' as the validation split.")

if processed_dataset and model: # Check if dataset and the single model are ready
    if 'train' in processed_dataset:
        train_dataset = processed_dataset['train']
        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2, pin_memory=True if device == 'cuda' else False)
        print(f"Train DataLoader created with {len(train_loader)} batches.")
    else:
        print("Warning: Train DataLoader cannot be created ('train' split missing).")

    if validation_split_name in processed_dataset:
        val_dataset = processed_dataset[validation_split_name]
        val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=2, pin_memory=True if device == 'cuda' else False)
        print(f"Validation DataLoader created using '{validation_split_name}' split with {len(val_loader)} batches.")
    else:
        print(f"Warning: Validation DataLoader cannot be created ('{validation_split_name}' split missing).")


    if 'test' in processed_dataset:
        test_dataset = processed_dataset['test']
        test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=2, pin_memory=True if device == 'cuda' else False)
        print(f"Test DataLoader created with {len(test_loader)} batches.")
    else:
        print("Note: Test DataLoader not created ('test' split missing).")

    # Verify required loaders exist for training
    if not train_loader or not val_loader:
        print("\nERROR: Training cannot proceed without both train and validation DataLoaders.")
        # Invalidate model if loaders are missing
        model = None

else:
    print("Skipping DataLoader creation.")

Using 'validation' as the validation split.
Train DataLoader created with 283 batches.
Validation DataLoader created using 'validation' split with 32 batches.
Note: Test DataLoader not created ('test' split missing).


In [21]:
# Block 6: Define Unified Training & Evaluation Functions

def train_epoch(model, data_loader, criterion, optimizer, device):
    """Trains one epoch for the combined model."""
    model.train()
    total_loss = 0
    all_preds = []
    all_labels = []

    progress_bar = tqdm(data_loader, desc="Training", leave=False)
    for batch in progress_bar:
        audio_emb = batch['audio_embedding'].to(device)
        text_emb = batch['text_embedding'].to(device)
        labels = batch['label_id'].to(device, dtype=torch.long)

        optimizer.zero_grad()
        # Get final logits (already averaged inside the model)
        final_logits = model(audio_emb, text_emb)
        loss = criterion(final_logits, labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        preds = torch.argmax(final_logits, dim=1)
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())
        progress_bar.set_postfix(loss=loss.item())

    avg_loss = total_loss / len(data_loader)
    accuracy = accuracy_score(all_labels, all_preds)
    f1_macro = f1_score(all_labels, all_preds, average='macro', zero_division=0)
    return avg_loss, accuracy, f1_macro


def evaluate(model, data_loader, criterion, device):
    """Evaluates the combined model."""
    model.eval()
    total_loss = 0
    all_preds = []
    all_labels = []

    progress_bar = tqdm(data_loader, desc="Evaluating", leave=False)
    with torch.no_grad():
        for batch in progress_bar:
            audio_emb = batch['audio_embedding'].to(device)
            text_emb = batch['text_embedding'].to(device)
            labels = batch['label_id'].to(device, dtype=torch.long)

            # Get final logits (already averaged inside the model)
            final_logits = model(audio_emb, text_emb)
            # Calculate loss based on final logits if criterion is provided
            if criterion:
                 loss = criterion(final_logits, labels)
                 total_loss += loss.item()

            preds = torch.argmax(final_logits, dim=1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy()) # Use labels from CPU for sklearn metrics
            if criterion:
                 progress_bar.set_postfix(loss=loss.item())


    avg_loss = total_loss / len(data_loader) if criterion and len(data_loader) > 0 else 0.0
    accuracy = accuracy_score(all_labels, all_preds)
    f1_macro = f1_score(all_labels, all_preds, average='macro', zero_division=0)
    f1_weighted = f1_score(all_labels, all_preds, average='weighted', zero_division=0)

    print("\n--- Combined Model Classification Report ---")
    target_names = [idx_to_label.get(i, f"Class_{i}") for i in range(num_classes)]
    present_labels = sorted(list(set(all_labels)))
    filtered_target_names=[idx_to_label.get(i, f"Class_{i}") for i in present_labels]
    try:
        print(classification_report(all_labels, all_preds, labels=present_labels, target_names=filtered_target_names, zero_division=0))
    except Exception as e:
        print(f"Could not generate classification report: {e}")

    return avg_loss, accuracy, f1_macro, f1_weighted

print("Unified training and evaluation functions defined.")

Unified training and evaluation functions defined.


In [22]:
# Block 7: Training Loop (Train the Combined Model)

best_model_state = None
best_val_f1_macro = -1.0 # Initialize best validation F1 score
epochs_no_improve = 0    # Counter for epochs without improvement
early_stopping_patience = 3 # Stop after N epochs with no improvement

# --- Check if ready for training ---
if model and criterion and optimizer and train_loader and val_loader:

    print("\n" + "="*20 + " Training Combined Model " + "="*20)
    for epoch in range(num_epochs):
        # --- Training ---
        train_loss, train_acc, train_f1 = train_epoch(
            model, train_loader, criterion, optimizer, device
        )

        # --- Evaluation ---
        val_loss, val_acc, val_f1_macro, val_f1_weighted = evaluate(
            model, val_loader, criterion, device
        )

        print(f"\nEpoch {epoch+1}/{num_epochs} [Combined]:")
        print(f"  Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.4f} | Train F1 (Macro): {train_f1:.4f}")
        print(f"  Val Loss:   {val_loss:.4f} | Val Acc:   {val_acc:.4f} | Val F1 (Macro): {val_f1_macro:.4f} | Val F1 (Weighted): {val_f1_weighted:.4f}")

        # Early stopping check
        if val_f1_macro > best_val_f1_macro:
            best_val_f1_macro = val_f1_macro
            epochs_no_improve = 0
            best_model_state = copy.deepcopy(model.state_dict())
            print(f"  New best validation F1 Macro: {best_val_f1_macro:.4f}. Saving model state.")
        else:
            epochs_no_improve += 1
            print(f"  Validation F1 Macro did not improve. ({epochs_no_improve}/{early_stopping_patience})")

        if epochs_no_improve >= early_stopping_patience:
            print(f"\nEarly stopping triggered after {epoch + 1} epochs.")
            break
        print("-" * 50)

    print("\nTraining finished.")

    # --- Load Best Model State ---
    if best_model_state:
        print(f"Loading model state from epoch with best validation F1 Macro: {best_val_f1_macro:.4f}")
        model.load_state_dict(best_model_state)
    else:
        print("Warning: No best model state was saved (perhaps training stopped early or validation metric never improved).")

else:
    print("\nSkipping training loop: One or more required components (model, criterion, optimizer, loaders) are missing.")




Training:   0%|          | 0/283 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/32 [00:00<?, ?it/s]


--- Combined Model Classification Report ---
              precision    recall  f1-score   support

     neutral       0.32      0.23      0.27       170
       happy       0.50      0.02      0.03        63
         sad       0.30      0.78      0.43       129
       angry       0.25      0.69      0.37       127
  frustrated       0.32      0.18      0.23       285
     excited       0.53      0.08      0.14       197
        fear       0.00      0.00      0.00        14
    surprise       0.00      0.00      0.00        18
       other       0.00      0.00      0.00         1

    accuracy                           0.30      1004
   macro avg       0.25      0.22      0.16      1004
weighted avg       0.35      0.30      0.24      1004


Epoch 1/15 [Combined]:
  Train Loss: 2.2205 | Train Acc: 0.2341 | Train F1 (Macro): 0.1337
  Val Loss:   2.1434 | Val Acc:   0.2958 | Val F1 (Macro): 0.1638 | Val F1 (Weighted): 0.2431
  New best validation F1 Macro: 0.1638. Saving model state.
---

Training:   0%|          | 0/283 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/32 [00:00<?, ?it/s]


--- Combined Model Classification Report ---
              precision    recall  f1-score   support

     neutral       0.28      0.12      0.17       170
       happy       0.10      0.05      0.06        63
         sad       0.34      0.73      0.46       129
       angry       0.28      0.66      0.40       127
  frustrated       0.36      0.19      0.25       285
     excited       0.40      0.30      0.34       197
        fear       0.00      0.00      0.00        14
    surprise       0.22      0.39      0.28        18
       other       0.00      0.00      0.00         1

    accuracy                           0.32      1004
   macro avg       0.22      0.27      0.22      1004
weighted avg       0.32      0.32      0.28      1004


Epoch 2/15 [Combined]:
  Train Loss: 2.1231 | Train Acc: 0.2794 | Train F1 (Macro): 0.1546
  Val Loss:   2.0376 | Val Acc:   0.3197 | Val F1 (Macro): 0.2179 | Val F1 (Weighted): 0.2842
  New best validation F1 Macro: 0.2179. Saving model state.
---

Training:   0%|          | 0/283 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/32 [00:00<?, ?it/s]


--- Combined Model Classification Report ---
              precision    recall  f1-score   support

     neutral       0.36      0.09      0.15       170
       happy       0.00      0.00      0.00        63
         sad       0.36      0.71      0.48       129
       angry       0.28      0.69      0.39       127
  frustrated       0.37      0.23      0.28       285
     excited       0.41      0.33      0.36       197
        fear       0.00      0.00      0.00        14
    surprise       0.19      0.50      0.27        18
       other       0.00      0.00      0.00         1

    accuracy                           0.33      1004
   macro avg       0.22      0.28      0.22      1004
weighted avg       0.33      0.33      0.29      1004


Epoch 3/15 [Combined]:
  Train Loss: 2.0486 | Train Acc: 0.2884 | Train F1 (Macro): 0.1700
  Val Loss:   1.9695 | Val Acc:   0.3327 | Val F1 (Macro): 0.2157 | Val F1 (Weighted): 0.2931
  Validation F1 Macro did not improve. (1/3)
------------------

Training:   0%|          | 0/283 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/32 [00:00<?, ?it/s]


--- Combined Model Classification Report ---
              precision    recall  f1-score   support

     neutral       0.34      0.10      0.15       170
       happy       0.11      0.05      0.07        63
         sad       0.36      0.74      0.49       129
       angry       0.27      0.68      0.39       127
  frustrated       0.38      0.15      0.22       285
     excited       0.37      0.35      0.36       197
        fear       0.00      0.00      0.00        14
    surprise       0.19      0.50      0.28        18
       other       0.00      0.00      0.00         1

    accuracy                           0.32      1004
   macro avg       0.22      0.28      0.22      1004
weighted avg       0.33      0.32      0.28      1004


Epoch 4/15 [Combined]:
  Train Loss: 2.0084 | Train Acc: 0.3065 | Train F1 (Macro): 0.1798
  Val Loss:   1.9234 | Val Acc:   0.3197 | Val F1 (Macro): 0.2159 | Val F1 (Weighted): 0.2778
  Validation F1 Macro did not improve. (2/3)
------------------

Training:   0%|          | 0/283 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/32 [00:00<?, ?it/s]


--- Combined Model Classification Report ---
              precision    recall  f1-score   support

     neutral       0.38      0.07      0.12       170
       happy       0.15      0.10      0.12        63
         sad       0.39      0.71      0.50       129
       angry       0.32      0.63      0.42       127
  frustrated       0.38      0.21      0.27       285
     excited       0.39      0.38      0.38       197
        fear       0.03      0.07      0.04        14
    surprise       0.18      0.56      0.27        18
       other       0.00      0.00      0.00         1

    accuracy                           0.33      1004
   macro avg       0.25      0.30      0.24      1004
weighted avg       0.35      0.33      0.30      1004


Epoch 5/15 [Combined]:
  Train Loss: 1.9813 | Train Acc: 0.3027 | Train F1 (Macro): 0.1864
  Val Loss:   1.8836 | Val Acc:   0.3347 | Val F1 (Macro): 0.2366 | Val F1 (Weighted): 0.3037
  New best validation F1 Macro: 0.2366. Saving model state.
---

Training:   0%|          | 0/283 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/32 [00:00<?, ?it/s]


--- Combined Model Classification Report ---
              precision    recall  f1-score   support

     neutral       0.46      0.07      0.12       170
       happy       0.12      0.19      0.14        63
         sad       0.39      0.69      0.50       129
       angry       0.28      0.66      0.40       127
  frustrated       0.38      0.13      0.20       285
     excited       0.39      0.31      0.35       197
        fear       0.05      0.07      0.06        14
    surprise       0.14      0.56      0.23        18
       other       0.00      0.00      0.00         1

    accuracy                           0.31      1004
   macro avg       0.25      0.30      0.22      1004
weighted avg       0.36      0.31      0.27      1004


Epoch 6/15 [Combined]:
  Train Loss: 1.9560 | Train Acc: 0.3167 | Train F1 (Macro): 0.1964
  Val Loss:   1.8544 | Val Acc:   0.3068 | Val F1 (Macro): 0.2216 | Val F1 (Weighted): 0.2732
  Validation F1 Macro did not improve. (1/3)
------------------

Training:   0%|          | 0/283 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/32 [00:00<?, ?it/s]


--- Combined Model Classification Report ---
              precision    recall  f1-score   support

     neutral       0.44      0.15      0.23       170
       happy       0.12      0.13      0.12        63
         sad       0.41      0.71      0.52       129
       angry       0.32      0.65      0.43       127
  frustrated       0.41      0.21      0.28       285
     excited       0.41      0.39      0.40       197
        fear       0.00      0.00      0.00        14
    surprise       0.15      0.56      0.24        18
       other       0.00      0.00      0.00         1

    accuracy                           0.35      1004
   macro avg       0.25      0.31      0.25      1004
weighted avg       0.38      0.35      0.33      1004


Epoch 7/15 [Combined]:
  Train Loss: 1.9229 | Train Acc: 0.3214 | Train F1 (Macro): 0.2049
  Val Loss:   1.8329 | Val Acc:   0.3516 | Val F1 (Macro): 0.2457 | Val F1 (Weighted): 0.3282
  New best validation F1 Macro: 0.2457. Saving model state.
---

Training:   0%|          | 0/283 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/32 [00:00<?, ?it/s]


--- Combined Model Classification Report ---
              precision    recall  f1-score   support

     neutral       0.44      0.14      0.21       170
       happy       0.13      0.16      0.14        63
         sad       0.40      0.71      0.52       129
       angry       0.35      0.65      0.45       127
  frustrated       0.44      0.24      0.31       285
     excited       0.42      0.35      0.38       197
        fear       0.09      0.14      0.11        14
    surprise       0.17      0.61      0.27        18
       other       0.00      0.00      0.00         1

    accuracy                           0.36      1004
   macro avg       0.27      0.33      0.27      1004
weighted avg       0.39      0.36      0.34      1004


Epoch 8/15 [Combined]:
  Train Loss: 1.9012 | Train Acc: 0.3300 | Train F1 (Macro): 0.2127
  Val Loss:   1.8062 | Val Acc:   0.3576 | Val F1 (Macro): 0.2660 | Val F1 (Weighted): 0.3381
  New best validation F1 Macro: 0.2660. Saving model state.
---

Training:   0%|          | 0/283 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/32 [00:00<?, ?it/s]


--- Combined Model Classification Report ---
              precision    recall  f1-score   support

     neutral       0.46      0.17      0.25       170
       happy       0.12      0.22      0.16        63
         sad       0.39      0.73      0.50       129
       angry       0.36      0.63      0.46       127
  frustrated       0.45      0.25      0.32       285
     excited       0.39      0.25      0.30       197
        fear       0.09      0.07      0.08        14
    surprise       0.18      0.61      0.28        18
       other       0.00      0.00      0.00         1

    accuracy                           0.35      1004
   macro avg       0.27      0.33      0.26      1004
weighted avg       0.39      0.35      0.33      1004


Epoch 9/15 [Combined]:
  Train Loss: 1.8829 | Train Acc: 0.3222 | Train F1 (Macro): 0.2087
  Val Loss:   1.7986 | Val Acc:   0.3476 | Val F1 (Macro): 0.2607 | Val F1 (Weighted): 0.3308
  Validation F1 Macro did not improve. (1/3)
------------------

Training:   0%|          | 0/283 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/32 [00:00<?, ?it/s]


--- Combined Model Classification Report ---
              precision    recall  f1-score   support

     neutral       0.49      0.18      0.27       170
       happy       0.13      0.19      0.15        63
         sad       0.41      0.71      0.52       129
       angry       0.34      0.65      0.44       127
  frustrated       0.44      0.20      0.28       285
     excited       0.42      0.37      0.39       197
        fear       0.15      0.14      0.15        14
    surprise       0.19      0.67      0.29        18
       other       0.00      0.00      0.00         1

    accuracy                           0.36      1004
   macro avg       0.29      0.35      0.28      1004
weighted avg       0.40      0.36      0.34      1004


Epoch 10/15 [Combined]:
  Train Loss: 1.8695 | Train Acc: 0.3397 | Train F1 (Macro): 0.2235
  Val Loss:   1.7797 | Val Acc:   0.3596 | Val F1 (Macro): 0.2769 | Val F1 (Weighted): 0.3401
  New best validation F1 Macro: 0.2769. Saving model state.
--

Training:   0%|          | 0/283 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/32 [00:00<?, ?it/s]


--- Combined Model Classification Report ---
              precision    recall  f1-score   support

     neutral       0.49      0.20      0.28       170
       happy       0.12      0.22      0.16        63
         sad       0.41      0.74      0.52       129
       angry       0.34      0.65      0.45       127
  frustrated       0.46      0.22      0.30       285
     excited       0.40      0.26      0.32       197
        fear       0.11      0.21      0.15        14
    surprise       0.20      0.56      0.30        18
       other       0.00      0.00      0.00         1

    accuracy                           0.35      1004
   macro avg       0.28      0.34      0.28      1004
weighted avg       0.40      0.35      0.34      1004


Epoch 11/15 [Combined]:
  Train Loss: 1.8445 | Train Acc: 0.3414 | Train F1 (Macro): 0.2250
  Val Loss:   1.7698 | Val Acc:   0.3536 | Val F1 (Macro): 0.2757 | Val F1 (Weighted): 0.3378
  Validation F1 Macro did not improve. (1/3)
-----------------

Training:   0%|          | 0/283 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/32 [00:00<?, ?it/s]


--- Combined Model Classification Report ---
              precision    recall  f1-score   support

     neutral       0.45      0.24      0.31       170
       happy       0.14      0.16      0.15        63
         sad       0.41      0.74      0.53       129
       angry       0.36      0.65      0.46       127
  frustrated       0.45      0.24      0.31       285
     excited       0.42      0.34      0.38       197
        fear       0.17      0.14      0.15        14
    surprise       0.20      0.67      0.30        18
       other       0.00      0.00      0.00         1

    accuracy                           0.37      1004
   macro avg       0.29      0.35      0.29      1004
weighted avg       0.40      0.37      0.36      1004


Epoch 12/15 [Combined]:
  Train Loss: 1.8381 | Train Acc: 0.3409 | Train F1 (Macro): 0.2299
  Val Loss:   1.7576 | Val Acc:   0.3745 | Val F1 (Macro): 0.2878 | Val F1 (Weighted): 0.3569
  New best validation F1 Macro: 0.2878. Saving model state.
--

Training:   0%|          | 0/283 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/32 [00:00<?, ?it/s]


--- Combined Model Classification Report ---
              precision    recall  f1-score   support

     neutral       0.44      0.23      0.30       170
       happy       0.12      0.17      0.15        63
         sad       0.45      0.68      0.54       129
       angry       0.36      0.67      0.47       127
  frustrated       0.45      0.26      0.33       285
     excited       0.43      0.35      0.38       197
        fear       0.20      0.14      0.17        14
    surprise       0.19      0.67      0.30        18
       other       0.00      0.00      0.00         1

    accuracy                           0.38      1004
   macro avg       0.29      0.35      0.29      1004
weighted avg       0.40      0.38      0.36      1004


Epoch 13/15 [Combined]:
  Train Loss: 1.8148 | Train Acc: 0.3534 | Train F1 (Macro): 0.2357
  Val Loss:   1.7438 | Val Acc:   0.3765 | Val F1 (Macro): 0.2919 | Val F1 (Weighted): 0.3641
  New best validation F1 Macro: 0.2919. Saving model state.
--

Training:   0%|          | 0/283 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/32 [00:00<?, ?it/s]


--- Combined Model Classification Report ---
              precision    recall  f1-score   support

     neutral       0.45      0.31      0.37       170
       happy       0.14      0.22      0.17        63
         sad       0.44      0.70      0.54       129
       angry       0.37      0.66      0.47       127
  frustrated       0.47      0.26      0.33       285
     excited       0.42      0.26      0.32       197
        fear       0.12      0.14      0.13        14
    surprise       0.19      0.67      0.29        18
       other       0.00      0.00      0.00         1

    accuracy                           0.38      1004
   macro avg       0.29      0.36      0.29      1004
weighted avg       0.41      0.38      0.37      1004


Epoch 14/15 [Combined]:
  Train Loss: 1.8111 | Train Acc: 0.3613 | Train F1 (Macro): 0.2418
  Val Loss:   1.7280 | Val Acc:   0.3775 | Val F1 (Macro): 0.2924 | Val F1 (Weighted): 0.3671
  New best validation F1 Macro: 0.2924. Saving model state.
--

Training:   0%|          | 0/283 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/32 [00:00<?, ?it/s]


--- Combined Model Classification Report ---
              precision    recall  f1-score   support

     neutral       0.49      0.23      0.31       170
       happy       0.13      0.24      0.17        63
         sad       0.40      0.73      0.52       129
       angry       0.38      0.65      0.48       127
  frustrated       0.53      0.23      0.32       285
     excited       0.39      0.29      0.33       197
        fear       0.09      0.21      0.12        14
    surprise       0.22      0.61      0.33        18
       other       0.00      0.00      0.00         1

    accuracy                           0.37      1004
   macro avg       0.29      0.36      0.29      1004
weighted avg       0.42      0.37      0.35      1004


Epoch 15/15 [Combined]:
  Train Loss: 1.7945 | Train Acc: 0.3603 | Train F1 (Macro): 0.2375
  Val Loss:   1.7254 | Val Acc:   0.3665 | Val F1 (Macro): 0.2871 | Val F1 (Weighted): 0.3546
  Validation F1 Macro did not improve. (1/3)
-----------------

In [23]:
# Block 8: Final Evaluation on Test Set

if model and test_loader and best_model_state:
    print("\n" + "="*20 + " Evaluating Combined Model on Test Set " + "="*20)

    # Ensure the best model state is loaded
    model.load_state_dict(best_model_state)

    # Evaluate using the unified evaluate function (criterion=None if only metrics needed)
    test_loss, test_acc, test_f1_macro, test_f1_weighted = evaluate(
        model,
        test_loader,
        criterion, # Pass criterion if you want test loss, else None
        device
    )

    print("-" * 50)
    print("Final Test Set Performance (Combined Model - Logit Avg):")
    # print(f"  Test Loss:        {test_loss:.4f}") # Only if criterion was passed
    print(f"  Test Accuracy:    {test_acc:.4f}")
    print(f"  Test F1 (Macro):  {test_f1_macro:.4f}")
    print(f"  Test F1 (Weighted):{test_f1_weighted:.4f}")
    print("-" * 50)

elif not test_loader:
    print("\nSkipping final test set evaluation: Test DataLoader not available.")
elif not best_model_state:
     print("\nSkipping final test set evaluation: Best model state not available (training may have failed or not run).")
else:
     print("\nSkipping final test set evaluation due to missing components.")


Skipping final test set evaluation: Test DataLoader not available.


In [24]:
# Block 9: Save Final Model

if model and best_model_state:
    try:
        torch.save(best_model_state, model_save_path)
        print(f"Best combined model state dictionary saved to: {model_save_path}")
    except Exception as e:
        print(f"Error saving combined model: {e}")
else:
    print("Skipping model saving: Model not trained or best state not available.")

Best combined model state dictionary saved to: combined_decision_avg_model.pth
