# Training Music Classification Models

In [12]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
import time
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from datetime import datetime
from pathlib import Path
import random

def set_seed(seed=42):
    """Set seeds for reproducibility."""
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seed(42)

## Training Configuration

In [13]:
# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Training hyperparameters
BATCH_SIZE = 32
LEARNING_RATE = 0.001  # Increased back to 0.001 for OneCycleLR
NUM_EPOCHS = 50
EARLY_STOPPING_PATIENCE = 15 # Increased patience for OneCycleLR

Using device: cuda


In [14]:
# Setup run directory
run_id = datetime.now().strftime("%Y%m%d_%H%M%S")
run_dir = Path(f"../runs/{run_id}")
run_dir.mkdir(parents=True, exist_ok=True)
print(f"Run directory created at: {run_dir}")

# Create changes.md
changes_file = run_dir / "changes.md"
with open(changes_file, "w") as f:
    f.write(f"# Run {run_id}\n\n")
    f.write("## Configuration\n")
    f.write(f"- Batch Size: {BATCH_SIZE}\n")
    f.write(f"- Learning Rate: {LEARNING_RATE}\n")
    f.write(f"- Epochs: {NUM_EPOCHS}\n")
    f.write(f"- Device: {device}\n")
    f.write(f"- Data Strategy: Chunking (3s chunks, 50% overlap)\n")
    f.write(f"- Augmentation: Noise=0.01, Shift=0.3\n")
    f.write(f"- Optimization: In-memory caching + Mixed Precision (AMP)\n")
    f.write(f"- Stability: Seed=42, Weight Decay=1e-4 (Standard), Gradient Clipping=1.0\n")
    f.write(f"- Data Split: Stratified (Balanced Validation Set)\n\n")
    f.write("## Changes\n")
    f.write("- Increased Dropout in Residual Blocks from 0.2 to 0.3 to combat slight overfitting.\n\n")
    f.write("## Results\n")

Run directory created at: ..\runs\20251202_114502


## Training Function (Single-label Classification)

In [15]:
def mixup_data(x, y, alpha=1.0, device='cuda'):
    '''Returns mixed inputs, pairs of targets, and lambda'''
    if alpha > 0:
        lam = np.random.beta(alpha, alpha)
    else:
        lam = 1

    batch_size = x.size(0)
    index = torch.randperm(batch_size).to(device)

    mixed_x = lam * x + (1 - lam) * x[index, :]
    y_a, y_b = y, y[index]
    return mixed_x, y_a, y_b, lam

def mixup_criterion(criterion, pred, y_a, y_b, lam):
    return lam * criterion(pred, y_a) + (1 - lam) * criterion(pred, y_b)

def train_epoch(model, train_loader, criterion, optimizer, device):
    """Train for one epoch with Mixup."""
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    
    # Use mixed precision training
    scaler = torch.amp.GradScaler()
    
    pbar = tqdm(train_loader, desc='Training')
    for inputs, labels in pbar:
        inputs, labels = inputs.to(device), labels.to(device)
        
        # Apply Mixup
        inputs, targets_a, targets_b, lam = mixup_data(inputs, labels, alpha=0.4, device=device)
        
        # Zero the parameter gradients
        optimizer.zero_grad()
        
        # Forward pass with mixed precision
        with torch.amp.autocast(device_type='cuda', dtype=torch.float16):
            outputs = model(inputs)
            loss = mixup_criterion(criterion, outputs, targets_a, targets_b, lam)
        
        # Backward pass and optimize
        scaler.scale(loss).backward()
        
        # Gradient clipping
        scaler.unscale_(optimizer)
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        
        scaler.step(optimizer)
        scaler.update()
        
        # Statistics
        running_loss += loss.item() * inputs.size(0)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        
        # Weighted accuracy for mixup
        correct += (lam * (predicted == targets_a).float() + (1 - lam) * (predicted == targets_b).float()).sum().item()
        
        pbar.set_postfix({'loss': loss.item(), 'acc': 100 * correct / total})
    
    epoch_loss = running_loss / total
    epoch_acc = 100 * correct / total
    
    return epoch_loss, epoch_acc

In [16]:
def validate_epoch(model, val_loader, criterion, device):
    """Validate for one epoch."""
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    
    all_preds = []
    all_labels = []
    
    with torch.no_grad():
        pbar = tqdm(val_loader, desc='Validation')
        for inputs, labels in pbar:
            inputs, labels = inputs.to(device), labels.to(device)
            
            # Forward pass
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            
            # Statistics
            running_loss += loss.item() * inputs.size(0)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            
            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
            
            pbar.set_postfix({'loss': loss.item(), 'acc': 100 * correct / total})
    
    epoch_loss = running_loss / total
    epoch_acc = 100 * correct / total
    
    return epoch_loss, epoch_acc, all_preds, all_labels

In [None]:
def train_model(model, train_loader, val_loader, num_epochs, learning_rate, device, 
                save_path='../models/best_model.pth', changes_file=None):
    
    model = model.to(device)
    
    # Loss and optimizer
    criterion = nn.CrossEntropyLoss(label_smoothing=0.1)
    optimizer = optim.AdamW(model.parameters(), lr=learning_rate, weight_decay=0.01) # Increased weight decay for AdamW
    
    # OneCycleLR Scheduler
    # Steps per epoch is len(train_loader)
    scheduler = optim.lr_scheduler.OneCycleLR(
        optimizer, 
        max_lr=learning_rate, 
        steps_per_epoch=len(train_loader), 
        epochs=num_epochs,
        pct_start=0.3, # Warmup for 30% of training
        div_factor=25.0,
        final_div_factor=1000.0
    )
    
    # Training history
    history = {
        'train_loss': [],
        'train_acc': [],
        'val_loss': [],
        'val_acc': []
    }
    
    best_val_loss = float('inf')
    patience_counter = 0
    
    for epoch in range(num_epochs):
        print(f"\nEpoch {epoch+1}/{num_epochs}")
        print("-" * 50)
        
        # Train
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0
        scaler = torch.amp.GradScaler()
        
        pbar = tqdm(train_loader, desc='Training')
        for inputs, labels in pbar:
            inputs, labels = inputs.to(device), labels.to(device)
            
            # Apply Mixup
            inputs, targets_a, targets_b, lam = mixup_data(inputs, labels, alpha=0.4, device=device)
            
            optimizer.zero_grad()
            
            with torch.amp.autocast(device_type='cuda', dtype=torch.float16):
                outputs = model(inputs)
                loss = mixup_criterion(criterion, outputs, targets_a, targets_b, lam)
            
            scaler.scale(loss).backward()
            scaler.unscale_(optimizer)
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            scaler.step(optimizer)
            scaler.update()
            
            # Step scheduler every batch for OneCycleLR
            scheduler.step()
            
            running_loss += loss.item() * inputs.size(0)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (lam * (predicted == targets_a).float() + (1 - lam) * (predicted == targets_b).float()).sum().item()
            
            pbar.set_postfix({'loss': loss.item(), 'lr': scheduler.get_last_lr()[0]})
        
        train_loss = running_loss / total
        train_acc = 100 * correct / total
        
        # Validate
        val_loss, val_acc, _, _ = validate_epoch(model, val_loader, criterion, device)
        
        # Save history
        history['train_loss'].append(train_loss)
        history['train_acc'].append(train_acc)
        history['val_loss'].append(val_loss)
        history['val_acc'].append(val_acc)
        
        print(f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%")
        print(f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.2f}%")
        
        # Early stopping
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            patience_counter = 0
            # Save best model
            torch.save(model.state_dict(), save_path)
            print(f"Model saved to {save_path}")
        else:
            patience_counter += 1
            if patience_counter >= EARLY_STOPPING_PATIENCE:
                print(f"\nEarly stopping triggered after {epoch+1} epochs")
                break
    
    if changes_file:
        with open(changes_file, "a") as f:
            f.write(f"- Final Train Loss: {history['train_loss'][-1]:.4f}\n")
            f.write(f"- Final Val Loss: {history['val_loss'][-1]:.4f}\n")
            f.write(f"- Final Train Acc: {history['train_acc'][-1]:.2f}%\n")
            f.write(f"- Final Val Acc: {history['val_acc'][-1]:.2f}%\n")

    return history

## Plot Training History

In [None]:
def plot_training_history(history, multi_label=False, save_path=None):
    fig, axes = plt.subplots(1, 2 if not multi_label else 1, figsize=(15, 5))
    
    # Loss plot
    axes[0].plot(history['train_loss'], label='Train Loss')
    axes[0].plot(history['val_loss'], label='Val Loss')
    axes[0].set_xlabel('Epoch')
    axes[0].set_ylabel('Loss')
    axes[0].set_title('Training and Validation Loss')
    axes[0].legend()
    axes[0].grid(True)
    
    # Accuracy plot
    axes[1].plot(history['train_acc'], label='Train Accuracy')
    axes[1].plot(history['val_acc'], label='Val Accuracy')
    axes[1].set_xlabel('Epoch')
    axes[1].set_ylabel('Accuracy (%)')
    axes[1].set_title('Training and Validation Accuracy')
    axes[1].legend()
    axes[1].grid(True)
    
    plt.tight_layout()
    
    if save_path:
        plt.savefig(save_path)
        plt.close()
    else:
        plt.show()

### For Single-label Classification (GTZAN, FMA)

In [None]:
# Ensure repository root is on sys.path
import os
import sys
from pathlib import Path
repo_root = Path.cwd().parent
if str(repo_root) not in sys.path:
    sys.path.insert(0, str(repo_root))

# Import model (prefer module; fallback to notebook)
try:
    from model_cnn import ComplexCNN
except ModuleNotFoundError:
    print("Model module not found; loading from notebook via %run ...")
    %run "./04_model_cnn.ipynb"

from utils.datasets_gtzan import GTZANDataset, create_dataloaders, GENRES, AudioAugmentation

# Create dataset with in-memory caching
gtzan_root = repo_root / "data" / "gtzan"
dataset = GTZANDataset(str(gtzan_root), cache_to_memory=True)
print(f"GTZAN files: {len(dataset)}")

# Define augmentation
train_transform = AudioAugmentation(noise_level=0.01, shift_max=0.3)

# Create loaders with Stratified Split and Chunking
# NOTE: With cache_to_memory=True, use num_workers=0 on Windows to avoid 
# pickling the entire cached dataset to worker processes, which causes hangs/OOM.
train_loader, val_loader, test_loader = create_dataloaders(
    dataset, 
    batch_size=BATCH_SIZE, 
    num_workers=0,
    train_transform=train_transform,
    chunk_length_sec=3.0, # Enable chunking
    test_split=0.1 # Create test split
)

# Create model
model = ComplexCNN(n_classes=10)

# Train
history = train_model(
    model, train_loader, val_loader,
    num_epochs=NUM_EPOCHS,
    learning_rate=LEARNING_RATE,
    device=device,
    save_path=str(run_dir / 'gtzan_cnn.pth'),
    changes_file=changes_file
)

# Plot results
plot_training_history(history, save_path=str(run_dir / 'training_history.png'))

# Load best model for evaluation
print(f"Loading best model from {run_dir / 'gtzan_cnn.pth'}...")
model.load_state_dict(torch.load(str(run_dir / 'gtzan_cnn.pth')))

Model module not found; loading from notebook via %run ...
SimpleCNN:
SimpleCNN(
  (mel_spec): MelSpectrogram(
    (spectrogram): Spectrogram()
    (mel_scale): MelScale()
  )
  (amplitude_to_db): AmplitudeToDB()
  (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn3): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (pool3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv4): Conv2d(128, 256, ke

  model.load_state_dict(torch.load(path))


Caching complete.
GTZAN files: 999
Created stratified split: 719 train, 180 val, 100 test songs
Applying chunking: 3.0s chunks with 50% overlap
Chunked dataset sizes: 13661 train, 3420 val, 1900 test chunks
Created stratified split: 719 train, 180 val, 100 test songs
Applying chunking: 3.0s chunks with 50% overlap
Chunked dataset sizes: 13661 train, 3420 val, 1900 test chunks

Epoch 1/50
--------------------------------------------------

Epoch 1/50
--------------------------------------------------


Training: 100%|██████████| 427/427 [00:33<00:00, 12.80it/s, loss=1.79, lr=5.05e-5]
Training: 100%|██████████| 427/427 [00:33<00:00, 12.80it/s, loss=1.79, lr=5.05e-5]
Validation: 100%|██████████| 107/107 [00:03<00:00, 32.06it/s, loss=0.848, acc=46.4]



Train Loss: 2.0429, Train Acc: 29.72%
Val Loss: 1.8058, Val Acc: 46.43%
✓ Model saved to ..\runs\20251202_114502\gtzan_cnn.pth

Epoch 2/50
--------------------------------------------------


Training: 100%|██████████| 427/427 [00:33<00:00, 12.84it/s, loss=1.22, lr=8.15e-5]
Training: 100%|██████████| 427/427 [00:33<00:00, 12.84it/s, loss=1.22, lr=8.15e-5]
Validation: 100%|██████████| 107/107 [00:02<00:00, 35.76it/s, loss=0.718, acc=58.9]



Train Loss: 1.7524, Train Acc: 46.52%
Val Loss: 1.5547, Val Acc: 58.92%
✓ Model saved to ..\runs\20251202_114502\gtzan_cnn.pth

Epoch 3/50
--------------------------------------------------


Training: 100%|██████████| 427/427 [00:30<00:00, 13.78it/s, loss=1.54, lr=0.000132] 
Training: 100%|██████████| 427/427 [00:30<00:00, 13.78it/s, loss=1.54, lr=0.000132]
Validation: 100%|██████████| 107/107 [00:03<00:00, 35.37it/s, loss=1.12, acc=53.3]
Validation: 100%|██████████| 107/107 [00:03<00:00, 35.37it/s, loss=1.12, acc=53.3]


Train Loss: 1.5791, Train Acc: 55.98%
Val Loss: 1.6244, Val Acc: 53.27%

Epoch 4/50
--------------------------------------------------


Training: 100%|██████████| 427/427 [00:31<00:00, 13.49it/s, loss=2.05, lr=0.000199] 
Training: 100%|██████████| 427/427 [00:31<00:00, 13.49it/s, loss=2.05, lr=0.000199]
Validation: 100%|██████████| 107/107 [00:03<00:00, 35.36it/s, loss=0.778, acc=67.2]



Train Loss: 1.5284, Train Acc: 58.70%
Val Loss: 1.2703, Val Acc: 67.22%
✓ Model saved to ..\runs\20251202_114502\gtzan_cnn.pth

Epoch 5/50
--------------------------------------------------


Training: 100%|██████████| 427/427 [00:31<00:00, 13.59it/s, loss=1.63, lr=0.00028]  
Training: 100%|██████████| 427/427 [00:31<00:00, 13.59it/s, loss=1.63, lr=0.00028]
Validation: 100%|██████████| 107/107 [00:03<00:00, 34.89it/s, loss=0.684, acc=67.7]



Train Loss: 1.4792, Train Acc: 61.38%
Val Loss: 1.2558, Val Acc: 67.72%
✓ Model saved to ..\runs\20251202_114502\gtzan_cnn.pth

Epoch 6/50
--------------------------------------------------


Training: 100%|██████████| 427/427 [00:30<00:00, 13.79it/s, loss=1.28, lr=0.000372] 
Training: 100%|██████████| 427/427 [00:30<00:00, 13.79it/s, loss=1.28, lr=0.000372]
Validation: 100%|██████████| 107/107 [00:03<00:00, 35.20it/s, loss=0.84, acc=57.3]
Validation: 100%|██████████| 107/107 [00:03<00:00, 35.20it/s, loss=0.84, acc=57.3]


Train Loss: 1.4107, Train Acc: 64.28%
Val Loss: 1.6144, Val Acc: 57.25%

Epoch 7/50
--------------------------------------------------


Training: 100%|██████████| 427/427 [00:30<00:00, 13.81it/s, loss=1.77, lr=0.00047]  
Training: 100%|██████████| 427/427 [00:30<00:00, 13.81it/s, loss=1.77, lr=0.00047]
Validation: 100%|██████████| 107/107 [00:02<00:00, 35.69it/s, loss=0.627, acc=56.5]
Validation: 100%|██████████| 107/107 [00:02<00:00, 35.69it/s, loss=0.627, acc=56.5]


Train Loss: 1.3665, Train Acc: 66.42%
Val Loss: 1.5505, Val Acc: 56.55%

Epoch 8/50
--------------------------------------------------


Training: 100%|██████████| 427/427 [00:31<00:00, 13.76it/s, loss=1.45, lr=0.00057]  
Training: 100%|██████████| 427/427 [00:31<00:00, 13.76it/s, loss=1.45, lr=0.00057]
Validation: 100%|██████████| 107/107 [00:03<00:00, 35.31it/s, loss=0.856, acc=62.8]
Validation: 100%|██████████| 107/107 [00:03<00:00, 35.31it/s, loss=0.856, acc=62.8]


Train Loss: 1.3644, Train Acc: 67.09%
Val Loss: 1.4362, Val Acc: 62.84%

Epoch 9/50
--------------------------------------------------


Training: 100%|██████████| 427/427 [00:30<00:00, 13.94it/s, loss=1.7, lr=0.000668]  
Training: 100%|██████████| 427/427 [00:30<00:00, 13.94it/s, loss=1.7, lr=0.000668] 
Validation: 100%|██████████| 107/107 [00:02<00:00, 35.87it/s, loss=0.601, acc=57.4]
Validation: 100%|██████████| 107/107 [00:02<00:00, 35.87it/s, loss=0.601, acc=57.4]


Train Loss: 1.3389, Train Acc: 68.20%
Val Loss: 1.5688, Val Acc: 57.37%

Epoch 10/50
--------------------------------------------------


Training: 100%|██████████| 427/427 [00:30<00:00, 13.85it/s, loss=1.61, lr=0.00076]  
Training: 100%|██████████| 427/427 [00:30<00:00, 13.85it/s, loss=1.61, lr=0.00076]
Validation: 100%|██████████| 107/107 [00:02<00:00, 35.87it/s, loss=0.573, acc=61.7]
Validation: 100%|██████████| 107/107 [00:02<00:00, 35.87it/s, loss=0.573, acc=61.7]


Train Loss: 1.3367, Train Acc: 68.39%
Val Loss: 1.3597, Val Acc: 61.70%

Epoch 11/50
--------------------------------------------------


Training: 100%|██████████| 427/427 [00:31<00:00, 13.54it/s, loss=1.45, lr=0.000841] 
Training: 100%|██████████| 427/427 [00:31<00:00, 13.54it/s, loss=1.45, lr=0.000841]
Validation: 100%|██████████| 107/107 [00:03<00:00, 35.64it/s, loss=0.687, acc=67]  
Validation: 100%|██████████| 107/107 [00:03<00:00, 35.64it/s, loss=0.687, acc=67] 


Train Loss: 1.3039, Train Acc: 69.83%
Val Loss: 1.3082, Val Acc: 67.05%

Epoch 12/50
--------------------------------------------------


Training: 100%|██████████| 427/427 [00:31<00:00, 13.68it/s, loss=1.61, lr=0.000908] 
Training: 100%|██████████| 427/427 [00:31<00:00, 13.68it/s, loss=1.61, lr=0.000908]
Validation: 100%|██████████| 107/107 [00:03<00:00, 34.22it/s, loss=0.772, acc=71]  
Validation: 100%|██████████| 107/107 [00:03<00:00, 34.22it/s, loss=0.772, acc=71]  


Train Loss: 1.2487, Train Acc: 72.03%
Val Loss: 1.3029, Val Acc: 70.96%

Epoch 13/50
--------------------------------------------------


Training: 100%|██████████| 427/427 [00:30<00:00, 13.79it/s, loss=1.75, lr=0.000959] 
Training: 100%|██████████| 427/427 [00:30<00:00, 13.79it/s, loss=1.75, lr=0.000959]
Validation: 100%|██████████| 107/107 [00:03<00:00, 33.18it/s, loss=0.744, acc=66.1]
Validation: 100%|██████████| 107/107 [00:03<00:00, 33.18it/s, loss=0.744, acc=66.1]


Train Loss: 1.2461, Train Acc: 72.42%
Val Loss: 1.3462, Val Acc: 66.05%

Epoch 14/50
--------------------------------------------------


Training: 100%|██████████| 427/427 [00:34<00:00, 12.54it/s, loss=0.719, lr=0.00099] 
Training: 100%|██████████| 427/427 [00:34<00:00, 12.54it/s, loss=0.719, lr=0.00099]
Validation: 100%|██████████| 107/107 [00:03<00:00, 35.02it/s, loss=0.599, acc=72.9]



Train Loss: 1.2139, Train Acc: 73.79%
Val Loss: 1.1986, Val Acc: 72.87%
✓ Model saved to ..\runs\20251202_114502\gtzan_cnn.pth

Epoch 15/50
--------------------------------------------------


Training: 100%|██████████| 427/427 [01:00<00:00,  7.11it/s, loss=0.814, lr=0.001]   
Validation:   0%|          | 0/107 [00:00<?, ?it/s]
Validation: 100%|██████████| 107/107 [00:03<00:00, 28.60it/s, loss=0.632, acc=71.8]
Validation: 100%|██████████| 107/107 [00:03<00:00, 28.60it/s, loss=0.632, acc=71.8]


Train Loss: 1.2098, Train Acc: 74.05%
Val Loss: 1.2043, Val Acc: 71.75%

Epoch 16/50
--------------------------------------------------


Training: 100%|██████████| 427/427 [00:32<00:00, 13.11it/s, loss=1.73, lr=0.000998] 
Training: 100%|██████████| 427/427 [00:32<00:00, 13.11it/s, loss=1.73, lr=0.000998] 
Validation: 100%|██████████| 107/107 [00:03<00:00, 34.98it/s, loss=0.627, acc=70.2]
Validation: 100%|██████████| 107/107 [00:03<00:00, 34.98it/s, loss=0.627, acc=70.2]


Train Loss: 1.1435, Train Acc: 77.02%
Val Loss: 1.2314, Val Acc: 70.18%

Epoch 17/50
--------------------------------------------------


Training: 100%|██████████| 427/427 [00:30<00:00, 13.86it/s, loss=0.617, lr=0.000992]
Training: 100%|██████████| 427/427 [00:30<00:00, 13.86it/s, loss=0.617, lr=0.000992]
Validation: 100%|██████████| 107/107 [00:03<00:00, 35.54it/s, loss=0.533, acc=75]  



Train Loss: 1.1672, Train Acc: 75.70%
Val Loss: 1.1272, Val Acc: 75.03%
✓ Model saved to ..\runs\20251202_114502\gtzan_cnn.pth

Epoch 18/50
--------------------------------------------------
✓ Model saved to ..\runs\20251202_114502\gtzan_cnn.pth

Epoch 18/50
--------------------------------------------------


Training: 100%|██████████| 427/427 [00:32<00:00, 13.14it/s, loss=0.585, lr=0.000982]
Training: 100%|██████████| 427/427 [00:32<00:00, 13.14it/s, loss=0.585, lr=0.000982]
Validation: 100%|██████████| 107/107 [00:03<00:00, 35.18it/s, loss=0.663, acc=74.9]
Validation: 100%|██████████| 107/107 [00:03<00:00, 35.18it/s, loss=0.663, acc=74.9]


Train Loss: 1.1604, Train Acc: 75.92%
Val Loss: 1.1464, Val Acc: 74.88%

Epoch 19/50
--------------------------------------------------


Training: 100%|██████████| 427/427 [00:31<00:00, 13.74it/s, loss=0.706, lr=0.000968]
Training: 100%|██████████| 427/427 [00:31<00:00, 13.74it/s, loss=0.706, lr=0.000968]
Validation: 100%|██████████| 107/107 [00:03<00:00, 35.53it/s, loss=0.92, acc=72.5] 
Validation: 100%|██████████| 107/107 [00:03<00:00, 35.53it/s, loss=0.92, acc=72.5]


Train Loss: 1.1093, Train Acc: 78.15%
Val Loss: 1.1685, Val Acc: 72.46%

Epoch 20/50
--------------------------------------------------


Training: 100%|██████████| 427/427 [00:32<00:00, 12.97it/s, loss=0.863, lr=0.00095] 
Training: 100%|██████████| 427/427 [00:32<00:00, 12.97it/s, loss=0.863, lr=0.00095]
Validation: 100%|██████████| 107/107 [00:02<00:00, 35.68it/s, loss=0.685, acc=74.9]



Train Loss: 1.1249, Train Acc: 77.55%
Val Loss: 1.1058, Val Acc: 74.88%
✓ Model saved to ..\runs\20251202_114502\gtzan_cnn.pth

Epoch 21/50
--------------------------------------------------


Training: 100%|██████████| 427/427 [00:31<00:00, 13.39it/s, loss=1.55, lr=0.000929] 
Training: 100%|██████████| 427/427 [00:31<00:00, 13.39it/s, loss=1.55, lr=0.000929] 
Validation: 100%|██████████| 107/107 [00:02<00:00, 35.67it/s, loss=0.544, acc=68.8]
Validation: 100%|██████████| 107/107 [00:02<00:00, 35.67it/s, loss=0.544, acc=68.8]


Train Loss: 1.0917, Train Acc: 78.99%
Val Loss: 1.2901, Val Acc: 68.77%

Epoch 22/50
--------------------------------------------------


Training: 100%|██████████| 427/427 [00:30<00:00, 13.85it/s, loss=0.626, lr=0.000904]
Training: 100%|██████████| 427/427 [00:30<00:00, 13.85it/s, loss=0.626, lr=0.000904]
Validation: 100%|██████████| 107/107 [00:03<00:00, 35.15it/s, loss=0.625, acc=74.8]
Validation: 100%|██████████| 107/107 [00:03<00:00, 35.15it/s, loss=0.625, acc=74.8]


Train Loss: 1.0652, Train Acc: 79.86%
Val Loss: 1.1295, Val Acc: 74.77%

Epoch 23/50
--------------------------------------------------


Training: 100%|██████████| 427/427 [00:30<00:00, 13.80it/s, loss=0.612, lr=0.000876]
Training: 100%|██████████| 427/427 [00:30<00:00, 13.80it/s, loss=0.612, lr=0.000876]
Validation: 100%|██████████| 107/107 [00:02<00:00, 36.18it/s, loss=0.624, acc=70]  
Validation: 100%|██████████| 107/107 [00:02<00:00, 36.18it/s, loss=0.624, acc=70]  


Train Loss: 1.0840, Train Acc: 79.21%
Val Loss: 1.2902, Val Acc: 69.97%

Epoch 24/50
--------------------------------------------------


Training: 100%|██████████| 427/427 [00:30<00:00, 13.84it/s, loss=1.44, lr=0.000845] 
Training: 100%|██████████| 427/427 [00:30<00:00, 13.84it/s, loss=1.44, lr=0.000845]
Validation: 100%|██████████| 107/107 [00:02<00:00, 35.91it/s, loss=0.795, acc=74]  
Validation: 100%|██████████| 107/107 [00:02<00:00, 35.91it/s, loss=0.795, acc=74]  


Train Loss: 1.0569, Train Acc: 80.40%
Val Loss: 1.2033, Val Acc: 74.01%

Epoch 25/50
--------------------------------------------------


Training: 100%|██████████| 427/427 [00:30<00:00, 13.87it/s, loss=1.46, lr=0.000812] 
Training: 100%|██████████| 427/427 [00:30<00:00, 13.87it/s, loss=1.46, lr=0.000812]
Validation: 100%|██████████| 107/107 [00:03<00:00, 35.33it/s, loss=0.779, acc=77]  



Train Loss: 1.0506, Train Acc: 80.59%
Val Loss: 1.1007, Val Acc: 77.05%
✓ Model saved to ..\runs\20251202_114502\gtzan_cnn.pth

Epoch 26/50
--------------------------------------------------


Training: 100%|██████████| 427/427 [00:30<00:00, 13.83it/s, loss=1.63, lr=0.000775] 
Training: 100%|██████████| 427/427 [00:30<00:00, 13.83it/s, loss=1.63, lr=0.000775]
Validation: 100%|██████████| 107/107 [00:03<00:00, 35.36it/s, loss=0.589, acc=76.1]
Validation: 100%|██████████| 107/107 [00:03<00:00, 35.36it/s, loss=0.589, acc=76.1]


Train Loss: 1.0767, Train Acc: 79.43%
Val Loss: 1.1106, Val Acc: 76.14%

Epoch 27/50
--------------------------------------------------


Training: 100%|██████████| 427/427 [00:33<00:00, 12.63it/s, loss=1.58, lr=0.000737] 
Training: 100%|██████████| 427/427 [00:33<00:00, 12.63it/s, loss=1.58, lr=0.000737] 
Validation: 100%|██████████| 107/107 [00:03<00:00, 31.67it/s, loss=0.605, acc=76.5]



Train Loss: 1.0272, Train Acc: 81.39%
Val Loss: 1.0982, Val Acc: 76.49%
✓ Model saved to ..\runs\20251202_114502\gtzan_cnn.pth

Epoch 28/50
--------------------------------------------------


Training: 100%|██████████| 427/427 [00:34<00:00, 12.48it/s, loss=1.62, lr=0.000696] 
Training: 100%|██████████| 427/427 [00:34<00:00, 12.48it/s, loss=1.62, lr=0.000696]
Validation: 100%|██████████| 107/107 [00:03<00:00, 33.26it/s, loss=0.582, acc=76.8]



Train Loss: 1.0081, Train Acc: 82.28%
Val Loss: 1.0818, Val Acc: 76.81%
✓ Model saved to ..\runs\20251202_114502\gtzan_cnn.pth

Epoch 29/50
--------------------------------------------------


Training: 100%|██████████| 427/427 [00:33<00:00, 12.90it/s, loss=0.846, lr=0.000654]
Training: 100%|██████████| 427/427 [00:33<00:00, 12.90it/s, loss=0.846, lr=0.000654]
Validation: 100%|██████████| 107/107 [00:03<00:00, 33.33it/s, loss=0.598, acc=76.3]
Validation: 100%|██████████| 107/107 [00:03<00:00, 33.33it/s, loss=0.598, acc=76.3]


Train Loss: 1.0403, Train Acc: 81.11%
Val Loss: 1.0881, Val Acc: 76.32%

Epoch 30/50
--------------------------------------------------


Training: 100%|██████████| 427/427 [00:37<00:00, 11.37it/s, loss=0.614, lr=0.000611]
Training: 100%|██████████| 427/427 [00:37<00:00, 11.37it/s, loss=0.614, lr=0.000611]
Validation: 100%|██████████| 107/107 [00:03<00:00, 32.54it/s, loss=0.664, acc=77.1]
Validation: 100%|██████████| 107/107 [00:03<00:00, 32.54it/s, loss=0.664, acc=77.1]


Train Loss: 1.0390, Train Acc: 80.81%
Val Loss: 1.1020, Val Acc: 77.11%

Epoch 31/50
--------------------------------------------------


Training: 100%|██████████| 427/427 [00:37<00:00, 11.38it/s, loss=1.07, lr=0.000567] 
Training: 100%|██████████| 427/427 [00:37<00:00, 11.38it/s, loss=1.07, lr=0.000567]
Validation: 100%|██████████| 107/107 [00:03<00:00, 32.07it/s, loss=0.794, acc=76.4]
Validation: 100%|██████████| 107/107 [00:03<00:00, 32.07it/s, loss=0.794, acc=76.4]


Train Loss: 1.0042, Train Acc: 82.16%
Val Loss: 1.1344, Val Acc: 76.37%

Epoch 32/50
--------------------------------------------------


Training: 100%|██████████| 427/427 [00:34<00:00, 12.42it/s, loss=0.864, lr=0.000522]
Training: 100%|██████████| 427/427 [00:34<00:00, 12.42it/s, loss=0.864, lr=0.000522]
Validation: 100%|██████████| 107/107 [00:03<00:00, 32.40it/s, loss=0.684, acc=73.6]



Train Loss: 0.9726, Train Acc: 83.69%
Val Loss: 1.2027, Val Acc: 73.63%

Epoch 33/50
--------------------------------------------------


Training: 100%|██████████| 427/427 [00:32<00:00, 13.04it/s, loss=0.536, lr=0.000477]
Training: 100%|██████████| 427/427 [00:32<00:00, 13.04it/s, loss=0.536, lr=0.000477]
Validation: 100%|██████████| 107/107 [00:03<00:00, 35.58it/s, loss=0.652, acc=75.7]
Validation: 100%|██████████| 107/107 [00:03<00:00, 35.58it/s, loss=0.652, acc=75.7]


Train Loss: 1.0079, Train Acc: 81.79%
Val Loss: 1.1114, Val Acc: 75.70%

Epoch 34/50
--------------------------------------------------


Training: 100%|██████████| 427/427 [00:30<00:00, 13.84it/s, loss=1.42, lr=0.000433] 
Training: 100%|██████████| 427/427 [00:30<00:00, 13.84it/s, loss=1.42, lr=0.000433] 
Validation: 100%|██████████| 107/107 [00:03<00:00, 35.32it/s, loss=0.629, acc=78.3]



Train Loss: 0.9831, Train Acc: 82.97%
Val Loss: 1.0780, Val Acc: 78.27%
✓ Model saved to ..\runs\20251202_114502\gtzan_cnn.pth

Epoch 35/50
--------------------------------------------------


Training: 100%|██████████| 427/427 [00:31<00:00, 13.72it/s, loss=1.15, lr=0.000389] 
Training: 100%|██████████| 427/427 [00:31<00:00, 13.72it/s, loss=1.15, lr=0.000389] 
Validation: 100%|██████████| 107/107 [00:03<00:00, 35.35it/s, loss=0.592, acc=80.2]



Train Loss: 1.0158, Train Acc: 81.47%
Val Loss: 1.0136, Val Acc: 80.23%
✓ Model saved to ..\runs\20251202_114502\gtzan_cnn.pth

Epoch 36/50
--------------------------------------------------


Training: 100%|██████████| 427/427 [00:30<00:00, 13.83it/s, loss=1.43, lr=0.000345] 
Training: 100%|██████████| 427/427 [00:30<00:00, 13.83it/s, loss=1.43, lr=0.000345] 
Validation: 100%|██████████| 107/107 [00:03<00:00, 35.12it/s, loss=0.634, acc=76.6]
Validation: 100%|██████████| 107/107 [00:03<00:00, 35.12it/s, loss=0.634, acc=76.6]


Train Loss: 0.9983, Train Acc: 82.21%
Val Loss: 1.1104, Val Acc: 76.61%

Epoch 37/50
--------------------------------------------------


Training: 100%|██████████| 427/427 [00:31<00:00, 13.46it/s, loss=0.707, lr=0.000303]
Training: 100%|██████████| 427/427 [00:31<00:00, 13.46it/s, loss=0.707, lr=0.000303]
Validation: 100%|██████████| 107/107 [00:03<00:00, 35.62it/s, loss=0.673, acc=76.3]
Validation: 100%|██████████| 107/107 [00:03<00:00, 35.62it/s, loss=0.673, acc=76.3]


Train Loss: 0.9723, Train Acc: 83.54%
Val Loss: 1.1311, Val Acc: 76.26%

Epoch 38/50
--------------------------------------------------


Training: 100%|██████████| 427/427 [00:33<00:00, 12.85it/s, loss=0.52, lr=0.000263] 
Training: 100%|██████████| 427/427 [00:33<00:00, 12.85it/s, loss=0.52, lr=0.000263] 
Validation: 100%|██████████| 107/107 [00:03<00:00, 33.70it/s, loss=0.721, acc=77.5]
Validation: 100%|██████████| 107/107 [00:03<00:00, 33.70it/s, loss=0.721, acc=77.5]


Train Loss: 0.9738, Train Acc: 83.07%
Val Loss: 1.0860, Val Acc: 77.46%

Epoch 39/50
--------------------------------------------------


Training: 100%|██████████| 427/427 [00:34<00:00, 12.29it/s, loss=0.935, lr=0.000224]
Training: 100%|██████████| 427/427 [00:34<00:00, 12.29it/s, loss=0.935, lr=0.000224]
Validation: 100%|██████████| 107/107 [00:02<00:00, 35.94it/s, loss=0.601, acc=77.9]
Validation: 100%|██████████| 107/107 [00:02<00:00, 35.94it/s, loss=0.601, acc=77.9]


Train Loss: 0.9889, Train Acc: 82.35%
Val Loss: 1.0881, Val Acc: 77.89%

Epoch 40/50
--------------------------------------------------


Training: 100%|██████████| 427/427 [00:32<00:00, 13.10it/s, loss=0.546, lr=0.000188]
Training: 100%|██████████| 427/427 [00:32<00:00, 13.10it/s, loss=0.546, lr=0.000188]
Validation: 100%|██████████| 107/107 [00:03<00:00, 35.63it/s, loss=0.607, acc=77.8]
Validation: 100%|██████████| 107/107 [00:03<00:00, 35.63it/s, loss=0.607, acc=77.8]


Train Loss: 0.9341, Train Acc: 84.93%
Val Loss: 1.0735, Val Acc: 77.84%

Epoch 41/50
--------------------------------------------------


Training: 100%|██████████| 427/427 [00:30<00:00, 13.90it/s, loss=0.976, lr=0.000154]
Training: 100%|██████████| 427/427 [00:30<00:00, 13.90it/s, loss=0.976, lr=0.000154]
Validation: 100%|██████████| 107/107 [00:03<00:00, 35.01it/s, loss=0.599, acc=76.6]
Validation: 100%|██████████| 107/107 [00:03<00:00, 35.01it/s, loss=0.599, acc=76.6]


Train Loss: 0.9668, Train Acc: 83.10%
Val Loss: 1.1109, Val Acc: 76.58%

Epoch 42/50
--------------------------------------------------


Training: 100%|██████████| 427/427 [00:30<00:00, 13.87it/s, loss=0.906, lr=0.000123]
Training: 100%|██████████| 427/427 [00:30<00:00, 13.87it/s, loss=0.906, lr=0.000123]
Validation: 100%|██████████| 107/107 [00:02<00:00, 35.89it/s, loss=0.631, acc=78.7]
Validation: 100%|██████████| 107/107 [00:02<00:00, 35.89it/s, loss=0.631, acc=78.7]


Train Loss: 0.9581, Train Acc: 83.63%
Val Loss: 1.0657, Val Acc: 78.68%

Epoch 43/50
--------------------------------------------------


Training: 100%|██████████| 427/427 [00:32<00:00, 13.06it/s, loss=1.13, lr=9.55e-5]  
Training: 100%|██████████| 427/427 [00:32<00:00, 13.06it/s, loss=1.13, lr=9.55e-5]
Validation: 100%|██████████| 107/107 [00:03<00:00, 35.15it/s, loss=0.641, acc=79]  



Train Loss: 1.0071, Train Acc: 81.09%
Val Loss: 1.0583, Val Acc: 79.04%

Epoch 44/50
--------------------------------------------------


Training: 100%|██████████| 427/427 [00:33<00:00, 12.68it/s, loss=1.36, lr=7.08e-5] 
Training: 100%|██████████| 427/427 [00:33<00:00, 12.68it/s, loss=1.36, lr=7.08e-5]
Validation: 100%|██████████| 107/107 [00:03<00:00, 34.46it/s, loss=0.623, acc=79.2]
Validation: 100%|██████████| 107/107 [00:03<00:00, 34.46it/s, loss=0.623, acc=79.2]


Train Loss: 0.9315, Train Acc: 84.59%
Val Loss: 1.0632, Val Acc: 79.18%

Epoch 45/50
--------------------------------------------------


Training: 100%|██████████| 427/427 [00:32<00:00, 13.15it/s, loss=1, lr=4.95e-5]    
Training: 100%|██████████| 427/427 [00:32<00:00, 13.15it/s, loss=1, lr=4.95e-5]    
Validation: 100%|██████████| 107/107 [00:03<00:00, 30.74it/s, loss=0.605, acc=79.2]
Validation: 100%|██████████| 107/107 [00:03<00:00, 30.74it/s, loss=0.605, acc=79.2]


Train Loss: 0.9555, Train Acc: 83.62%
Val Loss: 1.0563, Val Acc: 79.24%

Epoch 46/50
--------------------------------------------------


Training: 100%|██████████| 427/427 [00:34<00:00, 12.40it/s, loss=0.68, lr=3.19e-5] 
Training: 100%|██████████| 427/427 [00:34<00:00, 12.40it/s, loss=0.68, lr=3.19e-5]
Validation: 100%|██████████| 107/107 [00:03<00:00, 35.28it/s, loss=0.61, acc=79]   
Validation: 100%|██████████| 107/107 [00:03<00:00, 35.28it/s, loss=0.61, acc=79]   


Train Loss: 0.9666, Train Acc: 82.97%
Val Loss: 1.0740, Val Acc: 79.04%

Epoch 47/50
--------------------------------------------------


Training: 100%|██████████| 427/427 [00:30<00:00, 13.80it/s, loss=0.551, lr=1.8e-5] 
Training: 100%|██████████| 427/427 [00:30<00:00, 13.80it/s, loss=0.551, lr=1.8e-5] 
Validation: 100%|██████████| 107/107 [00:02<00:00, 36.84it/s, loss=0.561, acc=78.7]
Validation: 100%|██████████| 107/107 [00:02<00:00, 36.84it/s, loss=0.561, acc=78.7]


Train Loss: 0.9381, Train Acc: 84.19%
Val Loss: 1.0597, Val Acc: 78.68%

Epoch 48/50
--------------------------------------------------


Training: 100%|██████████| 427/427 [00:30<00:00, 13.98it/s, loss=0.689, lr=8.06e-6]
Training: 100%|██████████| 427/427 [00:30<00:00, 13.98it/s, loss=0.689, lr=8.06e-6]
Validation: 100%|██████████| 107/107 [00:02<00:00, 37.21it/s, loss=0.578, acc=78.8]
Validation: 100%|██████████| 107/107 [00:02<00:00, 37.21it/s, loss=0.578, acc=78.8]


Train Loss: 0.9148, Train Acc: 85.44%
Val Loss: 1.0555, Val Acc: 78.83%

Epoch 49/50
--------------------------------------------------


Training: 100%|██████████| 427/427 [00:31<00:00, 13.68it/s, loss=0.534, lr=2.04e-6]
Training: 100%|██████████| 427/427 [00:31<00:00, 13.68it/s, loss=0.534, lr=2.04e-6]
Validation: 100%|██████████| 107/107 [00:03<00:00, 35.61it/s, loss=0.637, acc=78.9]
Validation: 100%|██████████| 107/107 [00:03<00:00, 35.61it/s, loss=0.637, acc=78.9]


Train Loss: 0.9684, Train Acc: 82.89%
Val Loss: 1.0628, Val Acc: 78.95%

Epoch 50/50
--------------------------------------------------


Training: 100%|██████████| 427/427 [00:31<00:00, 13.63it/s, loss=1.57, lr=4e-8]    
Training: 100%|██████████| 427/427 [00:31<00:00, 13.63it/s, loss=1.57, lr=4e-8] 
Validation: 100%|██████████| 107/107 [00:02<00:00, 35.73it/s, loss=0.645, acc=78.8]



Train Loss: 0.9397, Train Acc: 84.04%
Val Loss: 1.0682, Val Acc: 78.80%

Early stopping triggered after 50 epochs
Loading best model from ..\runs\20251202_114502\gtzan_cnn.pth...
Loading best model from ..\runs\20251202_114502\gtzan_cnn.pth...


  model.load_state_dict(torch.load(str(run_dir / 'gtzan_cnn.pth')))


<All keys matched successfully>

## Evaluation Metrics

In [None]:
def evaluate_model(model, test_loader, device, genre_names=None, changes_file=None, split_name="Test"):
    model.eval()
    all_preds = []
    all_labels = []
    
    with torch.no_grad():
        for inputs, labels in tqdm(test_loader, desc=f'Evaluating {split_name}'):
            inputs = inputs.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            
            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    
    # Calculate metrics
    accuracy = accuracy_score(all_labels, all_preds)
    precision = precision_score(all_labels, all_preds, average='weighted')
    recall = recall_score(all_labels, all_preds, average='weighted')
    f1 = f1_score(all_labels, all_preds, average='weighted')
    
    print(f"\n{split_name} Metrics (Chunk-Level):")
    print(f"Accuracy: {accuracy*100:.2f}%")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1-Score: {f1:.4f}")
    
    if changes_file:
        with open(changes_file, "a") as f:
            f.write(f"- {split_name} Accuracy (Chunk): {accuracy*100:.2f}%\n")
            f.write(f"- {split_name} Precision: {precision:.4f}\n")
            f.write(f"- {split_name} Recall: {recall:.4f}\n")
            f.write(f"- {split_name} F1-Score: {f1:.4f}\n")
    
    return all_preds, all_labels

def evaluate_by_song(model, val_dataset, device, changes_file=None, split_name="Test"):
    """
    Evaluate accuracy by aggregating chunk predictions for each song.
    Val_dataset is ordered by song.
    """
    model.eval()
    correct_songs = 0
    
    # Check if dataset has num_chunks (ChunkedDataset)
    if not hasattr(val_dataset, 'num_chunks'):
        print("Dataset does not appear to be a ChunkedDataset. Skipping song-level evaluation.")
        return 0.0

    num_chunks = val_dataset.num_chunks
    total_songs = len(val_dataset) // num_chunks
    
    print(f"\nEvaluating on {total_songs} songs ({split_name}) (aggregating {num_chunks} chunks each)...")
    
    with torch.no_grad():
        for i in tqdm(range(total_songs), desc='Song Eval'):
            # Get all chunks for this song
            chunks = []
            label = None
            
            # Indices for this song's chunks
            start_idx = i * num_chunks
            
            for j in range(num_chunks):
                c, l = val_dataset[start_idx + j]
                chunks.append(c)
                label = l 
            
            # Stack: (num_chunks, channels, time)
            chunks_tensor = torch.stack(chunks).to(device)
            
            # Predict
            outputs = model(chunks_tensor) # (num_chunks, n_classes)
            
            # Soft Voting: Average probabilities
            avg_output = torch.mean(outputs, dim=0)
            pred_label = torch.argmax(avg_output).item()
            
            if pred_label == label:
                correct_songs += 1
                
    song_acc = 100 * correct_songs / total_songs
    print(f"{split_name} Song-Level Accuracy: {song_acc:.2f}%")
    
    if changes_file:
        with open(changes_file, "a") as f:
            f.write(f"- {split_name} Song-Level Accuracy: {song_acc:.2f}%\n")
            
    return song_acc

# Run evaluations on Validation Set
with open(changes_file, "a") as f:
    f.write("\n--- Validation Set ---\n")
print("\n--- Validation Set Evaluation ---")
evaluate_model(
    model, val_loader, device, genre_names=GENRES, changes_file=changes_file, split_name="Validation"
)
evaluate_by_song(model, val_loader.dataset, device, changes_file=changes_file, split_name="Validation")

# Run evaluations on Test Set
with open(changes_file, "a") as f:
    f.write("\n--- Test Set ---\n")
print("\n--- Test Set Evaluation ---")
evaluate_model(
    model, test_loader, device, genre_names=GENRES, changes_file=changes_file, split_name="Test"
)

evaluate_by_song(model, test_loader.dataset, device, changes_file=changes_file, split_name="Test")



--- Validation Set Evaluation ---


Evaluating Validation: 100%|██████████| 107/107 [00:03<00:00, 33.23it/s]
Evaluating Validation: 100%|██████████| 107/107 [00:03<00:00, 33.23it/s]



Validation Metrics (Chunk-Level):
Accuracy: 80.23%
Precision: 0.8043
Recall: 0.8023
F1-Score: 0.7947

Evaluating on 180 songs (Validation) (aggregating 19 chunks each)...


Song Eval: 100%|██████████| 180/180 [00:03<00:00, 57.61it/s]
Song Eval: 100%|██████████| 180/180 [00:03<00:00, 57.61it/s]


Validation Song-Level Accuracy: 85.56%

--- Test Set Evaluation ---


Evaluating Test: 100%|██████████| 60/60 [00:02<00:00, 20.69it/s]
Evaluating Test: 100%|██████████| 60/60 [00:02<00:00, 20.69it/s]



Test Metrics (Chunk-Level):
Accuracy: 77.16%
Precision: 0.7823
Recall: 0.7716
F1-Score: 0.7657

Evaluating on 100 songs (Test) (aggregating 19 chunks each)...


Song Eval: 100%|██████████| 100/100 [00:01<00:00, 60.83it/s]

Test Song-Level Accuracy: 83.00%





83.0