# Training Music Classification Models

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
import time
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from datetime import datetime
from pathlib import Path
import random

def set_seed(seed=42):
    """Set seeds for reproducibility."""
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seed(42)

## Training Configuration

In [2]:
# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Training hyperparameters
BATCH_SIZE = 32
LEARNING_RATE = 0.001  # Increased back to 0.001 for OneCycleLR
NUM_EPOCHS = 50
EARLY_STOPPING_PATIENCE = 15 # Increased patience for OneCycleLR

Using device: cuda


In [None]:
# Setup run directory
run_id = datetime.now().strftime("%Y%m%d_%H%M%S")
run_dir = Path(f"../runs/{run_id}")
run_dir.mkdir(parents=True, exist_ok=True)
print(f"Run directory created at: {run_dir}")

# Create changes.md
changes_file = run_dir / "changes.md"
with open(changes_file, "w") as f:
    f.write(f"# Run {run_id}\n\n")
    f.write("## Configuration\n")
    f.write(f"- Batch Size: {BATCH_SIZE}\n")
    f.write(f"- Learning Rate: {LEARNING_RATE}\n")
    f.write(f"- Epochs: {NUM_EPOCHS}\n")
    f.write(f"- Device: {device}\n")
    f.write(f"- Data Strategy: Chunking (3s chunks, 50% overlap)\n")
    f.write(f"- Augmentation: Noise=0.01, Shift=0.3\n")
    f.write(f"- Optimization: In-memory caching + Mixed Precision (AMP)\n")
    f.write(f"- Stability: Seed=42, Weight Decay=1e-4 (Standard), Gradient Clipping=1.0\n")
    f.write(f"- Data Split: Stratified (Balanced Validation Set)\n\n")
    f.write("## Changes\n")
    f.write("- Increased model capacity by restoring the 4th residual layer and double the channel depth (up to 512 channels)\n")
    f.write("- Updated scheduler to use OneCycleLR (SOTA for CNN models). It starts with a low learning rate, ramps up to a high one, and then anneals down to near zero.\n")
    f.write("- Switched from Adam optimizer to AdamW with higher weight decay (0.01). AdamW decouples weight decay from the gradient update, which usually leads to better generalization.\n\n")
    f.write("## Results\n")

Run directory created at: ..\runs\20251129_115258


## Training Function (Single-label Classification)

In [4]:
def mixup_data(x, y, alpha=1.0, device='cuda'):
    '''Returns mixed inputs, pairs of targets, and lambda'''
    if alpha > 0:
        lam = np.random.beta(alpha, alpha)
    else:
        lam = 1

    batch_size = x.size(0)
    index = torch.randperm(batch_size).to(device)

    mixed_x = lam * x + (1 - lam) * x[index, :]
    y_a, y_b = y, y[index]
    return mixed_x, y_a, y_b, lam

def mixup_criterion(criterion, pred, y_a, y_b, lam):
    return lam * criterion(pred, y_a) + (1 - lam) * criterion(pred, y_b)

def train_epoch(model, train_loader, criterion, optimizer, device):
    """Train for one epoch with Mixup."""
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    
    # Use mixed precision training
    scaler = torch.amp.GradScaler()
    
    pbar = tqdm(train_loader, desc='Training')
    for inputs, labels in pbar:
        inputs, labels = inputs.to(device), labels.to(device)
        
        # Apply Mixup
        inputs, targets_a, targets_b, lam = mixup_data(inputs, labels, alpha=0.4, device=device)
        
        # Zero the parameter gradients
        optimizer.zero_grad()
        
        # Forward pass with mixed precision
        with torch.amp.autocast(device_type='cuda', dtype=torch.float16):
            outputs = model(inputs)
            loss = mixup_criterion(criterion, outputs, targets_a, targets_b, lam)
        
        # Backward pass and optimize
        scaler.scale(loss).backward()
        
        # Gradient clipping
        scaler.unscale_(optimizer)
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        
        scaler.step(optimizer)
        scaler.update()
        
        # Statistics
        running_loss += loss.item() * inputs.size(0)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        
        # Weighted accuracy for mixup
        correct += (lam * (predicted == targets_a).float() + (1 - lam) * (predicted == targets_b).float()).sum().item()
        
        pbar.set_postfix({'loss': loss.item(), 'acc': 100 * correct / total})
    
    epoch_loss = running_loss / total
    epoch_acc = 100 * correct / total
    
    return epoch_loss, epoch_acc

In [5]:
def validate_epoch(model, val_loader, criterion, device):
    """Validate for one epoch."""
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    
    all_preds = []
    all_labels = []
    
    with torch.no_grad():
        pbar = tqdm(val_loader, desc='Validation')
        for inputs, labels in pbar:
            inputs, labels = inputs.to(device), labels.to(device)
            
            # Forward pass
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            
            # Statistics
            running_loss += loss.item() * inputs.size(0)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            
            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
            
            pbar.set_postfix({'loss': loss.item(), 'acc': 100 * correct / total})
    
    epoch_loss = running_loss / total
    epoch_acc = 100 * correct / total
    
    return epoch_loss, epoch_acc, all_preds, all_labels

In [None]:
def train_model(model, train_loader, val_loader, num_epochs, learning_rate, device, 
                save_path='../models/best_model.pth', changes_file=None):
    """Complete training loop with OneCycleLR."""
    model = model.to(device)
    
    # Loss and optimizer
    criterion = nn.CrossEntropyLoss(label_smoothing=0.1)
    optimizer = optim.AdamW(model.parameters(), lr=learning_rate, weight_decay=0.01) # Increased weight decay for AdamW
    
    # OneCycleLR Scheduler
    # Steps per epoch is len(train_loader)
    scheduler = optim.lr_scheduler.OneCycleLR(
        optimizer, 
        max_lr=learning_rate, 
        steps_per_epoch=len(train_loader), 
        epochs=num_epochs,
        pct_start=0.3, # Warmup for 30% of training
        div_factor=25.0,
        final_div_factor=1000.0
    )
    
    # Training history
    history = {
        'train_loss': [],
        'train_acc': [],
        'val_loss': [],
        'val_acc': []
    }
    
    best_val_loss = float('inf')
    patience_counter = 0
    
    for epoch in range(num_epochs):
        print(f"\nEpoch {epoch+1}/{num_epochs}")
        print("-" * 50)
        
        # Train
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0
        scaler = torch.amp.GradScaler()
        
        pbar = tqdm(train_loader, desc='Training')
        for inputs, labels in pbar:
            inputs, labels = inputs.to(device), labels.to(device)
            
            # Apply Mixup
            inputs, targets_a, targets_b, lam = mixup_data(inputs, labels, alpha=0.4, device=device)
            
            optimizer.zero_grad()
            
            with torch.amp.autocast(device_type='cuda', dtype=torch.float16):
                outputs = model(inputs)
                loss = mixup_criterion(criterion, outputs, targets_a, targets_b, lam)
            
            scaler.scale(loss).backward()
            scaler.unscale_(optimizer)
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            scaler.step(optimizer)
            scaler.update()
            
            # Step scheduler every batch for OneCycleLR
            scheduler.step()
            
            running_loss += loss.item() * inputs.size(0)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (lam * (predicted == targets_a).float() + (1 - lam) * (predicted == targets_b).float()).sum().item()
            
            pbar.set_postfix({'loss': loss.item(), 'lr': scheduler.get_last_lr()[0]})
        
        train_loss = running_loss / total
        train_acc = 100 * correct / total
        
        # Validate
        val_loss, val_acc, _, _ = validate_epoch(model, val_loader, criterion, device)
        
        # Save history
        history['train_loss'].append(train_loss)
        history['train_acc'].append(train_acc)
        history['val_loss'].append(val_loss)
        history['val_acc'].append(val_acc)
        
        print(f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%")
        print(f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.2f}%")
        
        # Early stopping
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            patience_counter = 0
            # Save best model
            torch.save(model.state_dict(), save_path)
            print(f"✓ Model saved to {save_path}")
        else:
            patience_counter += 1
            if patience_counter >= EARLY_STOPPING_PATIENCE:
                print(f"\nEarly stopping triggered after {epoch+1} epochs")
                break
    
    if changes_file:
        with open(changes_file, "a") as f:
            f.write(f"- Final Train Loss: {history['train_loss'][-1]:.4f}\n")
            f.write(f"- Final Val Loss: {history['val_loss'][-1]:.4f}\n")
            f.write(f"- Final Train Acc: {history['train_acc'][-1]:.2f}%\n")
            f.write(f"- Final Val Acc: {history['val_acc'][-1]:.2f}%\n")

    return history

## Training Function (Multi-label Classification)

In [7]:
def train_multilabel(model, train_loader, val_loader, num_epochs, learning_rate, device,
                     save_path='../models/best_model_multilabel.pth'):
    """Training loop for multi-label classification."""
    model = model.to(device)
    
    # Loss and optimizer (BCE for multi-label)
    criterion = nn.BCEWithLogitsLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5,
                                                       patience=5)
    
    history = {
        'train_loss': [],
        'val_loss': []
    }
    
    best_val_loss = float('inf')
    patience_counter = 0
    
    for epoch in range(num_epochs):
        print(f"\nEpoch {epoch+1}/{num_epochs}")
        print("-" * 50)
        
        # Training
        model.train()
        train_loss = 0.0
        train_batches = 0
        
        pbar = tqdm(train_loader, desc='Training')
        for inputs, labels in pbar:
            inputs, labels = inputs.to(device), labels.to(device)
            
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            train_loss += loss.item()
            train_batches += 1
            pbar.set_postfix({'loss': loss.item()})
        
        train_loss /= train_batches
        
        # Validation
        model.eval()
        val_loss = 0.0
        val_batches = 0
        
        with torch.no_grad():
            pbar = tqdm(val_loader, desc='Validation')
            for inputs, labels in pbar:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                val_loss += loss.item()
                val_batches += 1
                pbar.set_postfix({'loss': loss.item()})
        
        val_loss /= val_batches
        
        scheduler.step(val_loss)
        
        history['train_loss'].append(train_loss)
        history['val_loss'].append(val_loss)
        
        print(f"Train Loss: {train_loss:.4f}")
        print(f"Val Loss: {val_loss:.4f}")
        
        # Early stopping
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            patience_counter = 0
            torch.save(model.state_dict(), save_path)
            print(f"✓ Model saved to {save_path}")
        else:
            patience_counter += 1
            if patience_counter >= EARLY_STOPPING_PATIENCE:
                print(f"\nEarly stopping triggered after {epoch+1} epochs")
                break
    
    return history

## Plot Training History

In [8]:
def plot_training_history(history, multi_label=False, save_path=None):
    """Plot training history."""
    fig, axes = plt.subplots(1, 2 if not multi_label else 1, figsize=(15, 5))
    
    if not multi_label:
        # Loss plot
        axes[0].plot(history['train_loss'], label='Train Loss')
        axes[0].plot(history['val_loss'], label='Val Loss')
        axes[0].set_xlabel('Epoch')
        axes[0].set_ylabel('Loss')
        axes[0].set_title('Training and Validation Loss')
        axes[0].legend()
        axes[0].grid(True)
        
        # Accuracy plot
        axes[1].plot(history['train_acc'], label='Train Accuracy')
        axes[1].plot(history['val_acc'], label='Val Accuracy')
        axes[1].set_xlabel('Epoch')
        axes[1].set_ylabel('Accuracy (%)')
        axes[1].set_title('Training and Validation Accuracy')
        axes[1].legend()
        axes[1].grid(True)
    else:
        # Loss plot only for multi-label
        axes.plot(history['train_loss'], label='Train Loss')
        axes.plot(history['val_loss'], label='Val Loss')
        axes.set_xlabel('Epoch')
        axes.set_ylabel('Loss')
        axes.set_title('Training and Validation Loss')
        axes.legend()
        axes.grid(True)
    
    plt.tight_layout()
    
    if save_path:
        plt.savefig(save_path)
        plt.close()
    else:
        plt.show()

### For Single-label Classification (GTZAN, FMA)

In [None]:
# Train SimpleCNN on GTZAN

# Ensure repository root is on sys.path
import os
import sys
from pathlib import Path
repo_root = Path.cwd().parent
if str(repo_root) not in sys.path:
    sys.path.insert(0, str(repo_root))

# Import model (prefer module; fallback to notebook)
try:
    from model_cnn import ImprovedCNN
except ModuleNotFoundError:
    print("Model module not found; loading from notebook via %run ...")
    %run "./04_model_cnn.ipynb"

# Import dataset from stable utils module (Windows-safe)
try:
    from utils.datasets_gtzan import GTZANDataset, create_dataloaders, GENRES, AudioAugmentation
except ModuleNotFoundError:
    print("Dataset module not found; loading from notebook via %run ...")
    %run "./01_data_loading_gtzan.ipynb"

# Create dataset with in-memory caching
gtzan_root = repo_root / "data" / "gtzan"
dataset = GTZANDataset(str(gtzan_root), cache_to_memory=True)
print(f"GTZAN files: {len(dataset)}")

# Define augmentation
train_transform = AudioAugmentation(noise_level=0.01, shift_max=0.3)

# Create loaders with Stratified Split AND Chunking
# NOTE: With cache_to_memory=True, we must use num_workers=0 on Windows to avoid 
# pickling the entire cached dataset to worker processes, which causes hangs/OOM.
train_loader, val_loader, test_loader = create_dataloaders(
    dataset, 
    batch_size=BATCH_SIZE, 
    num_workers=0,
    train_transform=train_transform,
    chunk_length_sec=3.0, # Enable chunking
    test_split=0.1 # Create test split
)

# Create model
model = ImprovedCNN(n_classes=10)

# Train
history = train_model(
    model, train_loader, val_loader,
    num_epochs=NUM_EPOCHS,
    learning_rate=LEARNING_RATE,
    device=device,
    save_path=str(run_dir / 'gtzan_cnn.pth'),
    changes_file=changes_file
)

# Plot results
plot_training_history(history, save_path=str(run_dir / 'training_history.png'))

Model module not found; loading from notebook via %run ...
SimpleCNN:
SimpleCNN(
  (mel_spec): MelSpectrogram(
    (spectrogram): Spectrogram()
    (mel_scale): MelScale()
  )
  (amplitude_to_db): AmplitudeToDB()
  (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn3): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (pool3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv4): Conv2d(128, 256, ke

  model.load_state_dict(torch.load(path))


Model loaded from ../models/simple_cnn.pth
Caching 999 audio files to memory...
Caching 999 audio files to memory...
Caching complete.
GTZAN files: 999
Caching complete.
GTZAN files: 999
Created stratified split: 799 train songs, 200 val songs
Applying chunking: 3.0s chunks with 50% overlap
Chunked dataset sizes: 15181 train chunks, 3800 val chunks
Created stratified split: 799 train songs, 200 val songs
Applying chunking: 3.0s chunks with 50% overlap
Chunked dataset sizes: 15181 train chunks, 3800 val chunks

Epoch 1/50
--------------------------------------------------

Epoch 1/50
--------------------------------------------------


Training: 100%|██████████| 475/475 [00:45<00:00, 10.47it/s, loss=1.37, acc=34.5]
Training: 100%|██████████| 475/475 [00:45<00:00, 10.47it/s, loss=1.37, acc=34.5]
Validation: 100%|██████████| 119/119 [00:04<00:00, 24.33it/s, loss=0.898, acc=52.7]



Train Loss: 1.9576, Train Acc: 34.48%
Val Loss: 1.6428, Val Acc: 52.68%
✓ Model saved to ..\runs\20251129_115258\gtzan_cnn.pth

Epoch 2/50
--------------------------------------------------


Training: 100%|██████████| 475/475 [00:37<00:00, 12.75it/s, loss=1.92, acc=48.4]
Training: 100%|██████████| 475/475 [00:37<00:00, 12.75it/s, loss=1.92, acc=48.4]
Validation: 100%|██████████| 119/119 [00:02<00:00, 41.80it/s, loss=1.18, acc=57.3] 



Train Loss: 1.7140, Train Acc: 48.36%
Val Loss: 1.6281, Val Acc: 57.32%
✓ Model saved to ..\runs\20251129_115258\gtzan_cnn.pth

Epoch 3/50
--------------------------------------------------


Training: 100%|██████████| 475/475 [00:33<00:00, 14.33it/s, loss=1.47, acc=54.3]
Training: 100%|██████████| 475/475 [00:33<00:00, 14.33it/s, loss=1.47, acc=54.3]
Validation: 100%|██████████| 119/119 [00:02<00:00, 44.35it/s, loss=0.921, acc=59.6]
Validation: 100%|██████████| 119/119 [00:02<00:00, 44.35it/s, loss=0.921, acc=59.6]


Train Loss: 1.6109, Train Acc: 54.29%
Val Loss: 1.5184, Val Acc: 59.58%
✓ Model saved to ..\runs\20251129_115258\gtzan_cnn.pth

Epoch 4/50
--------------------------------------------------


Training: 100%|██████████| 475/475 [00:34<00:00, 13.90it/s, loss=1.15, acc=57.1]
Training: 100%|██████████| 475/475 [00:34<00:00, 13.90it/s, loss=1.15, acc=57.1]
Validation: 100%|██████████| 119/119 [00:02<00:00, 39.90it/s, loss=1.15, acc=60.8] 



Train Loss: 1.5664, Train Acc: 57.05%
Val Loss: 1.5163, Val Acc: 60.82%
✓ Model saved to ..\runs\20251129_115258\gtzan_cnn.pth

Epoch 5/50
--------------------------------------------------


Training: 100%|██████████| 475/475 [00:33<00:00, 14.07it/s, loss=0.991, acc=59.4]
Training: 100%|██████████| 475/475 [00:33<00:00, 14.07it/s, loss=0.991, acc=59.4]
Validation: 100%|██████████| 119/119 [00:02<00:00, 42.42it/s, loss=0.816, acc=61.5]
Validation: 100%|██████████| 119/119 [00:02<00:00, 42.42it/s, loss=0.816, acc=61.5]


Train Loss: 1.5234, Train Acc: 59.42%
Val Loss: 1.4741, Val Acc: 61.47%
✓ Model saved to ..\runs\20251129_115258\gtzan_cnn.pth

Epoch 6/50
--------------------------------------------------


Training: 100%|██████████| 475/475 [00:32<00:00, 14.43it/s, loss=1.87, acc=61.8] 
Training: 100%|██████████| 475/475 [00:32<00:00, 14.43it/s, loss=1.87, acc=61.8]
Validation: 100%|██████████| 119/119 [00:03<00:00, 37.37it/s, loss=1.05, acc=68]   



Train Loss: 1.4792, Train Acc: 61.79%
Val Loss: 1.2929, Val Acc: 67.97%
✓ Model saved to ..\runs\20251129_115258\gtzan_cnn.pth

Epoch 7/50
--------------------------------------------------


Training: 100%|██████████| 475/475 [00:42<00:00, 11.29it/s, loss=1.88, acc=62.7] 
Training: 100%|██████████| 475/475 [00:42<00:00, 11.29it/s, loss=1.88, acc=62.7]
Validation: 100%|██████████| 119/119 [00:02<00:00, 41.36it/s, loss=0.904, acc=66.1]
Validation: 100%|██████████| 119/119 [00:02<00:00, 41.36it/s, loss=0.904, acc=66.1]


Train Loss: 1.4636, Train Acc: 62.67%
Val Loss: 1.4380, Val Acc: 66.05%

Epoch 8/50
--------------------------------------------------


Training: 100%|██████████| 475/475 [00:37<00:00, 12.70it/s, loss=1.4, acc=64]    
Training: 100%|██████████| 475/475 [00:37<00:00, 12.70it/s, loss=1.4, acc=64] 
Validation: 100%|██████████| 119/119 [00:03<00:00, 36.72it/s, loss=1.05, acc=65.4] 
Validation: 100%|██████████| 119/119 [00:03<00:00, 36.72it/s, loss=1.05, acc=65.4]


Train Loss: 1.4344, Train Acc: 64.05%
Val Loss: 1.3535, Val Acc: 65.42%

Epoch 9/50
--------------------------------------------------


Training: 100%|██████████| 475/475 [00:33<00:00, 14.24it/s, loss=1.81, acc=64]   
Training: 100%|██████████| 475/475 [00:33<00:00, 14.24it/s, loss=1.81, acc=64]
Validation: 100%|██████████| 119/119 [00:03<00:00, 35.38it/s, loss=0.851, acc=72.2]



Train Loss: 1.4428, Train Acc: 64.01%
Val Loss: 1.2138, Val Acc: 72.24%
✓ Model saved to ..\runs\20251129_115258\gtzan_cnn.pth

Epoch 10/50
--------------------------------------------------


Training: 100%|██████████| 475/475 [00:31<00:00, 14.94it/s, loss=1.18, acc=65.1] 
Training: 100%|██████████| 475/475 [00:31<00:00, 14.94it/s, loss=1.18, acc=65.1]
Validation: 100%|██████████| 119/119 [00:02<00:00, 41.21it/s, loss=0.849, acc=67.5]
Validation: 100%|██████████| 119/119 [00:02<00:00, 41.21it/s, loss=0.849, acc=67.5]


Train Loss: 1.4249, Train Acc: 65.06%
Val Loss: 1.2954, Val Acc: 67.53%

Epoch 11/50
--------------------------------------------------


Training: 100%|██████████| 475/475 [00:35<00:00, 13.41it/s, loss=1.57, acc=67.3] 
Training: 100%|██████████| 475/475 [00:35<00:00, 13.41it/s, loss=1.57, acc=67.3]
Validation: 100%|██████████| 119/119 [00:03<00:00, 37.21it/s, loss=0.702, acc=65.5]
Validation: 100%|██████████| 119/119 [00:03<00:00, 37.21it/s, loss=0.702, acc=65.5]


Train Loss: 1.3738, Train Acc: 67.27%
Val Loss: 1.3326, Val Acc: 65.47%

Epoch 12/50
--------------------------------------------------


Training: 100%|██████████| 475/475 [00:32<00:00, 14.42it/s, loss=1.4, acc=67.9]  
Training: 100%|██████████| 475/475 [00:32<00:00, 14.42it/s, loss=1.4, acc=67.9] 
Validation: 100%|██████████| 119/119 [00:03<00:00, 37.78it/s, loss=0.843, acc=73.7]



Train Loss: 1.3690, Train Acc: 67.95%
Val Loss: 1.1839, Val Acc: 73.68%
✓ Model saved to ..\runs\20251129_115258\gtzan_cnn.pth

Epoch 13/50
--------------------------------------------------


Training: 100%|██████████| 475/475 [00:34<00:00, 13.96it/s, loss=1.6, acc=68.1]  
Training: 100%|██████████| 475/475 [00:34<00:00, 13.96it/s, loss=1.6, acc=68.1]
Validation: 100%|██████████| 119/119 [00:03<00:00, 39.02it/s, loss=0.866, acc=66]  
Validation: 100%|██████████| 119/119 [00:03<00:00, 39.02it/s, loss=0.866, acc=66]


Train Loss: 1.3582, Train Acc: 68.15%
Val Loss: 1.3186, Val Acc: 65.97%

Epoch 14/50
--------------------------------------------------


Training: 100%|██████████| 475/475 [00:32<00:00, 14.74it/s, loss=1.39, acc=68.6] 
Training: 100%|██████████| 475/475 [00:32<00:00, 14.74it/s, loss=1.39, acc=68.6]
Validation: 100%|██████████| 119/119 [00:02<00:00, 42.69it/s, loss=0.839, acc=73.2]
Validation: 100%|██████████| 119/119 [00:02<00:00, 42.69it/s, loss=0.839, acc=73.2]


Train Loss: 1.3558, Train Acc: 68.58%
Val Loss: 1.1900, Val Acc: 73.16%

Epoch 15/50
--------------------------------------------------


Training: 100%|██████████| 475/475 [00:32<00:00, 14.80it/s, loss=1.07, acc=69.6] 
Training: 100%|██████████| 475/475 [00:32<00:00, 14.80it/s, loss=1.07, acc=69.6]
Validation: 100%|██████████| 119/119 [00:03<00:00, 37.27it/s, loss=0.892, acc=72.2]
Validation: 100%|██████████| 119/119 [00:03<00:00, 37.27it/s, loss=0.892, acc=72.2]


Train Loss: 1.3334, Train Acc: 69.63%
Val Loss: 1.2131, Val Acc: 72.18%

Epoch 16/50
--------------------------------------------------


Training: 100%|██████████| 475/475 [00:32<00:00, 14.40it/s, loss=1.46, acc=68.7] 
Training: 100%|██████████| 475/475 [00:32<00:00, 14.40it/s, loss=1.46, acc=68.7]
Validation: 100%|██████████| 119/119 [00:02<00:00, 41.60it/s, loss=1.3, acc=67.7]  
Validation: 100%|██████████| 119/119 [00:02<00:00, 41.60it/s, loss=1.3, acc=67.7]  


Train Loss: 1.3557, Train Acc: 68.66%
Val Loss: 1.3148, Val Acc: 67.66%

Epoch 17/50
--------------------------------------------------


Training: 100%|██████████| 475/475 [00:32<00:00, 14.71it/s, loss=1.89, acc=70.7] 
Training: 100%|██████████| 475/475 [00:32<00:00, 14.71it/s, loss=1.89, acc=70.7]
Validation: 100%|██████████| 119/119 [00:02<00:00, 43.43it/s, loss=0.75, acc=73.6] 
Validation: 100%|██████████| 119/119 [00:02<00:00, 43.43it/s, loss=0.75, acc=73.6]


Train Loss: 1.3096, Train Acc: 70.67%
Val Loss: 1.2068, Val Acc: 73.58%

Epoch 18/50
--------------------------------------------------


Training: 100%|██████████| 475/475 [00:34<00:00, 13.85it/s, loss=0.954, acc=70.7]
Training: 100%|██████████| 475/475 [00:34<00:00, 13.85it/s, loss=0.954, acc=70.7]
Validation: 100%|██████████| 119/119 [00:02<00:00, 41.54it/s, loss=1.18, acc=69.4] 
Validation: 100%|██████████| 119/119 [00:02<00:00, 41.54it/s, loss=1.18, acc=69.4]


Train Loss: 1.3154, Train Acc: 70.72%
Val Loss: 1.3183, Val Acc: 69.39%

Epoch 19/50
--------------------------------------------------


Training: 100%|██████████| 475/475 [00:30<00:00, 15.36it/s, loss=1.99, acc=72.2] 
Training: 100%|██████████| 475/475 [00:30<00:00, 15.36it/s, loss=1.99, acc=72.2]
Validation: 100%|██████████| 119/119 [00:03<00:00, 37.32it/s, loss=1.04, acc=74]   
Validation: 100%|██████████| 119/119 [00:03<00:00, 37.32it/s, loss=1.04, acc=74]


Train Loss: 1.2840, Train Acc: 72.24%
Val Loss: 1.1894, Val Acc: 74.03%

Epoch 20/50
--------------------------------------------------


Training: 100%|██████████| 475/475 [00:32<00:00, 14.48it/s, loss=0.694, acc=73.2]
Training: 100%|██████████| 475/475 [00:32<00:00, 14.48it/s, loss=0.694, acc=73.2]
Validation: 100%|██████████| 119/119 [00:03<00:00, 39.08it/s, loss=0.838, acc=72.3]



Train Loss: 1.2589, Train Acc: 73.18%
Val Loss: 1.1738, Val Acc: 72.32%
✓ Model saved to ..\runs\20251129_115258\gtzan_cnn.pth

Epoch 21/50
--------------------------------------------------


Training: 100%|██████████| 475/475 [00:36<00:00, 12.98it/s, loss=1.07, acc=72.8] 
Training: 100%|██████████| 475/475 [00:36<00:00, 12.98it/s, loss=1.07, acc=72.8]
Validation: 100%|██████████| 119/119 [00:03<00:00, 32.74it/s, loss=0.711, acc=76.1]



Train Loss: 1.2723, Train Acc: 72.79%
Val Loss: 1.1449, Val Acc: 76.11%
✓ Model saved to ..\runs\20251129_115258\gtzan_cnn.pth

Epoch 22/50
--------------------------------------------------


Training: 100%|██████████| 475/475 [00:30<00:00, 15.32it/s, loss=1.01, acc=73.8] 
Training: 100%|██████████| 475/475 [00:30<00:00, 15.32it/s, loss=1.01, acc=73.8] 
Validation: 100%|██████████| 119/119 [00:03<00:00, 36.73it/s, loss=0.834, acc=75]  
Validation: 100%|██████████| 119/119 [00:03<00:00, 36.73it/s, loss=0.834, acc=75]


Train Loss: 1.2507, Train Acc: 73.80%
Val Loss: 1.1747, Val Acc: 74.97%

Epoch 23/50
--------------------------------------------------


Training: 100%|██████████| 475/475 [00:32<00:00, 14.70it/s, loss=0.723, acc=71.5]
Training: 100%|██████████| 475/475 [00:32<00:00, 14.70it/s, loss=0.723, acc=71.5]
Validation: 100%|██████████| 119/119 [00:03<00:00, 33.76it/s, loss=0.891, acc=76.6]
Validation: 100%|██████████| 119/119 [00:03<00:00, 33.76it/s, loss=0.891, acc=76.6]


Train Loss: 1.2960, Train Acc: 71.50%
Val Loss: 1.1460, Val Acc: 76.55%

Epoch 24/50
--------------------------------------------------


Training: 100%|██████████| 475/475 [00:39<00:00, 12.14it/s, loss=0.945, acc=74.4]
Training: 100%|██████████| 475/475 [00:39<00:00, 12.14it/s, loss=0.945, acc=74.4]
Validation: 100%|██████████| 119/119 [00:03<00:00, 33.72it/s, loss=0.795, acc=76.7]
Validation: 100%|██████████| 119/119 [00:03<00:00, 33.72it/s, loss=0.795, acc=76.7]


Train Loss: 1.2404, Train Acc: 74.38%
Val Loss: 1.1503, Val Acc: 76.66%

Epoch 25/50
--------------------------------------------------


Training: 100%|██████████| 475/475 [00:33<00:00, 14.31it/s, loss=1.04, acc=74.6] 
Training: 100%|██████████| 475/475 [00:33<00:00, 14.31it/s, loss=1.04, acc=74.6] 
Validation: 100%|██████████| 119/119 [00:03<00:00, 39.41it/s, loss=0.875, acc=75.4]
Validation: 100%|██████████| 119/119 [00:03<00:00, 39.41it/s, loss=0.875, acc=75.4]


Train Loss: 1.2374, Train Acc: 74.62%
Val Loss: 1.1703, Val Acc: 75.39%

Epoch 26/50
--------------------------------------------------


Training: 100%|██████████| 475/475 [00:35<00:00, 13.28it/s, loss=1.42, acc=73.9] 
Training: 100%|██████████| 475/475 [00:35<00:00, 13.28it/s, loss=1.42, acc=73.9]
Validation: 100%|██████████| 119/119 [00:03<00:00, 38.96it/s, loss=0.835, acc=76.1]
Validation: 100%|██████████| 119/119 [00:03<00:00, 38.96it/s, loss=0.835, acc=76.1]


Train Loss: 1.2500, Train Acc: 73.90%
Val Loss: 1.1527, Val Acc: 76.13%

Epoch 27/50
--------------------------------------------------


Training: 100%|██████████| 475/475 [00:32<00:00, 14.81it/s, loss=0.894, acc=73.6]
Training: 100%|██████████| 475/475 [00:32<00:00, 14.81it/s, loss=0.894, acc=73.6]
Validation: 100%|██████████| 119/119 [00:03<00:00, 30.01it/s, loss=0.782, acc=75.9]
Validation: 100%|██████████| 119/119 [00:03<00:00, 30.01it/s, loss=0.782, acc=75.9]


Train Loss: 1.2555, Train Acc: 73.61%
Val Loss: 1.1834, Val Acc: 75.87%

Epoch 28/50
--------------------------------------------------


Training: 100%|██████████| 475/475 [00:31<00:00, 14.89it/s, loss=1.29, acc=76]   
Training: 100%|██████████| 475/475 [00:31<00:00, 14.89it/s, loss=1.29, acc=76] 
Validation: 100%|██████████| 119/119 [00:02<00:00, 53.52it/s, loss=0.851, acc=76.3]
Validation: 100%|██████████| 119/119 [00:02<00:00, 53.52it/s, loss=0.851, acc=76.3]


Train Loss: 1.2056, Train Acc: 76.03%
Val Loss: 1.1269, Val Acc: 76.34%
✓ Model saved to ..\runs\20251129_115258\gtzan_cnn.pth

Epoch 29/50
--------------------------------------------------


Training: 100%|██████████| 475/475 [00:20<00:00, 22.64it/s, loss=1.28, acc=76.9] 
Training: 100%|██████████| 475/475 [00:20<00:00, 22.64it/s, loss=1.28, acc=76.9] 
Validation: 100%|██████████| 119/119 [00:02<00:00, 56.65it/s, loss=0.8, acc=78.1]  
Validation: 100%|██████████| 119/119 [00:02<00:00, 56.65it/s, loss=0.8, acc=78.1]


Train Loss: 1.1888, Train Acc: 76.87%
Val Loss: 1.1169, Val Acc: 78.11%
✓ Model saved to ..\runs\20251129_115258\gtzan_cnn.pth

Epoch 30/50
--------------------------------------------------


Training: 100%|██████████| 475/475 [00:19<00:00, 24.13it/s, loss=0.745, acc=75.4]
Training: 100%|██████████| 475/475 [00:19<00:00, 24.13it/s, loss=0.745, acc=75.4]
Validation: 100%|██████████| 119/119 [00:02<00:00, 56.83it/s, loss=0.742, acc=76.1]
Validation: 100%|██████████| 119/119 [00:02<00:00, 56.83it/s, loss=0.742, acc=76.1]


Train Loss: 1.2188, Train Acc: 75.37%
Val Loss: 1.1660, Val Acc: 76.13%

Epoch 31/50
--------------------------------------------------


Training: 100%|██████████| 475/475 [00:19<00:00, 24.18it/s, loss=1.31, acc=75.2] 
Training: 100%|██████████| 475/475 [00:19<00:00, 24.18it/s, loss=1.31, acc=75.2]
Validation: 100%|██████████| 119/119 [00:02<00:00, 56.61it/s, loss=0.765, acc=77.4]
Validation: 100%|██████████| 119/119 [00:02<00:00, 56.61it/s, loss=0.765, acc=77.4]


Train Loss: 1.2189, Train Acc: 75.18%
Val Loss: 1.1298, Val Acc: 77.45%

Epoch 32/50
--------------------------------------------------


Training: 100%|██████████| 475/475 [00:19<00:00, 24.38it/s, loss=0.975, acc=74.8]
Training: 100%|██████████| 475/475 [00:19<00:00, 24.38it/s, loss=0.975, acc=74.8]
Validation: 100%|██████████| 119/119 [00:02<00:00, 57.08it/s, loss=0.744, acc=77.8]
Validation: 100%|██████████| 119/119 [00:02<00:00, 57.08it/s, loss=0.744, acc=77.8]


Train Loss: 1.2299, Train Acc: 74.83%
Val Loss: 1.1285, Val Acc: 77.84%

Epoch 33/50
--------------------------------------------------


Training: 100%|██████████| 475/475 [00:19<00:00, 24.31it/s, loss=0.843, acc=75.8]
Training: 100%|██████████| 475/475 [00:19<00:00, 24.31it/s, loss=0.843, acc=75.8]
Validation: 100%|██████████| 119/119 [00:02<00:00, 55.80it/s, loss=0.847, acc=76.6]
Validation: 100%|██████████| 119/119 [00:02<00:00, 55.80it/s, loss=0.847, acc=76.6]


Train Loss: 1.2076, Train Acc: 75.84%
Val Loss: 1.1526, Val Acc: 76.61%

Epoch 34/50
--------------------------------------------------


Training: 100%|██████████| 475/475 [00:19<00:00, 24.25it/s, loss=1.65, acc=74.9] 
Training: 100%|██████████| 475/475 [00:19<00:00, 24.25it/s, loss=1.65, acc=74.9] 
Validation: 100%|██████████| 119/119 [00:02<00:00, 57.65it/s, loss=0.859, acc=77.2]
Validation: 100%|██████████| 119/119 [00:02<00:00, 57.65it/s, loss=0.859, acc=77.2]


Train Loss: 1.2214, Train Acc: 74.92%
Val Loss: 1.1303, Val Acc: 77.18%

Epoch 35/50
--------------------------------------------------


Training: 100%|██████████| 475/475 [00:20<00:00, 23.55it/s, loss=1.53, acc=76]   
Training: 100%|██████████| 475/475 [00:20<00:00, 23.55it/s, loss=1.53, acc=76]
Validation: 100%|██████████| 119/119 [00:02<00:00, 48.08it/s, loss=0.778, acc=76.5]
Validation: 100%|██████████| 119/119 [00:02<00:00, 48.08it/s, loss=0.778, acc=76.5]


Train Loss: 1.2100, Train Acc: 76.00%
Val Loss: 1.1483, Val Acc: 76.50%

Epoch 36/50
--------------------------------------------------


Training: 100%|██████████| 475/475 [00:20<00:00, 22.85it/s, loss=1.1, acc=77.5]  
Training: 100%|██████████| 475/475 [00:20<00:00, 22.85it/s, loss=1.1, acc=77.5]
Validation: 100%|██████████| 119/119 [00:02<00:00, 49.78it/s, loss=0.813, acc=77.1]
Validation: 100%|██████████| 119/119 [00:02<00:00, 49.78it/s, loss=0.813, acc=77.1]


Train Loss: 1.1752, Train Acc: 77.53%
Val Loss: 1.1382, Val Acc: 77.13%

Epoch 37/50
--------------------------------------------------


Training: 100%|██████████| 475/475 [00:23<00:00, 20.17it/s, loss=0.8, acc=76]    
Training: 100%|██████████| 475/475 [00:23<00:00, 20.17it/s, loss=0.8, acc=76] 
Validation: 100%|██████████| 119/119 [00:02<00:00, 48.04it/s, loss=0.839, acc=76.6]
Validation: 100%|██████████| 119/119 [00:02<00:00, 48.04it/s, loss=0.839, acc=76.6]


Train Loss: 1.2015, Train Acc: 75.96%
Val Loss: 1.1351, Val Acc: 76.63%

Epoch 38/50
--------------------------------------------------


Training: 100%|██████████| 475/475 [00:22<00:00, 21.27it/s, loss=1.37, acc=75.7] 
Training: 100%|██████████| 475/475 [00:22<00:00, 21.27it/s, loss=1.37, acc=75.7]
Validation: 100%|██████████| 119/119 [00:02<00:00, 51.42it/s, loss=0.74, acc=77.3] 
Validation: 100%|██████████| 119/119 [00:02<00:00, 51.42it/s, loss=0.74, acc=77.3]


Train Loss: 1.2058, Train Acc: 75.71%
Val Loss: 1.1461, Val Acc: 77.29%

Epoch 39/50
--------------------------------------------------


Training: 100%|██████████| 475/475 [00:25<00:00, 18.42it/s, loss=1.43, acc=75.3] 
Training: 100%|██████████| 475/475 [00:25<00:00, 18.42it/s, loss=1.43, acc=75.3]
Validation: 100%|██████████| 119/119 [00:02<00:00, 48.59it/s, loss=0.869, acc=76]  



Train Loss: 1.2174, Train Acc: 75.31%
Val Loss: 1.1539, Val Acc: 76.03%

Early stopping triggered after 39 epochs


### For Multi-label Classification (MTAT)

In [10]:
# Example: Train DeepCNN on MTAT
# Uncomment and adapt to your dataset

# from notebooks.model_cnn import DeepCNN
# from notebooks.data_loading_mtat import MTATDataset, create_dataloaders

# # Create dataset
# dataset = MTATDataset(MTAT_AUDIO_PATH, MTAT_ANNOTATIONS_PATH, top_tags=50)
# train_loader, val_loader = create_dataloaders(dataset, batch_size=BATCH_SIZE)

# # Create model
# model = DeepCNN(n_classes=50)

# # Train
# history = train_multilabel(
#     model, train_loader, val_loader,
#     num_epochs=NUM_EPOCHS,
#     learning_rate=LEARNING_RATE,
#     device=device,
#     save_path='../models/mtat_cnn.pth'
# )

# # Plot results
# plot_training_history(history, multi_label=True)

## Evaluation Metrics

In [None]:
def evaluate_model(model, test_loader, device, genre_names=None, changes_file=None, split_name="Test"):
    """Evaluate model and print detailed metrics."""
    model.eval()
    all_preds = []
    all_labels = []
    
    with torch.no_grad():
        for inputs, labels in tqdm(test_loader, desc=f'Evaluating {split_name}'):
            inputs = inputs.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            
            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    
    # Calculate metrics
    accuracy = accuracy_score(all_labels, all_preds)
    precision = precision_score(all_labels, all_preds, average='weighted')
    recall = recall_score(all_labels, all_preds, average='weighted')
    f1 = f1_score(all_labels, all_preds, average='weighted')
    
    print(f"\n{split_name} Metrics (Chunk-Level):")
    print(f"Accuracy: {accuracy*100:.2f}%")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1-Score: {f1:.4f}")
    
    if changes_file:
        with open(changes_file, "a") as f:
            f.write(f"- {split_name} Accuracy (Chunk): {accuracy*100:.2f}%\n")
            f.write(f"- {split_name} Precision: {precision:.4f}\n")
            f.write(f"- {split_name} Recall: {recall:.4f}\n")
            f.write(f"- {split_name} F1-Score: {f1:.4f}\n")
    
    return all_preds, all_labels

def evaluate_by_song(model, val_dataset, device, changes_file=None, split_name="Test"):
    """
    Evaluate accuracy by aggregating chunk predictions for each song.
    Assumes val_dataset is ordered by song.
    """
    model.eval()
    correct_songs = 0
    
    # Check if dataset has num_chunks (ChunkedDataset)
    if not hasattr(val_dataset, 'num_chunks'):
        print("Dataset does not appear to be a ChunkedDataset. Skipping song-level evaluation.")
        return 0.0

    num_chunks = val_dataset.num_chunks
    total_songs = len(val_dataset) // num_chunks
    
    print(f"\nEvaluating on {total_songs} songs ({split_name}) (aggregating {num_chunks} chunks each)...")
    
    with torch.no_grad():
        for i in tqdm(range(total_songs), desc='Song Eval'):
            # Get all chunks for this song
            chunks = []
            label = None
            
            # Indices for this song's chunks
            start_idx = i * num_chunks
            
            for j in range(num_chunks):
                c, l = val_dataset[start_idx + j]
                chunks.append(c)
                label = l 
            
            # Stack: (num_chunks, channels, time)
            chunks_tensor = torch.stack(chunks).to(device)
            
            # Predict
            outputs = model(chunks_tensor) # (num_chunks, n_classes)
            
            # Soft Voting: Average probabilities (logits are fine for argmax)
            avg_output = torch.mean(outputs, dim=0)
            pred_label = torch.argmax(avg_output).item()
            
            if pred_label == label:
                correct_songs += 1
                
    song_acc = 100 * correct_songs / total_songs
    print(f"{split_name} Song-Level Accuracy: {song_acc:.2f}%")
    
    if changes_file:
        with open(changes_file, "a") as f:
            f.write(f"- {split_name} Song-Level Accuracy: {song_acc:.2f}%\n")
            
    return song_acc

# Run evaluations on Validation Set
with open(changes_file, "a") as f:
    f.write("\n--- Validation Set ---\n")
print("\n--- Validation Set Evaluation ---")
evaluate_model(
    model, val_loader, device, genre_names=GENRES, changes_file=changes_file, split_name="Validation"
)
evaluate_by_song(model, val_loader.dataset, device, changes_file=changes_file, split_name="Validation")

# Run evaluations on Test Set
with open(changes_file, "a") as f:
    f.write("\n--- Test Set ---\n")
print("\n--- Test Set Evaluation ---")
evaluate_model(
    model, test_loader, device, genre_names=GENRES, changes_file=changes_file, split_name="Test"
)

evaluate_by_song(model, test_loader.dataset, device, changes_file=changes_file, split_name="Test")


Evaluating: 100%|██████████| 119/119 [00:02<00:00, 54.62it/s]




Test Metrics (Chunk-Level):
Accuracy: 76.03%
Precision: 0.7805
Recall: 0.7603
F1-Score: 0.7583

Evaluating on 200 songs (aggregating 19 chunks each)...


Song Eval: 100%|██████████| 200/200 [00:02<00:00, 93.67it/s] 

Song-Level Accuracy: 80.50%





80.5