# Improved Hybrid CNN+MLP Training (V3) with Context Windows and Probabilities

Training enhanced version of Hybrid CNN+MLP model with probability outputs using extended context windows:
- **Uses data with ±100ms context windows** (from `02.1_d-t_dl_data_preparation_with_context.ipynb`)
- Enhanced CNN: 64→128→256→512 channels with channel attention
- Enhanced MLP: 512→512→256→128 neurons
- Residual connections with attention in CNN branch
- Improved fusion layers: 512+128→512→256→128→64→2
- Better training: 100-120 epochs, warmup, cosine annealing, gradient clipping
- **Saves probabilities for each phoneme for error analysis**


In [1]:
import sys
from pathlib import Path
import torch
import torch.nn as nn
import torch.nn.functional as F
import json
import pandas as pd
import numpy as np
import h5py
import joblib
from torch.utils.data import DataLoader, WeightedRandomSampler, Dataset
from tqdm import tqdm
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    roc_auc_score, confusion_matrix
)
from sklearn.preprocessing import StandardScaler, LabelEncoder
import math

# Project root
# Determine project root (parent of notebooks directory)
PROJECT_ROOT = Path.cwd().parent if Path.cwd().name == 'notebooks' else Path.cwd()

# Data directory (with context)
DATA_DIR = PROJECT_ROOT / 'artifacts' / 'd-t_dl_models_with_context'
FEATURES_DIR = DATA_DIR / 'features'

# Device setup
if torch.backends.mps.is_available():
    device = torch.device("mps")
    print(f"Using MPS device")
elif torch.cuda.is_available():
    device = torch.device("cuda")
    print(f"Using CUDA device")
else:
    device = torch.device("cpu")
    print(f"Using CPU device")

print(f"Data directory: {DATA_DIR}")
print(f"Features directory: {FEATURES_DIR}")


Using MPS device
Data directory: /Volumes/SSanDisk/SpeechRec-German/artifacts/d-t_dl_models_with_context
Features directory: /Volumes/SSanDisk/SpeechRec-German/artifacts/d-t_dl_models_with_context/features


## Load Data with Context Windows


In [2]:
# Load feature columns
with open(DATA_DIR / 'feature_cols.json', 'r') as f:
    feature_cols = json.load(f)

# Load feature scaler
feature_scaler = joblib.load(DATA_DIR / 'feature_scaler.joblib')

# Load class weights
with open(DATA_DIR / 'class_weights.json', 'r') as f:
    class_weights_dict = json.load(f)

# Load features DataFrame (already contains all phoneme metadata with context windows)
# This file was created in 02.1 and includes phoneme_id, class, duration_ms, and all extracted features
df = pd.read_parquet(FEATURES_DIR / 'features.parquet')
print(f"Dataset shape: {df.shape}")
print(f"Feature columns: {len(feature_cols)}")

# Check what metadata columns we have
metadata_cols = ['phoneme_id', 'class', 'duration_ms', 'phoneme', 'utterance_id']
present_metadata = [col for col in metadata_cols if col in df.columns]
print(f"\nMetadata columns present: {present_metadata}")

# Note: features.parquet from 02.1 contains phoneme_id and class, but may not have utterance_id and phoneme
# We need to check if we need to load additional metadata or if it's already there
# For now, we'll work with what we have - phoneme_id and class should be sufficient

# Handle class column - it should be in features.parquet from 02.1
if 'class' not in df.columns:
    if 'phoneme' in df.columns:
        df['class'] = df['phoneme']
        print("Created 'class' column from 'phoneme'")
    else:
        raise ValueError("Neither 'class' nor 'phoneme' column found in features.parquet. Make sure 02.1 was run successfully.")

# Filter to only d and t classes
if 'd' in df['class'].values or 't' in df['class'].values:
    df = df[df['class'].isin(['d', 't'])].copy()
    print(f"Dataset after filtering to d/t: {len(df)} samples")

# Encode target
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
df['class_encoded'] = le.fit_transform(df['class'])  # d=0, t=1
print(f"\nClass encoding: {dict(zip(le.classes_, le.transform(le.classes_)))}")
print(f"Class distribution:\n{df['class'].value_counts()}")

# Load split indices (these are DataFrame indices from 02.1)
# After merge, indices may have changed, so we need to reset index first
with open(DATA_DIR / 'split_indices.json', 'r') as f:
    split_indices = json.load(f)

# Reset index to ensure we can use the indices from split_indices
df = df.reset_index(drop=True)

# Create split column based on indices
df['split'] = 'train'
if len(df) > max(split_indices['val'] + split_indices['test']):
    # Indices are valid, use them directly
    df.loc[split_indices['val'], 'split'] = 'val'
    df.loc[split_indices['test'], 'split'] = 'test'
else:
    # If indices don't match (e.g., after filtering), use phoneme_id matching
    # This is a fallback - normally indices should match
    print("Warning: Split indices may not match DataFrame indices. Using phoneme_id matching...")
    val_ids = set(df.loc[split_indices['val'], 'phoneme_id'].values) if len(df) > max(split_indices['val']) else set()
    test_ids = set(df.loc[split_indices['test'], 'phoneme_id'].values) if len(df) > max(split_indices['test']) else set()
    df.loc[df['phoneme_id'].isin(val_ids), 'split'] = 'val'
    df.loc[df['phoneme_id'].isin(test_ids), 'split'] = 'test'

print(f"\nSplit distribution:")
print(df['split'].value_counts())

# Load spectrograms
# Note: phoneme_id in h5 file might be string, so we keep it as string for matching
spectrograms_dict = {}
with h5py.File(FEATURES_DIR / 'spectrograms.h5', 'r') as f:
    phoneme_ids = list(f.keys())
    for phoneme_id in tqdm(phoneme_ids, desc="Loading spectrograms"):
        # Keep phoneme_id as string to match with DataFrame
        spectrograms_dict[phoneme_id] = f[phoneme_id][:]

print(f"\nLoaded {len(spectrograms_dict):,} spectrograms")
if spectrograms_dict:
    print(f"Spectrogram shape: {list(spectrograms_dict.values())[0].shape}")

# Filter to only phonemes with spectrograms
# Convert phoneme_id to string for matching if needed
df['phoneme_id_str'] = df['phoneme_id'].astype(str)
df['has_spectrogram'] = df['phoneme_id_str'].isin(spectrograms_dict.keys())
df = df[df['has_spectrogram']].copy()
print(f"\nDataset after filtering for spectrograms: {len(df)} samples")


Dataset shape: (132992, 112)
Feature columns: 109

Metadata columns present: ['phoneme_id', 'class', 'duration_ms']
Dataset after filtering to d/t: 132992 samples

Class encoding: {'d': np.int64(0), 't': np.int64(1)}
Class distribution:
class
t    74454
d    58538
Name: count, dtype: int64

Split distribution:
split
train    93147
test     19949
val      19896
Name: count, dtype: int64


Loading spectrograms: 100%|██████████| 132992/132992 [00:09<00:00, 14617.31it/s]



Loaded 132,992 spectrograms
Spectrogram shape: (128, 7)

Dataset after filtering for spectrograms: 132992 samples


## Create Dataset Classes and DataLoaders


In [3]:
from torch.utils.data import Dataset
from sklearn.preprocessing import StandardScaler

class HybridDataset(Dataset):
    """Dataset for hybrid models using both spectrograms and features"""
    def __init__(self, df, spectrograms_dict, feature_cols, scaler=None, split='train', fit_scaler=False, transform=None):
        self.df = df[df['split'] == split].reset_index(drop=True)
        self.spectrograms_dict = spectrograms_dict
        self.feature_cols = feature_cols
        self.transform = transform
        
        # Extract and scale features
        X_features = self.df[feature_cols].values.astype(np.float32)
        X_features = np.nan_to_num(X_features, nan=0.0, posinf=0.0, neginf=0.0)
        
        if fit_scaler:
            self.scaler = StandardScaler()
            X_features = self.scaler.fit_transform(X_features)
        elif scaler is not None:
            self.scaler = scaler
            X_features = self.scaler.transform(X_features)
        else:
            self.scaler = None
        
        self.X_features = torch.from_numpy(X_features)
        self.y = torch.from_numpy(self.df['class_encoded'].values).long()
        
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        phoneme_id = row['phoneme_id']
        
        # Get spectrogram
        spectrogram = self.spectrograms_dict[phoneme_id].astype(np.float32)
        if len(spectrogram.shape) == 2:
            spectrogram = np.expand_dims(spectrogram, axis=0)
        spectrogram = (spectrogram - spectrogram.min()) / (spectrogram.max() - spectrogram.min() + 1e-8)
        
        if self.transform:
            spectrogram = self.transform(spectrogram)
        
        features = self.X_features[idx]
        label = self.y[idx]
        
        return (torch.from_numpy(spectrogram), features), label

# Create datasets
train_hybrid_ds = HybridDataset(df, spectrograms_dict, feature_cols, scaler=feature_scaler, split='train')
val_hybrid_ds = HybridDataset(df, spectrograms_dict, feature_cols, scaler=feature_scaler, split='val')
test_hybrid_ds = HybridDataset(df, spectrograms_dict, feature_cols, scaler=feature_scaler, split='test')

print(f"Train dataset: {len(train_hybrid_ds)} samples")
print(f"Val dataset: {len(val_hybrid_ds)} samples")
print(f"Test dataset: {len(test_hybrid_ds)} samples")

# Create weighted sampler for training
train_labels = df[df['split'] == 'train']['class_encoded'].values
class_weights_array = np.array([class_weights_dict.get(str(i), class_weights_dict.get(i, 1.0)) for i in range(2)])
sample_weights = np.array([class_weights_array[label] for label in train_labels])
sampler = WeightedRandomSampler(
    weights=sample_weights,
    num_samples=len(sample_weights),
    replacement=True
)

# Create DataLoaders
BATCH_SIZE = 64
train_hybrid_loader = DataLoader(train_hybrid_ds, batch_size=BATCH_SIZE, sampler=sampler, num_workers=0)
val_hybrid_loader = DataLoader(val_hybrid_ds, batch_size=BATCH_SIZE, shuffle=False, num_workers=0)
test_hybrid_loader = DataLoader(test_hybrid_ds, batch_size=BATCH_SIZE, shuffle=False, num_workers=0)

print(f"\nTrain batches: {len(train_hybrid_loader)}")
print(f"Val batches: {len(val_hybrid_loader)}")
print(f"Test batches: {len(test_hybrid_loader)}")

# Test a batch
sample_batch = next(iter(train_hybrid_loader))
print(f"\nSample batch - Spectrogram shape: {sample_batch[0][0].shape}, Features shape: {sample_batch[0][1].shape}, Labels shape: {sample_batch[1].shape}")


Feature count mismatch detected: 108 features in DataFrame vs 109 in scaler
Retraining scaler on train split with current features...
Scaler retrained on 108 features
Train dataset: 93147 samples
Val dataset: 19896 samples
Test dataset: 19949 samples

Train batches: 1456
Val batches: 311
Test batches: 312

Sample batch - Spectrogram shape: torch.Size([64, 1, 128, 7]), Features shape: torch.Size([64, 108]), Labels shape: torch.Size([64])


In [4]:
# Define Residual Block for CNN
class ResidualBlock2D(nn.Module):
    """Residual block for CNN branch"""
    def __init__(self, in_channels, out_channels, stride=1):
        super(ResidualBlock2D, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1, stride=stride)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(out_channels)
        
        # Shortcut connection
        self.shortcut = nn.Sequential()
        if stride != 1 or in_channels != out_channels:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride),
                nn.BatchNorm2d(out_channels)
            )
    
    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out


# Define Channel Attention Module
class ChannelAttention(nn.Module):
    """Channel attention module"""
    def __init__(self, channels, reduction=16):
        super(ChannelAttention, self).__init__()
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.max_pool = nn.AdaptiveMaxPool2d(1)
        
        self.fc = nn.Sequential(
            nn.Linear(channels, channels // reduction, bias=False),
            nn.ReLU(),
            nn.Linear(channels // reduction, channels, bias=False),
            nn.Sigmoid()
        )
    
    def forward(self, x):
        b, c, _, _ = x.size()
        avg_out = self.fc(self.avg_pool(x).view(b, c))
        max_out = self.fc(self.max_pool(x).view(b, c))
        out = avg_out + max_out
        return x * out.view(b, c, 1, 1)


# Define Hybrid CNN+MLP Model V3
class HybridCNNMLP_V3(nn.Module):
    """
    Enhanced Hybrid model: CNN for spectrograms + MLP for features
    Version 3: 
    - Deeper CNN: 64->128->256->512 channels with attention
    - Enhanced MLP: 512->512->256->128 neurons
    - Attention mechanism in CNN branch
    - Improved fusion with attention
    Input: (spectrogram: batch, 1, 128, 7), (features: batch, n_features)
    Output: (batch, 2) - binary classification logits
    """
    
    def __init__(self, n_features=109, num_classes=2, dropout=0.3):
        super(HybridCNNMLP_V3, self).__init__()
        
        # Enhanced CNN branch with attention
        self.cnn_branch = nn.Sequential(
            nn.Conv2d(1, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),  # (64, 64, 3)
            
            ResidualBlock2D(64, 128),
            ChannelAttention(128),
            nn.MaxPool2d(2, 2),  # (128, 32, 1)
            
            ResidualBlock2D(128, 256),
            ChannelAttention(256),
            ResidualBlock2D(256, 512),
            ChannelAttention(512),
            nn.AdaptiveAvgPool2d((1, 1)),
            nn.Flatten()
        )
        
        # Enhanced MLP branch
        self.mlp_branch = nn.Sequential(
            nn.Linear(n_features, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(),
            nn.Dropout(dropout),
            
            nn.Linear(512, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(),
            nn.Dropout(dropout * 0.75),
            
            nn.Linear(512, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Dropout(dropout * 0.5),
            
            nn.Linear(256, 128)
        )
        
        # Enhanced Fusion layer
        self.fusion = nn.Sequential(
            nn.Linear(512 + 128, 512),  # CNN output (512) + MLP output (128)
            nn.BatchNorm1d(512),
            nn.ReLU(),
            nn.Dropout(dropout),
            
            nn.Linear(512, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Dropout(dropout * 0.75),
            
            nn.Linear(256, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Dropout(dropout * 0.5),
            
            nn.Linear(128, 64),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.Dropout(dropout * 0.25),
            
            nn.Linear(64, num_classes)
        )
        
    def forward(self, x):
        spectrogram, features = x
        
        # CNN branch
        cnn_out = self.cnn_branch(spectrogram)  # (batch, 512)
        
        # MLP branch
        mlp_out = self.mlp_branch(features)  # (batch, 128)
        
        # Concatenate
        fused = torch.cat([cnn_out, mlp_out], dim=1)  # (batch, 640)
        
        # Final classification
        out = self.fusion(fused)  # (batch, 2)
        
        return out
    
    def get_config(self):
        """Return model configuration"""
        return {
            'model_type': 'HybridCNNMLP_V3',
            'num_classes': 2,
            'n_features': 109,
            'input_shapes': {
                'spectrogram': (1, 128, 7),
                'features': (109,)
            },
            'version': '3.0'
        }

print("Model architecture defined successfully!")


Model architecture defined successfully!


In [5]:
# Training utilities
def train_epoch(model, dataloader, criterion, optimizer, device, max_grad_norm=None):
    """Train for one epoch with optional gradient clipping"""
    model.train()
    running_loss = 0.0
    all_preds = []
    all_labels = []
    
    for batch in tqdm(dataloader, desc="Training", leave=False):
        # Handle hybrid model inputs: ((spectrograms, features), labels)
        if isinstance(batch[0], (tuple, list)) and len(batch[0]) == 2:
            inputs = tuple(x.to(device) for x in batch[0])
        else:
            inputs = batch[0].to(device)
        
        labels = batch[1].to(device)
        
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        
        # Gradient clipping
        if max_grad_norm is not None:
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_grad_norm)
        
        optimizer.step()
        
        running_loss += loss.item()
        preds = torch.argmax(outputs, dim=1).cpu().numpy()
        all_preds.extend(preds)
        all_labels.extend(labels.cpu().numpy())
    
    avg_loss = running_loss / len(dataloader)
    accuracy = accuracy_score(all_labels, all_preds)
    
    return avg_loss, accuracy


def validate(model, dataloader, criterion, device):
    """Validate model"""
    model.eval()
    running_loss = 0.0
    all_preds = []
    all_labels = []
    all_probs = []
    
    with torch.no_grad():
        for batch in tqdm(dataloader, desc="Validating", leave=False):
            # Handle hybrid model inputs: ((spectrograms, features), labels)
            if isinstance(batch[0], (tuple, list)) and len(batch[0]) == 2:
                inputs = tuple(x.to(device) for x in batch[0])
            else:
                inputs = batch[0].to(device)
            
            labels = batch[1].to(device)
            
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            
            running_loss += loss.item()
            probs = torch.softmax(outputs, dim=1).cpu().numpy()
            preds = torch.argmax(outputs, dim=1).cpu().numpy()
            
            all_preds.extend(preds)
            all_labels.extend(labels.cpu().numpy())
            all_probs.extend(probs)
    
    avg_loss = running_loss / len(dataloader)
    accuracy = accuracy_score(all_labels, all_preds)
    precision = precision_score(all_labels, all_preds, average='weighted', zero_division=0)
    recall = recall_score(all_labels, all_preds, average='weighted', zero_division=0)
    f1 = f1_score(all_labels, all_preds, average='weighted', zero_division=0)
    
    try:
        roc_auc = roc_auc_score(all_labels, np.array(all_probs)[:, 1])
    except:
        roc_auc = 0.0
    
    metrics = {
        'loss': avg_loss,
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1': f1,
        'roc_auc': roc_auc
    }
    
    return metrics, all_preds, all_labels, all_probs


def train_model(model, train_loader, val_loader, criterion, optimizer, scheduler,
                device, num_epochs, save_dir, model_name, early_stopping_patience=10, max_grad_norm=None):
    """Train model with early stopping and optional gradient clipping"""
    save_dir = Path(save_dir)
    save_dir.mkdir(parents=True, exist_ok=True)
    
    best_val_f1 = 0.0
    best_epoch = 0
    patience_counter = 0
    training_history = []
    
    for epoch in range(num_epochs):
        print(f"\nEpoch {epoch+1}/{num_epochs}")
        print("-" * 50)
        
        # Train
        train_loss, train_acc = train_epoch(model, train_loader, criterion, optimizer, device, max_grad_norm)
        
        # Validate
        val_metrics, _, _, _ = validate(model, val_loader, criterion, device)
        
        # Update learning rate
        if scheduler is not None:
            scheduler.step()
        
        # Log metrics
        current_lr = optimizer.param_groups[0]['lr']
        epoch_metrics = {
            'epoch': epoch + 1,
            'train_loss': train_loss,
            'train_accuracy': train_acc,
            'val_loss': val_metrics['loss'],
            'val_accuracy': val_metrics['accuracy'],
            'val_precision': val_metrics['precision'],
            'val_recall': val_metrics['recall'],
            'val_f1': val_metrics['f1'],
            'val_roc_auc': val_metrics['roc_auc'],
            'learning_rate': current_lr
        }
        training_history.append(epoch_metrics)
        
        print(f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}")
        print(f"Val Loss: {val_metrics['loss']:.4f}, Val Acc: {val_metrics['accuracy']:.4f}")
        print(f"Val F1: {val_metrics['f1']:.4f}, Val ROC-AUC: {val_metrics['roc_auc']:.4f}")
        print(f"Learning Rate: {current_lr:.6f}")
        
        # Save best model
        if val_metrics['f1'] > best_val_f1:
            best_val_f1 = val_metrics['f1']
            best_epoch = epoch + 1
            patience_counter = 0
            
            # Save model
            torch.save({
                'epoch': epoch + 1,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'val_f1': best_val_f1,
                'val_metrics': val_metrics
            }, save_dir / 'best_model.pt')
            
            print(f"✓ New best model saved! (F1: {best_val_f1:.4f})")
        else:
            patience_counter += 1
            if patience_counter >= early_stopping_patience:
                print(f"\nEarly stopping at epoch {epoch+1}")
                print(f"Best F1: {best_val_f1:.4f} at epoch {best_epoch}")
                break
    
    # Save training history
    with open(save_dir / 'training_history.json', 'w') as f:
        json.dump(training_history, f, indent=2)
    
    # Save config
    config = model.get_config() if hasattr(model, 'get_config') else {}
    config.update({
        'best_epoch': best_epoch,
        'best_val_f1': best_val_f1,
        'num_epochs': num_epochs
    })
    with open(save_dir / 'config.json', 'w') as f:
        json.dump(config, f, indent=2)
    
    return training_history, best_epoch


def evaluate_model(model, test_loader, criterion, device):
    """Evaluate model on test set"""
    metrics, preds, labels, probs = validate(model, test_loader, criterion, device)
    
    # Per-class metrics
    precision_per_class = precision_score(labels, preds, average=None, zero_division=0)
    recall_per_class = recall_score(labels, preds, average=None, zero_division=0)
    f1_per_class = f1_score(labels, preds, average=None, zero_division=0)
    
    metrics['precision_d'] = float(precision_per_class[0])
    metrics['precision_t'] = float(precision_per_class[1])
    metrics['recall_d'] = float(recall_per_class[0])
    metrics['recall_t'] = float(recall_per_class[1])
    metrics['f1_d'] = float(f1_per_class[0])
    metrics['f1_t'] = float(f1_per_class[1])
    metrics['confusion_matrix'] = confusion_matrix(labels, preds).tolist()
    
    return metrics, preds, labels, probs


class LabelSmoothingCrossEntropy(nn.Module):
    """Label smoothing cross entropy loss"""
    def __init__(self, smoothing=0.1, weight=None):
        super(LabelSmoothingCrossEntropy, self).__init__()
        self.smoothing = smoothing
        self.weight = weight
    
    def forward(self, pred, target):
        log_prob = F.log_softmax(pred, dim=1)
        nll_loss = -log_prob.gather(dim=1, index=target.unsqueeze(1)).squeeze(1)
        
        if self.weight is not None:
            nll_loss = nll_loss * self.weight[target]
        
        smooth_loss = -log_prob.mean(dim=1)
        if self.weight is not None:
            smooth_loss = smooth_loss * self.weight.mean()
        
        loss = (1.0 - self.smoothing) * nll_loss + self.smoothing * smooth_loss
        return loss.mean()


class WarmupCosineScheduler:
    """Learning rate scheduler with warmup and cosine annealing"""
    def __init__(self, optimizer, warmup_epochs, total_epochs, min_lr=1e-6):
        self.optimizer = optimizer
        self.warmup_epochs = warmup_epochs
        self.total_epochs = total_epochs
        self.min_lr = min_lr
        self.base_lr = optimizer.param_groups[0]['lr']
        self.current_epoch = 0
    
    def step(self):
        self.current_epoch += 1
        
        if self.current_epoch <= self.warmup_epochs:
            # Warmup phase: linear increase
            lr = self.base_lr * (self.current_epoch / self.warmup_epochs)
        else:
            # Cosine annealing phase
            progress = (self.current_epoch - self.warmup_epochs) / (self.total_epochs - self.warmup_epochs)
            lr = self.min_lr + (self.base_lr - self.min_lr) * 0.5 * (1 + math.cos(math.pi * progress))
        
        for param_group in self.optimizer.param_groups:
            param_group['lr'] = lr
    
    def get_last_lr(self):
        return [self.optimizer.param_groups[0]['lr']]

print("Training utilities defined successfully!")


Training utilities defined successfully!


In [6]:
# Create model
model = HybridCNNMLP_V3(n_features=len(feature_cols), num_classes=2, dropout=0.3).to(device)

# Print model info
total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"Model: {model.get_config()['model_type']}")
print(f"Total parameters: {total_params:,}")
print(f"Trainable parameters: {trainable_params:,}")

# Prepare class weights for loss function
class_weights = torch.tensor([
    class_weights_dict.get('0', class_weights_dict.get(0, 1.0)), 
    class_weights_dict.get('1', class_weights_dict.get(1, 1.0))
], dtype=torch.float32).to(device)

# Loss function with label smoothing
criterion = LabelSmoothingCrossEntropy(smoothing=0.1, weight=class_weights)

# Optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-5)

# Learning rate scheduler with warmup and cosine annealing
num_epochs = 100
warmup_epochs = 5
scheduler = WarmupCosineScheduler(optimizer, warmup_epochs=warmup_epochs, total_epochs=num_epochs, min_lr=1e-6)

# Output directory
OUTPUT_DIR = DATA_DIR / 'improved_models'
save_dir = OUTPUT_DIR / 'hybrid_cnn_mlp_v3_with_context'
save_dir.mkdir(parents=True, exist_ok=True)

print(f"\nTraining configuration:")
print(f"- Epochs: {num_epochs}")
print(f"- Warmup epochs: {warmup_epochs}")
print(f"- Initial LR: {optimizer.param_groups[0]['lr']}")
print(f"- Label smoothing: 0.1")
print(f"- Gradient clipping: 1.0")
print(f"- Early stopping patience: 15")
print(f"- Dropout: 0.3")
print(f"- Context windows: ±100ms")
print(f"- Save directory: {save_dir}")


Model: HybridCNNMLP_V3
Total parameters: 5,856,706
Trainable parameters: 5,856,706

Training configuration:
- Epochs: 100
- Warmup epochs: 5
- Initial LR: 0.001
- Label smoothing: 0.1
- Gradient clipping: 1.0
- Early stopping patience: 15
- Dropout: 0.3
- Context windows: ±100ms
- Save directory: /Volumes/SSanDisk/SpeechRec-German/artifacts/d-t_dl_models_with_context/improved_models/hybrid_cnn_mlp_v3_with_context


## Train Model


In [7]:
# Train model
history, best_epoch = train_model(
    model, train_hybrid_loader, val_hybrid_loader, criterion, optimizer, scheduler,
    device, num_epochs=num_epochs, save_dir=save_dir, model_name='hybrid_cnn_mlp_v3_with_context', 
    early_stopping_patience=30, max_grad_norm=1.0
)

# Load best model and evaluate on test set
checkpoint = torch.load(save_dir / 'best_model.pt')
model.load_state_dict(checkpoint['model_state_dict'])
test_metrics, test_preds, test_labels, test_probs = evaluate_model(model, test_hybrid_loader, criterion, device)

# Save test metrics
with open(save_dir / 'test_metrics.json', 'w') as f:
    json.dump(test_metrics, f, indent=2)

print(f"\n{'='*60}")
print(f"Final Test Results:")
print(f"{'='*60}")
print(f"Accuracy: {test_metrics['accuracy']:.4f}")
print(f"F1-score: {test_metrics['f1']:.4f}")
print(f"ROC-AUC: {test_metrics['roc_auc']:.4f}")
print(f"Precision: {test_metrics['precision']:.4f}")
print(f"Recall: {test_metrics['recall']:.4f}")
print(f"Best epoch: {best_epoch}")



Epoch 1/100
--------------------------------------------------


                                                             

Train Loss: 0.3388, Train Acc: 0.9158
Val Loss: 0.3267, Val Acc: 0.9282
Val F1: 0.9280, Val ROC-AUC: 0.9792
Learning Rate: 0.000200
✓ New best model saved! (F1: 0.9280)

Epoch 2/100
--------------------------------------------------


                                                             

Train Loss: 0.2993, Train Acc: 0.9423
Val Loss: 0.2935, Val Acc: 0.9432
Val F1: 0.9433, Val ROC-AUC: 0.9868
Learning Rate: 0.000400
✓ New best model saved! (F1: 0.9433)

Epoch 3/100
--------------------------------------------------


                                                             

Train Loss: 0.2972, Train Acc: 0.9435
Val Loss: 0.2849, Val Acc: 0.9494
Val F1: 0.9494, Val ROC-AUC: 0.9879
Learning Rate: 0.000600
✓ New best model saved! (F1: 0.9494)

Epoch 4/100
--------------------------------------------------


                                                             

Train Loss: 0.2928, Train Acc: 0.9464
Val Loss: 0.2900, Val Acc: 0.9466
Val F1: 0.9467, Val ROC-AUC: 0.9874
Learning Rate: 0.000800

Epoch 5/100
--------------------------------------------------


                                                             

Train Loss: 0.2929, Train Acc: 0.9471
Val Loss: 0.2890, Val Acc: 0.9467
Val F1: 0.9468, Val ROC-AUC: 0.9874
Learning Rate: 0.001000

Epoch 6/100
--------------------------------------------------


                                                             

Train Loss: 0.2900, Train Acc: 0.9477
Val Loss: 0.2859, Val Acc: 0.9483
Val F1: 0.9484, Val ROC-AUC: 0.9875
Learning Rate: 0.001000

Epoch 7/100
--------------------------------------------------


                                                             

Train Loss: 0.2874, Train Acc: 0.9495
Val Loss: 0.2847, Val Acc: 0.9478
Val F1: 0.9478, Val ROC-AUC: 0.9885
Learning Rate: 0.000999

Epoch 8/100
--------------------------------------------------


                                                             

Train Loss: 0.2830, Train Acc: 0.9523
Val Loss: 0.2807, Val Acc: 0.9533
Val F1: 0.9533, Val ROC-AUC: 0.9890
Learning Rate: 0.000998
✓ New best model saved! (F1: 0.9533)

Epoch 9/100
--------------------------------------------------


                                                             

Train Loss: 0.2786, Train Acc: 0.9552
Val Loss: 0.2938, Val Acc: 0.9401
Val F1: 0.9403, Val ROC-AUC: 0.9896
Learning Rate: 0.000996

Epoch 10/100
--------------------------------------------------


                                                             

Train Loss: 0.2754, Train Acc: 0.9570
Val Loss: 0.2916, Val Acc: 0.9431
Val F1: 0.9432, Val ROC-AUC: 0.9876
Learning Rate: 0.000993

Epoch 11/100
--------------------------------------------------


                                                             

Train Loss: 0.2734, Train Acc: 0.9582
Val Loss: 0.2732, Val Acc: 0.9573
Val F1: 0.9574, Val ROC-AUC: 0.9907
Learning Rate: 0.000990
✓ New best model saved! (F1: 0.9574)

Epoch 12/100
--------------------------------------------------


                                                             

Train Loss: 0.2699, Train Acc: 0.9600
Val Loss: 0.2877, Val Acc: 0.9512
Val F1: 0.9511, Val ROC-AUC: 0.9902
Learning Rate: 0.000987

Epoch 13/100
--------------------------------------------------


                                                             

Train Loss: 0.2696, Train Acc: 0.9606
Val Loss: 0.2743, Val Acc: 0.9566
Val F1: 0.9566, Val ROC-AUC: 0.9910
Learning Rate: 0.000983

Epoch 14/100
--------------------------------------------------


                                                             

Train Loss: 0.2659, Train Acc: 0.9626
Val Loss: 0.2766, Val Acc: 0.9529
Val F1: 0.9530, Val ROC-AUC: 0.9908
Learning Rate: 0.000978

Epoch 15/100
--------------------------------------------------


                                                             

Train Loss: 0.2627, Train Acc: 0.9648
Val Loss: 0.2782, Val Acc: 0.9547
Val F1: 0.9546, Val ROC-AUC: 0.9906
Learning Rate: 0.000973

Epoch 16/100
--------------------------------------------------


                                                             

Train Loss: 0.2629, Train Acc: 0.9643
Val Loss: 0.2790, Val Acc: 0.9526
Val F1: 0.9526, Val ROC-AUC: 0.9907
Learning Rate: 0.000967

Epoch 17/100
--------------------------------------------------


                                                             

Train Loss: 0.2588, Train Acc: 0.9673
Val Loss: 0.2697, Val Acc: 0.9593
Val F1: 0.9594, Val ROC-AUC: 0.9919
Learning Rate: 0.000961
✓ New best model saved! (F1: 0.9594)

Epoch 18/100
--------------------------------------------------


                                                             

Train Loss: 0.2567, Train Acc: 0.9682
Val Loss: 0.2736, Val Acc: 0.9565
Val F1: 0.9566, Val ROC-AUC: 0.9911
Learning Rate: 0.000955

Epoch 19/100
--------------------------------------------------


                                                             

Train Loss: 0.2562, Train Acc: 0.9686
Val Loss: 0.2748, Val Acc: 0.9558
Val F1: 0.9558, Val ROC-AUC: 0.9905
Learning Rate: 0.000947

Epoch 20/100
--------------------------------------------------


                                                             

Train Loss: 0.2549, Train Acc: 0.9696
Val Loss: 0.2723, Val Acc: 0.9585
Val F1: 0.9585, Val ROC-AUC: 0.9912
Learning Rate: 0.000940

Epoch 21/100
--------------------------------------------------


                                                             

Train Loss: 0.2538, Train Acc: 0.9701
Val Loss: 0.2725, Val Acc: 0.9585
Val F1: 0.9586, Val ROC-AUC: 0.9905
Learning Rate: 0.000932

Epoch 22/100
--------------------------------------------------


                                                             

Train Loss: 0.2526, Train Acc: 0.9710
Val Loss: 0.2711, Val Acc: 0.9591
Val F1: 0.9591, Val ROC-AUC: 0.9914
Learning Rate: 0.000923

Epoch 23/100
--------------------------------------------------


                                                             

Train Loss: 0.2501, Train Acc: 0.9717
Val Loss: 0.2723, Val Acc: 0.9566
Val F1: 0.9566, Val ROC-AUC: 0.9917
Learning Rate: 0.000914

Epoch 24/100
--------------------------------------------------


                                                             

Train Loss: 0.2488, Train Acc: 0.9730
Val Loss: 0.2691, Val Acc: 0.9589
Val F1: 0.9590, Val ROC-AUC: 0.9924
Learning Rate: 0.000905

Epoch 25/100
--------------------------------------------------


                                                             

Train Loss: 0.2469, Train Acc: 0.9738
Val Loss: 0.2715, Val Acc: 0.9597
Val F1: 0.9597, Val ROC-AUC: 0.9914
Learning Rate: 0.000895
✓ New best model saved! (F1: 0.9597)

Epoch 26/100
--------------------------------------------------


                                                             

Train Loss: 0.2457, Train Acc: 0.9749
Val Loss: 0.2776, Val Acc: 0.9561
Val F1: 0.9561, Val ROC-AUC: 0.9871
Learning Rate: 0.000884

Epoch 27/100
--------------------------------------------------


                                                             

Train Loss: 0.2461, Train Acc: 0.9749
Val Loss: 0.2706, Val Acc: 0.9609
Val F1: 0.9610, Val ROC-AUC: 0.9914
Learning Rate: 0.000874
✓ New best model saved! (F1: 0.9610)

Epoch 28/100
--------------------------------------------------


                                                             

Train Loss: 0.2430, Train Acc: 0.9764
Val Loss: 0.2701, Val Acc: 0.9603
Val F1: 0.9603, Val ROC-AUC: 0.9912
Learning Rate: 0.000862

Epoch 29/100
--------------------------------------------------


                                                             

Train Loss: 0.2429, Train Acc: 0.9767
Val Loss: 0.2705, Val Acc: 0.9599
Val F1: 0.9600, Val ROC-AUC: 0.9913
Learning Rate: 0.000851

Epoch 30/100
--------------------------------------------------


                                                             

Train Loss: 0.2425, Train Acc: 0.9771
Val Loss: 0.2687, Val Acc: 0.9602
Val F1: 0.9602, Val ROC-AUC: 0.9920
Learning Rate: 0.000839

Epoch 31/100
--------------------------------------------------


                                                             

Train Loss: 0.2410, Train Acc: 0.9779
Val Loss: 0.2710, Val Acc: 0.9612
Val F1: 0.9612, Val ROC-AUC: 0.9912
Learning Rate: 0.000826
✓ New best model saved! (F1: 0.9612)

Epoch 32/100
--------------------------------------------------


                                                             

Train Loss: 0.2397, Train Acc: 0.9778
Val Loss: 0.2699, Val Acc: 0.9601
Val F1: 0.9602, Val ROC-AUC: 0.9915
Learning Rate: 0.000814

Epoch 33/100
--------------------------------------------------


                                                             

Train Loss: 0.2371, Train Acc: 0.9805
Val Loss: 0.2730, Val Acc: 0.9605
Val F1: 0.9605, Val ROC-AUC: 0.9914
Learning Rate: 0.000801

Epoch 34/100
--------------------------------------------------


                                                             

Train Loss: 0.2368, Train Acc: 0.9801
Val Loss: 0.2813, Val Acc: 0.9521
Val F1: 0.9522, Val ROC-AUC: 0.9898
Learning Rate: 0.000787

Epoch 35/100
--------------------------------------------------


                                                             

Train Loss: 0.2369, Train Acc: 0.9802
Val Loss: 0.2744, Val Acc: 0.9601
Val F1: 0.9601, Val ROC-AUC: 0.9912
Learning Rate: 0.000774

Epoch 36/100
--------------------------------------------------


                                                             

Train Loss: 0.2357, Train Acc: 0.9810
Val Loss: 0.2714, Val Acc: 0.9604
Val F1: 0.9604, Val ROC-AUC: 0.9916
Learning Rate: 0.000760

Epoch 37/100
--------------------------------------------------


                                                             

Train Loss: 0.2337, Train Acc: 0.9823
Val Loss: 0.2729, Val Acc: 0.9603
Val F1: 0.9603, Val ROC-AUC: 0.9907
Learning Rate: 0.000745

Epoch 38/100
--------------------------------------------------


                                                             

Train Loss: 0.2335, Train Acc: 0.9819
Val Loss: 0.2746, Val Acc: 0.9600
Val F1: 0.9600, Val ROC-AUC: 0.9914
Learning Rate: 0.000731

Epoch 39/100
--------------------------------------------------


                                                             

Train Loss: 0.2324, Train Acc: 0.9825
Val Loss: 0.2717, Val Acc: 0.9596
Val F1: 0.9597, Val ROC-AUC: 0.9913
Learning Rate: 0.000716

Epoch 40/100
--------------------------------------------------


                                                             

Train Loss: 0.2297, Train Acc: 0.9846
Val Loss: 0.2726, Val Acc: 0.9616
Val F1: 0.9615, Val ROC-AUC: 0.9914
Learning Rate: 0.000701
✓ New best model saved! (F1: 0.9615)

Epoch 41/100
--------------------------------------------------


                                                             

Train Loss: 0.2304, Train Acc: 0.9838
Val Loss: 0.2772, Val Acc: 0.9571
Val F1: 0.9572, Val ROC-AUC: 0.9905
Learning Rate: 0.000686

Epoch 42/100
--------------------------------------------------


                                                             

Train Loss: 0.2272, Train Acc: 0.9861
Val Loss: 0.2725, Val Acc: 0.9621
Val F1: 0.9621, Val ROC-AUC: 0.9909
Learning Rate: 0.000670
✓ New best model saved! (F1: 0.9621)

Epoch 43/100
--------------------------------------------------


                                                             

Train Loss: 0.2284, Train Acc: 0.9852
Val Loss: 0.2755, Val Acc: 0.9610
Val F1: 0.9610, Val ROC-AUC: 0.9913
Learning Rate: 0.000655

Epoch 44/100
--------------------------------------------------


                                                               

Train Loss: 0.2265, Train Acc: 0.9865
Val Loss: 0.2882, Val Acc: 0.9547
Val F1: 0.9546, Val ROC-AUC: 0.9901
Learning Rate: 0.000639

Epoch 45/100
--------------------------------------------------


                                                             

Train Loss: 0.2267, Train Acc: 0.9866
Val Loss: 0.2754, Val Acc: 0.9601
Val F1: 0.9601, Val ROC-AUC: 0.9908
Learning Rate: 0.000623

Epoch 46/100
--------------------------------------------------


                                                             

Train Loss: 0.2248, Train Acc: 0.9874
Val Loss: 0.2771, Val Acc: 0.9587
Val F1: 0.9587, Val ROC-AUC: 0.9907
Learning Rate: 0.000607

Epoch 47/100
--------------------------------------------------


                                                             

Train Loss: 0.2249, Train Acc: 0.9872
Val Loss: 0.2733, Val Acc: 0.9586
Val F1: 0.9586, Val ROC-AUC: 0.9909
Learning Rate: 0.000591

Epoch 48/100
--------------------------------------------------


                                                             

Train Loss: 0.2228, Train Acc: 0.9885
Val Loss: 0.2746, Val Acc: 0.9603
Val F1: 0.9603, Val ROC-AUC: 0.9897
Learning Rate: 0.000575

Epoch 49/100
--------------------------------------------------


                                                             

Train Loss: 0.2210, Train Acc: 0.9897
Val Loss: 0.2758, Val Acc: 0.9603
Val F1: 0.9604, Val ROC-AUC: 0.9873
Learning Rate: 0.000558

Epoch 50/100
--------------------------------------------------


                                                             

Train Loss: 0.2217, Train Acc: 0.9892
Val Loss: 0.2808, Val Acc: 0.9590
Val F1: 0.9589, Val ROC-AUC: 0.9901
Learning Rate: 0.000542

Epoch 51/100
--------------------------------------------------


                                                             

Train Loss: 0.2204, Train Acc: 0.9898
Val Loss: 0.2828, Val Acc: 0.9556
Val F1: 0.9557, Val ROC-AUC: 0.9893
Learning Rate: 0.000525

Epoch 52/100
--------------------------------------------------


                                                             

Train Loss: 0.2204, Train Acc: 0.9900
Val Loss: 0.2748, Val Acc: 0.9599
Val F1: 0.9600, Val ROC-AUC: 0.9890
Learning Rate: 0.000509

Epoch 53/100
--------------------------------------------------


                                                             

Train Loss: 0.2191, Train Acc: 0.9907
Val Loss: 0.2829, Val Acc: 0.9575
Val F1: 0.9574, Val ROC-AUC: 0.9881
Learning Rate: 0.000492

Epoch 54/100
--------------------------------------------------


                                                             

Train Loss: 0.2196, Train Acc: 0.9906
Val Loss: 0.2813, Val Acc: 0.9582
Val F1: 0.9582, Val ROC-AUC: 0.9892
Learning Rate: 0.000476

Epoch 55/100
--------------------------------------------------


                                                             

Train Loss: 0.2180, Train Acc: 0.9914
Val Loss: 0.2799, Val Acc: 0.9601
Val F1: 0.9600, Val ROC-AUC: 0.9827
Learning Rate: 0.000459

Epoch 56/100
--------------------------------------------------


                                                             

Train Loss: 0.2172, Train Acc: 0.9918
Val Loss: 0.2756, Val Acc: 0.9611
Val F1: 0.9611, Val ROC-AUC: 0.9867
Learning Rate: 0.000443

Epoch 57/100
--------------------------------------------------


                                                             

Train Loss: 0.2170, Train Acc: 0.9919
Val Loss: 0.2796, Val Acc: 0.9594
Val F1: 0.9594, Val ROC-AUC: 0.9871
Learning Rate: 0.000426

Epoch 58/100
--------------------------------------------------


                                                             

Train Loss: 0.2154, Train Acc: 0.9929
Val Loss: 0.2802, Val Acc: 0.9604
Val F1: 0.9604, Val ROC-AUC: 0.9816
Learning Rate: 0.000410

Epoch 59/100
--------------------------------------------------


                                                             

Train Loss: 0.2163, Train Acc: 0.9924
Val Loss: 0.2753, Val Acc: 0.9605
Val F1: 0.9605, Val ROC-AUC: 0.9900
Learning Rate: 0.000394

Epoch 60/100
--------------------------------------------------


                                                             

Train Loss: 0.2145, Train Acc: 0.9933
Val Loss: 0.2808, Val Acc: 0.9588
Val F1: 0.9588, Val ROC-AUC: 0.9828
Learning Rate: 0.000378

Epoch 61/100
--------------------------------------------------


                                                             

Train Loss: 0.2146, Train Acc: 0.9932
Val Loss: 0.2818, Val Acc: 0.9594
Val F1: 0.9593, Val ROC-AUC: 0.9856
Learning Rate: 0.000362

Epoch 62/100
--------------------------------------------------


                                                             

Train Loss: 0.2132, Train Acc: 0.9941
Val Loss: 0.2792, Val Acc: 0.9578
Val F1: 0.9578, Val ROC-AUC: 0.9862
Learning Rate: 0.000346

Epoch 63/100
--------------------------------------------------


                                                             

Train Loss: 0.2139, Train Acc: 0.9937
Val Loss: 0.2910, Val Acc: 0.9562
Val F1: 0.9561, Val ROC-AUC: 0.9736
Learning Rate: 0.000331

Epoch 64/100
--------------------------------------------------


                                                             

Train Loss: 0.2121, Train Acc: 0.9946
Val Loss: 0.2853, Val Acc: 0.9573
Val F1: 0.9573, Val ROC-AUC: 0.9792
Learning Rate: 0.000315

Epoch 65/100
--------------------------------------------------


                                                             

Train Loss: 0.2119, Train Acc: 0.9949
Val Loss: 0.2826, Val Acc: 0.9584
Val F1: 0.9584, Val ROC-AUC: 0.9761
Learning Rate: 0.000300

Epoch 66/100
--------------------------------------------------


                                                             

Train Loss: 0.2125, Train Acc: 0.9948
Val Loss: 0.2803, Val Acc: 0.9589
Val F1: 0.9589, Val ROC-AUC: 0.9860
Learning Rate: 0.000285

Epoch 67/100
--------------------------------------------------


                                                             

Train Loss: 0.2116, Train Acc: 0.9953
Val Loss: 0.2794, Val Acc: 0.9592
Val F1: 0.9592, Val ROC-AUC: 0.9848
Learning Rate: 0.000270

Epoch 68/100
--------------------------------------------------


                                                             

Train Loss: 0.2104, Train Acc: 0.9958
Val Loss: 0.2811, Val Acc: 0.9601
Val F1: 0.9600, Val ROC-AUC: 0.9814
Learning Rate: 0.000256

Epoch 69/100
--------------------------------------------------


                                                             

Train Loss: 0.2104, Train Acc: 0.9958
Val Loss: 0.2815, Val Acc: 0.9586
Val F1: 0.9586, Val ROC-AUC: 0.9816
Learning Rate: 0.000241

Epoch 70/100
--------------------------------------------------


                                                             

Train Loss: 0.2105, Train Acc: 0.9957
Val Loss: 0.2847, Val Acc: 0.9569
Val F1: 0.9569, Val ROC-AUC: 0.9721
Learning Rate: 0.000227

Epoch 71/100
--------------------------------------------------


                                                             

Train Loss: 0.2098, Train Acc: 0.9959
Val Loss: 0.2814, Val Acc: 0.9597
Val F1: 0.9597, Val ROC-AUC: 0.9777
Learning Rate: 0.000214

Epoch 72/100
--------------------------------------------------


                                                             

Train Loss: 0.2095, Train Acc: 0.9965
Val Loss: 0.2856, Val Acc: 0.9591
Val F1: 0.9591, Val ROC-AUC: 0.9670
Learning Rate: 0.000200

Early stopping at epoch 72
Best F1: 0.9621 at epoch 42


                                                             


Final Test Results:
Accuracy: 0.9600
F1-score: 0.9600
ROC-AUC: 0.9915
Precision: 0.9600
Recall: 0.9600
Best epoch: 42




## Save Predictions with Probabilities for Each Phoneme


In [8]:
# Get test dataset to extract phoneme metadata
test_df = df[df['split'] == 'test'].reset_index(drop=True)

# Create predictions dataframe with probabilities
predictions_data = []
for idx, row in test_df.iterrows():
    predictions_data.append({
        'phoneme_id': row['phoneme_id'],
        'utterance_id': row.get('utterance_id', None),  # May not be in features.parquet
        'phoneme': row.get('phoneme', row.get('class', None)),  # Use class if phoneme not available
        'true_class': row['class'],
        'true_class_encoded': int(test_labels[idx]),
        'predicted_class_encoded': int(test_preds[idx]),
        'predicted_class': 'd' if test_preds[idx] == 0 else 't',
        'prob_class_0': float(test_probs[idx][0]),  # Probability of class 'd'
        'prob_class_1': float(test_probs[idx][1]),  # Probability of class 't'
        'max_prob': float(np.max(test_probs[idx])),
        'is_correct': int(test_labels[idx] == test_preds[idx]),
        'confidence': float(np.max(test_probs[idx])) if test_labels[idx] == test_preds[idx] else float(test_probs[idx][test_preds[idx]]),
        'duration_ms': row.get('duration_ms', None)
    })

predictions_df = pd.DataFrame(predictions_data)

# Save to CSV
predictions_df.to_csv(save_dir / 'test_predictions_with_probs.csv', index=False)
print(f"Saved predictions with probabilities to: {save_dir / 'test_predictions_with_probs.csv'}")
print(f"Total predictions: {len(predictions_df)}")
print(f"Correct predictions: {predictions_df['is_correct'].sum()}")
print(f"Incorrect predictions: {(~predictions_df['is_correct'].astype(bool)).sum()}")

# Save summary statistics
summary_stats = {
    'total_samples': len(predictions_df),
    'correct_predictions': int(predictions_df['is_correct'].sum()),
    'incorrect_predictions': int((~predictions_df['is_correct'].astype(bool)).sum()),
    'accuracy': float(predictions_df['is_correct'].mean()),
    'avg_confidence_correct': float(predictions_df[predictions_df['is_correct'] == 1]['confidence'].mean()),
    'avg_confidence_incorrect': float(predictions_df[predictions_df['is_correct'] == 0]['confidence'].mean()),
    'min_confidence_incorrect': float(predictions_df[predictions_df['is_correct'] == 0]['confidence'].min()),
    'max_confidence_incorrect': float(predictions_df[predictions_df['is_correct'] == 0]['confidence'].max()),
    'high_confidence_errors': int(((predictions_df['is_correct'] == 0) & (predictions_df['confidence'] > 0.8)).sum()),
    'low_confidence_errors': int(((predictions_df['is_correct'] == 0) & (predictions_df['confidence'] < 0.6)).sum()),
}

with open(save_dir / 'predictions_summary.json', 'w') as f:
    json.dump(summary_stats, f, indent=2)

print(f"\nSummary Statistics:")
print(f"- Average confidence (correct): {summary_stats['avg_confidence_correct']:.4f}")
print(f"- Average confidence (incorrect): {summary_stats['avg_confidence_incorrect']:.4f}")
print(f"- High confidence errors (>0.8): {summary_stats['high_confidence_errors']}")
print(f"- Low confidence errors (<0.6): {summary_stats['low_confidence_errors']}")


Saved predictions with probabilities to: /Volumes/SSanDisk/SpeechRec-German/artifacts/d-t_dl_models_with_context/improved_models/hybrid_cnn_mlp_v3_with_context/test_predictions_with_probs.csv
Total predictions: 19949
Correct predictions: 19151
Incorrect predictions: 798

Summary Statistics:
- Average confidence (correct): 0.9349
- Average confidence (incorrect): 0.8121
- High confidence errors (>0.8): 500
- Low confidence errors (<0.6): 84


## Save Predictions for Validation Set (for analysis)


In [9]:
# Get validation predictions
val_metrics, val_preds, val_labels, val_probs = evaluate_model(model, val_hybrid_loader, criterion, device)
val_df = df[df['split'] == 'val'].reset_index(drop=True)

val_predictions_data = []
for idx, row in val_df.iterrows():
    val_predictions_data.append({
        'phoneme_id': row['phoneme_id'],
        'utterance_id': row.get('utterance_id', None),  # May not be in features.parquet
        'phoneme': row.get('phoneme', row.get('class', None)),  # Use class if phoneme not available
        'true_class': row['class'],
        'true_class_encoded': int(val_labels[idx]),
        'predicted_class_encoded': int(val_preds[idx]),
        'predicted_class': 'd' if val_preds[idx] == 0 else 't',
        'prob_class_0': float(val_probs[idx][0]),
        'prob_class_1': float(val_probs[idx][1]),
        'max_prob': float(np.max(val_probs[idx])),
        'is_correct': int(val_labels[idx] == val_preds[idx]),
        'confidence': float(np.max(val_probs[idx])) if val_labels[idx] == val_preds[idx] else float(val_probs[idx][val_preds[idx]]),
        'duration_ms': row.get('duration_ms', None)
    })

val_predictions_df = pd.DataFrame(val_predictions_data)
val_predictions_df.to_csv(save_dir / 'val_predictions_with_probs.csv', index=False)
print(f"Saved validation predictions to: {save_dir / 'val_predictions_with_probs.csv'}")


                                                             

Saved validation predictions to: /Volumes/SSanDisk/SpeechRec-German/artifacts/d-t_dl_models_with_context/improved_models/hybrid_cnn_mlp_v3_with_context/val_predictions_with_probs.csv
