# Improved Hybrid CNN+MLP Training (V4.2 Optimized) with Context Windows

**Optimized version** based on analysis of V4 results:

**Analysis of V4 results:**
- Accuracy: 0.9622 (down from V3: 0.9653)
- Best epoch: 27, but training continued to 127 (overfitting)
- Model too complex: 7.2M parameters with 129 features
- Issue: Increased features (130 vs ~109) but accuracy decreased

**Key optimizations in V4.2:**
1. **Simplified architecture**: Reduced MLP from 512→768→512→256→128 to 256→512→256→128
2. **Increased regularization**: Dropout 0.4-0.5 (vs 0.3), weight decay 1e-4 (vs 1e-5)
3. **Better feature selection**: Feature attention with reduction=8 (vs 4) for more selective attention
4. **Conservative learning rate**: 5e-4 (vs 1e-3) for more stable training
5. **Simplified fusion**: Reduced fusion layers complexity
6. **Early stopping patience**: 20 (vs 30) to prevent overfitting

**Expected improvements:**
- Better generalization (less overfitting)
- More stable training
- Better handling of increased feature count
- Target: Accuracy > 0.965 (back to V3 level or better)


In [1]:
import sys
from pathlib import Path
import torch
import torch.nn as nn
import torch.nn.functional as F
import json
import pandas as pd
import numpy as np
import h5py
import joblib
from torch.utils.data import DataLoader, WeightedRandomSampler, Dataset
from tqdm import tqdm
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    roc_auc_score, confusion_matrix
)
from sklearn.preprocessing import StandardScaler, LabelEncoder
import math

# Project root
# Determine project root (parent of notebooks directory)
PROJECT_ROOT = Path.cwd().parent if Path.cwd().name == 'notebooks' else Path.cwd()

# Data directory (with context v2 - includes VOT, burst features)
DATA_DIR = PROJECT_ROOT / 'artifacts' / 'b-p_dl_models_with_context_v2'
FEATURES_DIR = DATA_DIR / 'features'

# Device setup
if torch.backends.mps.is_available():
    device = torch.device("mps")
    print(f"Using MPS device")
elif torch.cuda.is_available():
    device = torch.device("cuda")
    print(f"Using CUDA device")
else:
    device = torch.device("cpu")
    print(f"Using CPU device")

print(f"Data directory: {DATA_DIR}")
print(f"Features directory: {FEATURES_DIR}")


Using MPS device
Data directory: /Volumes/SSanDisk/SpeechRec-German/artifacts/b-p_dl_models_with_context_v2
Features directory: /Volumes/SSanDisk/SpeechRec-German/artifacts/b-p_dl_models_with_context_v2/features


## Load Data with Context Windows (V2 - with VOT and Burst Features)


In [2]:
# Load feature columns
with open(DATA_DIR / 'feature_cols.json', 'r') as f:
    feature_cols = json.load(f)



# Load feature scaler
feature_scaler = joblib.load(DATA_DIR / 'feature_scaler.joblib')

# Load class weights
with open(DATA_DIR / 'class_weights.json', 'r') as f:
    class_weights_dict = json.load(f)

# Load features DataFrame (from 02.2 - includes VOT, burst features)
df = pd.read_parquet(FEATURES_DIR / 'features.parquet')
print(f"Dataset shape: {df.shape}")
print(f"Feature columns (loaded): {len(feature_cols)}")

# Filter feature_cols to only include columns that exist in DataFrame
original_feature_count = len(feature_cols)
feature_cols = [col for col in feature_cols if col in df.columns and pd.api.types.is_numeric_dtype(df[col])]

if len(feature_cols) != original_feature_count:
    missing_cols = set([col for col in json.load(open(DATA_DIR / 'feature_cols.json', 'r')) if col not in df.columns])
    print(f"Warning: {original_feature_count - len(feature_cols)} feature columns are missing from DataFrame")
    if missing_cols:
        print(f"Missing columns: {list(missing_cols)[:10]}...")
        
    # Special handling for duration_ms_features: if it's missing but duration_ms exists, 
    # it means the merge didn't create duration_ms_features (likely because duration_ms wasn't in df_features)
    # In this case, we just remove duration_ms_features from the list as it's not a real feature
    if 'duration_ms_features' in missing_cols:
        print("Note: 'duration_ms_features' is missing - this is expected if duration_ms wasn't duplicated during merge.")
        print("      This column is not a real feature and can be safely ignored.")

print(f"Feature columns (filtered): {len(feature_cols)}")

# Verify feature count matches scaler
if hasattr(feature_scaler, 'n_features_in_'):
    if len(feature_cols) != feature_scaler.n_features_in_:
        print(f"Warning: Feature count mismatch. Scaler expects {feature_scaler.n_features_in_} features, but we have {len(feature_cols)}")
        print("This is OK if some features were removed from the dataset. The scaler will be applied to available features.")

# Check what metadata columns we have
metadata_cols = ['phoneme_id', 'class', 'duration_ms', 'phoneme', 'utterance_id']
present_metadata = [col for col in metadata_cols if col in df.columns]
print(f"\nMetadata columns present: {present_metadata}")

# Handle class column
if 'class' not in df.columns:
    if 'phoneme' in df.columns:
        df['class'] = df['phoneme']
        print("Created 'class' column from 'phoneme'")
    else:
        raise ValueError("Neither 'class' nor 'phoneme' column found in features.parquet.")

# Filter to only b and p classes
if 'pf' in df['class'].values:
    df = df[df['class'].isin(['b', 'p'])].copy()
    print(f"Dataset after filtering to b/p: {len(df)} samples")

# Encode target
le = LabelEncoder()
df['class_encoded'] = le.fit_transform(df['class'])  # b=0, p=1
print(f"\nClass encoding: {dict(zip(le.classes_, le.transform(le.classes_)))}")
print(f"Class distribution:\n{df['class'].value_counts()}")

# Load split indices
with open(DATA_DIR / 'split_indices.json', 'r') as f:
    split_indices = json.load(f)

# Reset index
df = df.reset_index(drop=True)

# Create split column based on indices
df['split'] = 'train'
if len(df) > max(split_indices['val'] + split_indices['test']):
    df.loc[split_indices['val'], 'split'] = 'val'
    df.loc[split_indices['test'], 'split'] = 'test'
else:
    print("Warning: Split indices may not match DataFrame indices. Using phoneme_id matching...")
    val_ids = set(df.loc[split_indices['val'], 'phoneme_id'].values) if len(df) > max(split_indices['val']) else set()
    test_ids = set(df.loc[split_indices['test'], 'phoneme_id'].values) if len(df) > max(split_indices['test']) else set()
    df.loc[df['phoneme_id'].isin(val_ids), 'split'] = 'val'
    df.loc[df['phoneme_id'].isin(test_ids), 'split'] = 'test'

print(f"\nSplit distribution:")
print(df['split'].value_counts())

# Load spectrograms
spectrograms_dict = {}
with h5py.File(FEATURES_DIR / 'spectrograms.h5', 'r') as f:
    phoneme_ids = list(f.keys())
    for phoneme_id in tqdm(phoneme_ids, desc="Loading spectrograms"):
        spectrograms_dict[phoneme_id] = f[phoneme_id][:]

print(f"\nLoaded {len(spectrograms_dict):,} spectrograms")
if spectrograms_dict:
    print(f"Spectrogram shape: {list(spectrograms_dict.values())[0].shape}")

# Filter to only phonemes with spectrograms
df['phoneme_id_str'] = df['phoneme_id'].astype(str)
df['has_spectrogram'] = df['phoneme_id_str'].isin(spectrograms_dict.keys())
df = df[df['has_spectrogram']].copy()
print(f"\nDataset after filtering for spectrograms: {len(df)} samples")


Dataset shape: (36903, 134)
Feature columns (loaded): 130
Missing columns: ['duration_ms_features']...
Note: 'duration_ms_features' is missing - this is expected if duration_ms wasn't duplicated during merge.
      This column is not a real feature and can be safely ignored.
Feature columns (filtered): 129
This is OK if some features were removed from the dataset. The scaler will be applied to available features.

Metadata columns present: ['phoneme_id', 'class', 'duration_ms']

Class encoding: {'b': np.int64(0), 'p': np.int64(1)}
Class distribution:
class
b    25874
p    11029
Name: count, dtype: int64

Split distribution:
split
train    25846
test      5536
val       5521
Name: count, dtype: int64


Loading spectrograms: 100%|██████████| 36903/36903 [00:02<00:00, 16190.63it/s]



Loaded 36,903 spectrograms
Spectrogram shape: (128, 7)

Dataset after filtering for spectrograms: 36903 samples


## Create Dataset Classes and DataLoaders


In [3]:
from torch.utils.data import Dataset
from sklearn.preprocessing import StandardScaler

class HybridDataset(Dataset):
    """Dataset for hybrid models using both spectrograms and features"""
    def __init__(self, df, spectrograms_dict, feature_cols, scaler=None, split='train', fit_scaler=False, transform=None):
        self.df = df[df['split'] == split].reset_index(drop=True)
        self.spectrograms_dict = spectrograms_dict
        self.transform = transform
        
        self.feature_cols = [col for col in feature_cols if col in self.df.columns and pd.api.types.is_numeric_dtype(self.df[col])]
        if len(self.feature_cols) != len(feature_cols):
            missing = set(feature_cols) - set(self.feature_cols)
            print(f"Warning: {len(missing)} feature columns missing from DataFrame: {list(missing)[:5]}...")
        
        X_features = self.df[self.feature_cols].values.astype(np.float32)
        X_features = np.nan_to_num(X_features, nan=0.0, posinf=0.0, neginf=0.0)
        
        if fit_scaler:
            self.scaler = StandardScaler()
            X_features = self.scaler.fit_transform(X_features)
        elif scaler is not None:
            if hasattr(scaler, 'n_features_in_') and X_features.shape[1] != scaler.n_features_in_:
                print(f"Warning: Feature count mismatch ({X_features.shape[1]} vs {scaler.n_features_in_}). Retraining scaler on current features.")
                self.scaler = StandardScaler()
                X_features = self.scaler.fit_transform(X_features)
            else:
                self.scaler = scaler
                X_features = self.scaler.transform(X_features)
        else:
            self.scaler = None
        
        self.X_features = torch.from_numpy(X_features)
        self.y = torch.from_numpy(self.df['class_encoded'].values).long()
        
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        phoneme_id = str(row['phoneme_id'])
        
        spectrogram = self.spectrograms_dict[phoneme_id].astype(np.float32)
        if len(spectrogram.shape) == 2:
            spectrogram = np.expand_dims(spectrogram, axis=0)
        spectrogram = (spectrogram - spectrogram.min()) / (spectrogram.max() - spectrogram.min() + 1e-8)
        
        if self.transform:
            spectrogram = self.transform(spectrogram)
        
        features = self.X_features[idx]
        label = self.y[idx]
        
        return (torch.from_numpy(spectrogram), features), label

# Check and retrain scaler if needed
train_df = df[df['split'] == 'train'].reset_index(drop=True)
train_feature_cols = [col for col in feature_cols if col in train_df.columns and pd.api.types.is_numeric_dtype(train_df[col])]
feature_cols = train_feature_cols

if hasattr(feature_scaler, 'n_features_in_') and len(feature_cols) != feature_scaler.n_features_in_:
    print(f"Feature count mismatch detected: {len(feature_cols)} features in DataFrame vs {feature_scaler.n_features_in_} in scaler")
    print("Retraining scaler on train split with current features...")
    X_train_features = train_df[feature_cols].values.astype(np.float32)
    X_train_features = np.nan_to_num(X_train_features, nan=0.0, posinf=0.0, neginf=0.0)
    feature_scaler = StandardScaler()
    feature_scaler.fit(X_train_features)
    print(f"Scaler retrained on {len(feature_cols)} features")
else:
    print(f"Using existing scaler with {feature_scaler.n_features_in_} features")

# Create datasets
train_hybrid_ds = HybridDataset(df, spectrograms_dict, feature_cols, scaler=feature_scaler, split='train')
val_hybrid_ds = HybridDataset(df, spectrograms_dict, feature_cols, scaler=feature_scaler, split='val')
test_hybrid_ds = HybridDataset(df, spectrograms_dict, feature_cols, scaler=feature_scaler, split='test')

print(f"Train dataset: {len(train_hybrid_ds)} samples")
print(f"Val dataset: {len(val_hybrid_ds)} samples")
print(f"Test dataset: {len(test_hybrid_ds)} samples")

# Create weighted sampler
train_labels = df[df['split'] == 'train']['class_encoded'].values
class_weights_array = np.array([class_weights_dict.get(str(i), class_weights_dict.get(i, 1.0)) for i in range(2)])
sample_weights = np.array([class_weights_array[label] for label in train_labels])
sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True)

# Create DataLoaders
BATCH_SIZE = 64
train_hybrid_loader = DataLoader(train_hybrid_ds, batch_size=BATCH_SIZE, sampler=sampler, num_workers=0)
val_hybrid_loader = DataLoader(val_hybrid_ds, batch_size=BATCH_SIZE, shuffle=False, num_workers=0)
test_hybrid_loader = DataLoader(test_hybrid_ds, batch_size=BATCH_SIZE, shuffle=False, num_workers=0)

print(f"\nTrain batches: {len(train_hybrid_loader)}")
print(f"Val batches: {len(val_hybrid_loader)}")
print(f"Test batches: {len(test_hybrid_loader)}")

# Test a batch
sample_batch = next(iter(train_hybrid_loader))
print(f"\nSample batch - Spectrogram shape: {sample_batch[0][0].shape}, Features shape: {sample_batch[0][1].shape}, Labels shape: {sample_batch[1].shape}")


Feature count mismatch detected: 129 features in DataFrame vs 130 in scaler
Retraining scaler on train split with current features...
Scaler retrained on 129 features
Train dataset: 25846 samples
Val dataset: 5521 samples
Test dataset: 5536 samples

Train batches: 404
Val batches: 87
Test batches: 87

Sample batch - Spectrogram shape: torch.Size([64, 1, 128, 7]), Features shape: torch.Size([64, 129]), Labels shape: torch.Size([64])


## Define Enhanced Model Architecture V4


In [4]:
# Define Residual Block for CNN

class ResidualBlock2D(nn.Module):

    """Residual block for CNN branch"""

    def __init__(self, in_channels, out_channels, stride=1):

        super(ResidualBlock2D, self).__init__()

        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1, stride=stride)

        self.bn1 = nn.BatchNorm2d(out_channels)

        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1)

        self.bn2 = nn.BatchNorm2d(out_channels)

        

        self.shortcut = nn.Sequential()

        if stride != 1 or in_channels != out_channels:

            self.shortcut = nn.Sequential(

                nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride),

                nn.BatchNorm2d(out_channels)

            )

    

    def forward(self, x):

        out = F.relu(self.bn1(self.conv1(x)))

        out = self.bn2(self.conv2(out))

        out += self.shortcut(x)

        out = F.relu(out)

        return out





# Define Channel Attention Module

class ChannelAttention(nn.Module):

    """Channel attention module"""

    def __init__(self, channels, reduction=16):

        super(ChannelAttention, self).__init__()

        self.avg_pool = nn.AdaptiveAvgPool2d(1)

        self.max_pool = nn.AdaptiveMaxPool2d(1)

        

        self.fc = nn.Sequential(

            nn.Linear(channels, channels // reduction, bias=False),

            nn.ReLU(),

            nn.Linear(channels // reduction, channels, bias=False),

            nn.Sigmoid()

        )

    

    def forward(self, x):

        b, c, _, _ = x.size()

        avg_out = self.fc(self.avg_pool(x).view(b, c))

        max_out = self.fc(self.max_pool(x).view(b, c))

        out = avg_out + max_out

        return x * out.view(b, c, 1, 1)





# Define Feature Attention Module (SE-like for MLP features)

class FeatureAttention(nn.Module):

    """Squeeze-and-Excitation attention for feature vectors"""

    def __init__(self, n_features, reduction=4):

        super(FeatureAttention, self).__init__()

        self.reduction = reduction

        reduced_dim = max(1, n_features // reduction)

        

        self.fc = nn.Sequential(

            nn.Linear(n_features, reduced_dim, bias=False),

            nn.ReLU(),

            nn.Linear(reduced_dim, n_features, bias=False),

            nn.Sigmoid()

        )

    

    def forward(self, x):

        # x shape: (batch, n_features)

        attention_weights = self.fc(x)

        return x * attention_weights





# Define Multi-Scale Convolution Block

class MultiScaleConvBlock(nn.Module):

    """Multi-scale convolution with parallel 3x3 and 5x5 kernels"""

    def __init__(self, in_channels, out_channels):

        super(MultiScaleConvBlock, self).__init__()

        self.conv3x3 = nn.Sequential(

            nn.Conv2d(in_channels, out_channels // 2, kernel_size=3, padding=1),

            nn.BatchNorm2d(out_channels // 2),

            nn.ReLU()

        )

        self.conv5x5 = nn.Sequential(

            nn.Conv2d(in_channels, out_channels // 2, kernel_size=5, padding=2),

            nn.BatchNorm2d(out_channels // 2),

            nn.ReLU()

        )

    

    def forward(self, x):

        out3x3 = self.conv3x3(x)

        out5x5 = self.conv5x5(x)

        return torch.cat([out3x3, out5x5], dim=1)





# Define Cross-Attention Fusion Module

class CrossAttentionFusion(nn.Module):

    """Cross-attention between CNN and MLP outputs"""

    def __init__(self, cnn_dim, mlp_dim, hidden_dim=256):

        super(CrossAttentionFusion, self).__init__()

        self.cnn_dim = cnn_dim

        self.mlp_dim = mlp_dim

        self.hidden_dim = hidden_dim

        

        # Projections for cross-attention

        self.cnn_to_query = nn.Linear(cnn_dim, hidden_dim)

        self.mlp_to_key = nn.Linear(mlp_dim, hidden_dim)

        self.mlp_to_value = nn.Linear(mlp_dim, hidden_dim)

        

        self.cnn_to_key = nn.Linear(cnn_dim, hidden_dim)

        self.mlp_to_query = nn.Linear(mlp_dim, hidden_dim)

        self.cnn_to_value = nn.Linear(cnn_dim, hidden_dim)

        

        self.norm1 = nn.LayerNorm(hidden_dim)

        self.norm2 = nn.LayerNorm(hidden_dim)

        

        # Projections back to original dimensions

        self.cnn_proj = nn.Linear(hidden_dim, cnn_dim)

        self.mlp_proj = nn.Linear(hidden_dim, mlp_dim)

        

    def forward(self, cnn_out, mlp_out):

        # cnn_out: (batch, cnn_dim)

        # mlp_out: (batch, mlp_dim)

        

        # Simplified cross-attention: element-wise interaction

        # CNN enhanced by MLP

        q1 = self.cnn_to_query(cnn_out)  # (batch, hidden_dim)

        k1 = self.mlp_to_key(mlp_out)     # (batch, hidden_dim)

        v1 = self.mlp_to_value(mlp_out)  # (batch, hidden_dim)

        

        # Compute attention score and apply to value

        attention_score = torch.sigmoid(torch.sum(q1 * k1, dim=1, keepdim=True))  # (batch, 1)

        cnn_mlp_interaction = attention_score * v1  # (batch, hidden_dim)

        

        # Project back to cnn_dim and add residual

        cnn_enhanced = cnn_out + self.cnn_proj(self.norm1(cnn_mlp_interaction))

        

        # MLP enhanced by CNN

        q2 = self.mlp_to_query(mlp_out)  # (batch, hidden_dim)

        k2 = self.cnn_to_key(cnn_out)    # (batch, hidden_dim)

        v2 = self.cnn_to_value(cnn_out)  # (batch, hidden_dim)

        

        attention_score2 = torch.sigmoid(torch.sum(q2 * k2, dim=1, keepdim=True))  # (batch, 1)

        mlp_cnn_interaction = attention_score2 * v2  # (batch, hidden_dim)

        

        # Project back to mlp_dim and add residual

        mlp_enhanced = mlp_out + self.mlp_proj(self.norm2(mlp_cnn_interaction))

        

        return cnn_enhanced, mlp_enhanced





# Define Hybrid CNN+MLP Model V4

class HybridCNNMLP_V4_2(nn.Module):

    """

    Enhanced Hybrid model: CNN for spectrograms + MLP for features

    Version 4 Improvements:

    - Multi-Scale CNN: parallel 3x3 and 5x5 convolutions

    - Feature Attention: SE-like attention for important features (VOT, burst, delta MFCC)

    - Expanded MLP: 512→768→512→256→128 neurons

    - Cross-Attention Fusion: improved integration of CNN and MLP outputs

    - Channel attention in CNN branch

    Input: (spectrogram: batch, 1, 128, 7), (features: batch, n_features)

    Output: (batch, 2) - binary classification logits

    """

    

    def __init__(self, n_features=129, num_classes=2, dropout=0.4):

        super(HybridCNNMLP_V4_2, self).__init__()

        

        # Multi-Scale CNN branch with attention

        self.cnn_initial = nn.Sequential(

            nn.Conv2d(1, 64, kernel_size=3, padding=1),

            nn.BatchNorm2d(64),

            nn.ReLU(),

            nn.MaxPool2d(2, 2)  # (64, 64, 3)

        )

        

        # Multi-scale block

        self.multiscale = MultiScaleConvBlock(64, 128)

        

        self.cnn_branch = nn.Sequential(

            ResidualBlock2D(128, 128),

            ChannelAttention(128),

            nn.MaxPool2d(2, 2),  # (128, 32, 1)

            

            ResidualBlock2D(128, 256),

            ChannelAttention(256),

            ResidualBlock2D(256, 512),

            ChannelAttention(512),

            nn.AdaptiveAvgPool2d((1, 1)),

            nn.Flatten()

        )

        

        # Expanded MLP branch with feature attention

        # Expanded MLP branch with feature attention
        self.feature_attention = FeatureAttention(n_features, reduction=8)
        
        self.mlp_branch = nn.Sequential(
            nn.Linear(n_features, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Dropout(dropout),
            
            nn.Linear(256, 512),  # Reduced from 768
            nn.BatchNorm1d(512),
            nn.ReLU(),
            nn.Dropout(dropout * 0.75),
            
            nn.Linear(512, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Dropout(dropout * 0.5),
            
            nn.Linear(256, 128)
        )

        

        # Cross-attention fusion

        self.cross_attention = CrossAttentionFusion(cnn_dim=512, mlp_dim=128, hidden_dim=128)

        

        # Enhanced Fusion layer

        # Enhanced Fusion layer
        self.fusion = nn.Sequential(
            nn.Linear(512 + 128, 256),  # CNN output (512) + MLP output (128)
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Dropout(dropout),
            
            nn.Linear(256, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Dropout(dropout * 0.75),
            
            nn.Linear(128, 64),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.Dropout(dropout * 0.5),
            
            nn.Linear(64, num_classes)
        )

        

    def forward(self, x):

        spectrogram, features = x

        

        # CNN branch with multi-scale

        cnn_init = self.cnn_initial(spectrogram)

        cnn_multiscale = self.multiscale(cnn_init)

        cnn_out = self.cnn_branch(cnn_multiscale)  # (batch, 512)

        

        # MLP branch with feature attention

        features_attended = self.feature_attention(features)

        mlp_out = self.mlp_branch(features_attended)  # (batch, 128)

        

        # Cross-attention fusion

        cnn_enhanced, mlp_enhanced = self.cross_attention(cnn_out, mlp_out)

        

        # Concatenate enhanced outputs

        fused = torch.cat([cnn_enhanced, mlp_enhanced], dim=1)  # (batch, 640)

        

        # Final classification

        out = self.fusion(fused)  # (batch, 2)

        

        return out

    

    def get_config(self):

        """Return model configuration"""

        return {

            'model_type': 'HybridCNNMLP_V4_2',

            'num_classes': 2,

            'n_features': 129,

            'input_shapes': {

                'spectrogram': (1, 128, 7),

                'features': (129,)

            },

            'version': '4.2'

        }



print("Model architecture V4.2 (Optimized) defined successfully!")


Model architecture V4.2 (Optimized) defined successfully!


## Define Training Utilities


In [5]:
# Training utilities
def train_epoch(model, dataloader, criterion, optimizer, device, max_grad_norm=None):
    """Train for one epoch with optional gradient clipping"""
    model.train()
    running_loss = 0.0
    all_preds = []
    all_labels = []
    
    for batch in tqdm(dataloader, desc="Training", leave=False):
        if isinstance(batch[0], (tuple, list)) and len(batch[0]) == 2:
            inputs = tuple(x.to(device) for x in batch[0])
        else:
            inputs = batch[0].to(device)
        
        labels = batch[1].to(device)
        
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        
        if max_grad_norm is not None:
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_grad_norm)
        
        optimizer.step()
        
        running_loss += loss.item()
        preds = torch.argmax(outputs, dim=1).cpu().numpy()
        all_preds.extend(preds)
        all_labels.extend(labels.cpu().numpy())
    
    avg_loss = running_loss / len(dataloader)
    accuracy = accuracy_score(all_labels, all_preds)
    
    return avg_loss, accuracy


def validate(model, dataloader, criterion, device):
    """Validate model"""
    model.eval()
    running_loss = 0.0
    all_preds = []
    all_labels = []
    all_probs = []
    
    with torch.no_grad():
        for batch in tqdm(dataloader, desc="Validating", leave=False):
            if isinstance(batch[0], (tuple, list)) and len(batch[0]) == 2:
                inputs = tuple(x.to(device) for x in batch[0])
            else:
                inputs = batch[0].to(device)
            
            labels = batch[1].to(device)
            
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            
            running_loss += loss.item()
            probs = torch.softmax(outputs, dim=1).cpu().numpy()
            preds = torch.argmax(outputs, dim=1).cpu().numpy()
            
            all_preds.extend(preds)
            all_labels.extend(labels.cpu().numpy())
            all_probs.extend(probs)
    
    avg_loss = running_loss / len(dataloader)
    accuracy = accuracy_score(all_labels, all_preds)
    precision = precision_score(all_labels, all_preds, average='weighted', zero_division=0)
    recall = recall_score(all_labels, all_preds, average='weighted', zero_division=0)
    f1 = f1_score(all_labels, all_preds, average='weighted', zero_division=0)
    
    try:
        roc_auc = roc_auc_score(all_labels, np.array(all_probs)[:, 1])
    except:
        roc_auc = 0.0
    
    metrics = {
        'loss': avg_loss,
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1': f1,
        'roc_auc': roc_auc
    }
    
    return metrics, all_preds, all_labels, all_probs


def train_model(model, train_loader, val_loader, criterion, optimizer, scheduler,
                device, num_epochs, save_dir, model_name, early_stopping_patience=20, max_grad_norm=None):
    """Train model with early stopping and optional gradient clipping"""
    save_dir = Path(save_dir)
    save_dir.mkdir(parents=True, exist_ok=True)
    
    best_val_f1 = 0.0
    best_epoch = 0
    patience_counter = 0
    training_history = []
    
    for epoch in range(num_epochs):
        print(f"\nEpoch {epoch+1}/{num_epochs}")
        print("-" * 50)
        
        train_loss, train_acc = train_epoch(model, train_loader, criterion, optimizer, device, max_grad_norm)
        val_metrics, _, _, _ = validate(model, val_loader, criterion, device)
        
        if scheduler is not None:
            scheduler.step()
        
        current_lr = optimizer.param_groups[0]['lr']
        epoch_metrics = {
            'epoch': epoch + 1,
            'train_loss': train_loss,
            'train_accuracy': train_acc,
            'val_loss': val_metrics['loss'],
            'val_accuracy': val_metrics['accuracy'],
            'val_precision': val_metrics['precision'],
            'val_recall': val_metrics['recall'],
            'val_f1': val_metrics['f1'],
            'val_roc_auc': val_metrics['roc_auc'],
            'learning_rate': current_lr
        }
        training_history.append(epoch_metrics)
        
        print(f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}")
        print(f"Val Loss: {val_metrics['loss']:.4f}, Val Acc: {val_metrics['accuracy']:.4f}")
        print(f"Val F1: {val_metrics['f1']:.4f}, Val ROC-AUC: {val_metrics['roc_auc']:.4f}")
        print(f"Learning Rate: {current_lr:.6f}")
        
        if val_metrics['f1'] > best_val_f1:
            best_val_f1 = val_metrics['f1']
            best_epoch = epoch + 1
            patience_counter = 0
            
            torch.save({
                'epoch': epoch + 1,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'val_f1': best_val_f1,
                'val_metrics': val_metrics
            }, save_dir / 'best_model.pt')
            
            print(f"✓ New best model saved! (F1: {best_val_f1:.4f})")
        else:
            patience_counter += 1
            if patience_counter >= early_stopping_patience:
                print(f"\nEarly stopping at epoch {epoch+1}")
                print(f"Best F1: {best_val_f1:.4f} at epoch {best_epoch}")
                break
    
    with open(save_dir / 'training_history.json', 'w') as f:
        json.dump(training_history, f, indent=2)
    
    config = model.get_config() if hasattr(model, 'get_config') else {}
    config.update({
        'best_epoch': best_epoch,
        'best_val_f1': best_val_f1,
        'num_epochs': num_epochs
    })
    with open(save_dir / 'config.json', 'w') as f:
        json.dump(config, f, indent=2)
    
    return training_history, best_epoch


def evaluate_model(model, test_loader, criterion, device):
    """Evaluate model on test set"""
    metrics, preds, labels, probs = validate(model, test_loader, criterion, device)
    
    precision_per_class = precision_score(labels, preds, average=None, zero_division=0)
    recall_per_class = recall_score(labels, preds, average=None, zero_division=0)
    f1_per_class = f1_score(labels, preds, average=None, zero_division=0)
    
    metrics['precision_b'] = float(precision_per_class[0])
    metrics['precision_p'] = float(precision_per_class[1])
    metrics['recall_b'] = float(recall_per_class[0])
    metrics['recall_p'] = float(recall_per_class[1])
    metrics['f1_b'] = float(f1_per_class[0])
    metrics['f1_p'] = float(f1_per_class[1])
    metrics['confusion_matrix'] = confusion_matrix(labels, preds).tolist()
    
    return metrics, preds, labels, probs


class LabelSmoothingCrossEntropy(nn.Module):
    """Label smoothing cross entropy loss"""
    def __init__(self, smoothing=0.1, weight=None):
        super(LabelSmoothingCrossEntropy, self).__init__()
        self.smoothing = smoothing
        self.weight = weight
    
    def forward(self, pred, target):
        log_prob = F.log_softmax(pred, dim=1)
        nll_loss = -log_prob.gather(dim=1, index=target.unsqueeze(1)).squeeze(1)
        
        if self.weight is not None:
            nll_loss = nll_loss * self.weight[target]
        
        smooth_loss = -log_prob.mean(dim=1)
        if self.weight is not None:
            smooth_loss = smooth_loss * self.weight.mean()
        
        loss = (1.0 - self.smoothing) * nll_loss + self.smoothing * smooth_loss
        return loss.mean()


class WarmupCosineScheduler:
    """Learning rate scheduler with warmup and cosine annealing"""
    def __init__(self, optimizer, warmup_epochs, total_epochs, min_lr=1e-6):
        self.optimizer = optimizer
        self.warmup_epochs = warmup_epochs
        self.total_epochs = total_epochs
        self.min_lr = min_lr
        self.base_lr = optimizer.param_groups[0]['lr']
        self.current_epoch = 0
    
    def step(self):
        self.current_epoch += 1
        
        if self.current_epoch <= self.warmup_epochs:
            lr = self.base_lr * (self.current_epoch / self.warmup_epochs)
        else:
            progress = (self.current_epoch - self.warmup_epochs) / (self.total_epochs - self.warmup_epochs)
            lr = self.min_lr + (self.base_lr - self.min_lr) * 0.5 * (1 + math.cos(math.pi * progress))
        
        for param_group in self.optimizer.param_groups:
            param_group['lr'] = lr
    
    def get_last_lr(self):
        return [self.optimizer.param_groups[0]['lr']]

print("Training utilities defined successfully!")


Training utilities defined successfully!


In [6]:
# Create model V4 with automatic feature count detection
model = HybridCNNMLP_V4_2(n_features=len(feature_cols), num_classes=2, dropout=0.4).to(device)

# Print model info
total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"Model: {model.get_config()['model_type']}")
print(f"Total parameters: {total_params:,}")
print(f"Trainable parameters: {trainable_params:,}")
print(f"Number of features: {len(feature_cols)}")

# Prepare class weights for loss function
class_weights = torch.tensor([
    class_weights_dict.get('0', class_weights_dict.get(0, 1.0)), 
    class_weights_dict.get('1', class_weights_dict.get(1, 1.0))
], dtype=torch.float32).to(device)

# Loss function with label smoothing
criterion = LabelSmoothingCrossEntropy(smoothing=0.1, weight=class_weights)

# Optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=5e-4, weight_decay=1e-4)

# Learning rate scheduler with warmup and cosine annealing
num_epochs = 200
warmup_epochs = 5
scheduler = WarmupCosineScheduler(optimizer, warmup_epochs=warmup_epochs, total_epochs=num_epochs, min_lr=1e-6)

# Output directory (v2)
OUTPUT_DIR = DATA_DIR / 'improved_models'
save_dir = OUTPUT_DIR / 'hybrid_cnn_mlp_v4_2_optimized'
save_dir.mkdir(parents=True, exist_ok=True)

print(f"\nTraining configuration:")
print(f"- Epochs: {num_epochs}")
print(f"- Warmup epochs: {warmup_epochs}")
print(f"- Initial LR: {optimizer.param_groups[0]['lr']}")
print(f"- Label smoothing: 0.1")
print(f"- Gradient clipping: 1.0")
print(f"- Early stopping patience: 30")
print(f"- Dropout: 0.3")
print(f"- Context windows: ±100ms (V2 with VOT and burst features)")
print(f"- Save directory: {save_dir}")


Model: HybridCNNMLP_V4_2
Total parameters: 5,944,674
Trainable parameters: 5,944,674
Number of features: 129

Training configuration:
- Epochs: 200
- Warmup epochs: 5
- Initial LR: 0.0005
- Label smoothing: 0.1
- Gradient clipping: 1.0
- Early stopping patience: 30
- Dropout: 0.3
- Context windows: ±100ms (V2 with VOT and burst features)
- Save directory: /Volumes/SSanDisk/SpeechRec-German/artifacts/b-p_dl_models_with_context_v2/improved_models/hybrid_cnn_mlp_v4_2_optimized


## Train Model


In [7]:
# Train model
history, best_epoch = train_model(
    model, train_hybrid_loader, val_hybrid_loader, criterion, optimizer, scheduler,
    device, num_epochs=num_epochs, save_dir=save_dir, model_name='hybrid_cnn_mlp_v4_2_optimized', 
    early_stopping_patience=20, max_grad_norm=1.0
)

# Load best model and evaluate on test set
checkpoint = torch.load(save_dir / 'best_model.pt')
model.load_state_dict(checkpoint['model_state_dict'])
test_metrics, test_preds, test_labels, test_probs = evaluate_model(model, test_hybrid_loader, criterion, device)

# Save test metrics
with open(save_dir / 'test_metrics.json', 'w') as f:
    json.dump(test_metrics, f, indent=2)

print(f"\n{'='*60}")
print(f"Final Test Results:")
print(f"{'='*60}")
print(f"Accuracy: {test_metrics['accuracy']:.4f}")
print(f"F1-score: {test_metrics['f1']:.4f}")
print(f"ROC-AUC: {test_metrics['roc_auc']:.4f}")
print(f"Precision: {test_metrics['precision']:.4f}")
print(f"Recall: {test_metrics['recall']:.4f}")
print(f"Best epoch: {best_epoch}")



Epoch 1/200
--------------------------------------------------


                                                           

Train Loss: 0.4141, Train Acc: 0.8906
Val Loss: 0.3939, Val Acc: 0.8830
Val F1: 0.8871, Val ROC-AUC: 0.9747
Learning Rate: 0.000100
✓ New best model saved! (F1: 0.8871)

Epoch 2/200
--------------------------------------------------


                                                           

Train Loss: 0.3552, Train Acc: 0.9286
Val Loss: 0.3279, Val Acc: 0.9303
Val F1: 0.9316, Val ROC-AUC: 0.9842
Learning Rate: 0.000200
✓ New best model saved! (F1: 0.9316)

Epoch 3/200
--------------------------------------------------


                                                           

Train Loss: 0.3460, Train Acc: 0.9339
Val Loss: 0.3333, Val Acc: 0.9219
Val F1: 0.9238, Val ROC-AUC: 0.9859
Learning Rate: 0.000300

Epoch 4/200
--------------------------------------------------


                                                           

Train Loss: 0.3390, Train Acc: 0.9385
Val Loss: 0.3429, Val Acc: 0.9178
Val F1: 0.9199, Val ROC-AUC: 0.9851
Learning Rate: 0.000400

Epoch 5/200
--------------------------------------------------


                                                           

Train Loss: 0.3350, Train Acc: 0.9397
Val Loss: 0.3339, Val Acc: 0.9156
Val F1: 0.9179, Val ROC-AUC: 0.9869
Learning Rate: 0.000500

Epoch 6/200
--------------------------------------------------


                                                           

Train Loss: 0.3275, Train Acc: 0.9429
Val Loss: 0.3078, Val Acc: 0.9467
Val F1: 0.9474, Val ROC-AUC: 0.9884
Learning Rate: 0.000500
✓ New best model saved! (F1: 0.9474)

Epoch 7/200
--------------------------------------------------


                                                           

Train Loss: 0.3208, Train Acc: 0.9466
Val Loss: 0.3519, Val Acc: 0.8915
Val F1: 0.8953, Val ROC-AUC: 0.9873
Learning Rate: 0.000500

Epoch 8/200
--------------------------------------------------


                                                           

Train Loss: 0.3158, Train Acc: 0.9508
Val Loss: 0.3221, Val Acc: 0.9333
Val F1: 0.9348, Val ROC-AUC: 0.9882
Learning Rate: 0.000500

Epoch 9/200
--------------------------------------------------


                                                           

Train Loss: 0.3132, Train Acc: 0.9535
Val Loss: 0.3111, Val Acc: 0.9373
Val F1: 0.9386, Val ROC-AUC: 0.9882
Learning Rate: 0.000499

Epoch 10/200
--------------------------------------------------


                                                           

Train Loss: 0.3092, Train Acc: 0.9565
Val Loss: 0.3107, Val Acc: 0.9395
Val F1: 0.9406, Val ROC-AUC: 0.9886
Learning Rate: 0.000499

Epoch 11/200
--------------------------------------------------


                                                           

Train Loss: 0.3004, Train Acc: 0.9607
Val Loss: 0.3021, Val Acc: 0.9525
Val F1: 0.9529, Val ROC-AUC: 0.9892
Learning Rate: 0.000499
✓ New best model saved! (F1: 0.9529)

Epoch 12/200
--------------------------------------------------


                                                           

Train Loss: 0.3018, Train Acc: 0.9614
Val Loss: 0.3033, Val Acc: 0.9493
Val F1: 0.9500, Val ROC-AUC: 0.9894
Learning Rate: 0.000498

Epoch 13/200
--------------------------------------------------


                                                           

Train Loss: 0.2966, Train Acc: 0.9638
Val Loss: 0.3165, Val Acc: 0.9353
Val F1: 0.9367, Val ROC-AUC: 0.9902
Learning Rate: 0.000498

Epoch 14/200
--------------------------------------------------


                                                           

Train Loss: 0.2937, Train Acc: 0.9660
Val Loss: 0.3016, Val Acc: 0.9576
Val F1: 0.9579, Val ROC-AUC: 0.9899
Learning Rate: 0.000497
✓ New best model saved! (F1: 0.9579)

Epoch 15/200
--------------------------------------------------


                                                           

Train Loss: 0.2913, Train Acc: 0.9658
Val Loss: 0.3029, Val Acc: 0.9545
Val F1: 0.9549, Val ROC-AUC: 0.9899
Learning Rate: 0.000497

Epoch 16/200
--------------------------------------------------


                                                           

Train Loss: 0.2894, Train Acc: 0.9689
Val Loss: 0.3051, Val Acc: 0.9556
Val F1: 0.9558, Val ROC-AUC: 0.9903
Learning Rate: 0.000496

Epoch 17/200
--------------------------------------------------


                                                           

Train Loss: 0.2876, Train Acc: 0.9679
Val Loss: 0.3022, Val Acc: 0.9571
Val F1: 0.9573, Val ROC-AUC: 0.9904
Learning Rate: 0.000495

Epoch 18/200
--------------------------------------------------


                                                           

Train Loss: 0.2857, Train Acc: 0.9700
Val Loss: 0.3103, Val Acc: 0.9362
Val F1: 0.9376, Val ROC-AUC: 0.9907
Learning Rate: 0.000495

Epoch 19/200
--------------------------------------------------


                                                           

Train Loss: 0.2845, Train Acc: 0.9707
Val Loss: 0.3140, Val Acc: 0.9399
Val F1: 0.9409, Val ROC-AUC: 0.9883
Learning Rate: 0.000494

Epoch 20/200
--------------------------------------------------


                                                           

Train Loss: 0.2839, Train Acc: 0.9720
Val Loss: 0.3071, Val Acc: 0.9477
Val F1: 0.9484, Val ROC-AUC: 0.9901
Learning Rate: 0.000493

Epoch 21/200
--------------------------------------------------


                                                           

Train Loss: 0.2819, Train Acc: 0.9729
Val Loss: 0.3083, Val Acc: 0.9419
Val F1: 0.9429, Val ROC-AUC: 0.9895
Learning Rate: 0.000492

Epoch 22/200
--------------------------------------------------


                                                           

Train Loss: 0.2808, Train Acc: 0.9748
Val Loss: 0.3045, Val Acc: 0.9509
Val F1: 0.9516, Val ROC-AUC: 0.9897
Learning Rate: 0.000491

Epoch 23/200
--------------------------------------------------


                                                           

Train Loss: 0.2782, Train Acc: 0.9762
Val Loss: 0.3048, Val Acc: 0.9580
Val F1: 0.9581, Val ROC-AUC: 0.9902
Learning Rate: 0.000490
✓ New best model saved! (F1: 0.9581)

Epoch 24/200
--------------------------------------------------


                                                           

Train Loss: 0.2797, Train Acc: 0.9749
Val Loss: 0.2969, Val Acc: 0.9607
Val F1: 0.9608, Val ROC-AUC: 0.9910
Learning Rate: 0.000488
✓ New best model saved! (F1: 0.9608)

Epoch 25/200
--------------------------------------------------


                                                           

Train Loss: 0.2771, Train Acc: 0.9769
Val Loss: 0.3009, Val Acc: 0.9620
Val F1: 0.9621, Val ROC-AUC: 0.9918
Learning Rate: 0.000487
✓ New best model saved! (F1: 0.9621)

Epoch 26/200
--------------------------------------------------


                                                           

Train Loss: 0.2732, Train Acc: 0.9791
Val Loss: 0.2995, Val Acc: 0.9569
Val F1: 0.9572, Val ROC-AUC: 0.9900
Learning Rate: 0.000486

Epoch 27/200
--------------------------------------------------


                                                           

Train Loss: 0.2750, Train Acc: 0.9783
Val Loss: 0.2972, Val Acc: 0.9553
Val F1: 0.9557, Val ROC-AUC: 0.9909
Learning Rate: 0.000484

Epoch 28/200
--------------------------------------------------


                                                           

Train Loss: 0.2702, Train Acc: 0.9813
Val Loss: 0.3114, Val Acc: 0.9429
Val F1: 0.9440, Val ROC-AUC: 0.9892
Learning Rate: 0.000483

Epoch 29/200
--------------------------------------------------


                                                           

Train Loss: 0.2735, Train Acc: 0.9788
Val Loss: 0.2984, Val Acc: 0.9592
Val F1: 0.9595, Val ROC-AUC: 0.9909
Learning Rate: 0.000482

Epoch 30/200
--------------------------------------------------


                                                           

Train Loss: 0.2701, Train Acc: 0.9799
Val Loss: 0.3007, Val Acc: 0.9563
Val F1: 0.9568, Val ROC-AUC: 0.9907
Learning Rate: 0.000480

Epoch 31/200
--------------------------------------------------


                                                           

Train Loss: 0.2703, Train Acc: 0.9812
Val Loss: 0.3024, Val Acc: 0.9556
Val F1: 0.9561, Val ROC-AUC: 0.9894
Learning Rate: 0.000478

Epoch 32/200
--------------------------------------------------


                                                           

Train Loss: 0.2687, Train Acc: 0.9818
Val Loss: 0.3002, Val Acc: 0.9582
Val F1: 0.9585, Val ROC-AUC: 0.9900
Learning Rate: 0.000477

Epoch 33/200
--------------------------------------------------


                                                           

Train Loss: 0.2696, Train Acc: 0.9810
Val Loss: 0.3009, Val Acc: 0.9591
Val F1: 0.9592, Val ROC-AUC: 0.9901
Learning Rate: 0.000475

Epoch 34/200
--------------------------------------------------


                                                           

Train Loss: 0.2678, Train Acc: 0.9813
Val Loss: 0.3043, Val Acc: 0.9522
Val F1: 0.9527, Val ROC-AUC: 0.9886
Learning Rate: 0.000473

Epoch 35/200
--------------------------------------------------


                                                           

Train Loss: 0.2662, Train Acc: 0.9831
Val Loss: 0.2990, Val Acc: 0.9594
Val F1: 0.9597, Val ROC-AUC: 0.9912
Learning Rate: 0.000471

Epoch 36/200
--------------------------------------------------


                                                           

Train Loss: 0.2656, Train Acc: 0.9834
Val Loss: 0.3015, Val Acc: 0.9583
Val F1: 0.9586, Val ROC-AUC: 0.9901
Learning Rate: 0.000470

Epoch 37/200
--------------------------------------------------


                                                           

Train Loss: 0.2630, Train Acc: 0.9849
Val Loss: 0.2982, Val Acc: 0.9631
Val F1: 0.9632, Val ROC-AUC: 0.9910
Learning Rate: 0.000468
✓ New best model saved! (F1: 0.9632)

Epoch 38/200
--------------------------------------------------


                                                           

Train Loss: 0.2635, Train Acc: 0.9851
Val Loss: 0.3016, Val Acc: 0.9623
Val F1: 0.9625, Val ROC-AUC: 0.9861
Learning Rate: 0.000466

Epoch 39/200
--------------------------------------------------


                                                           

Train Loss: 0.2628, Train Acc: 0.9848
Val Loss: 0.3061, Val Acc: 0.9583
Val F1: 0.9586, Val ROC-AUC: 0.9878
Learning Rate: 0.000463

Epoch 40/200
--------------------------------------------------


                                                           

Train Loss: 0.2617, Train Acc: 0.9862
Val Loss: 0.3050, Val Acc: 0.9634
Val F1: 0.9634, Val ROC-AUC: 0.9898
Learning Rate: 0.000461
✓ New best model saved! (F1: 0.9634)

Epoch 41/200
--------------------------------------------------


                                                           

Train Loss: 0.2624, Train Acc: 0.9855
Val Loss: 0.3063, Val Acc: 0.9520
Val F1: 0.9526, Val ROC-AUC: 0.9896
Learning Rate: 0.000459

Epoch 42/200
--------------------------------------------------


                                                           

Train Loss: 0.2605, Train Acc: 0.9870
Val Loss: 0.3038, Val Acc: 0.9616
Val F1: 0.9617, Val ROC-AUC: 0.9910
Learning Rate: 0.000457

Epoch 43/200
--------------------------------------------------


                                                           

Train Loss: 0.2614, Train Acc: 0.9861
Val Loss: 0.3042, Val Acc: 0.9573
Val F1: 0.9576, Val ROC-AUC: 0.9906
Learning Rate: 0.000455

Epoch 44/200
--------------------------------------------------


                                                           

Train Loss: 0.2595, Train Acc: 0.9872
Val Loss: 0.3013, Val Acc: 0.9592
Val F1: 0.9595, Val ROC-AUC: 0.9845
Learning Rate: 0.000452

Epoch 45/200
--------------------------------------------------


                                                           

Train Loss: 0.2588, Train Acc: 0.9874
Val Loss: 0.3025, Val Acc: 0.9524
Val F1: 0.9530, Val ROC-AUC: 0.9911
Learning Rate: 0.000450

Epoch 46/200
--------------------------------------------------


                                                           

Train Loss: 0.2596, Train Acc: 0.9872
Val Loss: 0.3080, Val Acc: 0.9515
Val F1: 0.9521, Val ROC-AUC: 0.9904
Learning Rate: 0.000448

Epoch 47/200
--------------------------------------------------


                                                           

Train Loss: 0.2578, Train Acc: 0.9883
Val Loss: 0.2981, Val Acc: 0.9612
Val F1: 0.9615, Val ROC-AUC: 0.9910
Learning Rate: 0.000445

Epoch 48/200
--------------------------------------------------


                                                           

Train Loss: 0.2600, Train Acc: 0.9869
Val Loss: 0.3014, Val Acc: 0.9634
Val F1: 0.9635, Val ROC-AUC: 0.9909
Learning Rate: 0.000442
✓ New best model saved! (F1: 0.9635)

Epoch 49/200
--------------------------------------------------


                                                           

Train Loss: 0.2582, Train Acc: 0.9878
Val Loss: 0.3034, Val Acc: 0.9621
Val F1: 0.9621, Val ROC-AUC: 0.9904
Learning Rate: 0.000440

Epoch 50/200
--------------------------------------------------


                                                           

Train Loss: 0.2584, Train Acc: 0.9883
Val Loss: 0.2984, Val Acc: 0.9609
Val F1: 0.9611, Val ROC-AUC: 0.9907
Learning Rate: 0.000437

Epoch 51/200
--------------------------------------------------


                                                           

Train Loss: 0.2568, Train Acc: 0.9893
Val Loss: 0.2989, Val Acc: 0.9609
Val F1: 0.9612, Val ROC-AUC: 0.9912
Learning Rate: 0.000435

Epoch 52/200
--------------------------------------------------


                                                           

Train Loss: 0.2548, Train Acc: 0.9898
Val Loss: 0.3021, Val Acc: 0.9641
Val F1: 0.9642, Val ROC-AUC: 0.9841
Learning Rate: 0.000432
✓ New best model saved! (F1: 0.9642)

Epoch 53/200
--------------------------------------------------


                                                           

Train Loss: 0.2571, Train Acc: 0.9882
Val Loss: 0.3014, Val Acc: 0.9627
Val F1: 0.9629, Val ROC-AUC: 0.9905
Learning Rate: 0.000429

Epoch 54/200
--------------------------------------------------


                                                           

Train Loss: 0.2564, Train Acc: 0.9895
Val Loss: 0.3013, Val Acc: 0.9578
Val F1: 0.9582, Val ROC-AUC: 0.9906
Learning Rate: 0.000426

Epoch 55/200
--------------------------------------------------


                                                           

Train Loss: 0.2558, Train Acc: 0.9894
Val Loss: 0.3060, Val Acc: 0.9544
Val F1: 0.9549, Val ROC-AUC: 0.9859
Learning Rate: 0.000423

Epoch 56/200
--------------------------------------------------


                                                           

Train Loss: 0.2552, Train Acc: 0.9899
Val Loss: 0.3012, Val Acc: 0.9641
Val F1: 0.9642, Val ROC-AUC: 0.9907
Learning Rate: 0.000420
✓ New best model saved! (F1: 0.9642)

Epoch 57/200
--------------------------------------------------


                                                           

Train Loss: 0.2544, Train Acc: 0.9902
Val Loss: 0.3073, Val Acc: 0.9562
Val F1: 0.9565, Val ROC-AUC: 0.9886
Learning Rate: 0.000417

Epoch 58/200
--------------------------------------------------


                                                           

Train Loss: 0.2558, Train Acc: 0.9897
Val Loss: 0.2996, Val Acc: 0.9582
Val F1: 0.9585, Val ROC-AUC: 0.9889
Learning Rate: 0.000414

Epoch 59/200
--------------------------------------------------


                                                           

Train Loss: 0.2535, Train Acc: 0.9901
Val Loss: 0.3039, Val Acc: 0.9629
Val F1: 0.9630, Val ROC-AUC: 0.9901
Learning Rate: 0.000411

Epoch 60/200
--------------------------------------------------


                                                           

Train Loss: 0.2540, Train Acc: 0.9907
Val Loss: 0.3050, Val Acc: 0.9620
Val F1: 0.9620, Val ROC-AUC: 0.9906
Learning Rate: 0.000408

Epoch 61/200
--------------------------------------------------


                                                           

Train Loss: 0.2523, Train Acc: 0.9911
Val Loss: 0.3061, Val Acc: 0.9600
Val F1: 0.9601, Val ROC-AUC: 0.9891
Learning Rate: 0.000405

Epoch 62/200
--------------------------------------------------


                                                           

Train Loss: 0.2508, Train Acc: 0.9925
Val Loss: 0.2989, Val Acc: 0.9638
Val F1: 0.9639, Val ROC-AUC: 0.9917
Learning Rate: 0.000402

Epoch 63/200
--------------------------------------------------


                                                           

Train Loss: 0.2514, Train Acc: 0.9918
Val Loss: 0.3027, Val Acc: 0.9616
Val F1: 0.9618, Val ROC-AUC: 0.9885
Learning Rate: 0.000399

Epoch 64/200
--------------------------------------------------


                                                           

Train Loss: 0.2530, Train Acc: 0.9918
Val Loss: 0.3108, Val Acc: 0.9602
Val F1: 0.9603, Val ROC-AUC: 0.9788
Learning Rate: 0.000396

Epoch 65/200
--------------------------------------------------


                                                           

Train Loss: 0.2537, Train Acc: 0.9905
Val Loss: 0.2990, Val Acc: 0.9614
Val F1: 0.9617, Val ROC-AUC: 0.9901
Learning Rate: 0.000392

Epoch 66/200
--------------------------------------------------


                                                           

Train Loss: 0.2541, Train Acc: 0.9904
Val Loss: 0.3068, Val Acc: 0.9647
Val F1: 0.9647, Val ROC-AUC: 0.9822
Learning Rate: 0.000389
✓ New best model saved! (F1: 0.9647)

Epoch 67/200
--------------------------------------------------


                                                           

Train Loss: 0.2514, Train Acc: 0.9923
Val Loss: 0.3022, Val Acc: 0.9623
Val F1: 0.9625, Val ROC-AUC: 0.9823
Learning Rate: 0.000386

Epoch 68/200
--------------------------------------------------


                                                           

Train Loss: 0.2500, Train Acc: 0.9931
Val Loss: 0.3042, Val Acc: 0.9631
Val F1: 0.9631, Val ROC-AUC: 0.9775
Learning Rate: 0.000382

Epoch 69/200
--------------------------------------------------


                                                           

Train Loss: 0.2505, Train Acc: 0.9926
Val Loss: 0.3066, Val Acc: 0.9596
Val F1: 0.9598, Val ROC-AUC: 0.9892
Learning Rate: 0.000379

Epoch 70/200
--------------------------------------------------


                                                           

Train Loss: 0.2517, Train Acc: 0.9920
Val Loss: 0.3086, Val Acc: 0.9603
Val F1: 0.9605, Val ROC-AUC: 0.9904
Learning Rate: 0.000375

Epoch 71/200
--------------------------------------------------


                                                           

Train Loss: 0.2520, Train Acc: 0.9921
Val Loss: 0.3077, Val Acc: 0.9629
Val F1: 0.9628, Val ROC-AUC: 0.9913
Learning Rate: 0.000372

Epoch 72/200
--------------------------------------------------


                                                           

Train Loss: 0.2506, Train Acc: 0.9921
Val Loss: 0.3050, Val Acc: 0.9632
Val F1: 0.9633, Val ROC-AUC: 0.9895
Learning Rate: 0.000368

Epoch 73/200
--------------------------------------------------


                                                           

Train Loss: 0.2504, Train Acc: 0.9928
Val Loss: 0.3031, Val Acc: 0.9632
Val F1: 0.9634, Val ROC-AUC: 0.9880
Learning Rate: 0.000365

Epoch 74/200
--------------------------------------------------


                                                           

Train Loss: 0.2502, Train Acc: 0.9928
Val Loss: 0.3280, Val Acc: 0.9592
Val F1: 0.9589, Val ROC-AUC: 0.9610
Learning Rate: 0.000361

Epoch 75/200
--------------------------------------------------


                                                           

Train Loss: 0.2487, Train Acc: 0.9931
Val Loss: 0.3113, Val Acc: 0.9621
Val F1: 0.9622, Val ROC-AUC: 0.9899
Learning Rate: 0.000357

Epoch 76/200
--------------------------------------------------


                                                           

Train Loss: 0.2487, Train Acc: 0.9937
Val Loss: 0.3102, Val Acc: 0.9609
Val F1: 0.9609, Val ROC-AUC: 0.9881
Learning Rate: 0.000354

Epoch 77/200
--------------------------------------------------


                                                           

Train Loss: 0.2496, Train Acc: 0.9931
Val Loss: 0.3022, Val Acc: 0.9623
Val F1: 0.9625, Val ROC-AUC: 0.9852
Learning Rate: 0.000350

Epoch 78/200
--------------------------------------------------


                                                           

Train Loss: 0.2479, Train Acc: 0.9938
Val Loss: 0.3024, Val Acc: 0.9612
Val F1: 0.9615, Val ROC-AUC: 0.9891
Learning Rate: 0.000346

Epoch 79/200
--------------------------------------------------


                                                           

Train Loss: 0.2494, Train Acc: 0.9938
Val Loss: 0.3099, Val Acc: 0.9618
Val F1: 0.9619, Val ROC-AUC: 0.9892
Learning Rate: 0.000343

Epoch 80/200
--------------------------------------------------


                                                           

Train Loss: 0.2475, Train Acc: 0.9944
Val Loss: 0.3148, Val Acc: 0.9616
Val F1: 0.9615, Val ROC-AUC: 0.9911
Learning Rate: 0.000339

Epoch 81/200
--------------------------------------------------


                                                           

Train Loss: 0.2498, Train Acc: 0.9929
Val Loss: 0.3124, Val Acc: 0.9605
Val F1: 0.9606, Val ROC-AUC: 0.9898
Learning Rate: 0.000335

Epoch 82/200
--------------------------------------------------


                                                           

Train Loss: 0.2485, Train Acc: 0.9935
Val Loss: 0.3035, Val Acc: 0.9629
Val F1: 0.9629, Val ROC-AUC: 0.9909
Learning Rate: 0.000331

Epoch 83/200
--------------------------------------------------


                                                           

Train Loss: 0.2467, Train Acc: 0.9947
Val Loss: 0.3092, Val Acc: 0.9574
Val F1: 0.9578, Val ROC-AUC: 0.9826
Learning Rate: 0.000328

Epoch 84/200
--------------------------------------------------


                                                           

Train Loss: 0.2481, Train Acc: 0.9938
Val Loss: 0.3076, Val Acc: 0.9631
Val F1: 0.9631, Val ROC-AUC: 0.9854
Learning Rate: 0.000324

Epoch 85/200
--------------------------------------------------


                                                           

Train Loss: 0.2464, Train Acc: 0.9947
Val Loss: 0.3112, Val Acc: 0.9631
Val F1: 0.9631, Val ROC-AUC: 0.9901
Learning Rate: 0.000320

Epoch 86/200
--------------------------------------------------


                                                           

Train Loss: 0.2487, Train Acc: 0.9933
Val Loss: 0.3112, Val Acc: 0.9620
Val F1: 0.9621, Val ROC-AUC: 0.9714
Learning Rate: 0.000316

Early stopping at epoch 86
Best F1: 0.9647 at epoch 66


                                                           


Final Test Results:
Accuracy: 0.9660
F1-score: 0.9659
ROC-AUC: 0.9834
Precision: 0.9659
Recall: 0.9660
Best epoch: 66




## Save Predictions with Probabilities for Each Phoneme


In [8]:
# Get test dataset to extract phoneme metadata
test_df = df[df['split'] == 'test'].reset_index(drop=True)

# Create predictions dataframe with probabilities
predictions_data = []
for idx, row in test_df.iterrows():
    predictions_data.append({
        'phoneme_id': row['phoneme_id'],
        'utterance_id': row.get('utterance_id', None),
        'phoneme': row.get('phoneme', row.get('class', None)),
        'true_class': row['class'],
        'true_class_encoded': int(test_labels[idx]),
        'predicted_class_encoded': int(test_preds[idx]),
        'predicted_class': 'b' if test_preds[idx] == 0 else 'p',
        'prob_class_0': float(test_probs[idx][0]),
        'prob_class_1': float(test_probs[idx][1]),
        'max_prob': float(np.max(test_probs[idx])),
        'is_correct': int(test_labels[idx] == test_preds[idx]),
        'confidence': float(np.max(test_probs[idx])) if test_labels[idx] == test_preds[idx] else float(test_probs[idx][test_preds[idx]]),
        'duration_ms': row.get('duration_ms', None)
    })

predictions_df = pd.DataFrame(predictions_data)

# Save to CSV
predictions_df.to_csv(save_dir / 'test_predictions_with_probs.csv', index=False)
print(f"Saved predictions with probabilities to: {save_dir / 'test_predictions_with_probs.csv'}")
print(f"Total predictions: {len(predictions_df)}")
print(f"Correct predictions: {predictions_df['is_correct'].sum()}")
print(f"Incorrect predictions: {(~predictions_df['is_correct'].astype(bool)).sum()}")

# Save summary statistics
summary_stats = {
    'total_samples': len(predictions_df),
    'correct_predictions': int(predictions_df['is_correct'].sum()),
    'incorrect_predictions': int((~predictions_df['is_correct'].astype(bool)).sum()),
    'accuracy': float(predictions_df['is_correct'].mean()),
    'avg_confidence_correct': float(predictions_df[predictions_df['is_correct'] == 1]['confidence'].mean()),
    'avg_confidence_incorrect': float(predictions_df[predictions_df['is_correct'] == 0]['confidence'].mean()),
    'min_confidence_incorrect': float(predictions_df[predictions_df['is_correct'] == 0]['confidence'].min()),
    'max_confidence_incorrect': float(predictions_df[predictions_df['is_correct'] == 0]['confidence'].max()),
    'high_confidence_errors': int(((predictions_df['is_correct'] == 0) & (predictions_df['confidence'] > 0.8)).sum()),
    'low_confidence_errors': int(((predictions_df['is_correct'] == 0) & (predictions_df['confidence'] < 0.6)).sum()),
}

with open(save_dir / 'predictions_summary.json', 'w') as f:
    json.dump(summary_stats, f, indent=2)

print(f"\nSummary Statistics:")
print(f"- Average confidence (correct): {summary_stats['avg_confidence_correct']:.4f}")
print(f"- Average confidence (incorrect): {summary_stats['avg_confidence_incorrect']:.4f}")
print(f"- High confidence errors (>0.8): {summary_stats['high_confidence_errors']}")
print(f"- Low confidence errors (<0.6): {summary_stats['low_confidence_errors']}")


Saved predictions with probabilities to: /Volumes/SSanDisk/SpeechRec-German/artifacts/b-p_dl_models_with_context_v2/improved_models/hybrid_cnn_mlp_v4_2_optimized/test_predictions_with_probs.csv
Total predictions: 5536
Correct predictions: 5348
Incorrect predictions: 188

Summary Statistics:
- Average confidence (correct): 0.9278
- Average confidence (incorrect): 0.8400
- High confidence errors (>0.8): 132
- Low confidence errors (<0.6): 21


## Save Predictions for Validation Set (for analysis)


In [9]:
# Get validation predictions
val_metrics, val_preds, val_labels, val_probs = evaluate_model(model, val_hybrid_loader, criterion, device)
val_df = df[df['split'] == 'val'].reset_index(drop=True)

val_predictions_data = []
for idx, row in val_df.iterrows():
    val_predictions_data.append({
        'phoneme_id': row['phoneme_id'],
        'utterance_id': row.get('utterance_id', None),
        'phoneme': row.get('phoneme', row.get('class', None)),
        'true_class': row['class'],
        'true_class_encoded': int(val_labels[idx]),
        'predicted_class_encoded': int(val_preds[idx]),
        'predicted_class': 'b' if val_preds[idx] == 0 else 'p',
        'prob_class_0': float(val_probs[idx][0]),
        'prob_class_1': float(val_probs[idx][1]),
        'max_prob': float(np.max(val_probs[idx])),
        'is_correct': int(val_labels[idx] == val_preds[idx]),
        'confidence': float(np.max(val_probs[idx])) if val_labels[idx] == val_preds[idx] else float(val_probs[idx][val_preds[idx]]),
        'duration_ms': row.get('duration_ms', None)
    })

val_predictions_df = pd.DataFrame(val_predictions_data)
val_predictions_df.to_csv(save_dir / 'val_predictions_with_probs.csv', index=False)
print(f"Saved validation predictions to: {save_dir / 'val_predictions_with_probs.csv'}")


                                                           

Saved validation predictions to: /Volumes/SSanDisk/SpeechRec-German/artifacts/b-p_dl_models_with_context_v2/improved_models/hybrid_cnn_mlp_v4_2_optimized/val_predictions_with_probs.csv
