# Ensemble Models Training

Creating Stacking Ensemble from best individual models:
- Model 10: Stacking Ensemble

In [1]:
import sys
from pathlib import Path
import torch
import torch.nn as nn
import json
import numpy as np
from torch.utils.data import DataLoader, TensorDataset

# Determine project root (parent of notebooks directory)
PROJECT_ROOT = Path.cwd().parent if Path.cwd().name in ['notebooks', 'b-p_first_experiments'] else Path.cwd()
sys.path.insert(0, str(PROJECT_ROOT))

from models.ensemble.stacking_ensemble import StackingEnsemble, EnsemblePredictor
from models.spectrogram.resnet_spectrogram import resnet18_spectrogram
from models.spectrogram.vit_spectrogram import VisionTransformerSpectrogram
from models.hybrid.hybrid_cnn_mlp import HybridCNNMLP
from models.hybrid.multimodal_fusion import MultiModalFusion
from models.sequence.bilstm_attention import BiLSTMAttention
from utils.training_utils import evaluate_model
from utils.data_loader import load_data, create_dataloaders

if torch.backends.mps.is_available():
    device = torch.device("mps")
    print(f"Using MPS device")
else:
    device = torch.device("cpu")

OUTPUT_DIR = PROJECT_ROOT / 'artifacts' / 'b-p_dl_models'

# Load data
df, spectrograms_dict, feature_cols, feature_scaler, class_weights_dict = load_data(PROJECT_ROOT)
dataloaders = create_dataloaders(df, spectrograms_dict, feature_cols, feature_scaler, class_weights_dict, batch_size=64)

class_weights = torch.tensor([class_weights_dict.get('0', class_weights_dict.get(0, 1.0)), 
                              class_weights_dict.get('1', class_weights_dict.get(1, 1.0))], dtype=torch.float32).to(device)
criterion = nn.CrossEntropyLoss(weight=class_weights)

Using MPS device
Columns in df_phonemes: ['phoneme_id', 'utterance_id', 'phoneme', 'class', 'start_ms', 'end_ms', 'duration_ms', 'audio_path']
Columns in df_features: ['energy_rms', 'energy_rms_std', 'energy_zcr', 'energy_zcr_std', 'spectral_centroid', 'spectral_centroid_std', 'spectral_rolloff', 'spectral_rolloff_std', 'spectral_bandwidth', 'spectral_bandwidth_std', 'formant_f1', 'formant_f2', 'formant_f3', 'formant_f4', 'formant_f1_std', 'formant_f2_std', 'formant_f3_std', 'formant_f4_std', 'spectral_flatness', 'harmonic_noise_ratio', 'zcr_mean', 'energy_cv', 'phoneme_id', 'class', 'duration_ms', 'mfcc_mean_0', 'mfcc_mean_1', 'mfcc_mean_2', 'mfcc_mean_3', 'mfcc_mean_4', 'mfcc_mean_5', 'mfcc_mean_6', 'mfcc_mean_7', 'mfcc_mean_8', 'mfcc_mean_9', 'mfcc_mean_10', 'mfcc_mean_11', 'mfcc_mean_12', 'mfcc_std_0', 'mfcc_std_1', 'mfcc_std_2', 'mfcc_std_3', 'mfcc_std_4', 'mfcc_std_5', 'mfcc_std_6', 'mfcc_std_7', 'mfcc_std_8', 'mfcc_std_9', 'mfcc_std_10', 'mfcc_std_11', 'mfcc_std_12', 'delta_mfcc

## Load Best Models and Get Predictions

In [2]:
# Load all trained models
base_models = []
model_configs = []

# Model 1: ResNet
model1 = resnet18_spectrogram(num_classes=2).to(device)
checkpoint1 = torch.load(OUTPUT_DIR / 'spectrogram_models' / 'resnet_spectrogram' / 'best_model.pt')
model1.load_state_dict(checkpoint1['model_state_dict'])
base_models.append(model1)
model_configs.append({'name': 'ResNet', 'loader': dataloaders['spectrogram']})

# Model 2: ViT
model2 = VisionTransformerSpectrogram(img_size=(128, 7), patch_size=(16, 1), embed_dim=128, depth=6, num_heads=8, num_classes=2).to(device)
checkpoint2 = torch.load(OUTPUT_DIR / 'spectrogram_models' / 'vit_spectrogram' / 'best_model.pt')
model2.load_state_dict(checkpoint2['model_state_dict'])
base_models.append(model2)
model_configs.append({'name': 'ViT', 'loader': dataloaders['spectrogram']})

# Model 3: Hybrid CNN+MLP
model3 = HybridCNNMLP(n_features=len(feature_cols), num_classes=2).to(device)
checkpoint3 = torch.load(OUTPUT_DIR / 'hybrid_models' / 'hybrid_cnn_mlp' / 'best_model.pt')
model3.load_state_dict(checkpoint3['model_state_dict'])
base_models.append(model3)
model_configs.append({'name': 'Hybrid', 'loader': dataloaders['hybrid']})

# Model 5: BiLSTM
model5 = BiLSTMAttention(input_dim=128, hidden_dim=64, num_layers=2, num_classes=2).to(device)
checkpoint5 = torch.load(OUTPUT_DIR / 'sequence_models' / 'bilstm_attention' / 'best_model.pt')
model5.load_state_dict(checkpoint5['model_state_dict'])
base_models.append(model5)
model_configs.append({'name': 'BiLSTM', 'loader': dataloaders['sequence']})

print(f"Loaded {len(base_models)} base models")

Loaded 4 base models


## Create Stacking Ensemble

In [None]:
# Get predictions from base models on validation set
all_val_predictions = []
val_labels = None

for i, (model, config) in enumerate(zip(base_models, model_configs)):
    model.eval()
    val_loader = config['loader']['val']
    
    predictions = []
    labels = []
    
    with torch.no_grad():
        for batch in val_loader:
            if isinstance(batch[0], tuple):
                inputs = tuple(x.to(device) for x in batch[0])
            else:
                inputs = batch[0].to(device)
            batch_labels = batch[1].to(device)
            
            outputs = model(inputs)
            probs = torch.softmax(outputs, dim=1)
            
            predictions.append(probs.cpu())
            labels.append(batch_labels.cpu())
    
    predictions = torch.cat(predictions, dim=0)
    all_val_predictions.append(predictions)
    if val_labels is None:
        val_labels = torch.cat(labels, dim=0)

# Stack predictions: (n_samples, n_models * n_classes)
stacked_predictions = torch.cat(all_val_predictions, dim=1)

# Create dataset for meta-learner
meta_dataset = TensorDataset(stacked_predictions, val_labels)
meta_loader = DataLoader(meta_dataset, batch_size=64, shuffle=True)

# Train meta-learner
n_base_models = len(base_models)
meta_model = StackingEnsemble(n_base_models=n_base_models, n_classes=2).to(device)

optimizer = torch.optim.Adam(meta_model.parameters(), lr=1e-3)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=5)

save_dir = OUTPUT_DIR / 'ensemble_models' / 'stacking_ensemble'
save_dir.mkdir(parents=True, exist_ok=True)

print("Training meta-learner...")
from utils.training_utils import train_model
history10, best_epoch10 = train_model(meta_model, meta_loader, meta_loader, criterion, optimizer, scheduler,
                                      device, num_epochs=30, save_dir=save_dir, model_name='stacking_ensemble', early_stopping_patience=5)

# Evaluate on test set
all_test_predictions = []
test_labels = None

for i, (model, config) in enumerate(zip(base_models, model_configs)):
    model.eval()
    test_loader = config['loader']['test']
    
    predictions = []
    labels = []
    
    with torch.no_grad():
        for batch in test_loader:
            if isinstance(batch[0], tuple):
                inputs = tuple(x.to(device) for x in batch[0])
            else:
                inputs = batch[0].to(device)
            batch_labels = batch[1].to(device)
            
            outputs = model(inputs)
            probs = torch.softmax(outputs, dim=1)
            
            predictions.append(probs.cpu())
            labels.append(batch_labels.cpu())
    
    predictions = torch.cat(predictions, dim=0)
    all_test_predictions.append(predictions)
    if test_labels is None:
        test_labels = torch.cat(labels, dim=0)

stacked_test_predictions = torch.cat(all_test_predictions, dim=1)
test_dataset = TensorDataset(stacked_test_predictions, test_labels)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

checkpoint = torch.load(save_dir / 'best_model.pt')
meta_model.load_state_dict(checkpoint['model_state_dict'])
test_metrics10, _, _, _ = evaluate_model(meta_model, test_loader, criterion, device)

with open(save_dir / 'test_metrics.json', 'w') as f:
    json.dump(test_metrics10, f, indent=2)

print(f"\nModel 10 (Stacking Ensemble) Test Results:")
print(f"Accuracy: {test_metrics10['accuracy']:.4f}")
print(f"F1: {test_metrics10['f1']:.4f}")
print(f"ROC-AUC: {test_metrics10['roc_auc']:.4f}")

Training meta-learner...

Epoch 1/30
--------------------------------------------------


                                                            

Train Loss: 0.1950, Train Acc: 0.9426
Val Loss: 0.1406, Val Acc: 0.9552
Val F1: 0.9555, Val ROC-AUC: 0.9876
✓ New best model saved! (F1: 0.9555)

Epoch 2/30
--------------------------------------------------


                                                            

Train Loss: 0.1477, Train Acc: 0.9541
Val Loss: 0.1370, Val Acc: 0.9531
Val F1: 0.9535, Val ROC-AUC: 0.9875

Epoch 3/30
--------------------------------------------------


                                                            

Train Loss: 0.1459, Train Acc: 0.9520
Val Loss: 0.1377, Val Acc: 0.9543
Val F1: 0.9547, Val ROC-AUC: 0.9879

Epoch 4/30
--------------------------------------------------


                                                            

Train Loss: 0.1460, Train Acc: 0.9548
Val Loss: 0.1349, Val Acc: 0.9560
Val F1: 0.9562, Val ROC-AUC: 0.9878
✓ New best model saved! (F1: 0.9562)

Epoch 5/30
--------------------------------------------------


                                                            

Train Loss: 0.1491, Train Acc: 0.9537
Val Loss: 0.1344, Val Acc: 0.9552
Val F1: 0.9555, Val ROC-AUC: 0.9878

Epoch 6/30
--------------------------------------------------


                                                            

Train Loss: 0.1461, Train Acc: 0.9543
Val Loss: 0.1342, Val Acc: 0.9543
Val F1: 0.9546, Val ROC-AUC: 0.9879

Epoch 7/30
--------------------------------------------------


                                                            

Train Loss: 0.1455, Train Acc: 0.9537
Val Loss: 0.1345, Val Acc: 0.9565
Val F1: 0.9568, Val ROC-AUC: 0.9879
✓ New best model saved! (F1: 0.9568)

Epoch 8/30
--------------------------------------------------


                                                            

Train Loss: 0.1430, Train Acc: 0.9545
Val Loss: 0.1341, Val Acc: 0.9565
Val F1: 0.9568, Val ROC-AUC: 0.9879
✓ New best model saved! (F1: 0.9568)

Epoch 9/30
--------------------------------------------------


                                                            

Train Loss: 0.1419, Train Acc: 0.9539
Val Loss: 0.1329, Val Acc: 0.9569
Val F1: 0.9572, Val ROC-AUC: 0.9880
✓ New best model saved! (F1: 0.9572)

Epoch 10/30
--------------------------------------------------


                                                            

Train Loss: 0.1444, Train Acc: 0.9545
Val Loss: 0.1375, Val Acc: 0.9548
Val F1: 0.9552, Val ROC-AUC: 0.9880

Epoch 11/30
--------------------------------------------------


                                                            

Train Loss: 0.1414, Train Acc: 0.9546
Val Loss: 0.1320, Val Acc: 0.9567
Val F1: 0.9570, Val ROC-AUC: 0.9879

Epoch 12/30
--------------------------------------------------


                                                            

Train Loss: 0.1451, Train Acc: 0.9518
Val Loss: 0.1350, Val Acc: 0.9558
Val F1: 0.9561, Val ROC-AUC: 0.9879

Epoch 13/30
--------------------------------------------------


                                                            

Train Loss: 0.1426, Train Acc: 0.9556
Val Loss: 0.1344, Val Acc: 0.9569
Val F1: 0.9572, Val ROC-AUC: 0.9879
✓ New best model saved! (F1: 0.9572)

Epoch 14/30
--------------------------------------------------


                                                            

Train Loss: 0.1412, Train Acc: 0.9543
Val Loss: 0.1346, Val Acc: 0.9575
Val F1: 0.9577, Val ROC-AUC: 0.9879
✓ New best model saved! (F1: 0.9577)

Epoch 15/30
--------------------------------------------------


                                                            

Train Loss: 0.1389, Train Acc: 0.9556
Val Loss: 0.1347, Val Acc: 0.9554
Val F1: 0.9557, Val ROC-AUC: 0.9879

Epoch 16/30
--------------------------------------------------


                                                            

Train Loss: 0.1423, Train Acc: 0.9539
Val Loss: 0.1320, Val Acc: 0.9567
Val F1: 0.9570, Val ROC-AUC: 0.9879

Epoch 17/30
--------------------------------------------------


                                                            

Train Loss: 0.1470, Train Acc: 0.9569
Val Loss: 0.1321, Val Acc: 0.9554
Val F1: 0.9557, Val ROC-AUC: 0.9880

Epoch 18/30
--------------------------------------------------


                                                            

Train Loss: 0.1440, Train Acc: 0.9558
Val Loss: 0.1320, Val Acc: 0.9569
Val F1: 0.9572, Val ROC-AUC: 0.9881

Epoch 19/30
--------------------------------------------------


                                                            

Train Loss: 0.1413, Train Acc: 0.9567
Val Loss: 0.1330, Val Acc: 0.9550
Val F1: 0.9554, Val ROC-AUC: 0.9881

Early stopping at epoch 19
Best F1: 0.9577 at epoch 14


                                                            


Model 10 (Stacking Ensemble) Test Results:
Accuracy: 0.9467
F1: 0.9470
ROC-AUC: 0.9879
