# Ensemble Model Training - Simplified Single-Branch Models

This notebook trains separate simplified CNN models for each feature type and then creates an ensemble.

In [None]:
from datetime import datetime
import os
import json
import tensorflow as tf
import numpy as np
import pandas as pd
from keras.utils import to_categorical, Sequence
from keras.models import Model
from keras.layers import (
    Input, Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
)
from keras.callbacks import EarlyStopping
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm.notebook import tqdm

from modules.PostgresDBHandler import PostgresDBHandler

In [None]:
# Configuration
dbParams = {
    "dbname": "mydatabase",
    "user": "myuser",
    "password": "mypassword",
    "host": "postgres_server",
    "port": "5432",
}
EPOCHS = 100
BATCH_SIZE = 32
KFOLD_SPLITS = 5
FIXED_LENGTH = 128

# Feature types to train models for
FEATURE_TYPES = [
    'mel_spectrogram', 'mfcc', 'chromagram', 'spectral_contrast',
    'tonnetz', 'constant_q', 'cqt', 'stft', 'harmonic_percussive', 'onset_strength'
]

# Feature shapes for each type
FEATURE_SHAPES = {
    'mel_spectrogram': (64, 128),
    'mfcc': (8, 128),
    'chromagram': (8, 128),
    'spectral_contrast': (3, 128),
    'tonnetz': (6, 128),
    'constant_q': (42, 128),
    'cqt': (42, 128),
    'stft': (512, 128),
    'harmonic_percussive': (1025, 128),
    'onset_strength': (1, 128)
}
# GPU configuration
gpus = tf.config.experimental.list_physical_devices("GPU")
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        print(f"Number of available GPUs: {len(gpus)}")
    except RuntimeError as e:
        print(e)

In [None]:
# Initialize database connection
db = PostgresDBHandler(**dbParams)
db.connect()

# Get instrument mappings
instruments_mappings = db.get_mappings_instruments()
num_classes = len(instruments_mappings)
print(f"Number of instrument classes: {num_classes}")
print("Instruments:", instruments_mappings['name'].tolist())

db.close()

In [None]:
class SingleFeatureDataGenerator(Sequence):
    """Data generator for single feature type training."""
    
    def __init__(self, df, feature_type, batch_size=32, shuffle=True):
        self.df = df
        self.feature_type = feature_type
        self.batch_size = batch_size
        self.shuffle = shuffle
        
        self.label_encoder = LabelEncoder()
        self.df['instrumentID_encoded'] = self.label_encoder.fit_transform(self.df['instrumentID'])
        self.num_classes = len(self.label_encoder.classes_)
        
        self.indices = np.arange(len(self.df))
        self.on_epoch_end()
    
    def __len__(self):
        return int(np.floor(len(self.df) / self.batch_size))
    
    def __getitem__(self, index):
        indices = self.indices[index * self.batch_size : (index + 1) * self.batch_size]
        batch_df = self.df.iloc[indices]
        
        X = []
        y = []
        
        for _, row in batch_df.iterrows():
            feature_data = np.load(row['featurePath'])
            X.append(feature_data)
            y.append(row['instrumentID_encoded'])
        
        X = np.expand_dims(np.array(X), -1)
        y = to_categorical(y, num_classes=self.num_classes)
        
        return X, y
    
    def on_epoch_end(self):
        if self.shuffle:
            np.random.shuffle(self.indices)
    
    def get_labels(self):
        return self.df['instrumentID_encoded'].values

In [None]:
def create_simple_model(input_shape, num_classes, model_name="simple_cnn"):    
    """Create a simplified single-branch CNN model with reduced capacity."""
    input_layer = Input(shape=(*input_shape, 1), name=f"{model_name}_input")
    
    # Reduced number of filters and layers compared to original
    x = Conv2D(32, (3, 3), activation='relu', padding='same')(input_layer)
    x = MaxPooling2D((2, 2))(x)
    x = Dropout(0.2)(x)
    
    x = Conv2D(64, (3, 3), activation='relu', padding='same')(x)
    x = MaxPooling2D((2, 2))(x)
    x = Dropout(0.2)(x)
    
    # Only one more conv layer (original had 3)
    x = Conv2D(128, (3, 3), activation='relu', padding='same')(x)
    x = MaxPooling2D((2, 2))(x)
    x = Dropout(0.3)(x)
    
    x = Flatten()(x)
    
    # Reduced dense layers
    x = Dense(64, activation='relu')(x)
    x = Dropout(0.4)(x)
    x = BatchNormalization()(x)
    
    # Smaller final dense layer
    x = Dense(32, activation='relu')(x)
    x = Dropout(0.3)(x)
    
    output = Dense(num_classes, activation='softmax', name=f"{model_name}_output")(x)
    
    model = Model(inputs=input_layer, outputs=output, name=model_name)
    return model

In [None]:
# Load data for each feature type
db = PostgresDBHandler(**dbParams)
db.connect()

# Get all processed IDs
processed_ids = db.get_all_processed_ids()
print(f"Total processed samples: {len(processed_ids)}")

# Create DataFrames for each feature type
df_dict = {}
for feature_type in FEATURE_TYPES:
    processed_data = db.get_processed_fit_data(processed_ids, feature_type)
    
    if processed_data:
        df = pd.DataFrame(processed_data)
        df_dict[feature_type] = df
        print(f"{feature_type}: {len(df)} samples")
    else:
        print(f"Warning: No data found for {feature_type}")

db.close()

# Filter to only include feature types with data
available_feature_types = list(df_dict.keys())
print(f"\nAvailable feature types: {available_feature_types}")

In [None]:
# Training results storage
all_results = {}
all_models = {}

# Train individual models for each feature type
for feature_type in tqdm(available_feature_types, desc = "Feature Types", Leave = True):
    print(f"\n{'='*50}")
    print(f"Training model for {feature_type}")
    print(f"{'='*50}")
    
    df = df_dict[feature_type]
    input_shape = FEATURE_SHAPES[feature_type]
    
    # Initialize results storage for this feature type
    feature_results = {
        'accuracy_list': [],
        'loss_list': [],
        'classification_reports': [],
        'confusion_matrices': [],
        'histories': [],
        'models': []
    }
    
    # Cross-validation
    kf = KFold(n_splits=KFOLD_SPLITS, shuffle=True, random_state=42)
    
    for fold, (train_idx, test_idx) in enumerate(tqdm(list(kf.split(df)), desc = f"{feature_type} Folds", Leave = False)):
        print(f"\n--- Fold {fold + 1}/{KFOLD_SPLITS} ---")
        
        # Split data
        train_df = df.iloc[train_idx].reset_index(drop=True)
        test_df = df.iloc[test_idx].reset_index(drop=True)
        
        # Further split training data
        train_indices, val_indices = train_test_split(
            np.arange(len(train_df)), test_size=0.2, random_state=42
        )
        
        val_df = train_df.iloc[val_indices].reset_index(drop=True)
        train_df = train_df.iloc[train_indices].reset_index(drop=True)
        
        # Create data generators
        train_generator = SingleFeatureDataGenerator(train_df, feature_type, batch_size=BATCH_SIZE)
        val_generator = SingleFeatureDataGenerator(val_df, feature_type, batch_size=BATCH_SIZE, shuffle=False)
        test_generator = SingleFeatureDataGenerator(test_df, feature_type, batch_size=BATCH_SIZE, shuffle=False)
        
        # Create and compile model
        model = create_simple_model(input_shape, num_classes, f"{feature_type}_model")
        
        optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
        model.compile(
            optimizer=optimizer,
            loss="categorical_crossentropy",
            metrics=["accuracy"],
        )
        
        # Early stopping
        early_stopping = EarlyStopping(
            monitor="val_loss", patience=15, restore_best_weights=True
        )
        
        # Train the model
        history = model.fit(
            train_generator,
            validation_data=val_generator,
            epochs=EPOCHS,
            callbacks=[early_stopping],
            verbose=1
        )
        
        feature_results['histories'].append(history.history)
        
        # Evaluate the model
        loss, accuracy = model.evaluate(test_generator, verbose=0)
        feature_results['accuracy_list'].append(accuracy)
        feature_results['loss_list'].append(loss)
        
        print(f"Accuracy: {accuracy:.4f}, Loss: {loss:.4f}")
        
        # Predict and generate reports
        y_pred = model.predict(test_generator, verbose=0)
        y_pred_classes = np.argmax(y_pred, axis=1)
        y_true = test_generator.get_labels()
        
        # Classification report
        report = classification_report(y_true, y_pred_classes, output_dict=True)
        feature_results['classification_reports'].append(report)
        
        # Confusion matrix
        conf_matrix = confusion_matrix(y_true, y_pred_classes).tolist()
        feature_results['confusion_matrices'].append(conf_matrix)
        
        # Save the best model (last one for now)
        feature_results['models'].append(model)
    
    # Store results for this feature type
    all_results[feature_type] = feature_results
    all_models[feature_type] = feature_results['models'][-1]  # Save the last model
    
    # Print summary for this feature type
    mean_acc = np.mean(feature_results['accuracy_list'])
    std_acc = np.std(feature_results['accuracy_list'])
    print(f"\n{feature_type} - Mean Accuracy: {mean_acc:.4f} ± {std_acc:.4f}")

In [None]:
# Create ensemble predictions
print("\n" + "="*50)
print("Creating Ensemble Predictions")
print("="*50)

# Use the last fold of each feature type for ensemble evaluation
ensemble_results = {
    'accuracy_list': [],
    'loss_list': [],
    'classification_reports': [],
    'confusion_matrices': []
}

# For simplicity, we'll use the last fold of each feature type
for fold in tqdm(range(KFOLD_SPLITS), desc = "Ensemble Folds", Leave = True):
    print(f"\n--- Ensemble Fold {fold + 1}/{KFOLD_SPLITS} ---")
    
    # Get predictions from all models for this fold
    all_predictions = {}
    
    for feature_type in available_feature_types:
        if feature_type in all_results:
            # Get the model from this fold
            model = all_results[feature_type]['models'][fold]
            
            # Get test data for this fold (we need to recreate it)
            df = df_dict[feature_type]
            kf = KFold(n_splits=KFOLD_SPLITS, shuffle=True, random_state=42)
            train_idx, test_idx = list(kf.split(df))[fold]
            test_df = df.iloc[test_idx].reset_index(drop=True)
            
            test_generator = SingleFeatureDataGenerator(test_df, feature_type, batch_size=BATCH_SIZE, shuffle=False)
            
            # Get predictions
            pred = model.predict(test_generator, verbose=0)
            all_predictions[feature_type] = pred
            
            # Store true labels (should be the same for all feature types)
            if 'y_true' not in locals():
                y_true = test_generator.get_labels()
    
    # Simple averaging ensemble
    if all_predictions:
        ensemble_pred = np.mean(list(all_predictions.values()), axis=0)
        ensemble_pred_classes = np.argmax(ensemble_pred, axis=1)
        
        # Calculate ensemble accuracy
        ensemble_accuracy = accuracy_score(y_true, ensemble_pred_classes)
        ensemble_results['accuracy_list'].append(ensemble_accuracy)
        
        print(f"Ensemble Accuracy: {ensemble_accuracy:.4f}")
        
        # Classification report
        report = classification_report(y_true, ensemble_pred_classes, output_dict=True)
        ensemble_results['classification_reports'].append(report)
        
        # Confusion matrix
        conf_matrix = confusion_matrix(y_true, ensemble_pred_classes).tolist()
        ensemble_results['confusion_matrices'].append(conf_matrix)

# Store ensemble results
all_results['ensemble'] = ensemble_results

In [None]:
# Save results and models
try:
    os.mkdir("ensemble_models")
except FileExistsError:
    print("Folder already exists")
except Exception:
    print("Unknown error")

# Create version folder
date_part = datetime.now().date().__str__().replace('-', '_')
last_version = os.listdir(path="ensemble_models") if os.path.exists("ensemble_models") else []
last_version = [name.rpartition("_v")[-1] for name in last_version if date_part in name]
if len(last_version):
    last_version = int(sorted(last_version)[-1])
else:
    last_version = 0
folder_name = f"{date_part}_v{last_version+1}"

os.makedirs(os.path.join("ensemble_models", folder_name), exist_ok=True)

# Save individual models
for feature_type, model in all_models.items():
    model_path = os.path.join("ensemble_models", folder_name, f"{feature_type}_model.h5")
    model.save(model_path)
    print(f"Saved {feature_type} model to {model_path}")

# Save results
results_data = {
    'individual_results': {ft: {k: v for k, v in res.items() if k != 'models'} 
                          for ft, res in all_results.items() if ft != 'ensemble'},
    'ensemble_results': all_results['ensemble'],
    'feature_types': available_feature_types,
    'num_classes': num_classes,
    'feature_shapes': FEATURE_SHAPES,
    'training_config': {
        'epochs': EPOCHS,
        'batch_size': BATCH_SIZE,
        'kfold_splits': KFOLD_SPLITS,
        'fixed_length': FIXED_LENGTH
    },
    'instrument_mappings': instruments_mappings.to_dict()
}

results_path = os.path.join("ensemble_models", folder_name, "results.json")
with open(results_path, 'w') as f:
    json.dump(results_data, f, indent=2, default=str)

print(f"\nResults saved to: {results_path}")
print(f"Models saved to: ensemble_models/{folder_name}/")

In [None]:
# Print summary of results
print("\n" + "="*60)
print("TRAINING SUMMARY")
print("="*60)

print("\nIndividual Model Performance:")
for feature_type in available_feature_types:
    if feature_type in all_results:
        accuracies = all_results[feature_type]['accuracy_list']
        mean_acc = np.mean(accuracies)
        std_acc = np.std(accuracies)
        print(f"  {feature_type}: {mean_acc:.4f} ± {std_acc:.4f}")

print("\nEnsemble Performance:")
if 'ensemble' in all_results:
    ensemble_accuracies = all_results['ensemble']['accuracy_list']
    ensemble_mean = np.mean(ensemble_accuracies)
    ensemble_std = np.std(ensemble_accuracies)
    print(f"  Ensemble: {ensemble_mean:.4f} ± {ensemble_std:.4f}")

# Find best individual model
best_individual = max(
    [(ft, np.mean(all_results[ft]['accuracy_list'])) 
     for ft in available_feature_types if ft in all_results],
    key=lambda x: x[1]
)

improvement = ensemble_mean - best_individual[1]
print(f"\nBest Individual Model: {best_individual[0]} ({best_individual[1]:.4f})")
print(f"Ensemble Improvement: {improvement:.4f} ({improvement*100:.2f}%)")