# Ensemble Model Training - Simplified Single-Branch Models

This notebook trains separate simplified CNN models for each feature type and then creates an ensemble.

In [1]:
from datetime import datetime
import os
import json
import tensorflow as tf
import numpy as np
import pandas as pd
from keras.utils import to_categorical, Sequence
from keras.models import Model
from keras.layers import (
    Input, Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization, Average
)
from keras.callbacks import EarlyStopping
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm.notebook import tqdm
from modules.PostgresDBHandler import PostgresDBHandler
from tqdm import tqdm
from tensorflow.keras.optimizers import Adam
from tensorflow import keras

2025-07-08 13:43:21.971171: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-07-08 13:43:22.123181: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-07-08 13:43:22.123229: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-07-08 13:43:22.146634: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-07-08 13:43:22.196483: I tensorflow/core/platform/cpu_feature_guar

In [2]:
# Configuration
dbParams = {
    "dbname": "mydatabase",
    "user": "myuser",
    "password": "mypassword",
    "host": "postgres_server",
    "port": "5432",
}

EPOCHS = 200
BATCH_SIZE = 32
KFOLD_SPLITS = 5
FIXED_LENGTH = 128

# Feature types to train models for
FEATURE_TYPES = [
    'mel_spectrogram', 'mfcc', 'chromagram', 'spectral_contrast',
    'tonnetz', 'constant_q', 'cqt', 'stft', 'harmonic_percussive', 'onset_strength'
]

# GPU configuration
gpus = tf.config.experimental.list_physical_devices("GPU")
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        print(f"Number of available GPUs: {len(gpus)}")
    except RuntimeError as e:
        print(e)

2025-07-08 13:43:23.823363: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2025-07-08 13:43:23.902288: W tensorflow/core/common_runtime/gpu/gpu_device.cc:2256] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...


In [3]:
# Initialize database connection
db = PostgresDBHandler(**dbParams)
db.connect()

# Get instrument mappings
instruments_mappings = db.get_mappings_instruments()
num_classes = len(instruments_mappings)
print(f"Number of instrument classes: {num_classes}")
print("Instruments:", instruments_mappings['name'].tolist())

db.close()

Number of instrument classes: 9
Instruments: ['sax', 'trumpet', 'violin', 'clarinet', 'cello', 'piccolo', 'flute', 'bass', 'oboe']


In [4]:
dbConnect = PostgresDBHandler(**dbParams)
dbConnect.connect()
audioIDs = dbConnect.get_all_unique_audio_ids_in_processed()
processed_data = dbConnect.get_processed_fit_data(audioIDs)

all_processed_data = []
for audio_id in audioIDs:
    features = dbConnect.get_all_feature_types_for_audio(audio_id)
    feature_dict = {f['featureTypeName']: f['featurePath'] for f in features}
    instrumentID = dbConnect.get_audio_file(audio_id)['instrumentID']
    feature_dict['instrumentID'] = instrumentID
    all_processed_data.append(feature_dict)

dbConnect.close()

In [5]:
processed_df = pd.DataFrame(all_processed_data)
processed_df

Unnamed: 0,mel_spectrogram,mfcc,chromagram,spectral_contrast,tonnetz,constant_q,cqt,stft,harmonic_percussive,onset_strength,instrumentID
0,ensemble_intermediate_results/mel_spectrogram/...,ensemble_intermediate_results/mfcc/bc6b235b-3d...,ensemble_intermediate_results/chromagram/168c1...,ensemble_intermediate_results/spectral_contras...,ensemble_intermediate_results/tonnetz/b4bba5ee...,ensemble_intermediate_results/constant_q/b52bf...,ensemble_intermediate_results/cqt/47deeb04-a97...,ensemble_intermediate_results/stft/fc1697e3-d8...,ensemble_intermediate_results/harmonic_percuss...,ensemble_intermediate_results/onset_strength/b...,8
1,ensemble_intermediate_results/mel_spectrogram/...,ensemble_intermediate_results/mfcc/71cd1255-b0...,ensemble_intermediate_results/chromagram/bfe53...,ensemble_intermediate_results/spectral_contras...,ensemble_intermediate_results/tonnetz/ed0b9cbf...,ensemble_intermediate_results/constant_q/778c5...,ensemble_intermediate_results/cqt/77727c20-051...,ensemble_intermediate_results/stft/d8c26933-42...,ensemble_intermediate_results/harmonic_percuss...,ensemble_intermediate_results/onset_strength/5...,7
2,ensemble_intermediate_results/mel_spectrogram/...,ensemble_intermediate_results/mfcc/fbf95cf5-10...,ensemble_intermediate_results/chromagram/5eb60...,ensemble_intermediate_results/spectral_contras...,ensemble_intermediate_results/tonnetz/123f80b0...,ensemble_intermediate_results/constant_q/d7cb4...,ensemble_intermediate_results/cqt/47862e2d-1a7...,ensemble_intermediate_results/stft/e40d089f-df...,ensemble_intermediate_results/harmonic_percuss...,ensemble_intermediate_results/onset_strength/e...,5
3,ensemble_intermediate_results/mel_spectrogram/...,ensemble_intermediate_results/mfcc/76ddf671-d4...,ensemble_intermediate_results/chromagram/b41e3...,ensemble_intermediate_results/spectral_contras...,ensemble_intermediate_results/tonnetz/5e9fc60b...,ensemble_intermediate_results/constant_q/b9ce4...,ensemble_intermediate_results/cqt/cd6fec8a-ee9...,ensemble_intermediate_results/stft/15397da5-c3...,ensemble_intermediate_results/harmonic_percuss...,ensemble_intermediate_results/onset_strength/6...,2
4,ensemble_intermediate_results/mel_spectrogram/...,ensemble_intermediate_results/mfcc/fc0b5adb-8e...,ensemble_intermediate_results/chromagram/b6ce7...,ensemble_intermediate_results/spectral_contras...,ensemble_intermediate_results/tonnetz/ef5c49af...,ensemble_intermediate_results/constant_q/a5dc4...,ensemble_intermediate_results/cqt/f0f466a8-097...,ensemble_intermediate_results/stft/3d385e3c-23...,ensemble_intermediate_results/harmonic_percuss...,ensemble_intermediate_results/onset_strength/5...,5
...,...,...,...,...,...,...,...,...,...,...,...
265,ensemble_intermediate_results/mel_spectrogram/...,ensemble_intermediate_results/mfcc/55a17df4-ff...,ensemble_intermediate_results/chromagram/b07a7...,ensemble_intermediate_results/spectral_contras...,ensemble_intermediate_results/tonnetz/baf3cb8e...,ensemble_intermediate_results/constant_q/d1ccd...,ensemble_intermediate_results/cqt/c7237c32-8b5...,ensemble_intermediate_results/stft/6bff11d1-35...,ensemble_intermediate_results/harmonic_percuss...,ensemble_intermediate_results/onset_strength/4...,2
266,ensemble_intermediate_results/mel_spectrogram/...,ensemble_intermediate_results/mfcc/c7a97f3f-95...,ensemble_intermediate_results/chromagram/022e7...,ensemble_intermediate_results/spectral_contras...,ensemble_intermediate_results/tonnetz/868cd229...,ensemble_intermediate_results/constant_q/389a1...,ensemble_intermediate_results/cqt/9514edb3-c48...,ensemble_intermediate_results/stft/49875e80-db...,ensemble_intermediate_results/harmonic_percuss...,ensemble_intermediate_results/onset_strength/6...,1
267,ensemble_intermediate_results/mel_spectrogram/...,ensemble_intermediate_results/mfcc/0aefbc31-93...,ensemble_intermediate_results/chromagram/d7287...,ensemble_intermediate_results/spectral_contras...,ensemble_intermediate_results/tonnetz/77e1663f...,ensemble_intermediate_results/constant_q/b42f7...,ensemble_intermediate_results/cqt/3de14df6-119...,ensemble_intermediate_results/stft/63875202-d4...,ensemble_intermediate_results/harmonic_percuss...,ensemble_intermediate_results/onset_strength/a...,7
268,ensemble_intermediate_results/mel_spectrogram/...,ensemble_intermediate_results/mfcc/0aaaf7af-78...,ensemble_intermediate_results/chromagram/a31fd...,ensemble_intermediate_results/spectral_contras...,ensemble_intermediate_results/tonnetz/2051833c...,ensemble_intermediate_results/constant_q/24d50...,ensemble_intermediate_results/cqt/948005b4-320...,ensemble_intermediate_results/stft/8ea0f217-31...,ensemble_intermediate_results/harmonic_percuss...,ensemble_intermediate_results/onset_strength/f...,6


In [6]:
def get_input_shape(feature_type, df):
    feature_path_col = feature_type 
    for path in df[feature_path_col]:
        if isinstance(path, str) and os.path.exists(path):
            arr = np.load(path)
            return arr.shape
    raise ValueError(f"No valid file found for {feature_type}")

In [7]:
class SingleFeatureDataGenerator(Sequence):
    def __init__(self, df, feature_col, batch_size=32, shuffle=True, num_classes=None):
        self.df = df.reset_index(drop=True)
        self.feature_col = feature_col
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.num_classes = num_classes
        self.on_epoch_end()

    def __len__(self):
        return int(np.ceil(len(self.df) / self.batch_size))

    def on_epoch_end(self):
        self.indices = np.arange(len(self.df))
        if self.shuffle:
            np.random.shuffle(self.indices)

    def __getitem__(self, index):
        batch_indices = self.indices[index * self.batch_size:(index + 1) * self.batch_size]
        batch_df = self.df.iloc[batch_indices]

        X = []
        y = []

        for _, row in batch_df.iterrows():
            try:
                arr = np.load(row[self.feature_col])
            except Exception as e:
                print(f"Error loading {row[self.feature_col]}: {e}")
                continue
        
            if np.isnan(arr).any() or np.isinf(arr).any():
                raise ValueError(f"Feature file {row[self.feature_col]} contains NaNs or Infs.")
        
            arr = (arr - np.mean(arr)) / (np.std(arr) + 1e-8)
            if arr.ndim == 2:
                arr = np.expand_dims(arr, -1)  # shape: (H, W, 1)
        
            X.append(arr)
            y.append(row['instrumentID'])  # already label-encoded
        
        X = np.array(X)
        y = to_categorical(np.array(y), num_classes=self.num_classes)
        
        return X, y

In [8]:
def create_simple_model(input_shape, num_classes, model_name="simple_cnn"):
    input_layer = Input(shape=(*input_shape, 1), name=f"{model_name}_input")

    x = Conv2D(4, (3, 3), activation='relu', padding='same')(input_layer)
    x = BatchNormalization()(x)

    x = Flatten()(x)

    output = Dense(num_classes, activation='softmax', name=f"{model_name}_output")(x)

    model = Model(inputs=input_layer, outputs=output, name=model_name)
    return model

In [9]:
results = {}

for feature_type in tqdm(FEATURE_TYPES, desc="Training features"):
    print(f"\n{'='*40}\nTraining model for {feature_type}\n{'='*40}")

    feature_col = feature_type
    feature_df = processed_df.dropna(subset=[feature_col])
    
    # Global label encoder
    label_encoder = LabelEncoder()
    label_encoder.fit(feature_df['instrumentID'])

    feature_df = feature_df.copy()
    feature_df['instrumentID'] = label_encoder.transform(feature_df['instrumentID'])
    num_classes = len(label_encoder.classes_)
    input_shape = get_input_shape(feature_type, feature_df)


    kf = KFold(n_splits=KFOLD_SPLITS, shuffle=True, random_state=42)
    accuracy_list, loss_list, history_list = [], [], []
    classification_reports, confusion_matrices = [], []

    for fold, (train_idx, test_idx) in enumerate(kf.split(feature_df)):
        print(f"\n--- Fold {fold+1}/{KFOLD_SPLITS} ---")
        train_df = feature_df.iloc[train_idx].reset_index(drop=True)
        test_df = feature_df.iloc[test_idx].reset_index(drop=True)

        train_df, val_df = train_test_split(
            train_df, test_size=0.2, random_state=42, stratify=train_df['instrumentID'])

        # Generators (labels are already encoded)
        train_gen = SingleFeatureDataGenerator(train_df, feature_col, BATCH_SIZE, shuffle=True, num_classes=num_classes)
        val_gen   = SingleFeatureDataGenerator(val_df,   feature_col, BATCH_SIZE, shuffle=False, num_classes=num_classes)
        test_gen  = SingleFeatureDataGenerator(test_df,  feature_col, BATCH_SIZE, shuffle=False, num_classes=num_classes)

        # Model
        model = create_simple_model(input_shape, num_classes, model_name=feature_type)
        model.compile(optimizer=Adam(learning_rate=0.01), loss='categorical_crossentropy', metrics=['accuracy'])

        early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

        history = model.fit(train_gen, validation_data=val_gen, epochs=EPOCHS, callbacks=[early_stopping])
        history_list.append(history.history)

        # Evaluation
        loss, acc = model.evaluate(test_gen)
        loss_list.append(loss)
        accuracy_list.append(acc)
        print(f"{feature_type} - Fold {fold+1} Test accuracy: {acc:.4f}")

        # Predictions & Reports
        y_pred = model.predict(test_gen)
        y_pred_classes = np.argmax(y_pred, axis=1)
        y_true = []
        for _, labels in test_gen:
            y_true.extend(np.argmax(labels, axis=1))
        y_true = np.array(y_true)

        report = classification_report(y_true, y_pred_classes, output_dict=True)
        classification_reports.append(report)
        conf_matrix = confusion_matrix(y_true, y_pred_classes).tolist()
        confusion_matrices.append(conf_matrix)

        # Save model
        os.makedirs(f"models/{feature_type}", exist_ok=True)
        model.save(f"models/{feature_type}/model_fold{fold+1}.keras")

    # Save results
    results[feature_type] = {
        "accuracy_list": accuracy_list,
        "loss_list": loss_list,
        "histories": history_list,
        "classification_reports": classification_reports,
        "confusion_matrices": confusion_matrices,
    }

    with open(f"models/{feature_type}/results.json", "w") as f:
        json.dump(results[feature_type], f, indent=2)

print("\nAll training complete. Models and results saved in 'models/'")

Training features:   0%|          | 0/10 [00:00<?, ?it/s]


Training model for mel_spectrogram

--- Fold 1/5 ---
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
mel_spectrogram - Fold 1 Test accuracy: 0.4815

--- Fold 2/5 ---
Epoch 1/200


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
mel_spectrogram - Fold 2 Test accuracy: 0.4259

--- Fold 3/5 ---
Epoch 1/200


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
mel_spectrogram - Fold 3 Test accuracy: 0.8333

--- Fold 4/5 ---
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
mel_spectrogram - Fold 4 Test accuracy: 0.3148

--- Fold 5/5 ---
Epoch 1/200
Epoch 2/200
Epoch

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Training features:  10%|█         | 1/10 [00:09<01:21,  9.07s/it]


Training model for mfcc

--- Fold 1/5 ---
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
mfcc - Fold 1 Test accuracy: 0.2407

--- Fold 2/5 ---
Epoch 1/200


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
mfcc - Fold 2 Test accuracy: 0.7407

--- Fold 3/5 ---
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
mfcc - Fold 3 Test accuracy: 0.5370

--- Fold 4/5 ---
Epoch 1/200


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
mfcc - Fold 4 Test accuracy: 0.7593

--- Fold 5/5 ---
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
mfcc - Fold 5 Test accuracy: 0.1852


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Training features:  20%|██        | 2/10 [00:18<01:15,  9.42s/it]


Training model for chromagram

--- Fold 1/5 ---
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
chromagram - Fold 1 Test accuracy: 0.2963

--- Fold 2/5 ---
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
chromagram - Fold 2 Test accuracy: 0.2037

--- Fold 3/5 ---
Epoch 1/200


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
chromagram - Fold 3 Test accuracy: 0.2963

--- Fold 4/5 ---
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
chromagram - Fold 4 Test accuracy: 0.2963

--- Fold 5/5 ---
Epoch 1/200


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
chromagram - Fold 5 Test accuracy: 0.2407


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Training features:  30%|███       | 3/10 [00:25<00:58,  8.29s/it]


Training model for spectral_contrast

--- Fold 1/5 ---
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
spectral_contrast - Fold 1 Test accuracy: 0.4074

--- Fold 2/5 ---
Epoch 1/200


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
spectral_contrast - Fold 2 Test accuracy: 0.3519

--- Fold 3/5 ---
Epoch 1/200


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
spectral_contrast - Fold 3 Test accuracy: 0.5556

--- Fold 4/5 ---
Epoch 1/200


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
spectral_contrast - Fold 4 Test accuracy: 0.4444

--- Fold 5/5 ---
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
spectral_contrast - Fold 5 Test accuracy: 0.4259


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Training features:  40%|████      | 4/10 [00:35<00:53,  8.95s/it]


Training model for tonnetz

--- Fold 1/5 ---
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
tonnetz - Fold 1 Test accuracy: 0.0926

--- Fold 2/5 ---
Epoch 1/200


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
tonnetz - Fold 2 Test accuracy: 0.0741

--- Fold 3/5 ---
Epoch 1/200


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
tonnetz - Fold 3 Test accuracy: 0.1111

--- Fold 4/5 ---
Epoch 1/200


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
tonnetz - Fold 4 Test accuracy: 0.1481

--- Fold 5/5 ---
Epoch 1/200


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
tonnetz - Fold 5 Test accuracy: 0.2778


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Training features:  50%|█████     | 5/10 [00:41<00:40,  8.00s/it]


Training model for constant_q

--- Fold 1/5 ---
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
constant_q - Fold 1 Test accuracy: 0.5741

--- Fold 2/5 ---
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
constant_q - Fold 2 Test accuracy: 0.6852

--- Fold 3/5 ---
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
constant_q - Fold 3 Test accuracy: 0.7778

--- Fold 4/5 ---
Epoch 1/200
Epoch 2/200
Epoch 3

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Training features:  60%|██████    | 6/10 [00:49<00:32,  8.01s/it]


Training model for cqt

--- Fold 1/5 ---
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
cqt - Fold 1 Test accuracy: 0.6852

--- Fold 2/5 ---
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
cqt - Fold 2 Test accuracy: 0.6667

--- Fold 3/5 ---
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
cqt - Fold 3 Test accuracy: 0.5556

--- Fold 4/5 ---
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
E

Training features:  70%|███████   | 7/10 [00:58<00:24,  8.27s/it]


Training model for stft

--- Fold 1/5 ---
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
stft - Fold 1 Test accuracy: 0.1111

--- Fold 2/5 ---
Epoch 1/200


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
stft - Fold 2 Test accuracy: 0.5000

--- Fold 3/5 ---
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
s

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
stft - Fold 4 Test accuracy: 0.1481

--- Fold 5/5 ---
Epoch 1/200


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
stft - Fold 5 Test accuracy: 0.3148


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Training features:  80%|████████  | 8/10 [01:11<00:19,  9.54s/it]


Training model for harmonic_percussive

--- Fold 1/5 ---
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
harmonic_percussive - Fold 1 Test accuracy: 0.6481

--- Fold 2/5 ---
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
harmonic_percussive - Fold 2 Test accuracy: 0.7963

--- Fold 3/5 ---
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



--- Fold 5/5 ---
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
harmonic_percussive - Fold 5 Test accuracy: 0.3519


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Training features:  90%|█████████ | 9/10 [02:56<00:39, 39.49s/it]


Training model for onset_strength

--- Fold 1/5 ---
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
onset_strength - Fold 1 Test accuracy: 0.1111

--- Fold 2/5 ---
Epoch 1/200


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
onset_strength - Fold 2 Test accuracy: 0.1111

--- Fold 3/5 ---
Epoch 1/200


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
onset_strength - Fold 3 Test accuracy: 0.1852

--- Fold 4/5 ---
Epoch 1/200


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
onset_strength - Fold 4 Test accuracy: 0.0370

--- Fold 5/5 ---
Epoch 1/200


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
onset_strength - Fold 5 Test accuracy: 0.0741


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Training features: 100%|██████████| 10/10 [03:01<00:00, 18.16s/it]


All training complete. Models and results saved in 'models/'





In [10]:
print("\n" + "="*50)
print("Creating Ensemble Predictions")
print("="*50)

processed_df['instrumentID'] = processed_df['instrumentID'] - 1

ensemble_accuracies = []
ensemble_reports = []
ensemble_conf_matrices = []

for fold in range(KFOLD_SPLITS):
    print(f"\n--- Ensemble Fold {fold + 1}/{KFOLD_SPLITS} ---")
    fold_preds = []
    y_true = None

    for feature_type in FEATURE_TYPES:
        # Load model for this fold
        model_path = f"models/{feature_type}/model_fold{fold+1}.keras"
        if not os.path.exists(model_path):
            print(f"Model not found: {model_path}")
            continue
        model = keras.models.load_model(model_path)

        # Get test data for this fold
        feature_df = processed_df[[feature_type, 'instrumentID']].dropna().reset_index(drop=True)
        kf = KFold(n_splits=KFOLD_SPLITS, shuffle=True, random_state=42)
        train_idx, test_idx = list(kf.split(feature_df))[fold]
        test_df = feature_df.iloc[test_idx].reset_index(drop=True)
        test_gen = SingleFeatureDataGenerator(test_df, feature_type, batch_size=BATCH_SIZE, shuffle=False, num_classes=num_classes)
        preds = model.predict(test_gen, verbose=0)
        fold_preds.append(preds)
        if y_true is None:
            # Get true labels from generator
            y_true = []
            for _, labels in test_gen:
                y_true.extend(np.argmax(labels, axis=1))
            y_true = np.array(y_true)

    if fold_preds:
        ensemble_pred = np.mean(fold_preds, axis=0)
        ensemble_pred_classes = np.argmax(ensemble_pred, axis=1)
        acc = accuracy_score(y_true, ensemble_pred_classes)
        ensemble_accuracies.append(acc)
        print(f"Ensemble Accuracy: {acc:.4f}")
        report = classification_report(y_true, ensemble_pred_classes, output_dict=True)
        ensemble_reports.append(report)
        conf_matrix = confusion_matrix(y_true, ensemble_pred_classes).tolist()
        ensemble_conf_matrices.append(conf_matrix)


Creating Ensemble Predictions

--- Ensemble Fold 1/5 ---
Ensemble Accuracy: 0.7407

--- Ensemble Fold 2/5 ---
Ensemble Accuracy: 0.7963

--- Ensemble Fold 3/5 ---
Ensemble Accuracy: 0.8148

--- Ensemble Fold 4/5 ---
Ensemble Accuracy: 0.8704

--- Ensemble Fold 5/5 ---
Ensemble Accuracy: 0.7778


In [11]:
os.makedirs("ensemble_results", exist_ok=True)
date_part = datetime.now().date().__str__().replace('-', '_')
results_path = os.path.join("ensemble_results", f"ensemble_results_{date_part}.json")
ensemble_results = {
    "accuracy_list": ensemble_accuracies,
    "classification_reports": ensemble_reports,
    "confusion_matrices": ensemble_conf_matrices,
}
with open(results_path, "w") as f:
    json.dump(ensemble_results, f, indent=2)
print(f"\nEnsemble results saved to: {results_path}")




Ensemble results saved to: ensemble_results/ensemble_results_2025_07_08.json


In [12]:
for fold in range(KFOLD_SPLITS):
    models_fold = []
    inputs = []
    input_names = []
    for feature_type in FEATURE_TYPES:
        model_path = f"models/{feature_type}/model_fold{fold+1}.keras"
        if not os.path.exists(model_path):
            print(f"Model not found for ensemble: {model_path}")
            continue
        model = keras.models.load_model(model_path)
        models_fold.append(model)
        inp = model.input
        inputs.append(inp)
        input_names.append(feature_type)
    if not models_fold:
        print(f"No models found for fold {fold+1}, skipping ensemble model save.")
        continue
    # If all input shapes are the same, use a single input
    input_shapes = [tuple(inp.shape) for inp in inputs]
    if all(s == input_shapes[0] for s in input_shapes):
        ensemble_input = keras.Input(shape=input_shapes[0][1:], name="ensemble_input")
        model_outputs = [m(ensemble_input) for m in models_fold]
        avg = Average()(model_outputs)
        ensemble_model = Model(inputs=ensemble_input, outputs=avg, name=f"ensemble_model_fold{fold+1}")
    else:
        # Multi-input ensemble
        ensemble_inputs = [keras.Input(shape=inp.shape[1:], name=f"{name}_input") for inp, name in zip(inputs, input_names)]
        model_outputs = [m(inp) for m, inp in zip(models_fold, ensemble_inputs)]
        avg = Average()(model_outputs)
        ensemble_model = Model(inputs=ensemble_inputs, outputs=avg, name=f"ensemble_model_fold{fold+1}")
    # Save the ensemble model
    ensemble_model_path = os.path.join("ensemble_results", f"ensemble_model_fold{fold+1}_{date_part}.keras")
    ensemble_model.save(ensemble_model_path)
    print(f"Saved ensemble Keras model for fold {fold+1} to {ensemble_model_path}")


Saved ensemble Keras model for fold 1 to ensemble_results/ensemble_model_fold1_2025_07_08.keras
Saved ensemble Keras model for fold 2 to ensemble_results/ensemble_model_fold2_2025_07_08.keras
Saved ensemble Keras model for fold 3 to ensemble_results/ensemble_model_fold3_2025_07_08.keras
Saved ensemble Keras model for fold 4 to ensemble_results/ensemble_model_fold4_2025_07_08.keras
Saved ensemble Keras model for fold 5 to ensemble_results/ensemble_model_fold5_2025_07_08.keras


In [13]:
print("\n" + "="*60)
print("TRAINING SUMMARY")
print("="*60)

print("\nIndividual Model Performance:")
for feature_type in FEATURE_TYPES:
    if feature_type in results:
        accuracies = results[feature_type]['accuracy_list']
        mean_acc = np.mean(accuracies)
        std_acc = np.std(accuracies)
        print(f"  {feature_type}: {mean_acc:.4f} ± {std_acc:.4f}")

print("\nEnsemble Performance:")
ensemble_mean = np.mean(ensemble_accuracies)
ensemble_std = np.std(ensemble_accuracies)
print(f"  Ensemble: {ensemble_mean:.4f} ± {ensemble_std:.4f}")

# Find best individual model
best_individual = max(
    [(ft, np.mean(results[ft]['accuracy_list'])) for ft in FEATURE_TYPES if ft in results],
    key=lambda x: x[1]
)
improvement = ensemble_mean - best_individual[1]
print(f"\nBest Individual Model: {best_individual[0]} ({best_individual[1]:.4f})")
print(f"Ensemble Improvement: {improvement:.4f} ({improvement*100:.2f}%)")


TRAINING SUMMARY

Individual Model Performance:
  mel_spectrogram: 0.5222 ± 0.1743
  mfcc: 0.4926 ± 0.2419
  chromagram: 0.2667 ± 0.0381
  spectral_contrast: 0.4370 ± 0.0669
  tonnetz: 0.1407 ± 0.0728
  constant_q: 0.6667 ± 0.1054
  cqt: 0.6704 ± 0.1024
  stft: 0.3000 ± 0.1515
  harmonic_percussive: 0.6815 ± 0.1801
  onset_strength: 0.1037 ± 0.0491

Ensemble Performance:
  Ensemble: 0.8000 ± 0.0429

Best Individual Model: harmonic_percussive (0.6815)
Ensemble Improvement: 0.1185 (11.85%)
