# Ensemble Model Training - Simplified Single-Branch Models

This notebook trains separate simplified CNN models for each feature type and then creates an ensemble.

In [18]:
from datetime import datetime
import os
import json
import tensorflow as tf
import numpy as np
import pandas as pd
from keras.utils import to_categorical, Sequence
from keras.models import Model
from keras.layers import (
    Input, Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
)
from keras.callbacks import EarlyStopping
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm.notebook import tqdm
from modules.PostgresDBHandler import PostgresDBHandler
from tqdm import tqdm
from tensorflow.keras.optimizers import Adam
from tensorflow import keras

In [2]:
# Configuration
dbParams = {
    "dbname": "mydatabase",
    "user": "myuser",
    "password": "mypassword",
    "host": "postgres_server",
    "port": "5432",
}

EPOCHS = 200
BATCH_SIZE = 32
KFOLD_SPLITS = 5
FIXED_LENGTH = 128

# Feature types to train models for
FEATURE_TYPES = [
    'mel_spectrogram', 'mfcc', 'chromagram', 'spectral_contrast',
    'tonnetz', 'constant_q', 'cqt', 'stft', 'harmonic_percussive', 'onset_strength'
]

# GPU configuration
gpus = tf.config.experimental.list_physical_devices("GPU")
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        print(f"Number of available GPUs: {len(gpus)}")
    except RuntimeError as e:
        print(e)

2025-07-07 19:25:17.370446: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2025-07-07 19:25:17.450665: W tensorflow/core/common_runtime/gpu/gpu_device.cc:2256] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...


In [3]:
# Initialize database connection
db = PostgresDBHandler(**dbParams)
db.connect()

# Get instrument mappings
instruments_mappings = db.get_mappings_instruments()
num_classes = len(instruments_mappings)
print(f"Number of instrument classes: {num_classes}")
print("Instruments:", instruments_mappings['name'].tolist())

db.close()

Number of instrument classes: 9
Instruments: ['violin', 'bass', 'flute', 'piccolo', 'trumpet', 'oboe', 'sax', 'clarinet', 'cello']


In [4]:
dbConnect = PostgresDBHandler(**dbParams)
dbConnect.connect()
audioIDs = dbConnect.get_all_unique_audio_ids_in_processed()
processed_data = dbConnect.get_processed_fit_data(audioIDs)

all_processed_data = []
for audio_id in audioIDs:
    features = dbConnect.get_all_feature_types_for_audio(audio_id)
    feature_dict = {f['featureTypeName']: f['featurePath'] for f in features}
    instrumentID = dbConnect.get_audio_file(audio_id)['instrumentID']
    feature_dict['instrumentID'] = instrumentID
    all_processed_data.append(feature_dict)

dbConnect.close()

In [5]:
processed_df = pd.DataFrame(all_processed_data)
processed_df

Unnamed: 0,mel_spectrogram,mfcc,chromagram,spectral_contrast,tonnetz,constant_q,cqt,stft,harmonic_percussive,onset_strength,instrumentID
0,ensemble_intermediate_results/mel_spectrogram/...,ensemble_intermediate_results/mfcc/b4eff491-ba...,ensemble_intermediate_results/chromagram/97320...,ensemble_intermediate_results/spectral_contras...,ensemble_intermediate_results/tonnetz/edf38d8e...,ensemble_intermediate_results/constant_q/c57fd...,ensemble_intermediate_results/cqt/cdee10e8-02c...,ensemble_intermediate_results/stft/fd3e6642-db...,ensemble_intermediate_results/harmonic_percuss...,ensemble_intermediate_results/onset_strength/7...,1
1,ensemble_intermediate_results/mel_spectrogram/...,ensemble_intermediate_results/mfcc/cb7069c3-eb...,ensemble_intermediate_results/chromagram/56820...,ensemble_intermediate_results/spectral_contras...,ensemble_intermediate_results/tonnetz/e8615bcb...,ensemble_intermediate_results/constant_q/2800b...,ensemble_intermediate_results/cqt/f9a1e89d-2c5...,ensemble_intermediate_results/stft/b12a54e3-cb...,ensemble_intermediate_results/harmonic_percuss...,ensemble_intermediate_results/onset_strength/3...,5
2,ensemble_intermediate_results/mel_spectrogram/...,ensemble_intermediate_results/mfcc/e320d53d-b3...,ensemble_intermediate_results/chromagram/74786...,ensemble_intermediate_results/spectral_contras...,ensemble_intermediate_results/tonnetz/0566b804...,ensemble_intermediate_results/constant_q/c415e...,ensemble_intermediate_results/cqt/7fbfb24e-12b...,ensemble_intermediate_results/stft/9cdd13f6-47...,ensemble_intermediate_results/harmonic_percuss...,ensemble_intermediate_results/onset_strength/b...,4
3,ensemble_intermediate_results/mel_spectrogram/...,ensemble_intermediate_results/mfcc/30b73223-05...,ensemble_intermediate_results/chromagram/05881...,ensemble_intermediate_results/spectral_contras...,ensemble_intermediate_results/tonnetz/3d60483b...,ensemble_intermediate_results/constant_q/0bc74...,ensemble_intermediate_results/cqt/b7062127-882...,ensemble_intermediate_results/stft/bb17e38a-b3...,ensemble_intermediate_results/harmonic_percuss...,ensemble_intermediate_results/onset_strength/2...,6
4,ensemble_intermediate_results/mel_spectrogram/...,ensemble_intermediate_results/mfcc/950e2465-b2...,ensemble_intermediate_results/chromagram/a733d...,ensemble_intermediate_results/spectral_contras...,ensemble_intermediate_results/tonnetz/8bd82a0f...,ensemble_intermediate_results/constant_q/13fab...,ensemble_intermediate_results/cqt/7e056e87-607...,ensemble_intermediate_results/stft/c3e18b59-fb...,ensemble_intermediate_results/harmonic_percuss...,ensemble_intermediate_results/onset_strength/d...,4
...,...,...,...,...,...,...,...,...,...,...,...
895,ensemble_intermediate_results/mel_spectrogram/...,ensemble_intermediate_results/mfcc/5c041df7-89...,ensemble_intermediate_results/chromagram/6df3f...,ensemble_intermediate_results/spectral_contras...,ensemble_intermediate_results/tonnetz/a175304c...,ensemble_intermediate_results/constant_q/5d998...,ensemble_intermediate_results/cqt/d0aa2576-88d...,ensemble_intermediate_results/stft/5cefb2c7-fb...,ensemble_intermediate_results/harmonic_percuss...,ensemble_intermediate_results/onset_strength/e...,3
896,ensemble_intermediate_results/mel_spectrogram/...,ensemble_intermediate_results/mfcc/b8ae91b6-dd...,ensemble_intermediate_results/chromagram/5054d...,ensemble_intermediate_results/spectral_contras...,ensemble_intermediate_results/tonnetz/5c61d6ce...,ensemble_intermediate_results/constant_q/4aa61...,ensemble_intermediate_results/cqt/e0780fab-3a3...,ensemble_intermediate_results/stft/b519d38c-d6...,ensemble_intermediate_results/harmonic_percuss...,ensemble_intermediate_results/onset_strength/f...,7
897,ensemble_intermediate_results/mel_spectrogram/...,ensemble_intermediate_results/mfcc/a3366792-9c...,ensemble_intermediate_results/chromagram/af45a...,ensemble_intermediate_results/spectral_contras...,ensemble_intermediate_results/tonnetz/0cfb6ceb...,ensemble_intermediate_results/constant_q/4853e...,ensemble_intermediate_results/cqt/6fa6c254-335...,ensemble_intermediate_results/stft/b1e2b538-4b...,ensemble_intermediate_results/harmonic_percuss...,ensemble_intermediate_results/onset_strength/5...,7
898,ensemble_intermediate_results/mel_spectrogram/...,ensemble_intermediate_results/mfcc/b57042e1-2f...,ensemble_intermediate_results/chromagram/dfe20...,ensemble_intermediate_results/spectral_contras...,ensemble_intermediate_results/tonnetz/cb7dba36...,ensemble_intermediate_results/constant_q/c35e5...,ensemble_intermediate_results/cqt/9142568a-8a5...,ensemble_intermediate_results/stft/5b6bc032-6a...,ensemble_intermediate_results/harmonic_percuss...,ensemble_intermediate_results/onset_strength/2...,1


In [6]:
def get_input_shape(feature_type, df):
    feature_path_col = feature_type 
    for path in df[feature_path_col]:
        if isinstance(path, str) and os.path.exists(path):
            arr = np.load(path)
            return arr.shape
    raise ValueError(f"No valid file found for {feature_type}")

In [7]:
class SingleFeatureDataGenerator(Sequence):
    def __init__(self, df, feature_col, batch_size=32, shuffle=True, num_classes=None):
        self.df = df.reset_index(drop=True)
        self.feature_col = feature_col
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.num_classes = num_classes
        self.on_epoch_end()

    def __len__(self):
        return int(np.ceil(len(self.df) / self.batch_size))

    def on_epoch_end(self):
        self.indices = np.arange(len(self.df))
        if self.shuffle:
            np.random.shuffle(self.indices)

    def __getitem__(self, index):
        batch_indices = self.indices[index * self.batch_size:(index + 1) * self.batch_size]
        batch_df = self.df.iloc[batch_indices]

        X = []
        y = []

        for _, row in batch_df.iterrows():
            try:
                arr = np.load(row[self.feature_col])
            except Exception as e:
                print(f"Error loading {row[self.feature_col]}: {e}")
                continue
        
            if np.isnan(arr).any() or np.isinf(arr).any():
                raise ValueError(f"Feature file {row[self.feature_col]} contains NaNs or Infs.")
        
            arr = (arr - np.mean(arr)) / (np.std(arr) + 1e-8)
            if arr.ndim == 2:
                arr = np.expand_dims(arr, -1)  # shape: (H, W, 1)
        
            X.append(arr)
            y.append(row['instrumentID'])  # already label-encoded
        
        X = np.array(X)
        y = to_categorical(np.array(y), num_classes=self.num_classes)
        
        return X, y

In [14]:
def create_simple_model(input_shape, num_classes, model_name="simple_cnn"):
    input_layer = Input(shape=(*input_shape, 1), name=f"{model_name}_input")

    x = Conv2D(4, (3, 3), activation='relu', padding='same')(input_layer)
    x = BatchNormalization()(x)

    x = Flatten()(x)

    output = Dense(num_classes, activation='softmax', name=f"{model_name}_output")(x)

    model = Model(inputs=input_layer, outputs=output, name=model_name)
    return model

In [16]:
results = {}

for feature_type in tqdm(FEATURE_TYPES, desc="Training features"):
    print(f"\n{'='*40}\nTraining model for {feature_type}\n{'='*40}")

    feature_col = feature_type
    feature_df = processed_df.dropna(subset=[feature_col])
    
    # Global label encoder
    label_encoder = LabelEncoder()
    label_encoder.fit(feature_df['instrumentID'])

    feature_df = feature_df.copy()
    feature_df['instrumentID'] = label_encoder.transform(feature_df['instrumentID'])
    num_classes = len(label_encoder.classes_)
    input_shape = get_input_shape(feature_type, feature_df)


    kf = KFold(n_splits=KFOLD_SPLITS, shuffle=True, random_state=42)
    accuracy_list, loss_list, history_list = [], [], []
    classification_reports, confusion_matrices = [], []

    for fold, (train_idx, test_idx) in enumerate(kf.split(feature_df)):
        print(f"\n--- Fold {fold+1}/{KFOLD_SPLITS} ---")
        train_df = feature_df.iloc[train_idx].reset_index(drop=True)
        test_df = feature_df.iloc[test_idx].reset_index(drop=True)

        train_df, val_df = train_test_split(
            train_df, test_size=0.2, random_state=42, stratify=train_df['instrumentID'])

        # Generators (labels are already encoded)
        train_gen = SingleFeatureDataGenerator(train_df, feature_col, BATCH_SIZE, shuffle=True, num_classes=num_classes)
        val_gen   = SingleFeatureDataGenerator(val_df,   feature_col, BATCH_SIZE, shuffle=False, num_classes=num_classes)
        test_gen  = SingleFeatureDataGenerator(test_df,  feature_col, BATCH_SIZE, shuffle=False, num_classes=num_classes)

        # Model
        model = create_simple_model(input_shape, num_classes, model_name=feature_type)
        model.compile(optimizer=Adam(learning_rate=0.01), loss='categorical_crossentropy', metrics=['accuracy'])

        early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

        history = model.fit(train_gen, validation_data=val_gen, epochs=EPOCHS, callbacks=[early_stopping])
        history_list.append(history.history)

        # Evaluation
        loss, acc = model.evaluate(test_gen)
        loss_list.append(loss)
        accuracy_list.append(acc)
        print(f"{feature_type} - Fold {fold+1} Test accuracy: {acc:.4f}")

        # Predictions & Reports
        y_pred = model.predict(test_gen)
        y_pred_classes = np.argmax(y_pred, axis=1)
        y_true = []
        for _, labels in test_gen:
            y_true.extend(np.argmax(labels, axis=1))
        y_true = np.array(y_true)

        report = classification_report(y_true, y_pred_classes, output_dict=True)
        classification_reports.append(report)
        conf_matrix = confusion_matrix(y_true, y_pred_classes).tolist()
        confusion_matrices.append(conf_matrix)

        # Save model
        os.makedirs(f"models/{feature_type}", exist_ok=True)
        model.save(f"models/{feature_type}/model_fold{fold+1}.keras")

    # Save results
    results[feature_type] = {
        "accuracy_list": accuracy_list,
        "loss_list": loss_list,
        "histories": history_list,
        "classification_reports": classification_reports,
        "confusion_matrices": confusion_matrices,
    }

    with open(f"models/{feature_type}/results.json", "w") as f:
        json.dump(results[feature_type], f, indent=2)

print("\nAll training complete. Models and results saved in 'models/'")

Training features:   0%|          | 0/10 [00:00<?, ?it/s]


Training model for mel_spectrogram

--- Fold 1/5 ---
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
mel_spectrogram - Fold 1 Test accuracy: 0.9056

--- Fold 2/5 ---
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
mel_spectrogram - Fold 2 Tes

Training features:  10%|█         | 1/10 [00:42<06:23, 42.57s/it]


Training model for mfcc

--- Fold 1/5 ---
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
mfcc - Fold 1 Test accuracy: 0.8889

--- Fold 2/5 ---
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
mfcc - Fold 2 Test accuracy: 0.8667

--- Fold 3/5 ---
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch

Training features:  20%|██        | 2/10 [01:07<04:16, 32.06s/it]


Training model for chromagram

--- Fold 1/5 ---
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
chromagram - Fold 1 Test accuracy: 0.3278

--- Fold 2/5 ---
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
chromagram - Fold 2 Test accuracy: 0.4222

--- Fold 3/5 ---
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
chromagram - Fold 3 Test accuracy: 0.4278

--- Fold 4/5 ---
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200

Training features:  30%|███       | 3/10 [01:21<02:48, 24.12s/it]


Training model for spectral_contrast

--- Fold 1/5 ---
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
spectral_contrast - Fold 1 Test accuracy: 0.6722

--- Fold 2/5 ---
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
spectral_contrast - Fold 2 Test accuracy: 0.5778

--- Fold 3/5 ---
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch

Training features:  40%|████      | 4/10 [01:40<02:10, 21.80s/it]


Training model for tonnetz

--- Fold 1/5 ---
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
tonnetz - Fold 1 Test accuracy: 0.3667

--- Fold 2/5 ---
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
tonnetz - Fold 2 Test accuracy: 0.2833

--- Fold 3/5 ---
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/

Training features:  50%|█████     | 5/10 [01:57<01:41, 20.33s/it]


Training model for constant_q

--- Fold 1/5 ---
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
constant_q - Fold 1 Test accuracy: 0.9111

--- Fold 2/5 ---
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
constant_q - Fold 2 Test accuracy: 0.8389

--- Fold 3/5 ---
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
constant_q - Fold 3 Test accuracy: 0.8667

--- Fold 4/5 ---
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/20

Training features:  60%|██████    | 6/10 [02:15<01:17, 19.44s/it]


Training model for cqt

--- Fold 1/5 ---
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
cqt - Fold 1 Test accuracy: 0.7944

--- Fold 2/5 ---
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
cqt - Fold 2 Test accuracy: 0.9167

--- Fold 3/5 ---
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
cqt - Fold 3 Test accuracy: 0.9333

--- Fold 4/5 ---
Epoch 1

Training features:  70%|███████   | 7/10 [02:32<00:55, 18.59s/it]


Training model for stft

--- Fold 1/5 ---
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
stft - Fold 1 Test accuracy: 0.8000

--- Fold 2/5 ---
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
stft - Fold 2 Test accuracy: 0.7000

--- Fold 3/5 ---
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
stft - Fold 3 Test accuracy: 0.6056

--- Fol

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
stft - Fold 4 Test accuracy: 0.6500

--- Fold 5/5 ---
Epoch 1/200


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
stft - Fold 5 Test accuracy: 0.5000


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Training features:  80%|████████  | 8/10 [02:57<00:41, 20.64s/it]


Training model for harmonic_percussive

--- Fold 1/5 ---
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
harmonic_percussive - Fold 1 Test accuracy: 0.9111

--- Fold 2/5 ---
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
harmonic_percussive - Fold 2 Test accuracy: 0.9333

--- Fold 3/5 ---
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200

Training features:  90%|█████████ | 9/10 [08:24<01:56, 116.37s/it]


Training model for onset_strength

--- Fold 1/5 ---
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
onset_strength - Fold 1 Test accuracy: 0.2500

--- Fold 2/5 ---
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
onset_strength - Fold 2 Test accuracy: 0.2444

--- Fold 3/5 ---
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
onset_strength - Fold 3 Test accuracy: 0.1722

--- Fold 4/5 ---
Epoch 1/200


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
onset_strength - Fold 4 Test accuracy: 0.2167

--- Fold 5/5 ---
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
onset_strength - Fold 5 Test accuracy: 0.1889


Training features: 100%|██████████| 10/10 [08:38<00:00, 51.84s/it]


All training complete. Models and results saved in 'models/'





In [24]:
print("\n" + "="*50)
print("Creating Ensemble Predictions")
print("="*50)

processed_df['instrumentID'] = processed_df['instrumentID'] - 1

ensemble_accuracies = []
ensemble_reports = []
ensemble_conf_matrices = []

for fold in range(KFOLD_SPLITS):
    print(f"\n--- Ensemble Fold {fold + 1}/{KFOLD_SPLITS} ---")
    fold_preds = []
    y_true = None

    for feature_type in FEATURE_TYPES:
        # Load model for this fold
        model_path = f"models/{feature_type}/model_fold{fold+1}.keras"
        if not os.path.exists(model_path):
            print(f"Model not found: {model_path}")
            continue
        model = keras.models.load_model(model_path)

        # Get test data for this fold
        feature_df = processed_df[[feature_type, 'instrumentID']].dropna().reset_index(drop=True)
        kf = KFold(n_splits=KFOLD_SPLITS, shuffle=True, random_state=42)
        train_idx, test_idx = list(kf.split(feature_df))[fold]
        test_df = feature_df.iloc[test_idx].reset_index(drop=True)
        test_gen = SingleFeatureDataGenerator(test_df, feature_type, batch_size=BATCH_SIZE, shuffle=False, num_classes=num_classes)
        preds = model.predict(test_gen, verbose=0)
        fold_preds.append(preds)
        if y_true is None:
            # Get true labels from generator
            y_true = []
            for _, labels in test_gen:
                y_true.extend(np.argmax(labels, axis=1))
            y_true = np.array(y_true)

    if fold_preds:
        ensemble_pred = np.mean(fold_preds, axis=0)
        ensemble_pred_classes = np.argmax(ensemble_pred, axis=1)
        acc = accuracy_score(y_true, ensemble_pred_classes)
        ensemble_accuracies.append(acc)
        print(f"Ensemble Accuracy: {acc:.4f}")
        report = classification_report(y_true, ensemble_pred_classes, output_dict=True)
        ensemble_reports.append(report)
        conf_matrix = confusion_matrix(y_true, ensemble_pred_classes).tolist()
        ensemble_conf_matrices.append(conf_matrix)


Creating Ensemble Predictions

--- Ensemble Fold 1/5 ---
Ensemble Accuracy: 0.9611

--- Ensemble Fold 2/5 ---
Ensemble Accuracy: 0.9778

--- Ensemble Fold 3/5 ---
Ensemble Accuracy: 0.9611

--- Ensemble Fold 4/5 ---
Ensemble Accuracy: 0.9889

--- Ensemble Fold 5/5 ---
Ensemble Accuracy: 0.9222


In [25]:
os.makedirs("ensemble_results", exist_ok=True)
date_part = datetime.now().date().__str__().replace('-', '_')
results_path = os.path.join("ensemble_results", f"ensemble_results_{date_part}.json")
ensemble_results = {
    "accuracy_list": ensemble_accuracies,
    "classification_reports": ensemble_reports,
    "confusion_matrices": ensemble_conf_matrices,
}
with open(results_path, "w") as f:
    json.dump(ensemble_results, f, indent=2)
print(f"\nEnsemble results saved to: {results_path}")


Ensemble results saved to: ensemble_results/ensemble_results_2025_07_07.json


In [26]:
print("\n" + "="*60)
print("TRAINING SUMMARY")
print("="*60)

print("\nIndividual Model Performance:")
for feature_type in FEATURE_TYPES:
    if feature_type in results:
        accuracies = results[feature_type]['accuracy_list']
        mean_acc = np.mean(accuracies)
        std_acc = np.std(accuracies)
        print(f"  {feature_type}: {mean_acc:.4f} ± {std_acc:.4f}")

print("\nEnsemble Performance:")
ensemble_mean = np.mean(ensemble_accuracies)
ensemble_std = np.std(ensemble_accuracies)
print(f"  Ensemble: {ensemble_mean:.4f} ± {ensemble_std:.4f}")

# Find best individual model
best_individual = max(
    [(ft, np.mean(results[ft]['accuracy_list'])) for ft in FEATURE_TYPES if ft in results],
    key=lambda x: x[1]
)
improvement = ensemble_mean - best_individual[1]
print(f"\nBest Individual Model: {best_individual[0]} ({best_individual[1]:.4f})")
print(f"Ensemble Improvement: {improvement:.4f} ({improvement*100:.2f}%)")


TRAINING SUMMARY

Individual Model Performance:
  mel_spectrogram: 0.8878 ± 0.0432
  mfcc: 0.8767 ± 0.0206
  chromagram: 0.3956 ± 0.0373
  spectral_contrast: 0.5733 ± 0.0642
  tonnetz: 0.3300 ± 0.0428
  constant_q: 0.8733 ± 0.0231
  cqt: 0.8789 ± 0.0506
  stft: 0.6511 ± 0.0995
  harmonic_percussive: 0.9167 ± 0.0165
  onset_strength: 0.2144 ± 0.0303

Ensemble Performance:
  Ensemble: 0.9622 ± 0.0226

Best Individual Model: harmonic_percussive (0.9167)
Ensemble Improvement: 0.0456 (4.56%)
