# Instrument classification

Before starting model training, confirm whether you have GPUs available.

In [1]:
import tensorflow as tf
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))

Num GPUs Available:  1


The following libraries can be loaded if installation instructions from the README file were completed.

In [2]:
import json
import librosa
import numpy as np
import pandas as pd
from pathlib import Path
from tqdm import tqdm
from keras.models import Sequential
from keras.layers import Dense, Conv2D, MaxPooling2D, Flatten, BatchNormalization, Dropout, Input
from keras.optimizers import Adam
from keras.callbacks import ReduceLROnPlateau
from sklearn.metrics import classification_report

### Data loading and pre-processing

To start the analysis, we will load the OpenMic-2018 dataset, which should be present under `../data/raw/openmic-2018/`. The authors have provided a pre-processed dataset in the `openmic-2018.npz` file. This object contains contains VGGish features (X), True label probabilities (Y_true), binary masks (Y_masks) and sample keys (sample_key).

In [3]:
# Set the following parameter before running your analysis
DATA_ROOT = Path('../data/raw/openmic-2018/')

# Load openmic from npz object
OPENMIC = np.load(DATA_ROOT.joinpath('openmic-2018.npz'), allow_pickle=True)

# openmic contains VGGish features (X), True label probabilities (Y_true), binary masks (Y_masks) and sample keys
X, Y_true, Y_mask, sample_key = OPENMIC['X'], OPENMIC['Y_true'], OPENMIC['Y_mask'], OPENMIC['sample_key']

The OpenMic authors used VVGish features as inputs for their model. Here we will generate MFCCs following the method described by Blazke & Bostek in their publication (https://www.researchgate.net/publication/360046712_Musical_Instrument_Identification_Using_Deep_Learning_Approach). We overwrite the X object containing the VGGish features with our new MFCCs.

In [4]:
# Parameters for MFCC extraction, following Blazke & Bostek
n_fft = 1024  # Hamming window length (number of samples)
hop_length = 512  # Step size (number of samples)
n_mfcc = 40  # Number of MFCC bins

# Replace X by loading the .ogg files as MFCCs
mfcc_features = []

for i in tqdm(range(len(sample_key))):
    file = sample_key[i]
    dir = file[0:3]
    
    # Load audio file as waveform
    audio, sr = librosa.load(DATA_ROOT.joinpath('audio', dir, f"{file}.ogg"), sr=22050, mono=True)
    
    # Compute MFCCs
    mfcc = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=n_mfcc, n_fft=n_fft, hop_length=hop_length)
    
    # Transpose to match time steps on the first axis
    mfcc_features.append(mfcc.T)

# Convert the list of arrays into a numpy array
X = np.array(mfcc_features)

print(f"Each input audio file now has the following shape: {X[0].shape}")

100%|██████████| 20000/20000 [09:47<00:00, 34.06it/s]


Each input audio file now has the following shape: (431, 40)


The OpenMic authors have a pre-defined balanced test-train split. We will use their method of loading the partition files.

In [5]:
# Load the class map and the train / test split
with open(DATA_ROOT.joinpath('class-map.json'), 'r') as f:
    class_map = json.load(f)

# Let's split the data into the training and test set
split_train = pd.read_csv(DATA_ROOT.joinpath('partitions/split01_train.csv'), 
                          header=None, squeeze=True)
split_test = pd.read_csv(DATA_ROOT.joinpath('partitions/split01_test.csv'), 
                         header=None, squeeze=True)

train_set = set(split_train)
test_set = set(split_test)


# We now identify the IDs that are part of the train and test set
idx_train, idx_test = [], []

for idx, n in enumerate(sample_key):
    if n in train_set:
        idx_train.append(idx)
    elif n in test_set:
        idx_test.append(idx)
    else:
        # This should never happen, but better safe than sorry.
        raise RuntimeError('Unknown sample key={}! Abort!'.format(sample_key[n]))
        
# Finally, cast the idx_* arrays to numpy structures
idx_train = np.asarray(idx_train)
idx_test = np.asarray(idx_test)

# Finally, we use the split indices to partition the features, labels, and masks
X_train = X[idx_train]
X_test = X[idx_test]

Y_true_train = Y_true[idx_train]
Y_true_test = Y_true[idx_test]

Y_mask_train = Y_mask[idx_train]
Y_mask_test = Y_mask[idx_test]

# Prepare binary labels for classification
Y_train_binary = (Y_true_train > 0.5).astype(int)
Y_test_binary = (Y_true_test > 0.5).astype(int)



  split_train = pd.read_csv(DATA_ROOT.joinpath('partitions/split01_train.csv'),


  split_test = pd.read_csv(DATA_ROOT.joinpath('partitions/split01_test.csv'),


### Model training

We will train one model, and generate an evaluation report, for each of the 20 instruments present in the OpenMi-2018 dataset. The model architecture is based on the one used by Blazke & Bostek, with some modifications.

In [7]:
models = {}
reports = {}

for instrument in class_map:

    print(instrument)

    # Map the instrument name to its column number
    inst_num = class_map[instrument]
        
    # Extract the instrument data
    train_inst = Y_mask_train[:, inst_num]
    test_inst = Y_mask_test[:, inst_num]
    
    X_train_inst = X_train[train_inst]
    Y_true_train_inst = Y_true_train[train_inst, inst_num] >= 0.5

    X_test_inst = X_test[test_inst]
    Y_true_test_inst = Y_true_test[test_inst, inst_num] >= 0.5

    # Set the input shape based on MFCC output dimensions
    input_shape = (431, 40, 1)

    # Define the model
    model = Sequential()
    model.add(Input(shape=input_shape))

    # First Conv2D layer with 128 filters
    model.add(Conv2D(128, (3, 3), activation='relu', padding='same'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D((2, 2)))
    model.add(Dropout(0.3)) 

    # Second Conv2D layer with 64 filters
    model.add(Conv2D(64, (3, 3), activation='relu', padding='same'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D((2, 2)))
    model.add(Dropout(0.3)) 

    # Third Conv2D layer with 32 filters
    model.add(Conv2D(32, (3, 3), activation='relu', padding='same'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D((2, 2)))
    model.add(Dropout(0.3)) 

    # Flatten the output from convolutional layers
    model.add(Flatten())

    # Dense layers
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.3)) 
    model.add(Dense(32, activation='relu'))
    model.add(Dropout(0.3))  
    model.add(Dense(1, activation='sigmoid'))

    # Compile the model with custom metrics
    model.compile(optimizer=Adam(learning_rate=0.001), 
                  loss='binary_crossentropy',
                  metrics=['accuracy'])

    # Reduce learning rate on plateau
    reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=3, min_lr=1e-6, verbose=1)

    # Fit the model
    model.fit(X_train_inst, Y_true_train_inst, epochs=25, batch_size=32, validation_split=0.2, callbacks=[reduce_lr])


    # Generate and print a training report per instrument during training
    print(f'Instrument: {instrument}')
    Y_pred_test = model.predict(X_test_inst) > 0.5  # Convert predictions to binary
    report = classification_report(Y_true_test_inst, Y_pred_test)
    print(report)

    # Store the classifier and report in their dictionaries
    models[instrument] = model
    reports[instrument] = report



accordion
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 5: ReduceLROnPlateau reducing learning rate to 0.00010000000474974513.
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 8: ReduceLROnPlateau reducing learning rate to 1.0000000474974514e-05.
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 11: ReduceLROnPlateau reducing learning rate to 1.0000000656873453e-06.
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 14: ReduceLROnPlateau reducing learning rate to 1e-06.
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25
Instrument: accordion
              precision    recall  f1-score   support

       False       0.87      0.64      0.74       423
        True       0.33      0.64      0.43       115

    accuracy                           0.64       538
   macro avg       0.60      0.64      0.58       538
weighted avg       0.75      0.64      0.67       538

banjo




Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 4: ReduceLROnPlateau reducing learning rate to 0.00010000000474974513.
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 7: ReduceLROnPlateau reducing learning rate to 1.0000000474974514e-05.
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 10: ReduceLROnPlateau reducing learning rate to 1.0000000656873453e-06.
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 13: ReduceLROnPlateau reducing learning rate to 1e-06.
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25
Instrument: banjo
              precision    recall  f1-score   support

       False       0.76      0.77      0.76       338
        True       0.43      0.42      0.42       140

    accuracy                           0.67       478
   macro avg       0.59      0.59      0.59       478
weighted avg       0.66      0.67      0.66       478

bass




Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 6: ReduceLROnPlateau reducing learning rate to 0.00010000000474974513.
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 16: ReduceLROnPlateau reducing learning rate to 1.0000000474974514e-05.
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 19: ReduceLROnPlateau reducing learning rate to 1.0000000656873453e-06.
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 22: ReduceLROnPlateau reducing learning rate to 1e-06.
Epoch 23/25
Epoch 24/25
Epoch 25/25
Instrument: bass
              precision    recall  f1-score   support

       False       0.79      0.84      0.81       329
        True       0.54      0.46      0.49       134

    accuracy                           0.73       463
   macro avg       0.66      0.65      0.65       463
weighted avg       0.72      0.73      0.72       463

cello




Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 4: ReduceLROnPlateau reducing learning rate to 0.00010000000474974513.
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 7: ReduceLROnPlateau reducing learning rate to 1.0000000474974514e-05.
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 10: ReduceLROnPlateau reducing learning rate to 1.0000000656873453e-06.
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 13: ReduceLROnPlateau reducing learning rate to 1e-06.
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25
Instrument: cello
              precision    recall  f1-score   support

       False       0.53      0.99      0.69       259
        True       0.25      0.00      0.01       226

    accuracy                           0.53       485
   macro avg       0.39      0.50      0.35       485
weighted avg       0.40      0.53      0.37       485

clarinet




Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 11: ReduceLROnPlateau reducing learning rate to 0.00010000000474974513.
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 14: ReduceLROnPlateau reducing learning rate to 1.0000000474974514e-05.
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 17: ReduceLROnPlateau reducing learning rate to 1.0000000656873453e-06.
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 20: ReduceLROnPlateau reducing learning rate to 1e-06.
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25
Instrument: clarinet
              precision    recall  f1-score   support

       False       0.79      0.99      0.88       503
        True       0.40      0.01      0.03       137

    accuracy                           0.78       640
   macro avg       0.59      0.50      0.45       640
weighted avg       0.70      0.78      0.70       640

cymbals




Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 9: ReduceLROnPlateau reducing learning rate to 0.00010000000474974513.
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 12: ReduceLROnPlateau reducing learning rate to 1.0000000474974514e-05.
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 15: ReduceLROnPlateau reducing learning rate to 1.0000000656873453e-06.
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 18: ReduceLROnPlateau reducing learning rate to 1e-06.
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25
Instrument: cymbals
              precision    recall  f1-score   support

       False       0.92      0.73      0.82       139
        True       0.89      0.97      0.93       297

    accuracy                           0.89       436
   macro avg       0.90      0.85      0.87       436
weighted avg       0.90      0.89      0.89       436

drums




Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 5: ReduceLROnPlateau reducing learning rate to 0.00010000000474974513.
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 8: ReduceLROnPlateau reducing learning rate to 1.0000000474974514e-05.
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 11: ReduceLROnPlateau reducing learning rate to 1.0000000656873453e-06.
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 14: ReduceLROnPlateau reducing learning rate to 1e-06.
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25
Instrument: drums
              precision    recall  f1-score   support

       False       0.96      0.67      0.79       146
        True       0.85      0.99      0.91       278

    accuracy                           0.88       424
   macro avg       0.91      0.83      0.85       424
weighted avg       0.89      0.88      0.87       424

flute




Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 7: ReduceLROnPlateau reducing learning rate to 0.00010000000474974513.
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 10: ReduceLROnPlateau reducing learning rate to 1.0000000474974514e-05.
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 13: ReduceLROnPlateau reducing learning rate to 1.0000000656873453e-06.
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 16: ReduceLROnPlateau reducing learning rate to 1e-06.
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25
Instrument: flute
              precision    recall  f1-score   support

       False       0.69      1.00      0.82       387
        True       1.00      0.01      0.01       175

    accuracy                           0.69       562
   macro avg       0.84      0.50      0.41       562
weighted avg       0.79      0.69      0.57       562

guitar




Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 8: ReduceLROnPlateau reducing learning rate to 0.00010000000474974513.
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 16: ReduceLROnPlateau reducing learning rate to 1.0000000474974514e-05.
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 19: ReduceLROnPlateau reducing learning rate to 1.0000000656873453e-06.
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 22: ReduceLROnPlateau reducing learning rate to 1e-06.
Epoch 23/25
Epoch 24/25
Epoch 25/25
Instrument: guitar
              precision    recall  f1-score   support

       False       0.63      0.88      0.74       150
        True       0.92      0.73      0.81       286

    accuracy                           0.78       436
   macro avg       0.78      0.81      0.78       436
weighted avg       0.82      0.78      0.79       436

mallet_percussion




Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 5: ReduceLROnPlateau reducing learning rate to 0.00010000000474974513.
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 10: ReduceLROnPlateau reducing learning rate to 1.0000000474974514e-05.
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 13: ReduceLROnPlateau reducing learning rate to 1.0000000656873453e-06.
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 16: ReduceLROnPlateau reducing learning rate to 1e-06.
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25
Instrument: mallet_percussion
              precision    recall  f1-score   support

       False       0.60      0.84      0.70       267
        True       0.60      0.31      0.41       211

    accuracy                           0.60       478
   macro avg       0.60      0.57      0.55       478
weighted avg       0.60      0.60      0.57       478

mandolin




Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 7: ReduceLROnPlateau reducing learning rate to 0.00010000000474974513.
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 10: ReduceLROnPlateau reducing learning rate to 1.0000000474974514e-05.
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 13: ReduceLROnPlateau reducing learning rate to 1.0000000656873453e-06.
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 16: ReduceLROnPlateau reducing learning rate to 1e-06.
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25
Instrument: mandolin
              precision    recall  f1-score   support

       False       0.82      0.57      0.67       434
        True       0.43      0.71      0.53       193

    accuracy                           0.62       627
   macro avg       0.62      0.64      0.60       627
weighted avg       0.70      0.62      0.63       627

organ




Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 5: ReduceLROnPlateau reducing learning rate to 0.00010000000474974513.
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 8: ReduceLROnPlateau reducing learning rate to 1.0000000474974514e-05.
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 11: ReduceLROnPlateau reducing learning rate to 1.0000000656873453e-06.
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 14: ReduceLROnPlateau reducing learning rate to 1e-06.
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25
Instrument: organ
              precision    recall  f1-score   support

       False       0.77      0.92      0.84       310
        True       0.58      0.28      0.38       121

    accuracy                           0.74       431
   macro avg       0.67      0.60      0.61       431
weighted avg       0.71      0.74      0.71       431

piano




Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 10: ReduceLROnPlateau reducing learning rate to 0.00010000000474974513.
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 13: ReduceLROnPlateau reducing learning rate to 1.0000000474974514e-05.
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 16: ReduceLROnPlateau reducing learning rate to 1.0000000656873453e-06.
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 19: ReduceLROnPlateau reducing learning rate to 1e-06.
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25
Instrument: piano
              precision    recall  f1-score   support

       False       0.60      0.91      0.72       130
        True       0.94      0.72      0.82       285

    accuracy                           0.78       415
   macro avg       0.77      0.82      0.77       415
weighted avg       0.84      0.78      0.79       415

saxophone




Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 5: ReduceLROnPlateau reducing learning rate to 0.00010000000474974513.
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 12: ReduceLROnPlateau reducing learning rate to 1.0000000474974514e-05.
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 15: ReduceLROnPlateau reducing learning rate to 1.0000000656873453e-06.
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 18: ReduceLROnPlateau reducing learning rate to 1e-06.
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25
Instrument: saxophone
              precision    recall  f1-score   support

       False       0.82      0.26      0.39       324
        True       0.54      0.94      0.69       305

    accuracy                           0.59       629
   macro avg       0.68      0.60      0.54       629
weighted avg       0.68      0.59      0.54       629

synthesizer




Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 9: ReduceLROnPlateau reducing learning rate to 0.00010000000474974513.
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 12: ReduceLROnPlateau reducing learning rate to 1.0000000474974514e-05.
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 15: ReduceLROnPlateau reducing learning rate to 1.0000000656873453e-06.
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 18: ReduceLROnPlateau reducing learning rate to 1e-06.
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25
Instrument: synthesizer
              precision    recall  f1-score   support

       False       0.66      0.83      0.74       112
        True       0.92      0.82      0.87       268

    accuracy                           0.83       380
   macro avg       0.79      0.83      0.80       380
weighted avg       0.85      0.83      0.83       380

trombone




Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 8: ReduceLROnPlateau reducing learning rate to 0.00010000000474974513.
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 11: ReduceLROnPlateau reducing learning rate to 1.0000000474974514e-05.
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 14: ReduceLROnPlateau reducing learning rate to 1.0000000656873453e-06.
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 17: ReduceLROnPlateau reducing learning rate to 1e-06.
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25
Instrument: trombone
              precision    recall  f1-score   support

       False       0.68      0.96      0.80       492
        True       0.25      0.03      0.05       228

    accuracy                           0.66       720
   macro avg       0.47      0.49      0.43       720
weighted avg       0.54      0.66      0.56       720

trumpet




Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 4: ReduceLROnPlateau reducing learning rate to 0.00010000000474974513.
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 7: ReduceLROnPlateau reducing learning rate to 1.0000000474974514e-05.
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 10: ReduceLROnPlateau reducing learning rate to 1.0000000656873453e-06.
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 13: ReduceLROnPlateau reducing learning rate to 1e-06.
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25
Instrument: trumpet
              precision    recall  f1-score   support

       False       0.59      0.99      0.74       467
        True       0.25      0.00      0.01       318

    accuracy                           0.59       785
   macro avg       0.42      0.50      0.37       785
weighted avg       0.45      0.59      0.44       785

ukulele




Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 5: ReduceLROnPlateau reducing learning rate to 0.00010000000474974513.
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 15: ReduceLROnPlateau reducing learning rate to 1.0000000474974514e-05.
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 23: ReduceLROnPlateau reducing learning rate to 1.0000000656873453e-06.
Epoch 24/25
Epoch 25/25
Instrument: ukulele
              precision    recall  f1-score   support

       False       0.76      0.77      0.76       408
        True       0.47      0.46      0.47       182

    accuracy                           0.67       590
   macro avg       0.62      0.61      0.61       590
weighted avg       0.67      0.67      0.67       590

violin




Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 5: ReduceLROnPlateau reducing learning rate to 0.00010000000474974513.
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 8: ReduceLROnPlateau reducing learning rate to 1.0000000474974514e-05.
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 11: ReduceLROnPlateau reducing learning rate to 1.0000000656873453e-06.
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 14: ReduceLROnPlateau reducing learning rate to 1e-06.
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25
Instrument: violin
              precision    recall  f1-score   support

       False       0.76      0.61      0.68       237
        True       0.79      0.88      0.83       394

    accuracy                           0.78       631
   macro avg       0.77      0.75      0.75       631
weighted avg       0.78      0.78      0.77       631

voice




Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 7: ReduceLROnPlateau reducing learning rate to 0.00010000000474974513.
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 10: ReduceLROnPlateau reducing learning rate to 1.0000000474974514e-05.
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 13: ReduceLROnPlateau reducing learning rate to 1.0000000656873453e-06.
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 16: ReduceLROnPlateau reducing learning rate to 1e-06.
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25
Instrument: voice
              precision    recall  f1-score   support

       False       0.94      0.50      0.65       150
        True       0.74      0.98      0.85       224

    accuracy                           0.79       374
   macro avg       0.84      0.74      0.75       374
weighted avg       0.82      0.79      0.77       374



### Saving models and reports to disk
Saving the result, for potential future re-use.

In [9]:
# Directory to save models and reports
save_dir = Path('../models')

# Iterate over each instrument and save the model and report
for instrument in models:
    # Construct the file paths
    model_path = save_dir.joinpath(f'{instrument}_model.h5')
    report_path = save_dir.joinpath(f'{instrument}_report.csv')

    # Save the Keras model
    models[instrument].save(model_path)
    print(f'Model saved to {model_path}')

    # Save the classification report to a text file
    with open(report_path, 'w') as file:
        file.write(f'Classification Report for {instrument}:\n')
        file.write(reports[instrument])
    print(f'Report saved to {report_path}')

  saving_api.save_model(


Model saved to ../models/accordion_model.h5
Report saved to ../models/accordion_report.csv
Model saved to ../models/banjo_model.h5
Report saved to ../models/banjo_report.csv
Model saved to ../models/bass_model.h5
Report saved to ../models/bass_report.csv
Model saved to ../models/cello_model.h5
Report saved to ../models/cello_report.csv
Model saved to ../models/clarinet_model.h5
Report saved to ../models/clarinet_report.csv
Model saved to ../models/cymbals_model.h5
Report saved to ../models/cymbals_report.csv
Model saved to ../models/drums_model.h5
Report saved to ../models/drums_report.csv
Model saved to ../models/flute_model.h5
Report saved to ../models/flute_report.csv
Model saved to ../models/guitar_model.h5
Report saved to ../models/guitar_report.csv
Model saved to ../models/mallet_percussion_model.h5
Report saved to ../models/mallet_percussion_report.csv
Model saved to ../models/mandolin_model.h5
Report saved to ../models/mandolin_report.csv
Model saved to ../models/organ_model.h5