In [1]:
import json
import numpy as np
from sklearn.model_selection import train_test_split
import tensorflow.keras as keras

DATA_PATH = "data.json"

def load_data(data_path):
    with open(data_path, "r") as fp:
        data = json.load(fp)

    X = np.array(data["mfcc"])
    y = np.array(data["labels"])
    z = np.array(data["mapping"])
    
    return X, y, z

def prepare_datasets(test_size, validation_size):
    X, y,z = load_data(DATA_PATH)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size)
    X_train, X_validation, y_train, y_validation = train_test_split(X_train, y_train, test_size=validation_size)

    return X_train, X_validation, X_test, y_train, y_validation, y_test

#architecture for instrument recogition using CNN -> 2D convolutional layers as input is (T, MFCC)
def build_model(input_shape):
    model = keras.Sequential()
    model.add(keras.layers.Conv2D(32, (3,3), activation="relu", input_shape=input_shape))
    model.add(keras.layers.MaxPooling2D((3,3), strides=(2,2), padding="same"))
    model.add(keras.layers.BatchNormalization())
    model.add(keras.layers.Conv2D(32, (3,3), activation="relu"))
    model.add(keras.layers.MaxPooling2D((3,3), strides=(2,2), padding="same"))
    model.add(keras.layers.BatchNormalization())
    model.add(keras.layers.Conv2D(32, (2,2), activation="relu"))
    model.add(keras.layers.MaxPooling2D((2,2), strides=(2,2), padding="same"))
    model.add(keras.layers.BatchNormalization())
    model.add(keras.layers.Flatten())
    model.add(keras.layers.Dense(64, activation="relu"))
    model.add(keras.layers.Dropout(0.3))
    model.add(keras.layers.Dense(6, activation="softmax")) #6 classes
    return model

def predict(model, X, y, z):
    X = X[np.newaxis, ...]
    prediction = model.predict(X)
    predicted_index = np.argmax(prediction, axis=1)
    # print("Expected instrument: {}, predicted instrument: {}".format(z[y], z[predicted_index]))
    
if __name__ == "__main__":
    X,y,z = load_data(DATA_PATH)
    X_train, X_validation, X_test, y_train, y_validation, y_test = prepare_datasets(0.25, 0.2)

    input_shape = (X_train.shape[1], X_train.shape[2], 1) # Time_Bins, 13, 1; 1 for mono channel
    model = build_model(input_shape)
    optimizer = keras.optimizers.Adam(learning_rate=0.0001)
    model.compile(optimizer=optimizer, loss="sparse_categorical_crossentropy", metrics=["accuracy"])
    # model.summary()
    model.fit(X_train, y_train, validation_data = (X_validation, y_validation), batch_size=32, epochs=30)
    test_error, test_accuracy = model.evaluate(X_test, y_test, verbose=1)
    print(f"Test error: {test_error}, test accuracy: {test_accuracy}")




Epoch 1/30


Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Test error: 0.43882042169570923, test accuracy: 0.8439834117889404
