# Instrument recogntion machine learning project

In [None]:
import os
import tensorflow as tf
import numpy as np
import matplotlib as plt
%matplotlib inline 

from sklearn.preprocessing import MultiLabelBinarizer
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import *
from tensorflow.keras.optimizers import Adam

## Parameters

In [None]:
trainPath = "../data/IRMAS-TrainingData"
testPath = "../data/IRMAS-TestingData-Part"


SIZE = 44100
VALIDATION_SIZE = 1000
LSTM_RECURRENCE_SIZE = 40
EPOCHS = 15
BATCH_SIZE = 1

## Utility functions

In [None]:
def paths_and_labels_to_dataset(audio_paths, labels):
    """Constructs a dataset of audios and labels."""
    path_ds = tf.data.Dataset.from_tensor_slices(audio_paths)
    audio_ds = path_ds.map(lambda x: path_to_audio(x))
    label_ds = tf.data.Dataset.from_tensor_slices(labels)
    return tf.data.Dataset.zip((audio_ds, label_ds))


def path_to_audio(path):
    """Reads and decodes an audio file."""
    audio = tf.io.read_file(path)
    audio, _ = tf.audio.decode_wav(audio, 1)
    return tf.squeeze(audio)


def get_audio_paths(directoryPath, files):
    return [
        os.path.join(directoryPath, file)
        for file in files
        if file.endswith(".wav") ]
    
    
def get_labels(directoryPath, files):
    mlb = MultiLabelBinarizer()
    return mlb.fit_transform([
        open(os.path.join(directoryPath, file)).read().split()
        for file in files
        if file.endswith(".txt") ])

        
def label_set():
    labels = []
    for folder in os.listdir(trainPath):
        if os.path.isdir(os.path.join(trainPath, folder)):
            labels.append(folder)
    return labels

## Getting paths

In [None]:
def training_paths_and_labels(subFolder, label):
    mlb = MultiLabelBinarizer()
    subFolderItems = os.listdir(subFolder)
    audioPaths = get_audio_paths(subFolder, subFolderItems)
    labels = mlb.fit_transform([label_set()] + [[label]] * len(subFolderItems))[1:]
    return audioPaths, labels


def testing_paths_and_labels(subFolder):
    subFolderItems = os.listdir(subFolder)
    audioPaths = get_audio_paths(subFolder, subFolderItems)
    labels = get_labels(subFolder, subFolderItems)
    return audioPaths, labels


def path_to_paths_and_labels(path, folderIsLabel=True):
    audioPaths, labels = [], []
    
    for dir_ in os.listdir(path):
        dirPath = os.path.join(path, dir_)
        if not os.path.isdir(dirPath): continue
        
        if folderIsLabel:
            newAudioPaths, newLabels = training_paths_and_labels(dirPath, dir_)    
        else:
            newAudioPaths, newLabels = testing_paths_and_labels(dirPath)
        audioPaths += newAudioPaths
        [labels.append(oneHotLabel) for oneHotLabel in newLabels]
        
    return audioPaths, labels

## Generating datasets

In [None]:
def get_training_dataset():
    audioPaths, labels = path_to_paths_and_labels(trainPath)

    trainDS = paths_and_labels_to_dataset(audioPaths, labels)
    trainDS = trainDS.shuffle(len(trainDS))
    
    return trainDS

def get_testing_dataset():
    audioPaths, labels = [], []
    for i in range(1, 4):
        newAudioPaths, newLabels = path_to_paths_and_labels("{}{}".format(testPath, i), folderIsLabel=False)
        audioPaths += newAudioPaths
        labels += newLabels
    
    testDS = paths_and_labels_to_dataset(audioPaths, labels)
    testDS = testDS.shuffle(len(testDS))
    
    return testDS

def data_to_list(DS, size):
    a, b = [], []
    for (a_i, b_i) in DS.as_numpy_iterator():
        if a_i.size < size: continue
        
        step_size = a_i.size//size
        margin = (a_i.size%size)//2
        a_i = a_i[margin:-1-margin:step_size]
        a_i = a_i[:a_i.size-a_i.size%LSTM_RECURRENCE_SIZE].reshape(1, -1, LSTM_RECURRENCE_SIZE)
        
        a.append(a_i)
        b.append(b_i)
        
    return a, b

def split_training_data(DS, size):
    a, b = data_to_list(DS, size)
    
    a = np.array(a)
    b = np.array(b)

    a_train, a_test = a[:-VALIDATION_SIZE], a[-VALIDATION_SIZE:]
    b_train, b_test = b[:-VALIDATION_SIZE], b[-VALIDATION_SIZE:]
    
    return (a_train, b_train), (a_test, b_test)

def test_data_to_numpy(DS, size):
    a, b = data_to_list(DS, size)
    
    a = np.array(a, dtype=object).astype('float32')
    #a = np.asarray(a).astype('float32')
    b = np.array(b)
    
    return a, b

In [None]:
trainDS = get_training_dataset()
testDS = get_testing_dataset()
(a_train, b_train), (a_test, b_test) = split_training_data(trainDS, SIZE)
a, b = test_data_to_numpy(testDS, SIZE)

## Deep learning model

In [None]:
def residual_block(x, filters, conv_num=3, activation="relu"):
    # Shortcut
    s = Conv1D(filters, 1, padding="same")(x)
    for i in range(conv_num - 1):
        x = Conv1D(filters, 3, padding="same")(x)
        x = Activation(activation)(x)
    x = Conv1D(filters, 3, padding="same")(x)
    x = Add()([x, s])
    x = Activation(activation)(x)
    return MaxPool1D(pool_size=2, strides=2)(x)



# shape=(None, 44)
inputs = Input(shape=(None, None, LSTM_RECURRENCE_SIZE), name="input")

x = ConvLSTM1D(256, 3, padding="same")(inputs,)

x = residual_block(x, 256, conv_num=2)
x = residual_block(x, 512, conv_num=3)
x = residual_block(x, 1024, conv_num=3)
x = residual_block(x, 1024, conv_num=3)

x = AveragePooling1D(pool_size=3, strides=3)(x)

x = LSTM(128)(x)

x = Dense(256, activation="relu")(x)
x = Dense(256, activation="relu")(x)
x = Dense(256, activation="relu")(x)

outputs = Dense(len(label_set()), activation="softmax", name="output")(x)


model = Model(inputs=inputs, outputs=outputs)

model.summary()

model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['accuracy'])

## Fitting the model

In [None]:
model.fit(a_train, b_train, epochs=EPOCHS, batch_size=BATCH_SIZE, validation_data=(a_test, b_test))

In [None]:
model.evaluate(a, b)

In [None]:
model.save("model")

## Testing data visualisations

In [None]:
x = 10 # which excerpt to show 

#plt.figure(figsize=(80, 10))
#plt.scatter(range(0, a[x].size), a[x], s=1, alpha=1, );

In [None]:
#SIZE = 44100*3
#margin = (a_train[x].size%SIZE)//2
#audio = a_train[x][margin:-1-margin:int(a_train[x].size/SIZE)]

#plt.figure(figsize=(80, 10))
#plt.scatter(range(0, SIZE), audio, s=1, alpha=1, );

## Artifacts

In [None]:
"""inputs = Input(shape=(44100, 1), name="input")

x = Dense(64, activation='relu')(inputs)
x = Dense(16, activation='relu')(x)
x = Flatten()(x)
x = Dense(128, activation='relu')(x)
x = Dense(64, activation='relu')(x)
outputs = Dense(len(label_set()), activation='softmax', name='output')(x)

model = Model(inputs=inputs, outputs=outputs)

model.summary()

model = Sequential()
model.add(Dense(2048, input_shape=(44100,), activation="relu"))    # first dense layer, 32 hidden units
model.add(Dense(1024, activation="relu"))
model.add(Dense(512, activation="relu"))
model.add(Dense(256, activation="relu"))
model.add(Dense(256, activation="relu"))
model.add(Flatten())
model.add(Dense(256, activation="relu"))
model.add(Dense(128, activation="relu"))
model.add(Dense(32, activation="relu"))
model.add(Dense(32, activation="relu"))
model.add(Dense(11))                     # second dense layer
model.add(Activation('softmax'))         # output class probabilities

def residual_block(x, filters, conv_num=3, activation="relu"):
    # Shortcut
    s = Conv1D(filters, 1, padding="same")(x)
    for i in range(conv_num - 1):
        x = Conv1D(filters, 3, padding="same")(x)
        x = Activation(activation)(x)
    x = Conv1D(filters, 3, padding="same")(x)
    x = Add()([x, s])
    x = Activation(activation)(x)
    return MaxPool1D(pool_size=2, strides=2)(x)


def build_model(input_shape, num_classes):
    inputs = Input(shape=input_shape, name="input")

    x = residual_block(inputs, 16, 2)
    x = residual_block(x, 32, 2)
    x = residual_block(x, 64, 3)
    x = residual_block(x, 128, 3)
    x = residual_block(x, 128, 3)

    x = AveragePooling1D(pool_size=3, strides=3)(x)
    x = Flatten()(x)
    x = Dense(64, activation="relu")(x)
    x = Dense(128, activation="relu")(x)

    outputs = Dense(num_classes, activation="softmax", name="output")(x)

    return Model(inputs=inputs, outputs=outputs)


model = build_model((44100, 1), len(label_set()))"""