# Instrument recogntion machine learning project

In [1]:
import os
import tensorflow as tf
import numpy as np

from sklearn.preprocessing import MultiLabelBinarizer
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import *
from tensorflow.keras.optimizers import Adam

## Utility functions

In [2]:
def paths_and_labels_to_dataset(audio_paths, labels):
    """Constructs a dataset of audios and labels."""
    path_ds = tf.data.Dataset.from_tensor_slices(audio_paths)
    audio_ds = path_ds.map(lambda x: path_to_audio(x))
    label_ds = tf.data.Dataset.from_tensor_slices(labels)
    return tf.data.Dataset.zip((audio_ds, label_ds))


def path_to_audio(path):
    """Reads and decodes an audio file."""
    audio = tf.io.read_file(path)
    audio, _ = tf.audio.decode_wav(audio, 1, 44100)
    return tf.squeeze(audio)


def get_audio_paths(directoryPath, files):
    return [
        os.path.join(directoryPath, file)
        for file in files
        if file.endswith(".wav") ]
    
    
def get_labels(directoryPath, files):
    mlb = MultiLabelBinarizer()
    return mlb.fit_transform([
        open(os.path.join(directoryPath, file)).read().split()
        for file in files
        if file.endswith(".txt") ])

        
def label_set():
    trainPath = "../data/IRMAS-TrainingData"
    labels = []
    for folder in os.listdir(trainPath):
        if os.path.isdir(os.path.join(trainPath, folder)):
            labels.append(folder)
    return labels

## Getting paths

In [3]:
def training_paths_and_labels(subFolder, label):
    mlb = MultiLabelBinarizer()
    subFolderItems = os.listdir(subFolder)
    audioPaths = get_audio_paths(subFolder, subFolderItems)
    labels = mlb.fit_transform([label_set()] + [[label]] * len(subFolderItems))[1:]
    return audioPaths, labels


def testing_paths_and_labels(subFolder):
    subFolderItems = os.listdir(subFolder)
    audioPaths = get_audio_paths(subFolder, subFolderItems)
    labels = get_labels(subFolder, subFolderItems)
    return audioPaths, labels


def path_to_paths_and_labels(path, folderIsLabel=True):
    audioPaths, labels = [], []
    
    for dir_ in os.listdir(path):
        dirPath = os.path.join(path, dir_)
        if not os.path.isdir(dirPath): continue
        
        if folderIsLabel:
            newAudioPaths, newLabels = training_paths_and_labels(dirPath, dir_)    
        else:
            newAudioPaths, newLabels = testing_paths_and_labels(dirPath)
        audioPaths += newAudioPaths
        [labels.append(oneHotLabel) for oneHotLabel in newLabels]
        
    return audioPaths, labels

## Generating datasets

In [4]:
def get_training_dataset():
    trainPath = "../data/IRMAS-TrainingData"
    audioPaths, labels = path_to_paths_and_labels(trainPath)

    trainDS = paths_and_labels_to_dataset(audioPaths, labels)
    trainDS = trainDS.shuffle(len(trainDS))
    
    return trainDS

def split_training_data(trainDS):
    a = trainDS.map(lambda a, b: a)
    b = trainDS.map(lambda a, b: b)

    a = np.array(list(a.as_numpy_iterator()))
    b = np.array(list(b.as_numpy_iterator()))

    a_train, a_test = a[:-1000], a[-1000:]
    b_train, b_test = b[:-1000], b[-1000:]
    
    return (a_train, b_train), (a_test, b_test)

def get_testing_dataset():
    testPath = "../data/IRMAS-TestingData-Part"
    audioPaths, labels = [], []
    for i in range(1, 4):
        newAudioPaths, newLabels = path_to_paths_and_labels("{}{}".format(testPath, i), folderIsLabel=False)
        audioPaths += newAudioPaths
        labels += newLabels
    
    testDS = paths_and_labels_to_dataset(audioPaths, labels)
    testDS = testDS.shuffle(len(testDS))
    
    return testDS

In [5]:
trainDS = get_training_dataset()
testDS = get_testing_dataset()
(a_train, b_train), (a_test, b_test) = split_training_data(trainDS)
#print(list(trainDS.as_numpy_iterator()))

## Deep learning model

In [6]:
"""inputs = Input(shape=(44100, 1), name="input")

x = Dense(64, activation='relu')(inputs)
x = Dense(16, activation='relu')(x)
x = Flatten()(x)
x = Dense(128, activation='relu')(x)
x = Dense(64, activation='relu')(x)
outputs = Dense(len(label_set()), activation='softmax', name='output')(x)

model = Model(inputs=inputs, outputs=outputs)

model.summary()

model = Sequential()
model.add(layers.Dense(128, input_shape=(44100,)))  # first dense layer, 32 hidden units
model.add(layers.Activation('relu'))                # activation layer
model.add(layers.Flatten())
model.add(layers.Dense(32))
model.add(layers.Dense(11))                     # second dense layer
model.add(layers.Activation('softmax'))         # output class probabilities"""

def residual_block(x, filters, conv_num=3, activation="relu"):
    # Shortcut
    s = Conv1D(filters, 1, padding="same")(x)
    for i in range(conv_num - 1):
        x = Conv1D(filters, 3, padding="same")(x)
        x = Activation(activation)(x)
    x = Conv1D(filters, 3, padding="same")(x)
    x = Add()([x, s])
    x = Activation(activation)(x)
    return MaxPool1D(pool_size=2, strides=2)(x)


def build_model(input_shape, num_classes):
    inputs = Input(shape=input_shape, name="input")

    x = residual_block(inputs, 16, 2)
    x = residual_block(x, 32, 2)
    x = residual_block(x, 64, 3)
    x = residual_block(x, 128, 3)
    x = residual_block(x, 128, 3)

    x = AveragePooling1D(pool_size=3, strides=3)(x)
    x = Flatten()(x)
    x = Dense(256, activation="relu")(x)
    x = Dense(128, activation="relu")(x)

    outputs = Dense(num_classes, activation="softmax", name="output")(x)

    return Model(inputs=inputs, outputs=outputs)


model = build_model((44100, 1), len(label_set()))

model.summary()

model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['accuracy'])

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input (InputLayer)             [(None, 44100, 1)]   0           []                               
                                                                                                  
 conv1d_1 (Conv1D)              (None, 44100, 16)    64          ['input[0][0]']                  
                                                                                                  
 activation (Activation)        (None, 44100, 16)    0           ['conv1d_1[0][0]']               
                                                                                                  
 conv1d_2 (Conv1D)              (None, 44100, 16)    784         ['activation[0][0]']             
                                                                                              

## Fitting the model

In [None]:
model.fit(a_train, b_train, epochs=15, batch_size=32)

Epoch 1/15

In [None]:
model.evaluate(a_test, b_test, batch_size=1)

In [None]:
model.save("model")