## Creating Data Staging

In [2]:
import numpy as np
import librosa as lbr
import tensorflow.keras.backend as K

In [3]:
GENRES = ['blues', 'classical', 'country', 'disco', 'hiphop', 'jazz', 'metal',
        'pop', 'reggae', 'rock']
WINDOW_SIZE = 2048
WINDOW_STRIDE = WINDOW_SIZE // 2
N_MELS = 128
MEL_KWARGS = {
    'n_fft': WINDOW_SIZE,
    'hop_length': WINDOW_STRIDE,
    'n_mels': N_MELS
}



In [4]:
def get_layer_output_function(model, layer_name):
    input = model.get_layer('input').input
    output = model.get_layer(layer_name).output
    f = K.function([input, K.learning_phase()], [output])
    return lambda x: f([x, 0])[0] # learning_phase = 0 means test

def load_track(filename, enforce_shape=None):
    new_input, sample_rate = lbr.load(filename, mono=True)
    features = lbr.feature.melspectrogram(new_input, **MEL_KWARGS).T

    if enforce_shape is not None:
        if features.shape[0] < enforce_shape[0]:
            delta_shape = (enforce_shape[0] - features.shape[0],
                    enforce_shape[1])
            features = np.append(features, np.zeros(delta_shape), axis=0)
        elif features.shape[0] > enforce_shape[0]:
            features = features[: enforce_shape[0], :]

    features[features == 0] = 1e-6
    return (np.log(features), float(new_input.shape[0]) / sample_rate)


## Creating Data Pickle

In [5]:
import sys
import numpy as np
from math import pi
from pickle import dump
import os
from optparse import OptionParser


In [6]:
TRACK_COUNT = 1000

In [13]:
def get_default_shape(dataset_path):
    tmp_features, _ = load_track(os.path.join(dataset_path,
        'blues/blues.00000.au'))
    return tmp_features.shape

def collect_data(dataset_path):
    '''
    Collects data from the GTZAN dataset into a pickle. Computes a Mel-scaled
    power spectrogram for each track.

    :param dataset_path: path to the GTZAN dataset directory
    :returns: triple (x, y, track_paths) where x is a matrix containing
        extracted features, y is a one-hot matrix of genre labels and
        track_paths is a dict of absolute track paths indexed by row indices in
        the x and y matrices
    '''
    default_shape = get_default_shape(dataset_path)
    x = np.zeros((TRACK_COUNT,) + default_shape, dtype=np.float32)
    y = np.zeros((TRACK_COUNT, len(GENRES)), dtype=np.float32)
    track_paths = {}

    for (genre_index, genre_name) in enumerate(GENRES):
        for i in range(TRACK_COUNT // len(GENRES)):
            file_name = '{}/{}.000{}.au'.format(genre_name,
                    genre_name, str(i).zfill(2))
            print('Processing', file_name)
            path = os.path.join(dataset_path, file_name)
            track_index = genre_index  * (TRACK_COUNT // len(GENRES)) + i
            x[track_index], _ = load_track(path, default_shape)
            y[track_index, genre_index] = 1
            track_paths[track_index] = os.path.abspath(path)

    return (x, y, track_paths)

In [14]:
dataset_path=os.path.join('/Users/jairomelo/Desktop/ML/YORK/ML1020/Final Project/genres')
output_pkl_path=os.path.join('/Users/jairomelo/Desktop/ML/YORK/ML1020/Final Project/data.pkl')


In [15]:
(x, y, track_paths) = collect_data(dataset_path)

Processing blues/blues.00000.au
Processing blues/blues.00001.au
Processing blues/blues.00002.au
Processing blues/blues.00003.au
Processing blues/blues.00004.au
Processing blues/blues.00005.au
Processing blues/blues.00006.au
Processing blues/blues.00007.au
Processing blues/blues.00008.au
Processing blues/blues.00009.au
Processing blues/blues.00010.au
Processing blues/blues.00011.au
Processing blues/blues.00012.au
Processing blues/blues.00013.au
Processing blues/blues.00014.au
Processing blues/blues.00015.au
Processing blues/blues.00016.au
Processing blues/blues.00017.au
Processing blues/blues.00018.au
Processing blues/blues.00019.au
Processing blues/blues.00020.au
Processing blues/blues.00021.au
Processing blues/blues.00022.au
Processing blues/blues.00023.au
Processing blues/blues.00024.au
Processing blues/blues.00025.au
Processing blues/blues.00026.au
Processing blues/blues.00027.au
Processing blues/blues.00028.au
Processing blues/blues.00029.au
Processing blues/blues.00030.au
Processi

Processing country/country.00028.au
Processing country/country.00029.au
Processing country/country.00030.au
Processing country/country.00031.au
Processing country/country.00032.au
Processing country/country.00033.au
Processing country/country.00034.au
Processing country/country.00035.au
Processing country/country.00036.au
Processing country/country.00037.au
Processing country/country.00038.au
Processing country/country.00039.au
Processing country/country.00040.au
Processing country/country.00041.au
Processing country/country.00042.au
Processing country/country.00043.au
Processing country/country.00044.au
Processing country/country.00045.au
Processing country/country.00046.au
Processing country/country.00047.au
Processing country/country.00048.au
Processing country/country.00049.au
Processing country/country.00050.au
Processing country/country.00051.au
Processing country/country.00052.au
Processing country/country.00053.au
Processing country/country.00054.au
Processing country/country.0

Processing hiphop/hiphop.00071.au
Processing hiphop/hiphop.00072.au
Processing hiphop/hiphop.00073.au
Processing hiphop/hiphop.00074.au
Processing hiphop/hiphop.00075.au
Processing hiphop/hiphop.00076.au
Processing hiphop/hiphop.00077.au
Processing hiphop/hiphop.00078.au
Processing hiphop/hiphop.00079.au
Processing hiphop/hiphop.00080.au
Processing hiphop/hiphop.00081.au
Processing hiphop/hiphop.00082.au
Processing hiphop/hiphop.00083.au
Processing hiphop/hiphop.00084.au
Processing hiphop/hiphop.00085.au
Processing hiphop/hiphop.00086.au
Processing hiphop/hiphop.00087.au
Processing hiphop/hiphop.00088.au
Processing hiphop/hiphop.00089.au
Processing hiphop/hiphop.00090.au
Processing hiphop/hiphop.00091.au
Processing hiphop/hiphop.00092.au
Processing hiphop/hiphop.00093.au
Processing hiphop/hiphop.00094.au
Processing hiphop/hiphop.00095.au
Processing hiphop/hiphop.00096.au
Processing hiphop/hiphop.00097.au
Processing hiphop/hiphop.00098.au
Processing hiphop/hiphop.00099.au
Processing jaz

Processing pop/pop.00036.au
Processing pop/pop.00037.au
Processing pop/pop.00038.au
Processing pop/pop.00039.au
Processing pop/pop.00040.au
Processing pop/pop.00041.au
Processing pop/pop.00042.au
Processing pop/pop.00043.au
Processing pop/pop.00044.au
Processing pop/pop.00045.au
Processing pop/pop.00046.au
Processing pop/pop.00047.au
Processing pop/pop.00048.au
Processing pop/pop.00049.au
Processing pop/pop.00050.au
Processing pop/pop.00051.au
Processing pop/pop.00052.au
Processing pop/pop.00053.au
Processing pop/pop.00054.au
Processing pop/pop.00055.au
Processing pop/pop.00056.au
Processing pop/pop.00057.au
Processing pop/pop.00058.au
Processing pop/pop.00059.au
Processing pop/pop.00060.au
Processing pop/pop.00061.au
Processing pop/pop.00062.au
Processing pop/pop.00063.au
Processing pop/pop.00064.au
Processing pop/pop.00065.au
Processing pop/pop.00066.au
Processing pop/pop.00067.au
Processing pop/pop.00068.au
Processing pop/pop.00069.au
Processing pop/pop.00070.au
Processing pop/pop.0

In [18]:
data = {'x': x, 'y': y, 'track_paths': track_paths}
with open(output_pkl_path, 'wb') as f:
    dump(data, f)

## Training the Model

In [19]:
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import backend as K
from tensorflow.keras.layers import Input, Dense, Lambda, Dropout, Activation, \
        TimeDistributed, Convolution1D, MaxPooling1D, BatchNormalization
from sklearn.model_selection import train_test_split
import numpy as np
import pickle
from optparse import OptionParser
from sys import stderr, argv
import os



In [20]:
SEED = 42
N_LAYERS = 3
FILTER_LENGTH = 5
CONV_FILTER_COUNT = 256
BATCH_SIZE = 32
EPOCH_COUNT = 100



In [21]:
def train_model(data, model_path):
    x = data['x']
    y = data['y']
    (x_train, x_val, y_train, y_val) = train_test_split(x, y, test_size=0.3,
            random_state=SEED)

    print('Building model...')

    n_features = x_train.shape[2]
    input_shape = (None, n_features)
    model_input = Input(input_shape, name='input')
    layer = model_input
    for i in range(N_LAYERS):
        # second convolutional layer names are used by extract_filters.py
        layer = Convolution1D(
                filters=CONV_FILTER_COUNT,
                kernel_size=FILTER_LENGTH,
                name='convolution_' + str(i + 1)
            )(layer)
        layer = BatchNormalization(momentum=0.9)(layer)
        layer = Activation('relu')(layer)
        layer = MaxPooling1D(2)(layer)
        layer = Dropout(0.5)(layer)

    layer = TimeDistributed(Dense(len(GENRES)))(layer)
    time_distributed_merge_layer = Lambda(
            function=lambda x: K.mean(x, axis=1), 
            output_shape=lambda shape: (shape[0],) + shape[2:],
            name='output_merged'
        )
    layer = time_distributed_merge_layer(layer)
    layer = Activation('softmax', name='output_realtime')(layer)
    model_output = layer
    model = Model(model_input, model_output)
    opt = Adam(lr=0.001)
    model.compile(
            loss='categorical_crossentropy',
            optimizer=opt,
            metrics=['accuracy']
        )

    print('Training...')
    model.fit(
        x_train, y_train, batch_size=BATCH_SIZE, nb_epoch=EPOCH_COUNT,
        validation_data=(x_val, y_val), verbose=1, callbacks=[
            ModelCheckpoint(
                model_path, save_best_only=True, monitor='val_acc', verbose=1
            ),
            ReduceLROnPlateau(
                monitor='val_acc', factor=0.5, patience=10, min_delta=0.01,
                verbose=1
            )
        ]
    )

    return model

In [22]:
model_path=os.path.join('/Users/jairomelo/Desktop/ML/YORK/ML1020/Final Project/model.h5')

In [23]:

with open(output_pkl_path, 'rb') as f:
      data = pickle.load(f)


In [None]:
train_model(data, model_path)

Building model...
Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Training...
Train on 700 samples, validate on 300 samples
Instructions for updating:
Use tf.cast instead.
Epoch 1/100
Epoch 00001: val_acc improved from -inf to 0.46667, saving model to /Users/jairomelo/Desktop/ML/YORK/ML1020/Final Project/model.h5
Epoch 2/100
Epoch 00002: val_acc did not improve from 0.46667
Epoch 3/100
Epoch 00003: val_acc did not improve from 0.46667
Epoch 4/100
Epoch 00004: val_acc did not improve from 0.46667
Epoch 5/100
Epoch 00005: val_acc improved from 0.46667 to 0.57333, saving model to /Users/jairomelo/Desktop/ML/YORK/ML1020/Final Project/model.h5
Epoch 6/100
Epoch 00006: val_acc did not improve from 0.57333
Epoch 7/100
Epoch 00007: val_acc did not improve from 0.57333
Epoch 8/100
Epoch 00008: val_acc did not improve from 0.57333
Epoch 9/100
Epoch 00009: val