## Definition of genres

In [1]:
import numpy as np
import librosa as lbr
import tensorflow.keras.backend as K

In [2]:
GENRES = ['Electronic', 'Experimental', 'Folk', 'HipHop', 'Instrumental', 'International', 'Pop','Rock']
WINDOW_SIZE = 2048
WINDOW_STRIDE = WINDOW_SIZE // 2
N_MELS = 128
MEL_KWARGS = {
    'n_fft': WINDOW_SIZE,
    'hop_length': WINDOW_STRIDE,
    'n_mels': N_MELS
}


## Feature Engineering

In [3]:
def get_layer_output_function(model, layer_name):
    input = model.get_layer('input').input
    output = model.get_layer(layer_name).output
    f = K.function([input, K.learning_phase()], [output])
    return lambda x: f([x, 0])[0] # learning_phase = 0 means test

def load_track(filename, enforce_shape=None):
    new_input, sample_rate = lbr.load(filename, mono=True, duration=40.0)
    features = lbr.feature.melspectrogram(new_input, **MEL_KWARGS).T

    if enforce_shape is not None:
        if features.shape[0] < enforce_shape[0]:
            delta_shape = (enforce_shape[0] - features.shape[0],
                    enforce_shape[1])
            features = np.append(features, np.zeros(delta_shape), axis=0)
        elif features.shape[0] > enforce_shape[0]:
            features = features[: enforce_shape[0], :]

    features[features == 0] = 1e-6
    return (np.log(features), float(new_input.shape[0]) / sample_rate)


## Creating Data Pickle

In [4]:
import sys
import numpy as np
from math import pi
from pickle import dump
import os
from optparse import OptionParser

In [5]:
TRACK_COUNT = 10000

In [6]:
def get_default_shape(dataset_path):
    tmp_features, _ = load_track(os.path.join(dataset_path,
        'Electronic/Electronic.00000.mp3'))
    return tmp_features.shape

def collect_data(dataset_path):
    '''
    :param dataset_path: path to the dataset directory
    :returns: triple (x, y, track_paths) where x is a matrix containing
        extracted features, y is a one-hot matrix of genre labels and
        track_paths is a dict of absolute track paths indexed by row indices in
        the x and y matrices
    '''
    default_shape = get_default_shape(dataset_path)
    x = np.zeros((TRACK_COUNT,) + default_shape, dtype=np.float32)
    y = np.zeros((TRACK_COUNT, len(GENRES)), dtype=np.float32)
    track_paths = {}

    for (genre_index, genre_name) in enumerate(GENRES):
        for i in range(1000):            
            file_name = '{}/{}.{}.mp3'.format(genre_name,
                    genre_name, '%05d' % i)
#            print('Processing', file_name)
            
            path = os.path.join(dataset_path, file_name)
#            print(genre_index,i)
            track_index = genre_index  * 1000 + i
            
            x[track_index], _ = load_track(path, default_shape)
            y[track_index, genre_index] = 1
            track_paths[track_index] = os.path.abspath(path)

    return (x, y, track_paths)

In [7]:
dataset_path=os.path.join('/Users/jairomelo/Desktop/ML/YORK/ML1020/Final Project/genres')
output_pkl_path=os.path.join('/Users/jairomelo/Desktop/ML/YORK/ML1020/Final Project/data.pkl')


## Creating Data File

In [8]:
(x, y, track_paths) = collect_data(dataset_path)
data = {'x': x, 'y': y, 'track_paths': track_paths}

In [None]:
##Writing the Pickle/Data - This fails for files greater than 4GB
with open(output_pkl_path, 'wb') as f:
    dump(data, f)

## Training the C RNN Model

In [9]:
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import backend as K
from tensorflow.keras.layers import Input, Dense, Lambda, Dropout, Activation, \
        TimeDistributed, Convolution1D, MaxPooling1D, BatchNormalization
from sklearn.model_selection import train_test_split
import numpy as np
import pickle
from optparse import OptionParser
from sys import stderr, argv
import os



In [10]:
SEED = 42
N_LAYERS = 3
FILTER_LENGTH = 5
CONV_FILTER_COUNT = 256
BATCH_SIZE = 32
EPOCH_COUNT = 85



In [11]:
def train_model(data, model_path):
    x = data['x']
    y = data['y']
    (x_train, x_val, y_train, y_val) = train_test_split(x, y, test_size=0.3,
            random_state=SEED)

    print('Building model...')

    n_features = x_train.shape[2]
    input_shape = (None, n_features)
    model_input = Input(input_shape, name='input')
    layer = model_input
    for i in range(N_LAYERS):
        layer = Convolution1D(
                filters=CONV_FILTER_COUNT,
                kernel_size=FILTER_LENGTH,
                name='convolution_' + str(i + 1)
            )(layer)
        layer = BatchNormalization(momentum=0.9)(layer)
        layer = Activation('relu')(layer)
        layer = MaxPooling1D(2)(layer)
        layer = Dropout(0.5)(layer)

    layer = TimeDistributed(Dense(len(GENRES)))(layer)
    time_distributed_merge_layer = Lambda(
            function=lambda x: K.mean(x, axis=1), 
            output_shape=lambda shape: (shape[0],) + shape[2:],
            name='output_merged'
        )
    layer = time_distributed_merge_layer(layer)
    layer = Activation('softmax', name='output_realtime')(layer)
    model_output = layer
    model = Model(model_input, model_output)
    opt = Adam(lr=0.001)
    model.compile(
            loss='categorical_crossentropy',
            optimizer=opt,
            metrics=['accuracy']
        )

    print('Training...')
    model.fit(
        x_train, y_train, batch_size=BATCH_SIZE, nb_epoch=EPOCH_COUNT,
        validation_data=(x_val, y_val), verbose=1, callbacks=[
            ModelCheckpoint(
                model_path, save_best_only=True, monitor='val_acc', verbose=1
            ),
            ReduceLROnPlateau(
                monitor='val_acc', factor=0.5, patience=10, min_delta=0.01,
                verbose=1
            )
        ]
    )

    return model

In [12]:
model_path=os.path.join('/Users/jairomelo/Desktop/ML/YORK/ML1020/Final Project/model.h5')
output_pkl_path=os.path.join('/Users/jairomelo/Desktop/ML/YORK/ML1020/Final Project/data.pkl')

In [None]:
##Loading the Pickle/Data - This fails for files greater than 4GB
with open(output_pkl_path, 'rb') as f:
      data = pickle.load(f)

In [15]:
train_model(data, model_path)

Building model...
Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Training...
Train on 7000 samples, validate on 3000 samples
Instructions for updating:
Use tf.cast instead.
Epoch 1/85
Epoch 00001: val_acc improved from -inf to 0.54500, saving model to /Users/jairomelo/Desktop/ML/YORK/ML1020/Final Project/model.h5
Epoch 2/85
Epoch 00002: val_acc did not improve from 0.54500
Epoch 3/85
Epoch 00003: val_acc improved from 0.54500 to 0.58033, saving model to /Users/jairomelo/Desktop/ML/YORK/ML1020/Final Project/model.h5
Epoch 4/85
Epoch 00004: val_acc did not improve from 0.58033
Epoch 5/85
Epoch 00005: val_acc did not improve from 0.58033
Epoch 6/85
Epoch 00006: val_acc did not improve from 0.58033
Epoch 7/85
Epoch 00007: val_acc did not improve from 0.58033
Epoch 8/85
Epoch 00008: val_acc improved from 0.58033 to 0.59533, saving model to /Users/jairom

Epoch 00026: val_acc did not improve from 0.62467

Epoch 00026: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 27/85
Epoch 00027: val_acc did not improve from 0.62467
Epoch 28/85
Epoch 00028: val_acc did not improve from 0.62467
Epoch 29/85
Epoch 00029: val_acc did not improve from 0.62467
Epoch 30/85
Epoch 00030: val_acc did not improve from 0.62467
Epoch 31/85
Epoch 00031: val_acc improved from 0.62467 to 0.64967, saving model to /Users/jairomelo/Desktop/ML/YORK/ML1020/Final Project/model.h5
Epoch 32/85
Epoch 00032: val_acc did not improve from 0.64967
Epoch 33/85
Epoch 00033: val_acc did not improve from 0.64967
Epoch 34/85
Epoch 00034: val_acc did not improve from 0.64967
Epoch 35/85
Epoch 00035: val_acc improved from 0.64967 to 0.65000, saving model to /Users/jairomelo/Desktop/ML/YORK/ML1020/Final Project/model.h5
Epoch 36/85
Epoch 00036: val_acc did not improve from 0.65000
Epoch 37/85
Epoch 00037: val_acc improved from 0.65000 to 0.65033, saving model t

Epoch 00054: val_acc did not improve from 0.66867
Epoch 55/85
Epoch 00055: val_acc did not improve from 0.66867
Epoch 56/85
Epoch 00056: val_acc did not improve from 0.66867

Epoch 00056: ReduceLROnPlateau reducing learning rate to 0.0001250000059371814.
Epoch 57/85
Epoch 00057: val_acc did not improve from 0.66867
Epoch 58/85
Epoch 00058: val_acc did not improve from 0.66867
Epoch 59/85
Epoch 00059: val_acc did not improve from 0.66867
Epoch 60/85
Epoch 00060: val_acc did not improve from 0.66867
Epoch 61/85
Epoch 00061: val_acc did not improve from 0.66867
Epoch 62/85
Epoch 00062: val_acc did not improve from 0.66867
Epoch 63/85
Epoch 00063: val_acc did not improve from 0.66867
Epoch 64/85
Epoch 00064: val_acc did not improve from 0.66867
Epoch 65/85
Epoch 00065: val_acc did not improve from 0.66867
Epoch 66/85
Epoch 00066: val_acc did not improve from 0.66867

Epoch 00066: ReduceLROnPlateau reducing learning rate to 6.25000029685907e-05.
Epoch 67/85
Epoch 00067: val_acc did not impr

Epoch 84/85
Epoch 00084: val_acc did not improve from 0.66867
Epoch 85/85
Epoch 00085: val_acc did not improve from 0.66867


<tensorflow.python.keras.engine.training.Model at 0x7f9bfbd19d30>