In [21]:
# basic imports
import pandas as pd, matplotlib.pyplot as plt, numpy as np 
import IPython.display as ipd

In [22]:
# librosa - for CQT and visualisation
import librosa, librosa.display

In [23]:
# Interval for label storage
from intervaltree import Interval,IntervalTree

# Neural Network

## Import data

In [24]:
# import h5py to read in hdf5 files 
import h5py

filename ='musicnet.h5'
music_net = h5py.File(filename,'r')
type(music_net)

h5py._hl.files.File

In [25]:
# extract 10 songs from the database
source_list = list(music_net.keys())[0:10]
source_list

['id_1727',
 'id_1728',
 'id_1729',
 'id_1730',
 'id_1733',
 'id_1734',
 'id_1735',
 'id_1739',
 'id_1742',
 'id_1749']

## Preprocessing

In [26]:
# init lists for storing results
cqt_container = []
labels_container = []

print('initializing data extraction')
print('----------------------------')
for id_ in source_list:
    print('extracting data from',id_)
    # grab song id_1730
    song = music_net[id_]
    music = song['data'][()]
    raw_labels = song['labels'][()]

    # sampling rate
    sr = 44100
    sr_16k = 16000

    # resample to 16kHz
    music = librosa.resample(music,sr,sr_16k)

    # CQT parameters
    sr_16k = 16000 # downsampling to 16kHz
    hop_length = 512 # 32ms frames  
    bins_per_octave = 36 # corresponds to 7 octaves on the piano 
    n_bins = 252 # frequency resolution 

    # CQT 
    C = np.abs(librosa.cqt(music,sr = sr_16k , \
                     hop_length=hop_length, bins_per_octave = bins_per_octave,
                     window='hann',n_bins = n_bins))
    C = np.transpose(C)
    
    # append to container
    cqt_container.append(C)
    
    # set up params for labels 
    number_of_notes = 88 
    number_frames = C.shape[0]

    # binary labels 
    binary_labels = np.zeros((number_frames, number_of_notes))

    # init tree for slicing
    labels_tree = IntervalTree()
    for label in raw_labels: 
        pitch = label['note_id']
        start_time = label['start_time']
        end_time = label['end_time']
        # insert
        labels_tree.addi(start_time,end_time,pitch)

    # sample labels accordingly  
    wps = sr_16k/float(hop_length) # windows per second 
    for window in range(binary_labels.shape[0]):
        # check what notes are active
        labels = labels_tree.at(window*hop_length)
        # loop through all the labels 
        for label in labels:
            # keep the pitch in the range 0-87 
            pitch = label[2]-21 
            # if this pitch is active in this window 
            binary_labels[window,pitch] = 1
            
    labels_container.append(binary_labels)
print('----------------------------')
print('data extraction complete')

initializing data extraction
----------------------------
extracting data from id_1727
extracting data from id_1728
extracting data from id_1729
extracting data from id_1730
extracting data from id_1733
extracting data from id_1734
extracting data from id_1735
extracting data from id_1739
extracting data from id_1742
extracting data from id_1749
----------------------------
data extraction complete


In [56]:
# stack the containers vertically
X = np.vstack(cqt_container)
y = np.vstack(labels_container)
X.shape

(162790, 252)

In [28]:
# # save to txt files 
# np.save('preprocessed_cqt', X)
# np.save('preprocessed_labels', y)

In [None]:
# # load files
# X = np.load('preprocessed_cqt.npy')
# y = np.load('preprocessed_labels.npy')

## train-test split and normalization 

In [59]:
# split into training and test 
from sklearn.model_selection import train_test_split

# split into train, test and validation 
X_train_val, X_test, y_train_val, y_test = train_test_split(X, y, test_size = 0.2, random_state=42)
# # need to set split = 0.25 to get 60-20-20 split 
X_train, X_val, y_train, y_val = train_test_split(X_train_val,y_train_val, test_size=0.25, random_state=42)

In [60]:
# Normalization 
max_train = X_train.max()
min_train = X_train.min()
max_val = X_val.max()
min_val = X_val.min()
max_test = X_test.max()
min_test = X_test.min()

X_val_norm = (X_val - min_train)/(max_train - min_train)
X_test_norm = (X_test - min_train)/(max_train - min_train)
X_train_norm = (X_train - min_train)/(max_train - min_train)

# Compute the mean
train_mean = np.mean(X_train_norm, axis = 0)

# Subtract it
X_train_norm = X_train_norm - train_mean
X_val_norm = X_val_norm - train_mean
X_test_norm = X_test_norm - train_mean

# reassign for easy interpretation 
X_train = X_train_norm
X_val = X_val_norm
X_test = X_test_norm

## Define model 

In [70]:
# import keras deep learning models  
from keras.layers import Dense, Dropout
from keras.models import Sequential 

# model parameters 
layers = 3
hidden_nodes = 256
num_col = 252
output_nodes = 88

# model type 
model = Sequential()

# input layer
model.add(Dense(num_col,kernel_initializer='normal', \
                activation='relu',input_shape=(num_col,)))

# hidden layers 
for i in range(layers): 
    model.add(Dense(hidden_nodes,activation='relu', kernel_initializer='normal'))
    
# output layer
model.add(Dense(output_nodes, activation='sigmoid',kernel_initializer='normal'))

# compile model - with accuracy,f1 measure metrics
model.compile(optimizer='adam',\
              loss='binary_crossentropy',
             metrics=['acc'])

model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_74 (Dense)             (None, 252)               63756     
_________________________________________________________________
dense_75 (Dense)             (None, 256)               64768     
_________________________________________________________________
dense_76 (Dense)             (None, 256)               65792     
_________________________________________________________________
dense_77 (Dense)             (None, 256)               65792     
_________________________________________________________________
dense_78 (Dense)             (None, 88)                22616     
Total params: 282,724
Trainable params: 282,724
Non-trainable params: 0
_________________________________________________________________


## Fit the model

In [71]:
# early stop 
from keras.callbacks import EarlyStopping
from keras.callbacks import ModelCheckpoint

# patience = 20 
early_stopping_monitor = EarlyStopping(patience=20 mode='auto', verbose=1, min_delta=0, monitor='val_loss')
# this will save the best model and will not overwrite it
model_save = ModelCheckpoint('AMT_DNN_prelim.hdf5', monitor='val_loss', save_best_only=True)

# model params
batch_size = 100
epochs = 1000

# fit the model
print('Fitting model to training data')
print('------------------------------')
model.fit(X_train,y_train,validation_data = (X_val, y_val), \
                    epochs = epochs, batch_size=batch_size,\
                    callbacks=[early_stopping_monitor,model_save], verbose = 0)

Fitting model to training data
------------------------------
Train on 97674 samples, validate on 32558 samples
Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
E

<keras.callbacks.History at 0x208607b8710>

### Predict with the model 

In [None]:
# # load keras model 
# from keras.models import load_model 
# model = load_model('AMT_trained_DNN_trial_model.h5')

In [73]:
# parameters 
batch_size = 100 

print('making predictions on test set')
print('------------------------------')
# predict on test set
predictions = model.predict(X_test, batch_size = batch_size, verbose=1)
# clean up predictions 
predictions = np.array(predictions).round()
predictions[predictions > 1] = 1
print('------------------------------')
print('predictions complete')

making predictions on test set
------------------------------
------------------------------
predictions complete


In [74]:
# init true positives etc. 
TP = 0
FN = 0
FP = 0

# calculating model accuracy 
TP = np.count_nonzero(np.logical_and( predictions == 1, y_test == 1 ))
FN = np.count_nonzero(np.logical_and( predictions == 0, y_test == 1 ))
FP = np.count_nonzero(np.logical_and( predictions == 1, y_test == 0 ))
if (TP + FN) > 0:
    R = TP/float(TP + FN)
    P = TP/float(TP + FP)
    A = 100*TP/float(TP + FP + FN)
    if P == 0 and R == 0:
        F = 0
    else: 
        F = 100*2*P*R/(P + R)
else: 
    A = 0
    F = 0
    R = 0
    P = 0

print('model precision:', P)
print('model recall:', R)
print('F-measure:', F)
print('Accuracy:', A)

model precision: 0.8091827483486596
model recall: 0.6125796646730072
F-measure: 69.72879464285714
Accuracy: 53.525868937570145


## Post-processing

In [75]:
# clean model in adjacent frames based on the threshold 
# a is indexing notes
for a in range(predictions.shape[1]):
# j is indexing the frames 
    for j in range(2,predictions.shape[0]-3):
        # [1 0* 0 1]
        if predictions[j-1,a] == 1 and predictions[j,a] == 0 and predictions[j+1,a] == 0 and predictions[j+2,a] == 1:
            predictions[j,a] = 1
            predictions[j+1,a] = 1
        # [0 0 1* 1 0 0]
        if predictions[j-2,a] == 0 and predictions[j-1,a] == 0 and predictions[j,a] == 1 and predictions[j+1,a] == 1 and predictions[j+2,a] == 0 and predictions[j+3,a] == 0:
            predictions[j,a] = 0
            predictions[j+1,a] = 0
        # [0 1* 0 0]
        if predictions[j-1,a] == 0 and predictions[j,a] == 1 and predictions[j+1,a] == 0 and predictions[j+2,a] == 0:
            predictions[j,a] = 0
        # [1 0* 1 1]
        if predictions[j-1,a] == 1 and predictions[j,a] == 0 and predictions[j+1,a] == 1 and predictions[j+2,a] == 1:
            predictions[j,a] = 1

In [76]:
# init true positives etc. 
TP = 0
FN = 0
FP = 0

# calculating model accuracy 
TP = np.count_nonzero(np.logical_and( predictions == 1, y_test == 1 ))
FN = np.count_nonzero(np.logical_and( predictions == 0, y_test == 1 ))
FP = np.count_nonzero(np.logical_and( predictions == 1, y_test == 0 ))
if (TP + FN) > 0:
    R = TP/float(TP + FN)
    P = TP/float(TP + FP)
    A = 100*TP/float(TP + FP + FN)
    if P == 0 and R == 0:
        F = 0
    else: 
        F = 100*2*P*R/(P + R)
else: 
    A = 0
    F = 0
    R = 0
    P = 0

print('model precision:', P)
print('model recall:', R)
print('F-measure:', F)
print('Accuracy:', A)

model precision: 0.7602754237288135
model recall: 0.07036964408275322
F-measure: 12.881629722695862
Accuracy: 6.8842143631358335
