In [30]:
from sklearn.neural_network import MLPClassifier
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.multioutput import ClassifierChain
from sklearn.metrics import jaccard_score, multilabel_confusion_matrix, precision_recall_fscore_support
import matplotlib.pyplot as plt
from sklearn.multiclass import OneVsRestClassifier
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import cross_validate
from sklearn import preprocessing
from skopt import BayesSearchCV
from sklearn.base import BaseEstimator, ClassifierMixin
import librosa
import librosa.display
import librosa.feature
import IPython.display as ipd

In [31]:
x = np.load('../Part1.npy')
Y_t = np.load('../train.npy/y_train.npy')
#X_train = (x-np.min(x))/(np.max(x)-np.min(x))
xt = np.load('../Part2.npy')
Y_test = np.load('../test.npy/Y_Test.npy')
#X_test = (xt-np.min(xt))/(np.max(xt)-np.min(xt))
x_add = np.load('../Part3.npy')
y_add = np.load('../additional.npy/Y_additional.npy')
#x_add = (x_add-np.min(x_add))/(np.max(x_add)-np.min(x_add))
Y_train = np.concatenate((Y_t, Y_test, y_add))
X_train = np.concatenate((x, xt, x_add))
#X_train = (X_train-np.min(X_train))/(np.max(X_train)-np.min(X_train))
scaler = preprocessing.StandardScaler().fit(X_train)
X_train = scaler.transform(X_train)

In [32]:
X_train.shape

(2874, 17)

In [29]:
#IRMAS-TrainingData/cel/[cel][pop_roc]0143__3.wav
instruments = ['cel/','cla/','flu/','gac/','gel/','org/','pia/','sax/','tru/','vio/','voi/']
pathAudio = "../IRMAS-TrainingData/"
X_test = []
Y_test = []
cnt = 0
for i in instruments:
    label = np.zeros(11)
    label[cnt] = 1
    cnt +=1
    #print(pathAudio + i)
    files = librosa.util.find_files(pathAudio + i, ext=['wav'], recurse=False) 
    files = np.asarray(files)
    #print(files)
    for y in files:
        #print(test)
        y, sr = librosa.load(y)
        mfcc = librosa.feature.mfcc(y=y, sr=sr)
        rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
        rolloff = np.mean(rolloff)
        bw = librosa.feature.spectral_bandwidth(y=y, sr=sr)
        bw = np.mean(bw)
        ctr = librosa.feature.spectral_centroid(y=y, sr=sr)
        ctr = np.mean(ctr)
        zcr = librosa.feature.zero_crossing_rate(y=y)
        zcr = np.mean(zcr)
        mfcc = np.mean(mfcc, axis=1)
        mfcc = mfcc[:13]
        mfcc = np.append(mfcc, [rolloff, bw, ctr, zcr])
        X_test.append(mfcc)
        Y_test.append(label)

KeyboardInterrupt: 

In [33]:
X_test = np.load('X_single.npy')
Y_test = np.load('Y_single.npy')
scaler = preprocessing.StandardScaler().fit(X_test)
X_test = scaler.transform(X_test)

In [34]:
np.array(X_test).shape

(6705, 17)

In [35]:
mlp = MLPClassifier(activation = 'identity',hidden_layer_sizes = [100] ,solver = 'lbfgs',alpha = 0.00003, batch_size = 50,
                    learning_rate_init=0.058, max_iter = 114, warm_start=True)
chains = [ClassifierChain(mlp, order="random", random_state=i) for i in range(10)]
for chain in chains:
    chain.fit(X_train, Y_train)

In [36]:
Y_pred_chains = np.array([chain.predict(X_test) for chain in chains])
chain_jaccard_scores = [
    jaccard_score(Y_test, Y_pred_chain >= 0.5, average="samples")
    for Y_pred_chain in Y_pred_chains
]

Y_pred_ensemble = Y_pred_chains.mean(axis=0)

In [37]:
chain_jaccard_scores

[0.13044991300024855,
 0.11807109122545366,
 0.1218742232165051,
 0.13256276410638826,
 0.1184439473030077,
 0.1270444941585881,
 0.15244842157593835,
 0.11739995028585634,
 0.16711409395973154,
 0.1365647526721352]

In [38]:
Y_pred = np.where(Y_pred_ensemble>0.5,1,0)

In [39]:
Y_pred

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 1]])

In [40]:
m = multilabel_confusion_matrix(Y_test, Y_pred)

In [41]:
m

array([[[6299,   18],
        [ 388,    0]],

       [[6195,    5],
        [ 501,    4]],

       [[6253,    1],
        [ 451,    0]],

       [[5842,  226],
        [ 544,   93]],

       [[5421,  524],
        [ 585,  175]],

       [[6017,    6],
        [ 682,    0]],

       [[2549, 3435],
        [ 323,  398]],

       [[6027,   52],
        [ 625,    1]],

       [[6122,    6],
        [ 573,    4]],

       [[6038,   87],
        [ 572,    8]],

       [[4840, 1087],
        [ 640,  138]]], dtype=int64)

In [42]:
precision = [x[1,1]/(x[1,1]+x[0,1]) for x in m]

In [43]:
precision

[0.0,
 0.4444444444444444,
 0.0,
 0.29153605015673983,
 0.2503576537911302,
 0.0,
 0.10383511609705191,
 0.018867924528301886,
 0.4,
 0.08421052631578947,
 0.1126530612244898]

In [44]:
recall = np.array([x[1,1]/(x[1,1]+x[1,0]) for x in m])


In [45]:
recall

array([0.        , 0.00792079, 0.        , 0.14599686, 0.23026316,
       0.        , 0.5520111 , 0.00159744, 0.00693241, 0.0137931 ,
       0.17737789])

In [46]:
f1 = 2*(precision*recall/(precision+recall))
f1

  f1 = 2*(precision*recall/(precision+recall))


array([       nan, 0.0155642 ,        nan, 0.19456067, 0.23989034,
              nan, 0.17479139, 0.00294551, 0.01362862, 0.0237037 ,
       0.13779331])

In [47]:
ovr = OneVsRestClassifier(mlp)
ovr.fit(X_train, Y_train)

In [48]:
ovr_pred = ovr.predict(X_test)
M_ovr = multilabel_confusion_matrix(Y_test, ovr_pred)
M_ovr

array([[[6312,    5],
        [ 388,    0]],

       [[6197,    3],
        [ 500,    5]],

       [[6253,    1],
        [ 451,    0]],

       [[6049,   19],
        [ 618,   19]],

       [[5549,  396],
        [ 624,  136]],

       [[6015,    8],
        [ 682,    0]],

       [[5436,  548],
        [ 616,  105]],

       [[6063,   16],
        [ 626,    0]],

       [[6122,    6],
        [ 572,    5]],

       [[6081,   44],
        [ 578,    2]],

       [[5376,  551],
        [ 719,   59]]], dtype=int64)

In [49]:
p, r, f1, s = precision_recall_fscore_support(Y_test, ovr_pred)

In [50]:
p

array([0.        , 0.625     , 0.        , 0.5       , 0.2556391 ,
       0.        , 0.16079632, 0.        , 0.45454545, 0.04347826,
       0.09672131])

In [51]:
r

array([0.        , 0.00990099, 0.        , 0.02982732, 0.17894737,
       0.        , 0.14563107, 0.        , 0.00866551, 0.00344828,
       0.07583548])

In [52]:
f1

array([0.        , 0.01949318, 0.        , 0.0562963 , 0.21052632,
       0.        , 0.15283843, 0.        , 0.0170068 , 0.00638978,
       0.08501441])

In [53]:
s

array([388, 505, 451, 637, 760, 682, 721, 626, 577, 580, 778], dtype=int64)