In [5]:
from sklearn.neural_network import MLPClassifier
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.multioutput import ClassifierChain
from sklearn.metrics import jaccard_score, multilabel_confusion_matrix
import matplotlib.pyplot as plt
from sklearn.multiclass import OneVsRestClassifier
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import cross_validate
from sklearn import preprocessing
from skopt import BayesSearchCV
from sklearn.base import BaseEstimator, ClassifierMixin
import librosa
import librosa.display
import librosa.feature
import IPython.display as ipd

In [6]:
x = np.load('../Part1.npy')
Y_t = np.load('../train.npy/y_train.npy')
#X_train = (x-np.min(x))/(np.max(x)-np.min(x))
xt = np.load('../Part2.npy')
Y_test = np.load('../test.npy/Y_Test.npy')
#X_test = (xt-np.min(xt))/(np.max(xt)-np.min(xt))
x_add = np.load('../Part3.npy')
y_add = np.load('../additional.npy/Y_additional.npy')
#x_add = (x_add-np.min(x_add))/(np.max(x_add)-np.min(x_add))
Y_train = np.concatenate((Y_t, Y_test, y_add))
X_train = np.concatenate((x, xt, x_add))
#X_train = (X_train-np.min(X_train))/(np.max(X_train)-np.min(X_train))
scaler = preprocessing.StandardScaler().fit(X_train)
X_train = scaler.transform(X_train)

In [7]:
X_train.shape

(2874, 17)

In [4]:
#IRMAS-TrainingData/cel/[cel][pop_roc]0143__3.wav
instruments = ['cel/','cla/','flu/','gac/','gel/','org/','pia/','sax/','tru/','vio/','voi/']
pathAudio = "../IRMAS-TrainingData/"
X_test = []
Y_test = []
cnt = 0
for i in instruments:
    label = np.zeros(11)
    label[cnt] = 1
    cnt +=1
    #print(pathAudio + i)
    files = librosa.util.find_files(pathAudio + i, ext=['wav'], recurse=False) 
    files = np.asarray(files)
    #print(files)
    for y in files:
        #print(test)
        y, sr = librosa.load(y)
        mfcc = librosa.feature.mfcc(y=y, sr=sr)
        rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
        rolloff = np.mean(rolloff)
        bw = librosa.feature.spectral_bandwidth(y=y, sr=sr)
        bw = np.mean(bw)
        ctr = librosa.feature.spectral_centroid(y=y, sr=sr)
        ctr = np.mean(ctr)
        zcr = librosa.feature.zero_crossing_rate(y=y)
        zcr = np.mean(zcr)
        mfcc = np.mean(mfcc, axis=1)
        mfcc = mfcc[:13]
        mfcc = np.append(mfcc, [rolloff, bw, ctr, zcr])
        X_test.append(mfcc)
        Y_test.append(label)

In [8]:
X_test = np.load('X_single.npy')
Y_test = np.load('Y_single.npy')

In [9]:
np.array(X_test).shape

(6705, 17)

In [10]:
mlp = MLPClassifier(activation = 'identity',hidden_layer_sizes = [100] ,solver = 'lbfgs',alpha = 0.00003, batch_size = 50,
                    learning_rate_init=0.058, max_iter = 114, warm_start=True)
chains = [ClassifierChain(mlp, order="random", random_state=i) for i in range(10)]
for chain in chains:
    chain.fit(X_train, Y_train)

In [11]:
Y_pred_chains = np.array([chain.predict(X_test) for chain in chains])
chain_jaccard_scores = [
    jaccard_score(Y_test, Y_pred_chain >= 0.5, average="samples")
    for Y_pred_chain in Y_pred_chains
]

Y_pred_ensemble = Y_pred_chains.mean(axis=0)

In [12]:
chain_jaccard_scores

[0.08036823976421291,
 0.08089236887894607,
 0.07936046305173822,
 0.07997017151379568,
 0.07856432655090373,
 0.07977877206065127,
 0.07885817265011895,
 0.07997052661482192,
 0.07988423706544512,
 0.08158623628422287]

In [13]:
Y_pred = np.where(Y_pred_ensemble>0.5,1,0)

In [14]:
Y_pred

array([[1, 1, 1, ..., 0, 0, 0],
       [1, 1, 1, ..., 0, 0, 0],
       [1, 1, 1, ..., 0, 0, 0],
       ...,
       [1, 1, 1, ..., 0, 0, 0],
       [1, 1, 1, ..., 0, 0, 0],
       [1, 1, 1, ..., 0, 0, 0]])

In [15]:
m = multilabel_confusion_matrix(Y_test, Y_pred)

In [16]:
m

array([[[ 557, 5760],
        [  59,  329]],

       [[ 331, 5869],
        [  11,  494]],

       [[  94, 6160],
        [  11,  440]],

       [[6068,    0],
        [ 637,    0]],

       [[5875,   70],
        [ 746,   14]],

       [[   0, 6023],
        [   0,  682]],

       [[   0, 5984],
        [   0,  721]],

       [[6079,    0],
        [ 626,    0]],

       [[6128,    0],
        [ 577,    0]],

       [[6125,    0],
        [ 580,    0]],

       [[5246,  681],
        [ 775,    3]]], dtype=int64)

In [21]:
precision = [x[1,1]/(x[1,1]+x[0,1]) for x in m]

  precision = [x[1,1]/(x[1,1]+x[0,1]) for x in m]


In [22]:
precision

[0.054031860732468386,
 0.07763633506207764,
 0.06666666666666667,
 nan,
 0.16666666666666666,
 0.10171513795674869,
 0.1075316927665921,
 nan,
 nan,
 nan,
 0.0043859649122807015]

In [24]:
recall = np.array([x[1,1]/(x[1,1]+x[1,0]) for x in m])


In [25]:
recall

array([0.84793814, 0.97821782, 0.97560976, 0.        , 0.01842105,
       1.        , 1.        , 0.        , 0.        , 0.        ,
       0.00385604])

In [28]:
f1 = 2*(precision*recall/(precision+recall))
f1

array([0.10159024, 0.14385556, 0.12480499,        nan, 0.03317536,
       0.18464871, 0.1941826 ,        nan,        nan,        nan,
       0.00410397])

In [29]:
np.save(open('X_single.npy', 'wb'), X_test)

In [30]:
np.save(open('Y_single.npy', 'wb'), Y_test)