In [33]:
from sklearn.neural_network import MLPClassifier
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.multioutput import ClassifierChain
from sklearn.metrics import jaccard_score, multilabel_confusion_matrix, f1_score, precision_score
import matplotlib.pyplot as plt
from sklearn.multiclass import OneVsRestClassifier
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import cross_validate
from skopt import BayesSearchCV
from sklearn import preprocessing
from sklearn.base import BaseEstimator, ClassifierMixin
import pickle
from sklearn.svm import SVC

In [49]:
class ChainSVM(BaseEstimator, ClassifierMixin):
    def __init__(self, tol, reg, kernel, coef0, degree):
        super(ChainSVM, self).__init__()
        self.tol = tol
        self.reg = reg
        self.coef0 = coef0
        self.kernel = kernel
        self.degree = degree
        self.model = SVC(tol=self.tol, C = self.reg, class_weight='balanced', kernel = 'poly', coef0=self.coef0, degree = self.degree)
        self.chain = ClassifierChain(self.model, order="random", random_state=1)

    def fit(self, X_train, Y_train):
        #self.chain = ClassifierChain(self.model, order="random", random_state=1)
        return self.chain.fit(X_train, Y_train)
    
    def score(self, X_test, Y_test):
        Y_pred_chain = self.chain.predict(X_test)
        precision = precision_score(Y_test, Y_pred_chain>=0.5, average='samples')
        return precision 
        

In [50]:
x = np.load('../Part1.npy')
Y_t = np.load('../train.npy/y_train.npy')
#X_train = (x-np.min(x))/(np.max(x)-np.min(x))
xt = np.load('../Part2.npy')
Y_te = np.load('../test.npy/Y_Test.npy')
#X_test = (xt-np.min(xt))/(np.max(xt)-np.min(xt))
x_add = np.load('../Part3.npy')
y_add = np.load('../additional.npy/Y_additional.npy')
#x_add = (x_add-np.min(x_add))/(np.max(x_add)-np.min(x_add))
Y = np.concatenate((Y_t, Y_te, y_add))
X = np.concatenate((x, xt, x_add))
#X_train = (X_train-np.min(X_train))/(np.max(X_train)-np.min(X_train))
scaler = preprocessing.StandardScaler().fit(X)
X = scaler.transform(X)
X_train, X_test, Y_train, Y_test = train_test_split(X, Y)

In [52]:
search_space = {
    'coef0':(0.0,1),
    'degree':(2,8)
}

test = ChainSVM(0.0001, 1, 'rbf', coef0=0.0, degree = 3)
opt = BayesSearchCV(test, search_space, n_iter=50, cv=7, n_jobs=-1)
opt.fit(X_train, Y_train)

In [53]:
print("Best parameters found: ", opt.best_params_)
print("Best score: ", opt.best_score_)

Best parameters found:  OrderedDict([('coef0', 0.7958225015092468), ('degree', 6)])
Best score:  0.40246470219713587


In [54]:
import warnings
warnings.filterwarnings("ignore")
svm = SVC(C=0.15, tol=0.041, kernel='poly', coef0=0.79, degree=6)
chains = [ClassifierChain(svm, order="random", random_state=i) for i in range(10)]
for chain in chains:
    chain.fit(X_train, Y_train)

Y_pred_chains = np.array([chain.predict(X_test) for chain in chains])
chain_jaccard_scores = [
    jaccard_score(Y_test, Y_pred_chain >= 0.5, average="samples")
    for Y_pred_chain in Y_pred_chains
]

Y_pred_max = Y_pred_chains.max(axis=0)

In [55]:
Y_pred_ensemble = Y_pred_chains.mean(axis=0)
Y_pred = np.where(Y_pred_ensemble>0.5,1,0)
m = multilabel_confusion_matrix(Y_test, Y_pred)
print(m)

[[[683   8]
  [ 15  13]]

 [[698   2]
  [ 16   3]]

 [[671   5]
  [ 40   3]]

 [[565  22]
  [ 88  44]]

 [[449  36]
  [162  72]]

 [[611  14]
  [ 75  19]]

 [[405  62]
  [103 149]]

 [[631  11]
  [ 61  16]]

 [[678   6]
  [ 32   3]]

 [[663   9]
  [ 33  14]]

 [[386  75]
  [147 111]]]


In [56]:
tp = sum(m[:,1,1])
fp = sum(m[:,0,1])
tn = sum(m[:,0,0])
fn = sum(m[:,1,0])
print(tp, fp, tn, fn)

447 250 6440 772


In [58]:
precision = np.array([x[1,1]/(x[1,1]+x[0,1]) for x in m])
recall = np.array([x[1,1]/(x[1,1]+x[1,0]) for x in m])
f1 = 2*precision*recall/(precision+recall)
print(precision)

[0.61904762 0.6        0.375      0.66666667 0.66666667 0.57575758
 0.70616114 0.59259259 0.33333333 0.60869565 0.59677419]


In [59]:
recall

array([0.46428571, 0.15789474, 0.06976744, 0.33333333, 0.30769231,
       0.20212766, 0.59126984, 0.20779221, 0.08571429, 0.29787234,
       0.43023256])

In [60]:
f1

array([0.53061224, 0.25      , 0.11764706, 0.44444444, 0.42105263,
       0.2992126 , 0.64362851, 0.30769231, 0.13636364, 0.4       ,
       0.5       ])

In [None]:
import matplotlib.pyplot as plt
import numpy as np

fig, ax = plt.subplots(figsize=(10, 6))
x_ax = ["Cello","Clarinet", "Flute", "Guitar", "E-Guitar", "Organ", "Piano", "Saxophone", "Trumpet", "Violin", "Voice"]
x = np.arange(len(precision))  # the label locations
width = 0.2  # the width of the bars
rects1 = ax.bar(x - width, precision, width, label='Precision')
rects2 = ax.bar(x, recall, width, label='Recall')
rects3 = ax.bar(x + width, f1, width, label='F1-Score')
ax.set_ylabel('Scores')
ax.set_title('Precision, Recall, and F1-Score for each instrument with SVM, multi label')
ax.set_xticks(x)
ax.set_xticklabels([i for i in x_ax])
ax.legend()
fig.tight_layout()
plt.savefig('ScoresSVMmulti.png')
plt.show()