In [3]:
from sklearn.neural_network import MLPClassifier
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.multioutput import ClassifierChain
from sklearn.metrics import jaccard_score, multilabel_confusion_matrix
import matplotlib.pyplot as plt
from sklearn.multiclass import OneVsRestClassifier
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import cross_validate
from skopt import BayesSearchCV
from sklearn import preprocessing
from sklearn.base import BaseEstimator, ClassifierMixin

In [9]:
x = np.load('../Part1.npy')
Y_t = np.load('../train.npy/y_train.npy')
#X_train = (x-np.min(x))/(np.max(x)-np.min(x))
xt = np.load('../Part2.npy')
Y_te = np.load('../test.npy/Y_Test.npy')
#X_test = (xt-np.min(xt))/(np.max(xt)-np.min(xt))
x_add = np.load('../Part3.npy')
y_add = np.load('../additional.npy/Y_additional.npy')
#x_add = (x_add-np.min(x_add))/(np.max(x_add)-np.min(x_add))
Y = np.concatenate((Y_t, Y_te, y_add))
X = np.concatenate((x, xt, x_add))
#X_train = (X_train-np.min(X_train))/(np.max(X_train)-np.min(X_train))
scaler = preprocessing.StandardScaler().fit(X)
X = scaler.transform(X)

In [10]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y)

In [11]:
class ChainMLP(BaseEstimator, ClassifierMixin):
    def __init__(self, hidden_layer_sizes, activation, solver, alpha, batch_size, learning_rate, learning_rate_init, 
                 max_iter, shuffle, warm_start):
        super(ChainMLP, self).__init__()
        self.activation = activation
        self.hidden_layer_sizes = hidden_layer_sizes
        self.alpha = alpha
        self.learning_rate_init = learning_rate_init
        self.max_iter = max_iter
        self.model = MLPClassifier(activation = activation, hidden_layer_sizes = hidden_layer_sizes ,solver = 'adam',
                                   alpha = alpha, batch_size = 200, learning_rate_init= learning_rate_init, max_iter = max_iter)
        self.chain = ClassifierChain(self.model, order="random", random_state=1)
        self.solver = solver
        self.batch_size = batch_size
        self.learning_rate = learning_rate
        self.shuffle = shuffle
        self.warm_start = warm_start
    def fit(self, X_train, Y_train):
        #self.chain = ClassifierChain(self.model, order="random", random_state=1)
        return self.chain.fit(X_train, Y_train)
    
    def score(self, X_test, Y_test):
        Y_pred_chain = self.chain.predict(X_test)
        chain_jaccard_score = jaccard_score(Y_test, Y_pred_chain >= 0.5, average="samples")
        return chain_jaccard_score
        


In [12]:
search_space = {
    'hidden_layer_sizes': [(100), (200), (50)],#, (40, 20), (50, 30)],
    'activation': ['identity', 'logistic', 'relu', 'tanh'],
    'solver': ['lbfgs', 'sgd', 'adam'],
    'alpha': (1e-5, 1e-2, 'log-uniform'),
    'batch_size': [50, 100, 200, 500],
    'learning_rate': ['constant', 'invscaling', 'adaptive'],
    'learning_rate_init': (0.0001, 0.1, 'log-uniform'),
    'max_iter': (50, 200),
    'shuffle': [True, False],
    'warm_start':[True, False],
    
}

test = ChainMLP((100), activation = 'tanh', solver = 'adam', alpha= 0.001,learning_rate='constant', 
                learning_rate_init= 0.01, batch_size=200, max_iter = 200, shuffle = True, warm_start = True)
opt = BayesSearchCV(test, search_space, n_iter=50, cv=7, n_jobs=-1)
opt.fit(X_train, Y_train)

In [13]:
print("Best parameters found: ", opt.best_params_)
print("Best score: ", opt.best_score_)

Best parameters found:  OrderedDict([('activation', 'tanh'), ('alpha', 0.0004926311759039375), ('batch_size', 50), ('hidden_layer_sizes', 100), ('learning_rate', 'constant'), ('learning_rate_init', 0.0073926370411543825), ('max_iter', 144), ('shuffle', True), ('solver', 'lbfgs'), ('warm_start', True)])
Best score:  0.5140925307049085


In [14]:
import warnings
warnings.filterwarnings("ignore")
mlp = MLPClassifier(activation = 'tanh',hidden_layer_sizes = [100] ,solver = 'lbfgs',alpha = 0.00049, batch_size = 50, 
                    learning_rate='constant', learning_rate_init=0.007, max_iter = 144, warm_start=True, shuffle=True)
chains = [ClassifierChain(mlp, order="random", random_state=i) for i in range(10)]
for chain in chains:
    chain.fit(X_train, Y_train)

Y_pred_chains = np.array([chain.predict(X_test) for chain in chains])
chain_jaccard_scores = [
    jaccard_score(Y_test, Y_pred_chain >= 0.5, average="samples")
    for Y_pred_chain in Y_pred_chains
]

Y_pred_max = Y_pred_chains.max(axis=0)

In [15]:
Y_pred_ensemble = Y_pred_chains.mean(axis=0)
Y_pred = np.where(Y_pred_ensemble>0.5,1,0)

In [16]:
m = multilabel_confusion_matrix(Y_test, Y_pred)

In [19]:
print(m)

[[[682   3]
  [ 23  11]]

 [[699   2]
  [ 12   6]]

 [[666  12]
  [ 23  18]]

 [[557  29]
  [ 71  62]]

 [[420  61]
  [116 122]]

 [[617  14]
  [ 53  35]]

 [[420  48]
  [ 96 155]]

 [[619   5]
  [ 59  36]]

 [[666   8]
  [ 30  15]]

 [[666   6]
  [ 27  20]]

 [[362  86]
  [123 148]]]


In [32]:
precision = np.array([x[1,1]/(x[1,1]+x[0,1]) for x in m])
precision

array([0.78571429, 0.75      , 0.6       , 0.68131868, 0.66666667,
       0.71428571, 0.7635468 , 0.87804878, 0.65217391, 0.76923077,
       0.63247863])

In [20]:
tp = sum(m[:,1,1])
fp = sum(m[:,0,1])
tn = sum(m[:,0,0])
fn = sum(m[:,1,0])
print(tp, fp, tn, fn)

628 274 6374 633


In [24]:
precision_ovr = tp/(tp+fp)
precision_ovr

0.6962305986696231

In [25]:
recall_ovr = tp/(tp+fn)
recall_ovr

0.4980174464710547

In [29]:
f1_ovr = 2 *precision_ovr*recall_ovr/(precision_ovr+recall_ovr)
f1_ovr

0.5806749884419787

In [31]:
recall = np.array([x[1,1]/(x[1,1]+x[1,0]) for x in m])
recall

array([0.32352941, 0.33333333, 0.43902439, 0.46616541, 0.51260504,
       0.39772727, 0.61752988, 0.37894737, 0.33333333, 0.42553191,
       0.54612546])

In [33]:
f1 = 2*precision*recall/(precision+recall)
f1

array([0.45833333, 0.46153846, 0.50704225, 0.55357143, 0.57957245,
       0.51094891, 0.68281938, 0.52941176, 0.44117647, 0.54794521,
       0.58613861])