### MLP.
Θα προσπαθήσουμε να βελτιστοποιήσουμε τον MLP, αρχικά χωρίς τη χρήση του pipeline, για να μειώσουμε τον όγκο των δεδομένων μας και να προχωρήσουμε έπειτα έχοντας μια καλύτερη εικόνα για τον MLP στο dataset μας.

Παραθέτουμε τα βήματα που ακολουθήσαμε, κάνοντας διαδοχικά gridsearch στον MLP δοκιμάζοντες μικρό πεδίο για κάθε μια υπερπαράμετρο κάθε φορά, κρατόντας επίσης τον solver σταθερό.

In [26]:
import warnings 
warnings.filterwarnings('ignore')

In [27]:
import pandas as pd
df = pd.read_csv("spambase.data", header=None)

In [28]:
features_df = df.iloc[:,:-1]
labels_df = df.iloc[:,-1]

features = features_df.values
labels = labels_df.values

In [29]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.3, random_state=42)


In [30]:
import numpy as np

np.bincount(y_test)
print(X_train.shape)

(3220, 57)


In [31]:
from imblearn.pipeline import Pipeline
from sklearn.feature_selection import VarianceThreshold
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import RandomOverSampler
from sklearn.decomposition import PCA
from sklearn.neural_network import MLPClassifier


In [32]:
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report


In [33]:
X_train_small = X_train[:500]
y_train_small = y_train[:500]

In [34]:
np.arange(5, 200, 20)

array([  5,  25,  45,  65,  85, 105, 125, 145, 165, 185])

### Solver: lbfgs Hidden (5, 20, 5)

In [69]:
parameters = {
    'solver': ['lbfgs'], 
    'max_iter': [100, 250, 500], 
    'alpha': 10.0 ** -np.arange(1, 10), 
    'hidden_layer_sizes':np.arange(5, 20, 5)
}

clf = MLPClassifier()
clf_grid = GridSearchCV(clf, parameters, cv=5, scoring="f1_micro", n_jobs=-1)
clf_grid.fit(X_train, y_train)
preds = clf_grid.predict(X_test)

clf_grid.best_params_

{'alpha': 1e-05, 'hidden_layer_sizes': 5, 'max_iter': 500, 'solver': 'lbfgs'}

In [70]:
print(classification_report(y_test, preds))

              precision    recall  f1-score   support

           0       0.85      0.92      0.88       804
           1       0.87      0.77      0.82       577

   micro avg       0.86      0.86      0.86      1381
   macro avg       0.86      0.84      0.85      1381
weighted avg       0.86      0.86      0.86      1381



In [71]:
clf_grid.cv_results_

{'mean_fit_time': array([0.17285318, 0.50718083, 0.30823526, 0.25081916, 0.75075979,
        0.91024408, 0.26498489, 0.58632741, 0.77466297, 0.16014433,
        0.25803781, 0.65740466, 0.23397965, 0.39256482, 0.82270632,
        0.26306   , 0.48748102, 0.82649193, 0.18497777, 0.36977048,
        0.36535177, 0.21735244, 0.42585793, 1.03439798, 0.27901196,
        0.6823535 , 0.54022264, 0.16958141, 0.34554725, 0.61409039,
        0.21949263, 0.50696597, 0.87733564, 0.28866901, 0.57130446,
        1.01113029, 0.19120631, 0.39692016, 0.87489767, 0.23456736,
        0.42409329, 0.82799139, 0.28257093, 0.65595713, 1.1077848 ,
        0.17306418, 0.35108576, 0.53208728, 0.22659044, 0.52448153,
        0.80027795, 0.24053211, 0.62234535, 1.11396728, 0.21520123,
        0.42569952, 0.62821527, 0.22870455, 0.49904203, 0.79337902,
        0.28427725, 0.61906085, 0.79335442, 0.18580198, 0.72586689,
        0.78384986, 0.32610054, 0.53845234, 0.91241956, 0.28701019,
        0.55744629, 0.89179397,

### Solver: sgd Hidden: (5, 20, 5)

In [73]:
parameters = {
    'solver': ['sgd'], 
    'max_iter': [100, 250, 500], 
    'alpha': 10.0 ** -np.arange(1, 10), 
    'hidden_layer_sizes':np.arange(5, 20, 5)
}

clf_sgd_5 = MLPClassifier()
clf_grid_sgd_5 = GridSearchCV(clf_sgd_5, parameters, cv=5, scoring="f1_micro", n_jobs=-1)
clf_grid_sgd_5.fit(X_train, y_train)
preds_sgd_5 = clf_grid_sgd_5.predict(X_test)

clf_grid_sgd_5.best_params_

{'alpha': 1e-09, 'hidden_layer_sizes': 15, 'max_iter': 100, 'solver': 'sgd'}

In [75]:
print(classification_report(y_test, preds_sgd_5))

              precision    recall  f1-score   support

           0       0.64      0.97      0.77       804
           1       0.87      0.24      0.38       577

   micro avg       0.67      0.67      0.67      1381
   macro avg       0.76      0.61      0.58      1381
weighted avg       0.74      0.67      0.61      1381



In [74]:
clf_grid_sgd_5.cv_results_

{'mean_fit_time': array([0.68477812, 0.96046143, 0.82078223, 0.90807247, 0.72081914,
        1.02378831, 0.68075471, 0.72190895, 0.88270788, 0.52333407,
        0.65991712, 0.85159054, 0.79281945, 1.09473057, 1.24307032,
        1.17957759, 1.02631526, 1.1186584 , 0.74728527, 1.14287677,
        1.12007885, 0.66945853, 0.73192353, 0.97793708, 0.609658  ,
        0.64240518, 0.67943993, 0.72856908, 0.79580965, 0.73014631,
        0.58781238, 1.09682736, 1.40231838, 0.69486766, 0.97424765,
        1.00130992, 0.71307631, 1.1442513 , 0.90070295, 1.03851085,
        0.85555153, 0.94284239, 0.89323916, 1.05483594, 0.78253031,
        0.85787621, 1.1010406 , 0.842904  , 0.72939367, 0.60787897,
        0.57976561, 0.69582925, 0.59650059, 0.84357538, 0.63581467,
        0.56808558, 0.93455114, 0.63651056, 0.84040065, 0.73560963,
        0.75319958, 0.72475052, 0.80595522, 0.44858494, 0.7269083 ,
        0.63550029, 0.60099158, 0.56091137, 0.90849872, 0.79301844,
        1.03839064, 1.2503161 ,

### Solver: adam Hidden: (5, 20, 5)

In [76]:
parameters = {
    'solver': ['adam'], 
    'max_iter': [100, 250, 500], 
    'alpha': 10.0 ** -np.arange(1, 10), 
    'hidden_layer_sizes':np.arange(5, 20, 5)
}

clf_adam_5 = MLPClassifier()
clf_grid_adam_5 = GridSearchCV(clf_adam_5, parameters, cv=5, scoring="f1_micro", n_jobs=-1)
clf_grid_adam_5.fit(X_train, y_train)
preds_adam_5 = clf_grid_adam_5.predict(X_test)

clf_grid_adam_5.best_params_

{'alpha': 0.0001, 'hidden_layer_sizes': 5, 'max_iter': 500, 'solver': 'adam'}

In [79]:
print(classification_report(y_test, preds_adam_5))

              precision    recall  f1-score   support

           0       0.94      0.93      0.93       804
           1       0.90      0.91      0.91       577

   micro avg       0.92      0.92      0.92      1381
   macro avg       0.92      0.92      0.92      1381
weighted avg       0.92      0.92      0.92      1381



In [80]:
clf_grid_adam_5.cv_results_

{'mean_fit_time': array([0.92378874, 1.73742061, 2.17437677, 1.06666274, 1.44062552,
        1.51012855, 1.1312274 , 1.65650301, 1.60708342, 1.04422088,
        2.12999454, 2.42491674, 1.1293766 , 1.7424829 , 1.90403819,
        1.01180925, 1.62412448, 1.33791313, 0.8674366 , 1.9254684 ,
        1.7870708 , 0.94403868, 2.0827951 , 1.6462194 , 1.01649561,
        1.33007741, 1.16462016, 0.83605452, 2.18697996, 2.53781366,
        1.17822804, 1.94819398, 2.45540633, 1.10668907, 1.85419354,
        2.052214  , 0.9925055 , 1.90802779, 1.82902861, 0.98212333,
        1.80750194, 1.37370462, 1.04242301, 1.30747538, 1.28082199,
        0.86919374, 1.77672544, 2.76764493, 0.97174363, 1.38802519,
        1.43951325, 1.7533802 , 1.66257591, 1.15909543, 0.88979177,
        2.00892882, 1.9979218 , 0.89685278, 1.32760057, 1.49822469,
        0.98823504, 1.72399836, 1.25640426, 0.87246685, 1.43646278,
        2.07275777, 0.94227047, 1.47481456, 1.85114303, 1.06707125,
        1.20326338, 1.31153741,

### Activation (adam)

In [81]:
parameters = {
    'solver': ['adam'], 
    'activation': ['identity', 'logistic', 'tanh', 'relu'],
    'max_iter': [200, 250, 350], 
    'alpha': [0.1, 0.2, 0.5], 
    'hidden_layer_sizes':[15, 20, 25]
}

clf_adam_a = MLPClassifier()
clf_grid_adam_a = GridSearchCV(clf_adam_a, parameters, cv=5, scoring="f1_micro", n_jobs=-1)
clf_grid_adam_a.fit(X_train, y_train)
preds_adam_a = clf_grid_adam_a.predict(X_test)

clf_grid_adam_a.best_params_

{'activation': 'tanh',
 'alpha': 0.1,
 'hidden_layer_sizes': 20,
 'max_iter': 200,
 'solver': 'adam'}

In [82]:
print(classification_report(y_test, preds_adam_a))

              precision    recall  f1-score   support

           0       0.96      0.95      0.95       804
           1       0.93      0.94      0.93       577

   micro avg       0.94      0.94      0.94      1381
   macro avg       0.94      0.94      0.94      1381
weighted avg       0.94      0.94      0.94      1381



In [83]:
clf_grid_adam_a.cv_results_

{'mean_fit_time': array([0.89848418, 1.15791769, 1.33276052, 0.94871311, 1.49691896,
        1.24431763, 0.954987  , 0.89309826, 0.90882797, 1.3282835 ,
        1.08655791, 1.21596465, 1.00915699, 0.99103537, 1.08751488,
        1.0405952 , 0.98004341, 0.84319072, 1.24614968, 1.09512672,
        1.16654248, 1.27549844, 1.18498187, 1.31430063, 1.06871958,
        0.90997033, 1.26124859, 2.15063477, 1.83889627, 2.12764635,
        1.85116076, 1.98036456, 2.50744772, 2.21916056, 1.9935873 ,
        1.69681001, 1.8900424 , 2.39238734, 1.85780945, 2.00699778,
        2.02562146, 2.07990079, 2.20139461, 2.41681433, 2.60352998,
        2.27241607, 2.15607281, 2.47823367, 1.91661496, 2.24858975,
        1.76624832, 1.89790759, 2.44450488, 1.88767052, 1.60498834,
        1.45899148, 1.33922687, 1.55556378, 1.7407794 , 1.58714471,
        1.87071075, 1.33098178, 1.72870307, 1.36321278, 1.26121564,
        1.28296704, 1.3735034 , 1.50121708, 1.17867951, 1.5578567 ,
        1.95815144, 1.63449278,

### Activation (adam) 2

In [85]:
parameters = {
    'solver': ['adam'], 
    'activation': ['identity', 'logistic', 'tanh', 'relu'],
    'max_iter': [200, 250, 350], 
    'alpha': [0.1, 0.2, 0.5], 
    'hidden_layer_sizes':[20, 25, 30]
}

clf_adam_a2 = MLPClassifier()
clf_grid_adam_a2 = GridSearchCV(clf_adam_a2, parameters, cv=5, scoring="f1_micro", n_jobs=-1)
clf_grid_adam_a2.fit(X_train, y_train)
preds_adam_a2 = clf_grid_adam_a2.predict(X_test)

clf_grid_adam_a2.best_params_


{'activation': 'tanh',
 'alpha': 0.1,
 'hidden_layer_sizes': 30,
 'max_iter': 350,
 'solver': 'adam'}

In [86]:
print(classification_report(y_test, preds_adam_a2))

              precision    recall  f1-score   support

           0       0.95      0.93      0.94       804
           1       0.91      0.94      0.92       577

   micro avg       0.94      0.94      0.94      1381
   macro avg       0.93      0.94      0.93      1381
weighted avg       0.94      0.94      0.94      1381



In [87]:
clf_grid_adam_a2.cv_results_

{'mean_fit_time': array([0.85258799, 0.87053175, 0.97899022, 1.01850939, 0.77794094,
        0.92951503, 0.92091079, 0.70059414, 0.75130768, 1.07197399,
        0.89975848, 0.91944041, 0.78684802, 0.92854357, 0.9396668 ,
        0.86427984, 0.82504959, 0.8061482 , 0.87863274, 1.007658  ,
        0.83292122, 0.74683537, 0.82701054, 0.84012904, 0.85407619,
        1.08131404, 1.09631543, 2.38651824, 1.96686559, 1.90283237,
        1.70364747, 1.65628204, 1.52658095, 2.32554259, 3.09857903,
        2.5700006 , 2.83942204, 2.42979965, 1.96772013, 1.76176996,
        1.79391727, 1.70383501, 2.36652346, 1.93038802, 1.72825975,
        1.64488258, 1.43417559, 1.62853241, 1.76108427, 1.63991551,
        1.7272006 , 2.14722037, 1.79968777, 1.66917171, 1.37771087,
        1.48580856, 1.32811756, 1.81930394, 1.98490071, 1.50514722,
        1.36196246, 1.50500922, 1.64367347, 1.38138032, 1.14922338,
        1.49875965, 1.47103238, 1.64745417, 1.66023703, 1.47723675,
        1.2872828 , 1.54678278,

### Activation (lbfs)

In [88]:
parameters = {
    'solver': ['lbfgs'], 
    'activation': ['identity', 'logistic', 'tanh', 'relu'],
    'max_iter': [400, 500, 600], 
    'alpha': 10.0 ** -np.arange(7, 10), 
    'hidden_layer_sizes':[15, 20, 25]
}

clf_lbfs_a = MLPClassifier()
clf_grid_lbfs_a = GridSearchCV(clf_lbfs_a, parameters, cv=5, scoring="f1_micro", n_jobs=-1)
clf_grid_lbfs_a.fit(X_train, y_train)
preds_lbfs_a = clf_grid_lbfs_a.predict(X_test)

clf_grid_lbfs_a.best_params_

{'activation': 'logistic',
 'alpha': 1e-08,
 'hidden_layer_sizes': 15,
 'max_iter': 600,
 'solver': 'lbfgs'}

In [102]:
print(classification_report(y_test, preds_lbfs_a))

              precision    recall  f1-score   support

           0       0.94      0.94      0.94       804
           1       0.92      0.91      0.91       577

   micro avg       0.93      0.93      0.93      1381
   macro avg       0.93      0.93      0.93      1381
weighted avg       0.93      0.93      0.93      1381



In [103]:
clf_grid_lbfs_a.cv_results_

{'mean_fit_time': array([0.66557751, 0.86610193, 1.12905326, 1.70032744, 1.30409236,
        0.70079341, 0.66649122, 0.64963145, 0.93162417, 0.6169908 ,
        0.40814261, 0.82408133, 0.63242497, 0.46258488, 0.35828962,
        1.1802135 , 1.03906236, 0.71550283, 0.43020139, 0.69228215,
        0.62328072, 0.54553528, 0.50966215, 0.47302737, 0.97011237,
        0.54158254, 0.56196342, 1.38420835, 1.63701854, 1.89225941,
        1.54493895, 1.71462617, 2.35061307, 1.90507197, 2.36134095,
        2.8251163 , 1.30541406, 1.61181202, 1.94681997, 1.53995848,
        1.92810221, 2.10760403, 1.89273782, 2.54200201, 2.88141975,
        1.31342192, 1.62502155, 2.08275452, 2.03225389, 2.51768279,
        2.08134594, 1.97808952, 2.95296774, 2.99861259, 1.22101378,
        1.54464574, 1.90197926, 1.51005964, 1.92147093, 2.2239346 ,
        1.82300167, 2.26080427, 2.71717091, 1.26115165, 1.5850482 ,
        1.89056721, 1.49226694, 1.92751923, 2.24479852, 1.82749486,
        2.25991735, 2.75162601,

### Activation (lbfs) iter

In [92]:
parameters = {
    'solver': ['lbfgs'], 
    'activation': ['identity', 'logistic', 'tanh', 'relu'],
    'max_iter': [500, 600, 700], 
    'alpha': 10.0 ** -np.arange(7, 10), 
    'hidden_layer_sizes':[15, 20, 25]
}

clf_lbfs_a_i = MLPClassifier()
clf_grid_lbfs_a_i = GridSearchCV(clf_lbfs_a_i, parameters, cv=5, scoring="f1_micro", n_jobs=-1)
clf_grid_lbfs_a_i.fit(X_train, y_train)
preds_lbfs_a_i = clf_grid_lbfs_a_i.predict(X_test)

clf_grid_lbfs_a_i.best_params_

{'activation': 'logistic',
 'alpha': 1e-09,
 'hidden_layer_sizes': 20,
 'max_iter': 700,
 'solver': 'lbfgs'}

In [93]:
print(classification_report(y_test, preds_lbfs_a_i))

              precision    recall  f1-score   support

           0       0.94      0.94      0.94       804
           1       0.91      0.92      0.92       577

   micro avg       0.93      0.93      0.93      1381
   macro avg       0.93      0.93      0.93      1381
weighted avg       0.93      0.93      0.93      1381



In [94]:
clf_grid_lbfs_a_i.cv_results_

{'mean_fit_time': array([0.6203692 , 0.56324   , 0.84631276, 0.74451294, 0.88136272,
        0.94061842, 0.73151264, 1.27658091, 1.09315658, 0.76185055,
        0.72734318, 0.81749949, 0.59872937, 0.63184175, 0.29798365,
        0.70989532, 1.05738835, 1.54537406, 0.84135742, 0.32115669,
        0.47522058, 0.69098449, 0.91079063, 0.70199361, 0.79432979,
        0.70804887, 0.79320626, 1.67782345, 2.31482244, 3.2860714 ,
        3.5613719 , 2.98311448, 2.8718627 , 2.50207376, 2.97504501,
        3.45218234, 1.74477019, 2.0345408 , 2.39577665, 2.03477659,
        2.45078373, 2.89676957, 2.5376524 , 3.06834254, 3.60547147,
        1.72269211, 2.05734119, 2.14823003, 1.98285136, 2.43304291,
        2.81264701, 2.47463293, 2.99334626, 3.21934228, 1.60996647,
        1.9514534 , 2.2973474 , 1.93868575, 2.33450661, 2.72841291,
        2.3903049 , 2.88479042, 3.35447326, 1.6489284 , 1.9552484 ,
        2.32995682, 1.96004496, 2.3216845 , 2.71896572, 2.412183  ,
        2.88053737, 3.28169684,

### Activation (lbfs) iter2

In [95]:
parameters = {
    'solver': ['lbfgs'], 
    'activation': ['identity', 'logistic', 'tanh', 'relu'],
    'max_iter': [700, 800, 900], 
    'alpha': 10.0 ** -np.arange(7, 10), 
    'hidden_layer_sizes':[15, 20, 25]
}

clf_lbfs_a_i2 = MLPClassifier()
clf_grid_lbfs_a_i2 = GridSearchCV(clf_lbfs_a_i2, parameters, cv=5, scoring="f1_micro", n_jobs=-1)
clf_grid_lbfs_a_i2.fit(X_train, y_train)
preds_lbfs_a_i2 = clf_grid_lbfs_a_i2.predict(X_test)

clf_grid_lbfs_a_i2.best_params_

{'activation': 'logistic',
 'alpha': 1e-07,
 'hidden_layer_sizes': 15,
 'max_iter': 800,
 'solver': 'lbfgs'}

In [97]:
print(classification_report(y_test, preds_lbfs_a_i2))

              precision    recall  f1-score   support

           0       0.95      0.96      0.95       804
           1       0.94      0.93      0.93       577

   micro avg       0.95      0.95      0.95      1381
   macro avg       0.94      0.94      0.94      1381
weighted avg       0.95      0.95      0.95      1381



In [98]:
clf_grid_lbfs_a_i2.cv_results_

{'mean_fit_time': array([0.74561291, 1.691467  , 1.39930725, 1.11413465, 0.77563825,
        2.86014366, 1.14797091, 1.71880236, 1.837256  , 1.48011985,
        2.11058989, 1.76366997, 1.04980183, 0.4900528 , 1.68242273,
        1.31190562, 0.61815062, 1.79279356, 0.91355662, 0.77737193,
        1.30475063, 1.061624  , 1.17103825, 1.96969109, 0.90335221,
        0.956598  , 1.80714602, 2.26588097, 2.14304352, 2.86948581,
        2.6897172 , 3.17422066, 3.4912847 , 3.30375428, 3.78413796,
        6.20027719, 3.66748853, 2.92332788, 5.54943485, 4.44703465,
        3.46069894, 4.22770567, 4.58296385, 5.04004745, 5.20671058,
        3.30415053, 3.43838105, 2.86343832, 3.26505618, 4.06399341,
        5.18325324, 4.93233256, 5.99337873, 6.09672766, 3.74825282,
        3.60791202, 3.70239439, 3.41199756, 3.89473519, 4.05460286,
        3.98050747, 4.53673382, 5.85241046, 3.15264883, 2.61793499,
        3.22109475, 2.86828861, 4.09641299, 3.97728662, 3.4025866 ,
        4.45877838, 5.64482756,

In [100]:
parameters = {
    'solver': ['lbfgs', 'sgd', 'adam'], 
    'activation': ['identity', 'logistic', 'tanh', 'relu'],
    'max_iter': [700, 800, 900], 
    'alpha': 10.0 ** -np.arange(7, 10), 
    'hidden_layer_sizes':[15, 20, 25]
}

clf_h1 = MLPClassifier()
clf_grid_h1 = GridSearchCV(clf_h1, parameters, cv=5, scoring="f1_micro", n_jobs=-1)
clf_grid_h1.fit(X_train, y_train)
preds_h1 = clf_grid_h1.predict(X_test)

clf_grid_h1.best_params_

{'activation': 'logistic',
 'alpha': 1e-07,
 'hidden_layer_sizes': 25,
 'max_iter': 800,
 'solver': 'adam'}

In [101]:
print(classification_report(y_test, preds_h1))

              precision    recall  f1-score   support

           0       0.95      0.96      0.96       804
           1       0.95      0.94      0.94       577

   micro avg       0.95      0.95      0.95      1381
   macro avg       0.95      0.95      0.95      1381
weighted avg       0.95      0.95      0.95      1381



### Almost final (adam)

In [104]:
parameters = {
    'solver': ['adam'], 
    'activation': ['logistic', 'tanh'],
    'max_iter': [700, 800, 900], 
    'alpha': 10.0 ** -np.arange(7, 10), 
    'hidden_layer_sizes':[20, 25, 30],
    'learning_rate': ['constant', 'invscaling', 'adaptive']
}

clf_f1 = MLPClassifier()
clf_grid_f1 = GridSearchCV(clf_f1, parameters, cv=5, scoring="f1_micro", n_jobs=3)
clf_grid_f1.fit(X_train, y_train)
preds_f1 = clf_grid_f1.predict(X_test)

clf_grid_f1.best_params_

{'activation': 'tanh',
 'alpha': 1e-08,
 'hidden_layer_sizes': 30,
 'learning_rate': 'invscaling',
 'max_iter': 800,
 'solver': 'adam'}

In [105]:
print(classification_report(y_test, preds_f1))

              precision    recall  f1-score   support

           0       0.96      0.94      0.95       804
           1       0.92      0.94      0.93       577

   micro avg       0.94      0.94      0.94      1381
   macro avg       0.94      0.94      0.94      1381
weighted avg       0.94      0.94      0.94      1381



### Almost final (lbfgs)

In [24]:
parameters = {
    'solver': ['lbfgs'], 
    'activation': ['logistic'],
    'max_iter': [1500], 
    'alpha': 10.0 ** -np.arange(8, 9), 
    'hidden_layer_sizes':[20],
    'learning_rate': ['invscaling']
}

clf_f2 = MLPClassifier()
clf_grid_f2 = GridSearchCV(clf_f2, parameters, cv=5, scoring="f1_micro", n_jobs=3)
clf_grid_f2.fit(X_train, y_train)
preds_f2 = clf_grid_f2.predict(X_test)

clf_grid_f2.best_params_

{'activation': 'logistic',
 'alpha': 1e-08,
 'hidden_layer_sizes': 20,
 'learning_rate': 'invscaling',
 'max_iter': 1500,
 'solver': 'lbfgs'}

In [25]:
print(classification_report(y_test, preds_f2))

              precision    recall  f1-score   support

           0       0.95      0.95      0.95       804
           1       0.93      0.93      0.93       577

   micro avg       0.94      0.94      0.94      1381
   macro avg       0.94      0.94      0.94      1381
weighted avg       0.94      0.94      0.94      1381



## sgd.

In [21]:
parameters = {
    'solver': ['sgd'], 
    'max_iter': [400, 450, 500], 
    'alpha': 10.0 ** -np.arange(10, 11), 
    'hidden_layer_sizes':[70, 80, 90],
    'activation': ['identity', 'tanh', 'relu'],
    'learning_rate': ['adaptive']
}

clf_sgd_f_5 = MLPClassifier()
clf_grid_sgd_f_5 = GridSearchCV(clf_sgd_f_5, parameters, cv=5, scoring="f1_micro", n_jobs=-1)
clf_grid_sgd_f_5.fit(X_train, y_train)
preds_sgd_f_5 = clf_grid_sgd_f_5.predict(X_test)

clf_grid_sgd_f_5.best_params_

{'activation': 'identity',
 'alpha': 1e-10,
 'hidden_layer_sizes': 70,
 'learning_rate': 'adaptive',
 'max_iter': 400,
 'solver': 'sgd'}

In [22]:
print(classification_report(y_test, preds_sgd_f_5))

              precision    recall  f1-score   support

           0       0.87      0.92      0.89       804
           1       0.88      0.81      0.84       577

   micro avg       0.87      0.87      0.87      1381
   macro avg       0.87      0.86      0.87      1381
weighted avg       0.87      0.87      0.87      1381



Ο sgd δεν ξεπερνάει score μεγαλύτερο του 0.88, τον αφήνουμε.

Παρόλο που παρακάμψαμε το βήμα του pipeline, βλέπουμε πως έχουμε ήδη αρκετά υψηλά scores (~0.94) στους adams και lbfgs με τις υπερπαραμέτρους στις οποίες καταλήξαμε.