In [2]:
import numpy as np
import pandas as pd
# library for splitting training-testing
from sklearn.model_selection import train_test_split
# library for classification
from sklearn.metrics import accuracy_score
from sklearn.decomposition import PCA

from sklearn.neural_network import MLPClassifier

from sklearn.model_selection import GridSearchCV, PredefinedSplit
from sklearn.preprocessing import StandardScaler
from sklearn import pipeline

In [3]:
x_train = pd.read_feather("Data/x_train__IterativeImputer_genFeats.ftr")
x_test = pd.read_feather("Data/x_test__IterativeImputer_genFeats.ftr")

x_tr = pd.read_csv("Preds/preds_train.csv")
x_val_1 = pd.read_csv("Preds/preds_val.csv")
x_test_1 = pd.read_csv("Preds/preds_test.csv")

y_train = pd.read_csv("Data/train_y.csv")
y_test = pd.read_csv("Data/Sample_Output.csv", names = ['ID', 'Target'], header=None)

In [4]:
x_train_1, x_val, y_train_1, y_val = train_test_split(x_train, y_train, test_size=0.15, random_state=59, stratify=y_train)

In [5]:
x_train_1 = pd.concat([x_train_1.reset_index(drop=True), x_tr], axis=1)
x_val = pd.concat([x_val.reset_index(drop=True), x_val_1], axis=1)
x_test = pd.concat([x_test.reset_index(drop=True), x_test_1], axis=1)

In [6]:
model = MLPClassifier(random_state=59,
                     verbose=True,
                      max_iter=20,
                      learning_rate_init=0.001,
                      learning_rate='adaptive',
                      hidden_layer_sizes=(60,40),
                      alpha=0.0001
                 )

In [7]:
scl = StandardScaler()
pca = PCA(n_components=100)

pipe = pipeline.Pipeline(steps=[ ('model', model)], verbose=True)

In [12]:
%%time

pipe.fit(x_tr, y_train_1.values.reshape(-1))

Iteration 1, loss = 0.85793166
Iteration 2, loss = 0.85536574
Iteration 3, loss = 0.85520284
Iteration 4, loss = 0.85514079
Iteration 5, loss = 0.85513262
Iteration 6, loss = 0.85500428
Iteration 7, loss = 0.85494225
Iteration 8, loss = 0.85480428
Iteration 9, loss = 0.85480711
Iteration 10, loss = 0.85476698
Iteration 11, loss = 0.85479345
Iteration 12, loss = 0.85467595
Iteration 13, loss = 0.85469700
Iteration 14, loss = 0.85460683
Iteration 15, loss = 0.85459031
Iteration 16, loss = 0.85457687
Iteration 17, loss = 0.85452126
Iteration 18, loss = 0.85450686
Iteration 19, loss = 0.85454410
Training loss did not improve more than tol=0.000100 for 10 consecutive epochs. Stopping.
[Pipeline] ............. (step 1 of 1) Processing model, total=  49.1s
CPU times: user 3min 10s, sys: 5.49 s, total: 3min 16s
Wall time: 49.1 s


Pipeline(steps=[('model',
                 MLPClassifier(hidden_layer_sizes=(60, 40),
                               learning_rate='adaptive', max_iter=20,
                               random_state=59, verbose=True))],
         verbose=True)

Best: 0.4513, (60, 40)

In [13]:
preds_tr = pipe.predict(x_tr,)
preds_val = pipe.predict(x_val_1)

In [14]:
accuracy_score(y_train_1, preds_tr)

0.7410658435791999

In [15]:
accuracy_score(y_val, preds_val)

0.7410694829815361

In [16]:
x_tr

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,9_11_diff,10_11_diff,12_mean,12_std,12_13_diff,12_14_diff,12_15_diff,13_14_diff,13_15_diff,14_15_diff
0,0.993990,0.869190,0.927896,0.996551,0.001928,0.009432,0.014073,0.000381,0.001614,0.033698,...,0.032340,0.033570,0.028741,0.040524,-0.085212,-0.020635,0.000757,0.064577,0.085969,0.021392
1,0.995758,0.838207,0.957323,0.995639,0.001157,0.027222,0.005914,0.000254,0.001155,0.063905,...,0.062632,0.021723,0.022300,0.032690,-0.068737,-0.011838,-0.000905,0.056899,0.067832,0.010934
2,0.993795,0.776918,0.926793,0.998510,0.000965,0.013918,0.008806,0.000051,0.001850,0.088968,...,0.088550,0.030505,0.039521,0.055776,-0.116807,-0.030087,0.002370,0.086719,0.119176,0.032457
3,0.996476,0.841127,0.875440,0.997124,0.001540,0.009748,0.021301,0.000404,0.000871,0.043268,...,0.042352,0.052090,0.039694,0.049773,-0.104744,-0.049140,-0.000443,0.055604,0.104301,0.048697
4,0.146044,0.998731,0.998285,0.978373,0.201281,0.000318,0.000185,0.001230,0.415091,0.000402,...,-0.005424,-0.005276,0.063421,0.116291,0.237035,0.236604,0.223012,-0.000431,-0.014023,-0.013592
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
390071,0.998741,0.975003,0.992792,0.998238,0.000363,0.004669,0.000488,0.000412,0.000415,0.011200,...,0.010608,0.001993,0.003625,0.004027,-0.008648,-0.003655,-0.000277,0.004993,0.008371,0.003378
390072,0.002772,0.990292,0.936780,0.293642,0.904154,0.000613,0.010191,0.077472,0.058628,0.003809,...,-0.318939,-0.294878,0.092758,0.142773,0.029160,0.009287,-0.271691,-0.019873,-0.300851,-0.280978
390073,0.000048,0.981781,0.966017,0.279546,0.801821,0.000808,0.011563,0.103407,0.146660,0.006044,...,-0.342671,-0.334288,0.084790,0.123944,0.040105,0.043480,-0.216860,0.003375,-0.256965,-0.260340
390074,0.000050,0.995027,0.922230,0.217918,0.756714,0.000106,0.009783,0.095127,0.167308,0.001775,...,-0.367388,-0.338657,0.108573,0.142617,0.072834,0.038446,-0.241865,-0.034388,-0.314700,-0.280312


In [31]:
probs_tr = pipe.predict_proba(x_train_1)
probs_val = pipe.predict_proba(x_val)

In [32]:
np.save("Preds/NN_val", probs_val)
np.save("Preds/NN_tr", probs_tr)

probs_test = pipe.predict_proba(x_test)
np.save("Preds/NN_test", probs_test)