In [63]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.utils import shuffle
import scipy.sparse

X = scipy.sparse.load_npz('new_data/X_sparse.npz')#np.genfromtxt('X.csv', delimiter = ',')
Y = np.genfromtxt('new_data/Y.csv', delimiter = ',')

In [64]:
X.shape

(100000, 5000)

In [65]:
Y = Y[1:]
Y.shape

(100000,)

In [66]:
Y

array([1., 1., 0., ..., 1., 0., 0.])

In [67]:
Y.shape

(100000,)

In [68]:
X_comp, X_test, Y_comp, Y_test = train_test_split(X, Y, test_size=0.2, random_state=0)
Xtr, Xva, Ytr, Yva = train_test_split(X_comp, Y_comp, test_size=0.2)
Xtr, Ytr = shuffle(Xtr, Ytr)

In [69]:
Xtr.shape

(64000, 5000)

In [70]:
Xva.shape

(16000, 5000)

In [71]:
from sklearn.metrics import classification_report
from sklearn.model_selection import GridSearchCV
import time

In [88]:
#mlp = MLPClassifier(max_iter=1000)
parameters1 = {
    'hidden_layer_sizes': [(100,100,100),(100,150,50),(150,100,50),(200,100),(200,50,50),(150,150),(300,)]
}

parameters2 = {
    'activation': ['relu','identity','logistic','tanh']
}

parameters3 = {
    'solver': ['adam','lbfgs','sgd']
}

parameters4 = {
    'alpha': [0.0001, 0.05, 0.1, 0.01, 0.001]
}

parameters5 = {
    'learning_rate': ['constant', 'invscaling', 'adaptive']
}

"\nparameter_space = {\n    'hidden_layer_sizes': [(50,50,50),(50,100,50),(100,)],\n    'activation': ['relu','identity','logistic','tanh'],\n    'solver': ['adam','lbfgs','sgd'],\n    'alpha': [0.0001, 0.05, 0.1, 0.01, 0.001],\n    'learning_rate': ['constant', 'invscaling', 'adaptive']\n}\n"

In [89]:
print("parameters1 training starting...")
starting_time = time.time()
clf1 = GridSearchCV(MLPClassifier(max_iter=1000), parameters1, cv=5)
clf1.fit(Xtr[:5000], Ytr[:5000])
end_time = time.time()
print("training finished, took {} seconds".format(end_time - starting_time))

#best parameter set
print("Best parameters found: \n", clf1.best_params_)

#all results
means = clf1.cv_results_['mean_test_score']
stds = clf1.cv_results_['std_test_score']
for mean, std, params in zip(means, stds, clf1.cv_results_['params']):
    print("%0.3f (+/-%0.03f) for %r" % (mean, std * 2, params))

parameters1 training starting...
training finished, took 464.78902292251587 seconds
Best parameters found: 
 {'hidden_layer_sizes': (200, 50, 50)}
0.677 (+/-0.032) for {'hidden_layer_sizes': (100, 100, 100)}
0.672 (+/-0.007) for {'hidden_layer_sizes': (100, 150, 50)}
0.677 (+/-0.025) for {'hidden_layer_sizes': (150, 100, 50)}
0.677 (+/-0.016) for {'hidden_layer_sizes': (200, 100)}
0.678 (+/-0.019) for {'hidden_layer_sizes': (200, 50, 50)}
0.678 (+/-0.016) for {'hidden_layer_sizes': (150, 150)}
0.674 (+/-0.014) for {'hidden_layer_sizes': (300,)}


In [90]:
best_hidden_layer_sizes = clf1.best_params_['hidden_layer_sizes']
print("best hidden_layer_sizes: ", best_hidden_layer_sizes)

best hidden_layer_sizes:  (200, 50, 50)


In [91]:
print("parameters2 training starting...")
starting_time = time.time()
clf2 = GridSearchCV(MLPClassifier(max_iter=1000, hidden_layer_sizes=best_hidden_layer_sizes), parameters2, cv=5)
clf2.fit(Xtr[:5000], Ytr[:5000])
end_time = time.time()
print("training finished, took {} seconds".format(end_time - starting_time))

#best parameter set
print("Best parameters found: \n", clf2.best_params_)

#all results
means = clf2.cv_results_['mean_test_score']
stds = clf2.cv_results_['std_test_score']
for mean, std, params in zip(means, stds, clf2.cv_results_['params']):
    print("%0.3f (+/-%0.03f) for %r" % (mean, std * 2, params))

parameters2 training starting...
training finished, took 277.8222677707672 seconds
Best parameters found: 
 {'activation': 'relu'}
0.668 (+/-0.026) for {'activation': 'relu'}
0.649 (+/-0.032) for {'activation': 'identity'}
0.627 (+/-0.131) for {'activation': 'logistic'}
0.651 (+/-0.018) for {'activation': 'tanh'}


In [92]:
best_activation = clf2.best_params_['activation']
print("best_activation: ", best_activation)

best_activation:  relu


In [93]:
print("parameters3 training starting...")
starting_time = time.time()
clf3 = GridSearchCV(MLPClassifier(max_iter=1000, hidden_layer_sizes=best_hidden_layer_sizes, activation=best_activation), parameters3, cv=5)
clf3.fit(Xtr[:5000], Ytr[:5000])
end_time = time.time()
print("training finished, took {} seconds".format(end_time - starting_time))

#best parameter set
print("Best parameters found: \n", clf3.best_params_)

#all results
means = clf3.cv_results_['mean_test_score']
stds = clf3.cv_results_['std_test_score']
for mean, std, params in zip(means, stds, clf3.cv_results_['params']):
    print("%0.3f (+/-%0.03f) for %r" % (mean, std * 2, params))

parameters3 training starting...
training finished, took 588.1112458705902 seconds
Best parameters found: 
 {'solver': 'lbfgs'}
0.674 (+/-0.019) for {'solver': 'adam'}
0.687 (+/-0.023) for {'solver': 'lbfgs'}
0.580 (+/-0.168) for {'solver': 'sgd'}


In [94]:
best_solver = clf3.best_params_['solver']
print("best_solver: ", best_solver)

best_solver:  lbfgs


In [95]:
print("parameters4 training starting...")
starting_time = time.time()
clf4 = GridSearchCV(MLPClassifier(max_iter=1000, hidden_layer_sizes=best_hidden_layer_sizes, activation=best_activation, solver=best_solver), parameters4, cv=5)
clf4.fit(Xtr[:5000], Ytr[:5000])
end_time = time.time()
print("training finished, took {} seconds".format(end_time - starting_time))

#best parameter set
print("Best parameters found: \n", clf4.best_params_)

#all results
means = clf4.cv_results_['mean_test_score']
stds = clf4.cv_results_['std_test_score']
for mean, std, params in zip(means, stds, clf4.cv_results_['params']):
    print("%0.3f (+/-%0.03f) for %r" % (mean, std * 2, params))

parameters4 training starting...
training finished, took 2514.4083154201508 seconds
Best parameters found: 
 {'alpha': 0.0001}
0.682 (+/-0.022) for {'alpha': 0.0001}
0.667 (+/-0.031) for {'alpha': 0.05}
0.677 (+/-0.022) for {'alpha': 0.1}
0.681 (+/-0.017) for {'alpha': 0.01}
0.677 (+/-0.016) for {'alpha': 0.001}


In [96]:
best_alpha = clf4.best_params_['alpha']
print("best_alpha: ", best_alpha)

best_alpha:  0.0001


In [97]:
print("parameters5 training starting...")
starting_time = time.time()
clf5 = GridSearchCV(MLPClassifier(max_iter=1000, hidden_layer_sizes=best_hidden_layer_sizes, activation=best_activation, solver=best_solver, alpha=best_alpha), parameters5, cv=5)
clf5.fit(Xtr[:5000], Ytr[:5000])
end_time = time.time()
print("training finished, took {} seconds".format(end_time - starting_time))

#best parameter set
print("Best parameters found: \n", clf5.best_params_)

#all results
means = clf5.cv_results_['mean_test_score']
stds = clf5.cv_results_['std_test_score']
for mean, std, params in zip(means, stds, clf5.cv_results_['params']):
    print("%0.3f (+/-%0.03f) for %r" % (mean, std * 2, params))

parameters5 training starting...
training finished, took 480.82027649879456 seconds
Best parameters found: 
 {'learning_rate': 'constant'}
0.687 (+/-0.017) for {'learning_rate': 'constant'}
0.679 (+/-0.011) for {'learning_rate': 'invscaling'}
0.675 (+/-0.031) for {'learning_rate': 'adaptive'}


In [98]:
best_learning_rate = clf5.best_params_['learning_rate']
print("best_learning_rate: ", best_learning_rate)

best_learning_rate:  constant


In [99]:
optimal_mlp = MLPClassifier(max_iter=1000, hidden_layer_sizes=best_hidden_layer_sizes, 
                            activation=best_activation, solver=best_solver, alpha=best_alpha, 
                            learning_rate=best_learning_rate)
optimal_mlp.fit(Xtr, Ytr)

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(200, 50, 50), learning_rate='constant',
       learning_rate_init=0.001, max_iter=1000, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='lbfgs', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False)

In [100]:
optimal_mlp_roc = roc_auc_score(Yva, optimal_mlp.predict_proba(Xva)[:,1])
print("optimal_mlp_roc: ", optimal_mlp_roc)
print("training error:", 1 - optimal_mlp.score(Xtr, Ytr))
print("validation error:", 1 - optimal_mlp.score(Xva, Yva))

optimal_mlp_roc:  0.7693450460363473
training error: 0.013921874999999972
validation error: 0.28174999999999994
