In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.utils import shuffle

X = np.genfromtxt('X.csv', delimiter = ',')
Y = np.genfromtxt('Y.csv', delimiter = ',')

In [2]:
X.shape

(99990, 220)

In [3]:
Y.shape

(99990,)

In [4]:
X_comp, X_test, Y_comp, Y_test = train_test_split(X, Y, test_size=0.2, random_state=0)
Xtr, Xva, Ytr, Yva = train_test_split(X_comp, Y_comp, test_size=0.2)
Xtr, Ytr = shuffle(Xtr, Ytr)

In [5]:
Xtr.shape

(63993, 220)

In [6]:
Xva.shape

(15999, 220)

In [7]:
from sklearn.metrics import classification_report
from sklearn.model_selection import GridSearchCV
import time

In [8]:
#mlp = MLPClassifier(max_iter=1000)
parameters1 = {
    'hidden_layer_sizes': [(50,50,50),(50,100,50),(100,)]
}

parameters2 = {
    'activation': ['relu','identity','logistic','tanh']
}

parameters3 = {
    'solver': ['adam','lbfgs','sgd']
}

parameters4 = {
    'alpha': [0.0001, 0.05, 0.1, 0.01, 0.001]
}

parameters5 = {
    'learning_rate': ['constant', 'invscaling', 'adaptive']
}
'''
parameter_space = {
    'hidden_layer_sizes': [(50,50,50),(50,100,50),(100,)],
    'activation': ['relu','identity','logistic','tanh'],
    'solver': ['adam','lbfgs','sgd'],
    'alpha': [0.0001, 0.05, 0.1, 0.01, 0.001],
    'learning_rate': ['constant', 'invscaling', 'adaptive']
}
'''

"\nparameter_space = {\n    'hidden_layer_sizes': [(50,50,50),(50,100,50),(100,)],\n    'activation': ['relu','identity','logistic','tanh'],\n    'solver': ['adam','lbfgs','sgd'],\n    'alpha': [0.0001, 0.05, 0.1, 0.01, 0.001],\n    'learning_rate': ['constant', 'invscaling', 'adaptive']\n}\n"

In [30]:
print("parameters1 training starting...")
starting_time = time.time()
clf1 = GridSearchCV(MLPClassifier(max_iter=1000), parameters1, cv=5)
clf1.fit(Xtr[:5000], Ytr[:5000])
end_time = time.time()
print("training finished, took {} seconds".format(end_time - starting_time))

#best parameter set
print("Best parameters found: \n", clf1.best_params_)

#all results
means = clf1.cv_results_['mean_test_score']
stds = clf1.cv_results_['std_test_score']
for mean, std, params in zip(means, stds, clf1.cv_results_['params']):
    print("%0.3f (+/-%0.03f) for %r" % (mean, std * 2, params))

parameters1 training starting...
training finished, took 54.64198160171509 seconds
Best parameters found: 
 {'hidden_layer_sizes': (100,)}
0.644 (+/-0.037) for {'hidden_layer_sizes': (50, 50, 50)}
0.644 (+/-0.032) for {'hidden_layer_sizes': (50, 100, 50)}
0.659 (+/-0.038) for {'hidden_layer_sizes': (100,)}


In [37]:
best_hidden_layer_sizes = clf1.best_params_['hidden_layer_sizes']
print("best hidden_layer_sizes: ", best_hidden_layer_sizes)

best hidden_layer_sizes:  (100,)


In [38]:
print("parameters2 training starting...")
starting_time = time.time()
clf2 = GridSearchCV(MLPClassifier(max_iter=1000, hidden_layer_sizes=best_hidden_layer_sizes), parameters2, cv=5)
clf2.fit(Xtr[:5000], Ytr[:5000])
end_time = time.time()
print("training finished, took {} seconds".format(end_time - starting_time))

#best parameter set
print("Best parameters found: \n", clf2.best_params_)

#all results
means = clf2.cv_results_['mean_test_score']
stds = clf2.cv_results_['std_test_score']
for mean, std, params in zip(means, stds, clf2.cv_results_['params']):
    print("%0.3f (+/-%0.03f) for %r" % (mean, std * 2, params))

parameters2 training starting...
training finished, took 45.97746539115906 seconds
Best parameters found: 
 {'activation': 'identity'}
0.651 (+/-0.037) for {'activation': 'relu'}
0.670 (+/-0.030) for {'activation': 'identity'}
0.667 (+/-0.019) for {'activation': 'logistic'}
0.666 (+/-0.023) for {'activation': 'tanh'}


In [39]:
best_activation = clf2.best_params_['activation']
print("best_activation: ", best_activation)

best_activation:  identity


In [40]:
print("parameters3 training starting...")
starting_time = time.time()
clf3 = GridSearchCV(MLPClassifier(max_iter=1000, hidden_layer_sizes=best_hidden_layer_sizes, activation=best_activation), parameters3, cv=5)
clf3.fit(Xtr[:5000], Ytr[:5000])
end_time = time.time()
print("training finished, took {} seconds".format(end_time - starting_time))

#best parameter set
print("Best parameters found: \n", clf3.best_params_)

#all results
means = clf3.cv_results_['mean_test_score']
stds = clf3.cv_results_['std_test_score']
for mean, std, params in zip(means, stds, clf3.cv_results_['params']):
    print("%0.3f (+/-%0.03f) for %r" % (mean, std * 2, params))

parameters3 training starting...
training finished, took 47.59204983711243 seconds
Best parameters found: 
 {'solver': 'sgd'}
0.665 (+/-0.024) for {'solver': 'adam'}
0.666 (+/-0.026) for {'solver': 'lbfgs'}
0.668 (+/-0.017) for {'solver': 'sgd'}


In [41]:
best_solver = clf3.best_params_['solver']
print("best_solver: ", best_solver)

best_solver:  sgd


In [9]:
print("parameters4 training starting...")
starting_time = time.time()
clf4 = GridSearchCV(MLPClassifier(max_iter=1000, hidden_layer_sizes=(100,), activation='identity', solver='sgd'), parameters4, cv=5)
clf4.fit(Xtr[:5000], Ytr[:5000])
end_time = time.time()
print("training finished, took {} seconds".format(end_time - starting_time))

#best parameter set
print("Best parameters found: \n", clf4.best_params_)

#all results
means = clf4.cv_results_['mean_test_score']
stds = clf4.cv_results_['std_test_score']
for mean, std, params in zip(means, stds, clf4.cv_results_['params']):
    print("%0.3f (+/-%0.03f) for %r" % (mean, std * 2, params))

parameters4 training starting...
training finished, took 202.60059785842896 seconds
Best parameters found: 
 {'alpha': 0.01}
0.671 (+/-0.025) for {'alpha': 0.0001}
0.672 (+/-0.031) for {'alpha': 0.05}
0.668 (+/-0.017) for {'alpha': 0.1}
0.673 (+/-0.024) for {'alpha': 0.01}
0.669 (+/-0.023) for {'alpha': 0.001}


In [10]:
best_alpha = clf4.best_params_['alpha']
print("best_alpha: ", best_alpha)

best_alpha:  0.01


In [11]:
print("parameters5 training starting...")
starting_time = time.time()
clf5 = GridSearchCV(MLPClassifier(max_iter=1000, hidden_layer_sizes=(100,), activation='identity', solver='sgd', alpha=best_alpha), parameters5, cv=5)
clf5.fit(Xtr[:5000], Ytr[:5000])
end_time = time.time()
print("training finished, took {} seconds".format(end_time - starting_time))

#best parameter set
print("Best parameters found: \n", clf5.best_params_)

#all results
means = clf5.cv_results_['mean_test_score']
stds = clf5.cv_results_['std_test_score']
for mean, std, params in zip(means, stds, clf5.cv_results_['params']):
    print("%0.3f (+/-%0.03f) for %r" % (mean, std * 2, params))

parameters5 training starting...
training finished, took 85.35491991043091 seconds
Best parameters found: 
 {'learning_rate': 'adaptive'}
0.667 (+/-0.028) for {'learning_rate': 'constant'}
0.498 (+/-0.084) for {'learning_rate': 'invscaling'}
0.671 (+/-0.023) for {'learning_rate': 'adaptive'}


In [14]:
best_learning_rate = clf5.best_params_['learning_rate']
print("best_learning_rate: ", best_learning_rate)

best_learning_rate:  adaptive


In [35]:
'''mlp_classifier_roc = roc_auc_score(Yva, clf.predict_proba(Xva)[:,1])
print("mlp_classifier_roc: ", mlp_classifier_roc)

print("training error:", 1 - clf.score(Xtr, Ytr))
print("validation error:", 1 - clf.score(Xva, Yva))'''

mlp_classifier_roc:  0.7497837633723505
training error: 0.3237697873204882
validation error: 0.31351959497468596


In [15]:
optimal_mlp = MLPClassifier(max_iter=1000, hidden_layer_sizes=(100,), activation='identity', solver='sgd', alpha=best_alpha, learning_rate=best_learning_rate)
optimal_mlp.fit(Xtr[:5000], Ytr[:5000])

optimal_mlp_roc = roc_auc_score(Yva, optimal_mlp.predict_proba(Xva)[:,1])
print("optimal_mlp_roc: ", optimal_mlp_roc)
print("training error:", 1 - optimal_mlp.score(Xtr, Ytr))
print("validation error:", 1 - optimal_mlp.score(Xva, Yva))

optimal_mlp_roc:  0.737568815540788
training error: 0.32062881877705374
validation error: 0.3233327082942684


In [None]:
optimal_mlp = MLPClassifier(max_iter=1000, hidden_layer_sizes=(100,), activation='identity', solver='sgd', alpha=best_alpha, learning_rate=best_learning_rate)
optimal_mlp.fit(Xtr, Ytr)

optimal_mlp_roc = roc_auc_score(Yva, optimal_mlp.predict_proba(Xva)[:,1])
print("optimal_mlp_roc: ", optimal_mlp_roc)
print("training error:", 1 - optimal_mlp.score(Xtr, Ytr))
print("validation error:", 1 - optimal_mlp.score(Xva, Yva))