In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.utils import shuffle
import scipy.sparse

X = scipy.sparse.load_npz('../new_data/X_sparse.npz')#np.genfromtxt('X.csv', delimiter = ',')
Y = np.genfromtxt('../new_data/Y.csv', delimiter = ',')

In [2]:
X.shape

(100000, 5000)

In [4]:
Y = Y[1:]
Y.shape

(100000,)

In [5]:
Y[:10]

array([1., 1., 0., 1., 1., 0., 1., 1., 1., 1.])

In [26]:
Y_df = pd.DataFrame(Y)
Y_df.to_csv('../new_data/Y.csv', index = False)

In [14]:
Y.shape

(100000,)

In [6]:
X_comp, X_test, Y_comp, Y_test = train_test_split(X, Y, test_size=0.2, random_state=0)
Xtr, Xva, Ytr, Yva = train_test_split(X_comp, Y_comp, test_size=0.2)
# Xtr, Ytr = shuffle(Xtr, Ytr)

In [7]:
Xtr.shape

(64000, 5000)

In [8]:
Xva.shape

(16000, 5000)

In [9]:
from sklearn.metrics import classification_report
from sklearn.model_selection import GridSearchCV
import time

In [13]:
#mlp = MLPClassifier(max_iter=1000)
parameters1 = {
    'hidden_layer_sizes': [(50,50,50),(50,100,50),(100,)]
}

parameters2 = {
    'activation': ['relu','identity','logistic','tanh']
}

parameters3 = {
    'solver': ['adam','lbfgs','sgd']
}

parameters4 = {
    'alpha': [0.0001, 0.05, 0.1, 0.01, 0.001]
}

parameters5 = {
    'learning_rate': ['constant', 'invscaling', 'adaptive']
}
'''
parameter_space = {
    'hidden_layer_sizes': [(50,50,50),(50,100,50),(100,)],
    'activation': ['relu','identity','logistic','tanh'],
    'solver': ['adam','lbfgs','sgd'],
    'alpha': [0.0001, 0.05, 0.1, 0.01, 0.001],
    'learning_rate': ['constant', 'invscaling', 'adaptive']
}
'''

"\nparameter_space = {\n    'hidden_layer_sizes': [(50,50,50),(50,100,50),(100,)],\n    'activation': ['relu','identity','logistic','tanh'],\n    'solver': ['adam','lbfgs','sgd'],\n    'alpha': [0.0001, 0.05, 0.1, 0.01, 0.001],\n    'learning_rate': ['constant', 'invscaling', 'adaptive']\n}\n"

In [14]:
print("parameters1 training starting...")
starting_time = time.time()
clf1 = GridSearchCV(MLPClassifier(max_iter=1000), parameters1, cv=5)
clf1.fit(Xtr[:5000], Ytr[:5000])
end_time = time.time()
print("training finished, took {} seconds".format(end_time - starting_time))

#best parameter set
print("Best parameters found: \n", clf1.best_params_)

#all results
means = clf1.cv_results_['mean_test_score']
stds = clf1.cv_results_['std_test_score']
for mean, std, params in zip(means, stds, clf1.cv_results_['params']):
    print("%0.3f (+/-%0.03f) for %r" % (mean, std * 2, params))

parameters1 training starting...
training finished, took 183.28552889823914 seconds
Best parameters found: 
 {'hidden_layer_sizes': (50, 50, 50)}
0.676 (+/-0.019) for {'hidden_layer_sizes': (50, 50, 50)}
0.673 (+/-0.029) for {'hidden_layer_sizes': (50, 100, 50)}
0.670 (+/-0.014) for {'hidden_layer_sizes': (100,)}


In [15]:
best_hidden_layer_sizes = clf1.best_params_['hidden_layer_sizes']
print("best hidden_layer_sizes: ", best_hidden_layer_sizes)

best hidden_layer_sizes:  (50, 50, 50)


In [16]:
print("parameters2 training starting...")
starting_time = time.time()
clf2 = GridSearchCV(MLPClassifier(max_iter=1000, hidden_layer_sizes=best_hidden_layer_sizes), parameters2, cv=5)
clf2.fit(Xtr[:5000], Ytr[:5000])
end_time = time.time()
print("training finished, took {} seconds".format(end_time - starting_time))

#best parameter set
print("Best parameters found: \n", clf2.best_params_)

#all results
means = clf2.cv_results_['mean_test_score']
stds = clf2.cv_results_['std_test_score']
for mean, std, params in zip(means, stds, clf2.cv_results_['params']):
    print("%0.3f (+/-%0.03f) for %r" % (mean, std * 2, params))

parameters2 training starting...
training finished, took 146.32048296928406 seconds
Best parameters found: 
 {'activation': 'relu'}
0.668 (+/-0.022) for {'activation': 'relu'}
0.646 (+/-0.020) for {'activation': 'identity'}
0.653 (+/-0.025) for {'activation': 'logistic'}
0.649 (+/-0.031) for {'activation': 'tanh'}


In [17]:
best_activation = clf2.best_params_['activation']
print("best_activation: ", best_activation)

best_activation:  relu


In [18]:
print("parameters3 training starting...")
starting_time = time.time()
clf3 = GridSearchCV(MLPClassifier(max_iter=1000, hidden_layer_sizes=best_hidden_layer_sizes, activation=best_activation), parameters3, cv=5)
clf3.fit(Xtr[:5000], Ytr[:5000])
end_time = time.time()
print("training finished, took {} seconds".format(end_time - starting_time))

#best parameter set
print("Best parameters found: \n", clf3.best_params_)

#all results
means = clf3.cv_results_['mean_test_score']
stds = clf3.cv_results_['std_test_score']
for mean, std, params in zip(means, stds, clf3.cv_results_['params']):
    print("%0.3f (+/-%0.03f) for %r" % (mean, std * 2, params))

parameters3 training starting...
training finished, took 99.28910970687866 seconds
Best parameters found: 
 {'solver': 'adam'}
0.676 (+/-0.027) for {'solver': 'adam'}
0.673 (+/-0.025) for {'solver': 'lbfgs'}
0.587 (+/-0.170) for {'solver': 'sgd'}


In [19]:
best_solver = clf3.best_params_['solver']
print("best_solver: ", best_solver)

best_solver:  adam


In [20]:
print("parameters4 training starting...")
starting_time = time.time()
clf4 = GridSearchCV(MLPClassifier(max_iter=1000, hidden_layer_sizes=best_hidden_layer_sizes, activation=best_activation, solver=best_solver), parameters4, cv=5)
clf4.fit(Xtr[:5000], Ytr[:5000])
end_time = time.time()
print("training finished, took {} seconds".format(end_time - starting_time))

#best parameter set
print("Best parameters found: \n", clf4.best_params_)

#all results
means = clf4.cv_results_['mean_test_score']
stds = clf4.cv_results_['std_test_score']
for mean, std, params in zip(means, stds, clf4.cv_results_['params']):
    print("%0.3f (+/-%0.03f) for %r" % (mean, std * 2, params))

parameters4 training starting...




training finished, took 99.13655018806458 seconds
Best parameters found: 
 {'alpha': 0.1}
0.673 (+/-0.024) for {'alpha': 0.0001}
0.676 (+/-0.018) for {'alpha': 0.05}
0.681 (+/-0.021) for {'alpha': 0.1}
0.669 (+/-0.025) for {'alpha': 0.01}
0.671 (+/-0.018) for {'alpha': 0.001}


In [21]:
best_alpha = clf4.best_params_['alpha']
print("best_alpha: ", best_alpha)

best_alpha:  0.1


In [22]:
print("parameters5 training starting...")
starting_time = time.time()
clf5 = GridSearchCV(MLPClassifier(max_iter=1000, hidden_layer_sizes=best_hidden_layer_sizes, activation=best_activation, solver=best_solver, alpha=best_alpha), parameters5, cv=5)
clf5.fit(Xtr[:5000], Ytr[:5000])
end_time = time.time()
print("training finished, took {} seconds".format(end_time - starting_time))

#best parameter set
print("Best parameters found: \n", clf5.best_params_)

#all results
means = clf5.cv_results_['mean_test_score']
stds = clf5.cv_results_['std_test_score']
for mean, std, params in zip(means, stds, clf5.cv_results_['params']):
    print("%0.3f (+/-%0.03f) for %r" % (mean, std * 2, params))

parameters5 training starting...




training finished, took 53.06690692901611 seconds
Best parameters found: 
 {'learning_rate': 'invscaling'}
0.677 (+/-0.025) for {'learning_rate': 'constant'}
0.685 (+/-0.030) for {'learning_rate': 'invscaling'}
0.681 (+/-0.028) for {'learning_rate': 'adaptive'}


In [23]:
best_learning_rate = clf5.best_params_['learning_rate']
print("best_learning_rate: ", best_learning_rate)

best_learning_rate:  invscaling


In [24]:
optimal_mlp = MLPClassifier(max_iter=1000, hidden_layer_sizes=best_hidden_layer_sizes, 
                            activation=best_activation, solver=best_solver, alpha=best_alpha, 
                            learning_rate=best_learning_rate)
optimal_mlp.fit(Xtr, Ytr)

MLPClassifier(activation='relu', alpha=0.1, batch_size='auto', beta_1=0.9,
              beta_2=0.999, early_stopping=False, epsilon=1e-08,
              hidden_layer_sizes=(50, 50, 50), learning_rate='invscaling',
              learning_rate_init=0.001, max_iter=1000, momentum=0.9,
              n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
              random_state=None, shuffle=True, solver='adam', tol=0.0001,
              validation_fraction=0.1, verbose=False, warm_start=False)

In [25]:
optimal_mlp_roc = roc_auc_score(Yva, optimal_mlp.predict_proba(Xva)[:,1])
print("optimal_mlp_roc: ", optimal_mlp_roc)
print("training acc:", optimal_mlp.score(Xtr, Ytr))
print("validation acc:", optimal_mlp.score(Xva, Yva))

optimal_mlp_roc:  0.7691234544700865
training acc: 0.97709375
validation acc: 0.70525
