In [1]:
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import KFold
from sklearn.metrics import (roc_auc_score, balanced_accuracy_score)
import numpy 
import time

In [16]:
seed = 777
numpy.random.seed(seed)

kfold = KFold(n_splits=10, shuffle=True)


layers = [(1,),(4,),(12,),(1,3),(4,3),(12,3)]

In [23]:
scores = []
datasets = ['n_250.csv', 'n_1000.csv', 'n_10000.csv']
for dset in datasets:
    print(f"starting {dset}")
    data = numpy.genfromtxt(dset, delimiter=',')
    inputs = data[:,:2]
    targets = data[:,2]
    for layer in layers:
        fold_number = 1
        for train, test in kfold.split(inputs, targets):
            scoredict = {}
            scoredict['dataset'] = dset
            scoredict['model'] = layer
            scoredict['fold'] = fold_number
            x_train = inputs[train]
            y_train = targets[train]
            x_test = inputs[test]
            y_test = targets[test]

            s = time.time()
            model = MLPClassifier(layer,activation='tanh',solver='sgd', max_iter=500, early_stopping=True)
            model.fit(x_train, y_train)
            e = time.time()
            scoredict['training_time'] = e-s
            y_pred = model.predict(x_test)

            scoredict['roc auc'] = roc_auc_score(y_test, y_pred)
            scoredict['balanced accuracy'] = balanced_accuracy_score(y_test, y_pred)
            scores.append(scoredict)
            fold_number += 1

    



starting n_250.csv
starting n_1000.csv
starting n_10000.csv


In [24]:
print(scores)

 'fold': 9, 'training_time': 0.13050365447998047, 'roc auc': 0.5, 'balanced accuracy': 0.5}, {'dataset': 'n_250.csv', 'model': (4, 3), 'fold': 10, 'training_time': 0.28348803520202637, 'roc auc': 0.5, 'balanced accuracy': 0.5}, {'dataset': 'n_250.csv', 'model': (12, 3), 'fold': 1, 'training_time': 0.36350011825561523, 'roc auc': 0.5, 'balanced accuracy': 0.5}, {'dataset': 'n_250.csv', 'model': (12, 3), 'fold': 2, 'training_time': 0.4324979782104492, 'roc auc': 0.6428571428571428, 'balanced accuracy': 0.6428571428571428}, {'dataset': 'n_250.csv', 'model': (12, 3), 'fold': 3, 'training_time': 0.40850090980529785, 'roc auc': 0.5714285714285714, 'balanced accuracy': 0.5714285714285714}, {'dataset': 'n_250.csv', 'model': (12, 3), 'fold': 4, 'training_time': 0.42149853706359863, 'roc auc': 0.5, 'balanced accuracy': 0.5}, {'dataset': 'n_250.csv', 'model': (12, 3), 'fold': 5, 'training_time': 0.4609990119934082, 'roc auc': 0.5, 'balanced accuracy': 0.5}, {'dataset': 'n_250.csv', 'model': (12, 