# Imports

In [1]:
import pandas as pd
import numpy as np

# Data Cleaning

## Selecting Relevant Fields
the dataset will be loaded and transformed and relevant dimensions will remain

In [2]:
#TODO: remove the nrows argument when done testing
#TODO: randomly select examples
df = pd.read_csv('database.csv', nrows=5000)[[
    'NAME_CONTRACT_STATUS',
    'CODE_GENDER',
    'FLAG_OWN_CAR',
    'FLAG_OWN_REALTY',
    'CNT_CHILDREN',
    'AMT_INCOME_TOTAL',
    'AMT_CREDIT',
    'NAME_INCOME_TYPE',
    'NAME_EDUCATION_TYPE',
    'NAME_FAMILY_STATUS',
    'NAME_HOUSING_TYPE',
    'DAYS_BIRTH',
    'DAYS_EMPLOYED',
    'OCCUPATION_TYPE',
    'CNT_FAM_MEMBERS'
]]

df.head(10)

Unnamed: 0,NAME_CONTRACT_STATUS,CODE_GENDER,FLAG_OWN_CAR,FLAG_OWN_REALTY,CNT_CHILDREN,AMT_INCOME_TOTAL,AMT_CREDIT,NAME_INCOME_TYPE,NAME_EDUCATION_TYPE,NAME_FAMILY_STATUS,NAME_HOUSING_TYPE,DAYS_BIRTH,DAYS_EMPLOYED,OCCUPATION_TYPE,CNT_FAM_MEMBERS
0,Approved,F,N,Y,0,171000.0,491580.0,State servant,Secondary / secondary special,Married,House / apartment,-14548,-1187,Medicine staff,2.0
1,Approved,F,Y,Y,1,175500.0,29700.0,State servant,Higher education,Married,House / apartment,-11081,-3244,High skill tech staff,3.0
2,Approved,F,N,Y,0,135000.0,48600.0,Working,Secondary / secondary special,Married,House / apartment,-12939,-629,Sales staff,2.0
3,Approved,M,N,N,0,180000.0,196740.0,Working,Secondary / secondary special,Married,Rented apartment,-8945,-672,Sales staff,2.0
4,Refused,M,Y,N,0,225000.0,774229.5,Pensioner,Secondary / secondary special,Married,House / apartment,-23919,365243,,2.0
5,Refused,M,Y,N,1,225000.0,36166.5,Working,Secondary / secondary special,Married,House / apartment,-15173,-3397,Drivers,3.0
6,Approved,F,N,Y,1,90000.0,120582.0,Pensioner,Secondary / secondary special,Married,House / apartment,-18834,365243,,3.0
7,Approved,M,N,N,0,135000.0,30550.5,Working,Secondary / secondary special,Married,House / apartment,-9950,-146,Laborers,2.0
8,Approved,F,N,N,0,54000.0,112500.0,Pensioner,Secondary / secondary special,Widow,House / apartment,-23154,365243,,1.0
9,Approved,M,N,Y,0,315000.0,26811.0,Commercial associate,Secondary / secondary special,Married,House / apartment,-17154,-4006,Drivers,2.0


## Reductions to Binary Variables

The target variable, NAME_CONTRACT_STATUS, will be reduced from one of 4 possible values, to one of two generic but still correct values – for example, the dataset distingushes between cancelled and rejected and granted loans, however we will only distinguish between granted and not granted loans. Values that are binary but that don't use the binary alphabet will be transformed to use the binary alphabet as well.

Defining a function that will return a copy of the dataframe with reduced fields.

In [3]:
def reduce(df, name, value):
    if type(name) != str:
        raise Exception('only one dimension is reduced at a time')
    idx_name = df.columns.get_loc(name)
    reduced = [(1 if df[name][i] == value else 0) for i in range(len(df))]
    df_reduced = df.drop(labels=[name], axis=1)
    df_reduced.insert(loc=idx_name, column=name, value=reduced)
    return df_reduced

In [4]:
df_reduced = df
df_reduced = reduce(df_reduced, 'NAME_CONTRACT_STATUS', 'Approved') # 1 if approved else 0
df_reduced = reduce(df_reduced, 'CODE_GENDER', 'M') # 1 if male else 0
df_reduced = reduce(df_reduced, 'FLAG_OWN_CAR', 'Y') # 1 if owns car else 0
df_reduced = reduce(df_reduced, 'FLAG_OWN_REALTY', 'Y') # 1 if owns property else 0

df_reduced.head(10)

Unnamed: 0,NAME_CONTRACT_STATUS,CODE_GENDER,FLAG_OWN_CAR,FLAG_OWN_REALTY,CNT_CHILDREN,AMT_INCOME_TOTAL,AMT_CREDIT,NAME_INCOME_TYPE,NAME_EDUCATION_TYPE,NAME_FAMILY_STATUS,NAME_HOUSING_TYPE,DAYS_BIRTH,DAYS_EMPLOYED,OCCUPATION_TYPE,CNT_FAM_MEMBERS
0,1,0,0,1,0,171000.0,491580.0,State servant,Secondary / secondary special,Married,House / apartment,-14548,-1187,Medicine staff,2.0
1,1,0,1,1,1,175500.0,29700.0,State servant,Higher education,Married,House / apartment,-11081,-3244,High skill tech staff,3.0
2,1,0,0,1,0,135000.0,48600.0,Working,Secondary / secondary special,Married,House / apartment,-12939,-629,Sales staff,2.0
3,1,1,0,0,0,180000.0,196740.0,Working,Secondary / secondary special,Married,Rented apartment,-8945,-672,Sales staff,2.0
4,0,1,1,0,0,225000.0,774229.5,Pensioner,Secondary / secondary special,Married,House / apartment,-23919,365243,,2.0
5,0,1,1,0,1,225000.0,36166.5,Working,Secondary / secondary special,Married,House / apartment,-15173,-3397,Drivers,3.0
6,1,0,0,1,1,90000.0,120582.0,Pensioner,Secondary / secondary special,Married,House / apartment,-18834,365243,,3.0
7,1,1,0,0,0,135000.0,30550.5,Working,Secondary / secondary special,Married,House / apartment,-9950,-146,Laborers,2.0
8,1,0,0,0,0,54000.0,112500.0,Pensioner,Secondary / secondary special,Widow,House / apartment,-23154,365243,,1.0
9,1,1,0,1,0,315000.0,26811.0,Commercial associate,Secondary / secondary special,Married,House / apartment,-17154,-4006,Drivers,2.0


## One-Hot Encoding
defining a function that returns a copy of the input dataframe with a specific dimension one-hot encoded

In [5]:
def one_hot_encode(df, name, prefix=''):
    if type(name) != str:
        raise Exception('one hot encoding applies to one dimension at a time')
    if len(df) == 0:
        raise Exception('dataframe is empty')
    
    df = df.copy()
    values = df[name].unique()
    
    #for each unique value, we create a new column where df[row][new column] is 1 if the value of df[row][value] == new column
    for v in values:
        one_hot_column = [(1 if df[name][i] == v else 0) for i in range(len(df))]
        df.insert(loc=len(df.loc[0]), column=prefix + str(v), value=one_hot_column)

    return df.drop(labels=[name], axis=1)

performing one-hot encoding on any dimension whose values are one of a set of string values

In [6]:
df_encoded = df_reduced
df_encoded = one_hot_encode(df_encoded, 'NAME_INCOME_TYPE')
df_encoded = one_hot_encode(df_encoded, 'NAME_EDUCATION_TYPE')
df_encoded = one_hot_encode(df_encoded, 'NAME_FAMILY_STATUS')
df_encoded = one_hot_encode(df_encoded, 'NAME_HOUSING_TYPE')
df_encoded = one_hot_encode(df_encoded, 'OCCUPATION_TYPE')
df_encoded = one_hot_encode(df_encoded, 'CNT_CHILDREN', prefix='CNT_CHILDREN_')
df_encoded = one_hot_encode(df_encoded, 'CNT_FAM_MEMBERS', prefix='CNT_CHILDREN_')
df_all_features = df_encoded

#sanity check the dataframe before any work on it begins
df_all_features.head(10)

Unnamed: 0,NAME_CONTRACT_STATUS,CODE_GENDER,FLAG_OWN_CAR,FLAG_OWN_REALTY,AMT_INCOME_TOTAL,AMT_CREDIT,DAYS_BIRTH,DAYS_EMPLOYED,State servant,Working,...,CNT_CHILDREN_3,CNT_CHILDREN_5,CNT_CHILDREN_4,CNT_CHILDREN_2.0,CNT_CHILDREN_3.0,CNT_CHILDREN_1.0,CNT_CHILDREN_4.0,CNT_CHILDREN_5.0,CNT_CHILDREN_7.0,CNT_CHILDREN_6.0
0,1,0,0,1,171000.0,491580.0,-14548,-1187,1,0,...,0,0,0,1,0,0,0,0,0,0
1,1,0,1,1,175500.0,29700.0,-11081,-3244,1,0,...,0,0,0,0,1,0,0,0,0,0
2,1,0,0,1,135000.0,48600.0,-12939,-629,0,1,...,0,0,0,1,0,0,0,0,0,0
3,1,1,0,0,180000.0,196740.0,-8945,-672,0,1,...,0,0,0,1,0,0,0,0,0,0
4,0,1,1,0,225000.0,774229.5,-23919,365243,0,0,...,0,0,0,1,0,0,0,0,0,0
5,0,1,1,0,225000.0,36166.5,-15173,-3397,0,1,...,0,0,0,0,1,0,0,0,0,0
6,1,0,0,1,90000.0,120582.0,-18834,365243,0,0,...,0,0,0,0,1,0,0,0,0,0
7,1,1,0,0,135000.0,30550.5,-9950,-146,0,1,...,0,0,0,1,0,0,0,0,0,0
8,1,0,0,0,54000.0,112500.0,-23154,365243,0,0,...,0,0,0,0,0,1,0,0,0,0
9,1,1,0,1,315000.0,26811.0,-17154,-4006,0,0,...,0,0,0,1,0,0,0,0,0,0


# Training the models

## Split the dataframe into X and y as numpy arrays

normalize the data

In [7]:
from sklearn import preprocessing
to_scale = [
    'AMT_INCOME_TOTAL',
    'AMT_CREDIT',
    'DAYS_BIRTH',
    'DAYS_EMPLOYED'
]
subframe = df_all_features[to_scale].copy()
scaler = preprocessing.StandardScaler().fit(subframe.values)
subframe = scaler.transform(subframe.values)
df_all_features[to_scale] = subframe
df_final = df_all_features

df_final.head(6)

Unnamed: 0,NAME_CONTRACT_STATUS,CODE_GENDER,FLAG_OWN_CAR,FLAG_OWN_REALTY,AMT_INCOME_TOTAL,AMT_CREDIT,DAYS_BIRTH,DAYS_EMPLOYED,State servant,Working,...,CNT_CHILDREN_3,CNT_CHILDREN_5,CNT_CHILDREN_4,CNT_CHILDREN_2.0,CNT_CHILDREN_3.0,CNT_CHILDREN_1.0,CNT_CHILDREN_4.0,CNT_CHILDREN_5.0,CNT_CHILDREN_7.0,CNT_CHILDREN_6.0
0,1,0,0,1,-0.007997,0.823271,0.359057,-0.471875,1,0,...,0,0,0,1,0,0,0,0,0,0
1,1,0,1,1,0.04206,-0.618186,1.144166,-0.486204,1,0,...,0,0,0,0,1,0,0,0,0,0
2,1,0,0,1,-0.408452,-0.559202,0.723418,-0.467988,0,1,...,0,0,0,1,0,0,0,0,0,0
3,1,1,0,0,0.092117,-0.09688,1.627867,-0.468287,0,1,...,0,0,0,1,0,0,0,0,0,0
4,0,1,1,0,0.592686,1.705377,-1.763023,2.08077,0,0,...,0,0,0,1,0,0,0,0,0,0
5,0,1,1,0,0.592686,-0.598005,0.217525,-0.48727,0,1,...,0,0,0,0,1,0,0,0,0,0


convert the dataframe into a numpy tensor

In [8]:
data = np.array(df_final)

#undersample the target 1's so that the dataset is exactly balanced
num_zero = len(data[data[:,0]==0])
num_one = num_zero

#balance the amount of approvals and rejections
data = np.vstack((
    data[data[:,0]==1][:num_one,:],
    data[data[:,0]==0]
))
np.random.shuffle(data)

X = data[:,1:]
y = data[:,0].astype(np.int32)

## Creating the test class
We will create a class that collects everything we need to build and to analyze a model. The class will point to our data, and to a model building algorithm, and will provide functionality for presenting results.

In [9]:
from sklearn.model_selection import KFold

class Test:
    def __init__(self, name, X, y, algorithm, args):
        self.name = name
        self.X = X  #pointer, not copy
        self.y = y
        self.algorithm = algorithm
        self.args = args
        
        self.predictions=None
        self.accs_train=None
        self.accs_test=None
        self.best_acc_index=None
    
    def get_args(self):
        return dict(self.args)
    
    def get_training_accuracies(self):
        if self.accs_train is None:
            raise Exception('test has not yet been run')
        return list(self.accs_train)

    def get_best_accuracy_training(self):
        return self.get_training_accuracies()[self.best_acc_index]

    def get_testing_accuracies(self):
        if self.accs_test is None:
            raise Exception('test has not yet been run')
        return list(self.accs_test)
    
    def get_best_accuracy(self):
        return self.get_testing_accuracies()[self.best_acc_index]

    def get_predictions(self):
        if self.predictions is None:
            raise Exception('test has not yet been run')
        return list(self.predictions)
    
    def get_best_prediction(self):
        return self.get_predictions()[self.best_acc_index]
    
    #obtain accuracy using k-fold cross validation
    def run(self, k_fold_splits=5):

        predictions = []
        accs_train = []
        accs_test = []
        best_acc_test = 0
        best_acc_index = 0
        
        kfold_model = KFold(n_splits=k_fold_splits, random_state=None, shuffle=False)
        
        i = 0
        for train_index, test_index in kfold_model.split(self.X):
            X_train = self.X[train_index]
            y_train = self.y[train_index]
            X_test = self.X[test_index]
            y_test = self.y[test_index]
            model = self.algorithm(**self.args)
            model.fit(X_train, y_train)
            accs_train.append(model.score(X_train,y_train))
            acc_test = model.score(X_test,y_test)
            accs_test.append(acc_test)
            if acc_test > best_acc_test:
                best_acc_test = acc_test
                best_acc_index = i
            predictions.append(model.predict(X_test))
            i += 1
        
        self.accs_test = accs_test
        self.predictions = predictions
        self.accs_train = accs_train
        self.best_acc_index = best_acc_index

    
    def display(self):
        print(f'\n\n{self.name}\n' + '='*len(self.name))
        acc_train = self.get_best_accuracy_training()
        acc_test = self.get_best_accuracy()
        prediction = self.get_best_prediction()
        print(f'Mean prediction: {np.round(np.mean(np.array(prediction)), 2)}')
        print(f'Average training accuracy: {acc_train}')
        print(f'Average test accuracy: {acc_test}')

## Support Vector Machine Modelling
The motivation behind support vector machines is that we are building a line of best fit between two datasets, where "best" is defined by an objective function of distance between our line of best fit and between critical points, called support vectors, of these datasets. Support vectors are the closest points to a line of best fit. Our best fit line is also a decision boundary.

In [10]:
from sklearn import svm

### Tests comparing linear kernel instantiations with different C values

In [11]:
svm_linear_tests = [
    Test(
        name=f'SVM linear with C of {i}',
        X=X,
        y=y,
        algorithm=svm.SVC,
        args={'kernel': 'linear', 'C': i}
    )
    for i in [0.001, 0.01, 0.1, 1.0/len(X), 1, 10]
]

for test in svm_linear_tests:
    test.run()
    test.display()



SVM linear with C of 0.001
Mean prediction: 0.62
Average training accuracy: 0.6098300970873787
Average test accuracy: 0.6189320388349514


SVM linear with C of 0.01
Mean prediction: 0.74
Average training accuracy: 0.6110436893203883
Average test accuracy: 0.6043689320388349


SVM linear with C of 0.1
Mean prediction: 0.69
Average training accuracy: 0.620752427184466
Average test accuracy: 0.5946601941747572


SVM linear with C of 0.0004854368932038835
Mean prediction: 0.49
Average training accuracy: 0.6274271844660194
Average test accuracy: 0.5679611650485437


SVM linear with C of 1
Mean prediction: 0.6
Average training accuracy: 0.6134708737864077
Average test accuracy: 0.5970873786407767


SVM linear with C of 10
Mean prediction: 0.6
Average training accuracy: 0.616504854368932
Average test accuracy: 0.5898058252427184


### Tests comparing instantions of the RBF kernel with different C

In [12]:
svm_rbf_tests = [
    Test(
        name=f'SVM rbf of with C of {i}',
        X=X,
        y=y,
        algorithm=svm.SVC,
        args={'kernel': 'rbf', 'C': i}
    )
    for i in [0.001, 0.01, 0.1, 1.0/len(X), 1, 10]
]

for test in svm_rbf_tests:
    test.run()
    test.display()



SVM rbf of with C of 0.001
Mean prediction: 0.84
Average training accuracy: 0.5898058252427184
Average test accuracy: 0.5995145631067961


SVM rbf of with C of 0.01
Mean prediction: 0.84
Average training accuracy: 0.5898058252427184
Average test accuracy: 0.5995145631067961


SVM rbf of with C of 0.1
Mean prediction: 0.73
Average training accuracy: 0.6080097087378641
Average test accuracy: 0.6067961165048543


SVM rbf of with C of 0.0004854368932038835
Mean prediction: 0.84
Average training accuracy: 0.5898058252427184
Average test accuracy: 0.5995145631067961


SVM rbf of with C of 1
Mean prediction: 0.59
Average training accuracy: 0.6747572815533981
Average test accuracy: 0.6237864077669902


SVM rbf of with C of 10
Mean prediction: 0.51
Average training accuracy: 0.8173543689320388
Average test accuracy: 0.6043689320388349


### Tests comparing different degrees of the polynomial kernel

In [13]:
pairs = []
for degree in range(1,6):
    for C in [0.01, 0.1, 1.0/len(X), 1]:
        pairs.append((degree, C))

svm_poly_tests = [
    Test(
        name=f'SVM poly of degree {degree} and with C of {C}',
        X=X,
        y=y,
        algorithm=svm.SVC,
        args={'kernel': 'poly', 'C': C, 'degree': degree}
    )
    for (degree, C) in pairs
]

for test in svm_poly_tests:
    test.run()
    test.display()



SVM poly of degree 1 and with C of 0.01
Mean prediction: 0.37
Average training accuracy: 0.5970873786407767
Average test accuracy: 0.5898058252427184


SVM poly of degree 1 and with C of 0.1
Mean prediction: 0.79
Average training accuracy: 0.6037621359223301
Average test accuracy: 0.6019417475728155


SVM poly of degree 1 and with C of 0.0004854368932038835
Mean prediction: 0.99
Average training accuracy: 0.5109223300970874
Average test accuracy: 0.5072815533980582


SVM poly of degree 1 and with C of 1
Mean prediction: 0.69
Average training accuracy: 0.6195388349514563
Average test accuracy: 0.5946601941747572


SVM poly of degree 2 and with C of 0.01
Mean prediction: 0.77
Average training accuracy: 0.5849514563106796
Average test accuracy: 0.587378640776699


SVM poly of degree 2 and with C of 0.1
Mean prediction: 0.77
Average training accuracy: 0.6067961165048543
Average test accuracy: 0.5946601941747572


SVM poly of degree 2 and with C of 0.0004854368932038835
Mean prediction: 1

## Logistic Regression Modelling
The motivation behind Logistic Regression modelling is that we attempt to *explain*, by defining a **likely** decision boundary, why a specific dataset is split the way it is. Our objective function is best when it is most likely the explanation for the given dataset split. We avoid overfitting by introducing a penalty function that is some function of weights that is then scaled by a ∆.

In [20]:
from sklearn import linear_model
from sklearn.exceptions import ConvergenceWarning
import warnings
warnings.filterwarnings(action='ignore', category=ConvergenceWarning)

In [21]:
logreg_l1_tests = [
    Test(
        name=f'Logreg l1 penalty with ∆={i}',
        X=X,
        y=y,
        algorithm=linear_model.LogisticRegression,
        args={'penalty':'l1', 'solver':'saga', 'C':i}
    )
    for i in [0.0001, 0.001, 0.01, 0.1, 1, 10, 100, 1000]
]

for test in logreg_l1_tests:
    test.run()
    test.display()



Logreg l1 penalty with ∆=0.0001
Mean prediction: 0.0
Average training accuracy: 0.4969660194174757
Average test accuracy: 0.5121359223300971


Logreg l1 penalty with ∆=0.001
Mean prediction: 0.0
Average training accuracy: 0.4969660194174757
Average test accuracy: 0.5121359223300971


Logreg l1 penalty with ∆=0.01
Mean prediction: 0.7
Average training accuracy: 0.5855582524271845
Average test accuracy: 0.6237864077669902


Logreg l1 penalty with ∆=0.1
Mean prediction: 0.59
Average training accuracy: 0.6140776699029126
Average test accuracy: 0.6262135922330098


Logreg l1 penalty with ∆=1
Mean prediction: 0.61
Average training accuracy: 0.6183252427184466
Average test accuracy: 0.6067961165048543


Logreg l1 penalty with ∆=10
Mean prediction: 0.55
Average training accuracy: 0.6158980582524272
Average test accuracy: 0.5800970873786407


Logreg l1 penalty with ∆=100
Mean prediction: 0.6
Average training accuracy: 0.6225728155339806
Average test accuracy: 0.5849514563106796


Logreg l1 pe

In [22]:
logreg_l2_tests = [
    Test(
        name=f'Logreg l2 penalty with ∆={i}',
        X=X,
        y=y,
        algorithm=linear_model.LogisticRegression,
        args={'penalty':'l2', 'solver':'saga', 'C':i}
    )
    for i in [0.0001, 0.001, 0.01, 0.1, 1, 10, 100, 1000]
]

for test in logreg_l2_tests:
    test.run()
    test.display()



Logreg l2 penalty with ∆=0.0001
Mean prediction: 0.68
Average training accuracy: 0.5989077669902912
Average test accuracy: 0.6213592233009708


Logreg l2 penalty with ∆=0.001
Mean prediction: 0.67
Average training accuracy: 0.6080097087378641
Average test accuracy: 0.6310679611650486


Logreg l2 penalty with ∆=0.01
Mean prediction: 0.6
Average training accuracy: 0.6146844660194175
Average test accuracy: 0.6140776699029126


Logreg l2 penalty with ∆=0.1
Mean prediction: 0.62
Average training accuracy: 0.6219660194174758
Average test accuracy: 0.6140776699029126


Logreg l2 penalty with ∆=1
Mean prediction: 0.55
Average training accuracy: 0.6146844660194175
Average test accuracy: 0.587378640776699


Logreg l2 penalty with ∆=10
Mean prediction: 0.6
Average training accuracy: 0.6219660194174758
Average test accuracy: 0.587378640776699


Logreg l2 penalty with ∆=100
Mean prediction: 0.6
Average training accuracy: 0.6225728155339806
Average test accuracy: 0.5849514563106796


Logreg l2 pen

## Neural Network Modelling
The motvation behind neural network modelling is that we attempt to define abstractions that are functions of other abstractions that are functions of our input. At a high level, a neural network is a set of functions that are defined as computation graphs. To improve our function, we find the gradient of our augmented error function that includes a penalty function, just as in logistic regression.

In [17]:
#code reference: https://scikit-learn.org/stable/modules/neural_networks_supervised.html
from sklearn.neural_network import MLPClassifier

In [18]:
nn_basic_tests = [
    Test(
        name='NN',
        X=X,
        y=y,
        algorithm=MLPClassifier,
        args={'solver': 'lbfgs', 'alpha': 1e-5, 'hidden_layer_sizes': (5,2), 'random_state': 1}
    )
]

for test in nn_basic_tests:
    test.run()
    test.display()



NN
==
Mean prediction: 0.61
Average training accuracy: 0.7184466019417476
Average test accuracy: 0.5849514563106796


In [19]:
#TODO:
#weight init
#regularization and lambda
#activation
nn_layer_tests = [
    Test(
        name=f'NN with hidden layers {structure}',
        X=X,
        y=y,
        algorithm=MLPClassifier,
        args={'solver': 'lbfgs', 'alpha': 1e-5, 'hidden_layer_sizes': structure, 'random_state': 1}
    )
    for structure in [
        (6,4),
        (6,4,2),
        (10,8),
        (10,8,6),
        (10,8,6,4),
        (20,16,10,8),
    ]
]

for test in nn_layer_tests:
    test.run()
    test.display()



NN with hidden layers (6, 4)
Mean prediction: 0.62
Average training accuracy: 0.7081310679611651
Average test accuracy: 0.5849514563106796


NN with hidden layers (6, 4, 2)
Mean prediction: 0.45
Average training accuracy: 0.6887135922330098
Average test accuracy: 0.5825242718446602


NN with hidden layers (10, 8)
Mean prediction: 0.62
Average training accuracy: 0.7669902912621359
Average test accuracy: 0.5898058252427184


NN with hidden layers (10, 8, 6)
Mean prediction: 0.52
Average training accuracy: 0.8216019417475728
Average test accuracy: 0.5970873786407767


NN with hidden layers (10, 8, 6, 4)
Mean prediction: 0.48
Average training accuracy: 0.8349514563106796
Average test accuracy: 0.5922330097087378


NN with hidden layers (20, 16, 10, 8)
Mean prediction: 0.46
Average training accuracy: 0.883495145631068
Average test accuracy: 0.5728155339805825
