In [1]:
import pandas as pd
import scipy.stats as sps
import numpy as np
import datetime as dt
import seaborn as sns
import matplotlib.pyplot as plt
import math

In [2]:
data = pd.read_csv("train_test_final.csv", index_col=0)

In [3]:
data.head()

Unnamed: 0,card_ps_3d_total,card-state_ps_3d_total,card-state_ps_14d_total,card-state_ps_3d_maximum,card-state_ps_1d_total,card-state_ps_7d_maximum,card-state_ps_30d_total,card_ps_0d_total,card-state_ps_1d_maximum,card-merchant_ps_3d_total,...,card-state_ps_14d_average,card-merchant_ps_1d_total,card_ps_30d_average,card-state_ps_7d_median,merch#_ps_3d_total,card-state_ps_0d_maximum,card-state_ps_14d_median,card-state_ps_0d_total,card-state_ps_0d_average,fraud
3338,0.239431,0.0,0.405804,0.0,0.0,1.057503,0.293871,0.0,0.0,0.0,...,1.423042,0.0,0.224369,1.608665,0.087707,0.0,1.481152,0.0,0.0,0
3339,0.109461,0.052926,0.122207,0.015794,0.0,0.016556,0.066435,0.0,0.0,0.050124,...,-0.002238,0.0,-0.582122,0.007515,0.131778,0.0,0.00011,0.0,0.0,0
3340,1.303699,0.207725,0.60677,0.452024,0.097819,0.39999,0.455039,0.0,0.35736,0.096802,...,0.461099,0.099204,0.32896,0.551246,0.044923,0.0,0.547433,0.0,0.0,0
3341,0.11017,0.054255,0.123333,0.015794,0.001374,0.016556,0.067337,0.001415,0.005603,0.051484,...,-0.00224,0.001394,-0.582219,0.007515,0.13267,0.007521,0.00011,0.002375,0.012225,0
3342,-0.02535,0.0,-0.013172,0.0,0.0,0.0,-0.042135,0.0,0.0,0.0,...,-0.010154,0.0,-0.602674,0.0,0.004678,0.0,-0.006842,0.0,0.0,0


In [4]:
data.dtypes

card_ps_3d_total              float64
card-state_ps_3d_total        float64
card-state_ps_14d_total       float64
card-state_ps_3d_maximum      float64
card-state_ps_1d_total        float64
card-state_ps_7d_maximum      float64
card-state_ps_30d_total       float64
card_ps_0d_total              float64
card-state_ps_1d_maximum      float64
card-merchant_ps_3d_total     float64
card_ps_0d_maximum            float64
card-state_ps_3d_average      float64
card-state_ps_1d_average      float64
card-state_ps_1d_median       float64
card-zip_ps_30d_total         float64
card-state_ps_7d_average      float64
card-zip_ps_3d_maximum        float64
card-state_ps_3d_median       float64
card-merchant_ps_30d_total    float64
card-zip_ps_1d_total          float64
card-zip_ps_1d_maximum        float64
card-state_ps_14d_average     float64
card-merchant_ps_1d_total     float64
card_ps_30d_average           float64
card-state_ps_7d_median       float64
merch#_ps_3d_total            float64
card-state_p

In [5]:
from sklearn.model_selection import KFold

In [6]:
from sklearn.neural_network import MLPClassifier

In [7]:
# hidden_layer_size range(5,21,1) & alpha = 0.01

for n in range(5,21,1):
    fdr_train = 0
    fdr_test = 0

    for i in range(5):
        
        kfolds = KFold(n_splits=10, shuffle=True, random_state=1)
        for train_index, test_index in kfolds.split(data):
            y_train = data.iloc[train_index, -1].to_numpy()
            y_test = data.iloc[test_index, -1].to_numpy()
            x_train = data.iloc[train_index, 0:-1].to_numpy()
            x_test = data.iloc[test_index, 0:-1].to_numpy()
        
        NN = MLPClassifier(
            hidden_layer_sizes=(n,),
            activation='relu',
            learning_rate='adaptive',
            max_iter=10000,
            learning_rate_init=.01,
            alpha=.01)
        
        NN.fit(x_train,y_train)
        
        NN_train_pred = NN.predict_proba(x_train)[:,1]
        ind = np.argsort(NN_train_pred)
        NN_y_train_sorted = np.take_along_axis(y_train, ind, axis=0)
        NN_train_pred_sorted = np.take_along_axis(NN_train_pred, ind, axis=0)
        NN_y_train_sorted_3per = NN_y_train_sorted[-int(len(NN_y_train_sorted)*0.03):-1]
        NN_y_train_sorted_3per_fraud = NN_y_train_sorted_3per[NN_y_train_sorted_3per==1]
        y_train_fraud = len(y_train[y_train==1])
        fdr_train += len(NN_y_train_sorted_3per_fraud)/y_train_fraud

        NN_test_pred = NN.predict_proba(x_test)[:,1]
        ind = np.argsort(NN_test_pred)
        NN_y_test_sorted = np.take_along_axis(y_test, ind, axis=0)
        NN_test_pred_sorted = np.take_along_axis(NN_test_pred, ind, axis=0)
        NN_y_test_sorted_3per = NN_y_test_sorted[-int(len(NN_y_test_sorted)*0.03):-1]
        NN_y_test_sorted_3per_fraud = NN_y_test_sorted_3per[NN_y_test_sorted_3per==1]
        y_test_fraud = len(y_test[y_test==1])
        fdr_test += len(NN_y_test_sorted_3per_fraud)/y_test_fraud
        
        i += 1
        
    avg_fdr_train = fdr_train/5
    avg_fdr_test = fdr_test/5

    print(f'In training set, with {n} nodes and 0.01 alpha, FDR at 3%:', avg_fdr_train)
    print(f'In testing set, with {n} nodes and 0.01 alpha, FDR at 3%:', avg_fdr_test)
    
    n += 1

In training set, with 5 nodes and 0.01 alpha, FDR at 3%: 0.6942857142857143
In testing set, with 5 nodes and 0.01 alpha, FDR at 3%: 0.710204081632653
In training set, with 6 nodes and 0.01 alpha, FDR at 3%: 0.7150649350649351
In testing set, with 6 nodes and 0.01 alpha, FDR at 3%: 0.7163265306122448
In training set, with 7 nodes and 0.01 alpha, FDR at 3%: 0.7023376623376623
In testing set, with 7 nodes and 0.01 alpha, FDR at 3%: 0.7244897959183673
In training set, with 8 nodes and 0.01 alpha, FDR at 3%: 0.7038961038961039
In testing set, with 8 nodes and 0.01 alpha, FDR at 3%: 0.7061224489795918
In training set, with 9 nodes and 0.01 alpha, FDR at 3%: 0.6966233766233766
In testing set, with 9 nodes and 0.01 alpha, FDR at 3%: 0.6959183673469388
In training set, with 10 nodes and 0.01 alpha, FDR at 3%: 0.7218181818181819
In testing set, with 10 nodes and 0.01 alpha, FDR at 3%: 0.7326530612244898
In training set, with 11 nodes and 0.01 alpha, FDR at 3%: 0.7277922077922079
In testing set, 

In [8]:
# hidden_layer_size range(5,21,1) & alpha = 0.05

for n in range(5,21,1):
    fdr_train = 0
    fdr_test = 0

    for i in range(5):
        
        kfolds = KFold(n_splits=10, shuffle=True, random_state=1)
        for train_index, test_index in kfolds.split(data):
            y_train = data.iloc[train_index, -1].to_numpy()
            y_test = data.iloc[test_index, -1].to_numpy()
            x_train = data.iloc[train_index, 0:-1].to_numpy()
            x_test = data.iloc[test_index, 0:-1].to_numpy()
        
        NN = MLPClassifier(
            hidden_layer_sizes=(n,),
            activation='relu',
            learning_rate='adaptive',
            max_iter=10000,
            learning_rate_init=.01,
            alpha=.05)
        
        NN.fit(x_train,y_train)
        
        NN_train_pred = NN.predict_proba(x_train)[:,1]
        ind = np.argsort(NN_train_pred)
        NN_y_train_sorted = np.take_along_axis(y_train, ind, axis=0)
        NN_train_pred_sorted = np.take_along_axis(NN_train_pred, ind, axis=0)
        NN_y_train_sorted_3per = NN_y_train_sorted[-int(len(NN_y_train_sorted)*0.03):-1]
        NN_y_train_sorted_3per_fraud = NN_y_train_sorted_3per[NN_y_train_sorted_3per==1]
        y_train_fraud = len(y_train[y_train==1])
        fdr_train += len(NN_y_train_sorted_3per_fraud)/y_train_fraud
    
        NN_test_pred = NN.predict_proba(x_test)[:,1]
        ind = np.argsort(NN_test_pred)
        NN_y_test_sorted = np.take_along_axis(y_test, ind, axis=0)
        NN_test_pred_sorted = np.take_along_axis(NN_test_pred, ind, axis=0)
        NN_y_test_sorted_3per = NN_y_test_sorted[-int(len(NN_y_test_sorted)*0.03):-1]
        NN_y_test_sorted_3per_fraud = NN_y_test_sorted_3per[NN_y_test_sorted_3per==1]
        y_test_fraud = len(y_test[y_test==1])
        fdr_test += len(NN_y_test_sorted_3per_fraud)/y_test_fraud
        
        i += 1
        
    avg_fdr_train = fdr_train/5
    avg_fdr_test = fdr_test/5

    print(f'In training set, with {n} nodes and 0.05 alpha, FDR at 3%:', avg_fdr_train)
    print(f'In testing set, with {n} nodes and 0.05 alpha, FDR at 3%:', avg_fdr_test)
    
    n += 1

In training set, with 5 nodes and 0.05 alpha, FDR at 3%: 0.6625974025974026
In testing set, with 5 nodes and 0.05 alpha, FDR at 3%: 0.6755102040816326
In training set, with 6 nodes and 0.05 alpha, FDR at 3%: 0.6618181818181819
In testing set, with 6 nodes and 0.05 alpha, FDR at 3%: 0.6755102040816326
In training set, with 7 nodes and 0.05 alpha, FDR at 3%: 0.6688311688311688
In testing set, with 7 nodes and 0.05 alpha, FDR at 3%: 0.6795918367346939
In training set, with 8 nodes and 0.05 alpha, FDR at 3%: 0.6667532467532468
In testing set, with 8 nodes and 0.05 alpha, FDR at 3%: 0.6816326530612244
In training set, with 9 nodes and 0.05 alpha, FDR at 3%: 0.6735064935064935
In testing set, with 9 nodes and 0.05 alpha, FDR at 3%: 0.7061224489795919
In training set, with 10 nodes and 0.05 alpha, FDR at 3%: 0.6701298701298701
In testing set, with 10 nodes and 0.05 alpha, FDR at 3%: 0.6775510204081633
In training set, with 11 nodes and 0.05 alpha, FDR at 3%: 0.6797402597402598
In testing set,

In [9]:
# hidden_layer_size range(5,21,1) & alpha = 0.1

for n in range(5,21,1):
    fdr_train = 0
    fdr_test = 0

    for i in range(5):
        
        kfolds = KFold(n_splits=10, shuffle=True, random_state=1)
        for train_index, test_index in kfolds.split(data):
            y_train = data.iloc[train_index, -1].to_numpy()
            y_test = data.iloc[test_index, -1].to_numpy()
            x_train = data.iloc[train_index, 0:-1].to_numpy()
            x_test = data.iloc[test_index, 0:-1].to_numpy()
        
        NN = MLPClassifier(
            hidden_layer_sizes=(n,),
            activation='relu',
            learning_rate='adaptive',
            max_iter=10000,
            learning_rate_init=.01,
            alpha=.1)
        
        NN.fit(x_train,y_train)
        
        NN_train_pred = NN.predict_proba(x_train)[:,1]
        ind = np.argsort(NN_train_pred)
        NN_y_train_sorted = np.take_along_axis(y_train, ind, axis=0)
        NN_train_pred_sorted = np.take_along_axis(NN_train_pred, ind, axis=0)
        NN_y_train_sorted_3per = NN_y_train_sorted[-int(len(NN_y_train_sorted)*0.03):-1]
        NN_y_train_sorted_3per_fraud = NN_y_train_sorted_3per[NN_y_train_sorted_3per==1]
        y_train_fraud = len(y_train[y_train==1])
        fdr_train += len(NN_y_train_sorted_3per_fraud)/y_train_fraud
    
        NN_test_pred = NN.predict_proba(x_test)[:,1]
        ind = np.argsort(NN_test_pred)
        NN_y_test_sorted = np.take_along_axis(y_test, ind, axis=0)
        NN_test_pred_sorted = np.take_along_axis(NN_test_pred, ind, axis=0)
        NN_y_test_sorted_3per = NN_y_test_sorted[-int(len(NN_y_test_sorted)*0.03):-1]
        NN_y_test_sorted_3per_fraud = NN_y_test_sorted_3per[NN_y_test_sorted_3per==1]
        y_test_fraud = len(y_test[y_test==1])
        fdr_test += len(NN_y_test_sorted_3per_fraud)/y_test_fraud
        
        i += 1
        
    avg_fdr_train = fdr_train/5
    avg_fdr_test = fdr_test/5

    print(f'In training set, with {n} nodes and 0.1 alpha, FDR at 3%:', avg_fdr_train)
    print(f'In testing set, with {n} nodes and 0.1 alpha, FDR at 3%:', avg_fdr_test)
    
    n += 1

In training set, with 5 nodes and 0.1 alpha, FDR at 3%: 0.6592207792207792
In testing set, with 5 nodes and 0.1 alpha, FDR at 3%: 0.6653061224489796
In training set, with 6 nodes and 0.1 alpha, FDR at 3%: 0.6610389610389611
In testing set, with 6 nodes and 0.1 alpha, FDR at 3%: 0.6714285714285715
In training set, with 7 nodes and 0.1 alpha, FDR at 3%: 0.6550649350649351
In testing set, with 7 nodes and 0.1 alpha, FDR at 3%: 0.6714285714285714
In training set, with 8 nodes and 0.1 alpha, FDR at 3%: 0.6566233766233767
In testing set, with 8 nodes and 0.1 alpha, FDR at 3%: 0.6693877551020408
In training set, with 9 nodes and 0.1 alpha, FDR at 3%: 0.6592207792207792
In testing set, with 9 nodes and 0.1 alpha, FDR at 3%: 0.6755102040816326
In training set, with 10 nodes and 0.1 alpha, FDR at 3%: 0.6587012987012987
In testing set, with 10 nodes and 0.1 alpha, FDR at 3%: 0.6775510204081632
In training set, with 11 nodes and 0.1 alpha, FDR at 3%: 0.6584415584415584
In testing set, with 11 node

In [7]:
# hidden_layer_size range(5,21,1) & alpha = 0.02

for n in range(5,21,1):
    fdr_train = 0
    fdr_test = 0

    for i in range(5):
        
        kfolds = KFold(n_splits=10, shuffle=True, random_state=1)
        for train_index, test_index in kfolds.split(data):
            y_train = data.iloc[train_index, -1].to_numpy()
            y_test = data.iloc[test_index, -1].to_numpy()
            x_train = data.iloc[train_index, 0:-1].to_numpy()
            x_test = data.iloc[test_index, 0:-1].to_numpy()
        
        NN = MLPClassifier(
            hidden_layer_sizes=(n,),
            activation='relu',
            learning_rate='adaptive',
            max_iter=10000,
            learning_rate_init=.01,
            alpha=.02)
        
        NN.fit(x_train,y_train)
        
        NN_train_pred = NN.predict_proba(x_train)[:,1]
        ind = np.argsort(NN_train_pred)
        NN_y_train_sorted = np.take_along_axis(y_train, ind, axis=0)
        NN_train_pred_sorted = np.take_along_axis(NN_train_pred, ind, axis=0)
        NN_y_train_sorted_3per = NN_y_train_sorted[-int(len(NN_y_train_sorted)*0.03):-1]
        NN_y_train_sorted_3per_fraud = NN_y_train_sorted_3per[NN_y_train_sorted_3per==1]
        y_train_fraud = len(y_train[y_train==1])
        fdr_train += len(NN_y_train_sorted_3per_fraud)/y_train_fraud
    
        NN_test_pred = NN.predict_proba(x_test)[:,1]
        ind = np.argsort(NN_test_pred)
        NN_y_test_sorted = np.take_along_axis(y_test, ind, axis=0)
        NN_test_pred_sorted = np.take_along_axis(NN_test_pred, ind, axis=0)
        NN_y_test_sorted_3per = NN_y_test_sorted[-int(len(NN_y_test_sorted)*0.03):-1]
        NN_y_test_sorted_3per_fraud = NN_y_test_sorted_3per[NN_y_test_sorted_3per==1]
        y_test_fraud = len(y_test[y_test==1])
        fdr_test += len(NN_y_test_sorted_3per_fraud)/y_test_fraud
        
        i += 1
        
    avg_fdr_train = fdr_train/5
    avg_fdr_test = fdr_test/5

    print(f'In training set, with {n} nodes and 0.02 alpha, FDR at 3%:', avg_fdr_train)
    print(f'In testing set, with {n} nodes and 0.02 alpha, FDR at 3%:', avg_fdr_test)
    
    n += 1

In training set, with 5 nodes and 0.02 alpha, FDR at 3%: 0.6799999999999999
In testing set, with 5 nodes and 0.02 alpha, FDR at 3%: 0.689795918367347
In training set, with 6 nodes and 0.02 alpha, FDR at 3%: 0.6610389610389611
In testing set, with 6 nodes and 0.02 alpha, FDR at 3%: 0.6632653061224489
In training set, with 7 nodes and 0.02 alpha, FDR at 3%: 0.692987012987013
In testing set, with 7 nodes and 0.02 alpha, FDR at 3%: 0.6877551020408164
In training set, with 8 nodes and 0.02 alpha, FDR at 3%: 0.6903896103896103
In testing set, with 8 nodes and 0.02 alpha, FDR at 3%: 0.7
In training set, with 9 nodes and 0.02 alpha, FDR at 3%: 0.6953246753246753
In testing set, with 9 nodes and 0.02 alpha, FDR at 3%: 0.7040816326530612
In training set, with 10 nodes and 0.02 alpha, FDR at 3%: 0.7057142857142857
In testing set, with 10 nodes and 0.02 alpha, FDR at 3%: 0.7183673469387755
In training set, with 11 nodes and 0.02 alpha, FDR at 3%: 0.6903896103896103
In testing set, with 11 nodes an

In [8]:
# hidden_layer_size range(5,21,1) & alpha = 0.04

for n in range(5,21,1):
    fdr_train = 0
    fdr_test = 0

    for i in range(5):
        
        kfolds = KFold(n_splits=10, shuffle=True, random_state=1)
        for train_index, test_index in kfolds.split(data):
            y_train = data.iloc[train_index, -1].to_numpy()
            y_test = data.iloc[test_index, -1].to_numpy()
            x_train = data.iloc[train_index, 0:-1].to_numpy()
            x_test = data.iloc[test_index, 0:-1].to_numpy()
        
        NN = MLPClassifier(
            hidden_layer_sizes=(n,),
            activation='relu',
            learning_rate='adaptive',
            max_iter=10000,
            learning_rate_init=.01,
            alpha=.04)
        
        NN.fit(x_train,y_train)
        
        NN_train_pred = NN.predict_proba(x_train)[:,1]
        ind = np.argsort(NN_train_pred)
        NN_y_train_sorted = np.take_along_axis(y_train, ind, axis=0)
        NN_train_pred_sorted = np.take_along_axis(NN_train_pred, ind, axis=0)
        NN_y_train_sorted_3per = NN_y_train_sorted[-int(len(NN_y_train_sorted)*0.03):-1]
        NN_y_train_sorted_3per_fraud = NN_y_train_sorted_3per[NN_y_train_sorted_3per==1]
        y_train_fraud = len(y_train[y_train==1])
        fdr_train += len(NN_y_train_sorted_3per_fraud)/y_train_fraud
    
        NN_test_pred = NN.predict_proba(x_test)[:,1]
        ind = np.argsort(NN_test_pred)
        NN_y_test_sorted = np.take_along_axis(y_test, ind, axis=0)
        NN_test_pred_sorted = np.take_along_axis(NN_test_pred, ind, axis=0)
        NN_y_test_sorted_3per = NN_y_test_sorted[-int(len(NN_y_test_sorted)*0.03):-1]
        NN_y_test_sorted_3per_fraud = NN_y_test_sorted_3per[NN_y_test_sorted_3per==1]
        y_test_fraud = len(y_test[y_test==1])
        fdr_test += len(NN_y_test_sorted_3per_fraud)/y_test_fraud
        
        i += 1
        
    avg_fdr_train = fdr_train/5
    avg_fdr_test = fdr_test/5

    print(f'In training set, with {n} nodes and 0.04 alpha, FDR at 3%:', avg_fdr_train)
    print(f'In testing set, with {n} nodes and 0.04 alpha, FDR at 3%:', avg_fdr_test)
    
    n += 1

In training set, with 5 nodes and 0.04 alpha, FDR at 3%: 0.6672727272727274
In testing set, with 5 nodes and 0.04 alpha, FDR at 3%: 0.673469387755102
In training set, with 6 nodes and 0.04 alpha, FDR at 3%: 0.67012987012987
In testing set, with 6 nodes and 0.04 alpha, FDR at 3%: 0.6979591836734694
In training set, with 7 nodes and 0.04 alpha, FDR at 3%: 0.677142857142857
In testing set, with 7 nodes and 0.04 alpha, FDR at 3%: 0.7
In training set, with 8 nodes and 0.04 alpha, FDR at 3%: 0.6748051948051947
In testing set, with 8 nodes and 0.04 alpha, FDR at 3%: 0.6857142857142857
In training set, with 9 nodes and 0.04 alpha, FDR at 3%: 0.6729870129870129
In testing set, with 9 nodes and 0.04 alpha, FDR at 3%: 0.6979591836734694
In training set, with 10 nodes and 0.04 alpha, FDR at 3%: 0.6787012987012987
In testing set, with 10 nodes and 0.04 alpha, FDR at 3%: 0.7020408163265305
In training set, with 11 nodes and 0.04 alpha, FDR at 3%: 0.6779220779220779
In testing set, with 11 nodes and 

In [7]:
# hidden_layer_size range(5,21,1) & alpha = 0.06

for n in range(5,21,1):
    fdr_train = 0
    fdr_test = 0

    for i in range(5):
        
        kfolds = KFold(n_splits=10, shuffle=True, random_state=1)
        for train_index, test_index in kfolds.split(data):
            y_train = data.iloc[train_index, -1].to_numpy()
            y_test = data.iloc[test_index, -1].to_numpy()
            x_train = data.iloc[train_index, 0:-1].to_numpy()
            x_test = data.iloc[test_index, 0:-1].to_numpy()
        
        NN = MLPClassifier(
            hidden_layer_sizes=(n,),
            activation='relu',
            learning_rate='adaptive',
            max_iter=10000,
            learning_rate_init=.01,
            alpha=.06)
        
        NN.fit(x_train,y_train)
        
        NN_train_pred = NN.predict_proba(x_train)[:,1]
        ind = np.argsort(NN_train_pred)
        NN_y_train_sorted = np.take_along_axis(y_train, ind, axis=0)
        NN_train_pred_sorted = np.take_along_axis(NN_train_pred, ind, axis=0)
        NN_y_train_sorted_3per = NN_y_train_sorted[-int(len(NN_y_train_sorted)*0.03):-1]
        NN_y_train_sorted_3per_fraud = NN_y_train_sorted_3per[NN_y_train_sorted_3per==1]
        y_train_fraud = len(y_train[y_train==1])
        fdr_train += len(NN_y_train_sorted_3per_fraud)/y_train_fraud
    
        NN_test_pred = NN.predict_proba(x_test)[:,1]
        ind = np.argsort(NN_test_pred)
        NN_y_test_sorted = np.take_along_axis(y_test, ind, axis=0)
        NN_test_pred_sorted = np.take_along_axis(NN_test_pred, ind, axis=0)
        NN_y_test_sorted_3per = NN_y_test_sorted[-int(len(NN_y_test_sorted)*0.03):-1]
        NN_y_test_sorted_3per_fraud = NN_y_test_sorted_3per[NN_y_test_sorted_3per==1]
        y_test_fraud = len(y_test[y_test==1])
        fdr_test += len(NN_y_test_sorted_3per_fraud)/y_test_fraud
        
        i += 1
        
    avg_fdr_train = fdr_train/5
    avg_fdr_test = fdr_test/5

    print(f'In training set, with {n} nodes and 0.06 alpha, FDR at 3%:', avg_fdr_train)
    print(f'In testing set, with {n} nodes and 0.06 alpha, FDR at 3%:', avg_fdr_test)
    
    n += 1

In training set, with 5 nodes and 0.06 alpha, FDR at 3%: 0.6633766233766233
In testing set, with 5 nodes and 0.06 alpha, FDR at 3%: 0.689795918367347
In training set, with 6 nodes and 0.06 alpha, FDR at 3%: 0.6615584415584416
In testing set, with 6 nodes and 0.06 alpha, FDR at 3%: 0.6714285714285714
In training set, with 7 nodes and 0.06 alpha, FDR at 3%: 0.6766233766233767
In testing set, with 7 nodes and 0.06 alpha, FDR at 3%: 0.6857142857142857
In training set, with 8 nodes and 0.06 alpha, FDR at 3%: 0.6688311688311688
In testing set, with 8 nodes and 0.06 alpha, FDR at 3%: 0.6734693877551019
In training set, with 9 nodes and 0.06 alpha, FDR at 3%: 0.6727272727272726
In testing set, with 9 nodes and 0.06 alpha, FDR at 3%: 0.6795918367346938
In training set, with 10 nodes and 0.06 alpha, FDR at 3%: 0.6719480519480518
In testing set, with 10 nodes and 0.06 alpha, FDR at 3%: 0.6857142857142857
In training set, with 11 nodes and 0.06 alpha, FDR at 3%: 0.6763636363636364
In testing set, 

In [7]:
# hidden_layer_size range(5,21,1) & alpha = 0.08

for n in range(5,21,1):
    fdr_train = 0
    fdr_test = 0

    for i in range(5):
        
        kfolds = KFold(n_splits=10, shuffle=True, random_state=1)
        for train_index, test_index in kfolds.split(data):
            y_train = data.iloc[train_index, -1].to_numpy()
            y_test = data.iloc[test_index, -1].to_numpy()
            x_train = data.iloc[train_index, 0:-1].to_numpy()
            x_test = data.iloc[test_index, 0:-1].to_numpy()
        
        NN = MLPClassifier(
            hidden_layer_sizes=(n,),
            activation='relu',
            learning_rate='adaptive',
            max_iter=10000,
            learning_rate_init=.01,
            alpha=.08)
        
        NN.fit(x_train,y_train)
        
        NN_train_pred = NN.predict_proba(x_train)[:,1]
        ind = np.argsort(NN_train_pred)
        NN_y_train_sorted = np.take_along_axis(y_train, ind, axis=0)
        NN_train_pred_sorted = np.take_along_axis(NN_train_pred, ind, axis=0)
        NN_y_train_sorted_3per = NN_y_train_sorted[-int(len(NN_y_train_sorted)*0.03):-1]
        NN_y_train_sorted_3per_fraud = NN_y_train_sorted_3per[NN_y_train_sorted_3per==1]
        y_train_fraud = len(y_train[y_train==1])
        fdr_train += len(NN_y_train_sorted_3per_fraud)/y_train_fraud
    
        NN_test_pred = NN.predict_proba(x_test)[:,1]
        ind = np.argsort(NN_test_pred)
        NN_y_test_sorted = np.take_along_axis(y_test, ind, axis=0)
        NN_test_pred_sorted = np.take_along_axis(NN_test_pred, ind, axis=0)
        NN_y_test_sorted_3per = NN_y_test_sorted[-int(len(NN_y_test_sorted)*0.03):-1]
        NN_y_test_sorted_3per_fraud = NN_y_test_sorted_3per[NN_y_test_sorted_3per==1]
        y_test_fraud = len(y_test[y_test==1])
        fdr_test += len(NN_y_test_sorted_3per_fraud)/y_test_fraud
        
        i += 1
        
    avg_fdr_train = fdr_train/5
    avg_fdr_test = fdr_test/5

    print(f'In training set, with {n} nodes and 0.08 alpha, FDR at 3%:', avg_fdr_train)
    print(f'In testing set, with {n} nodes and 0.08 alpha, FDR at 3%:', avg_fdr_test)
    
    n += 1

In training set, with 5 nodes and 0.08 alpha, FDR at 3%: 0.6605194805194805
In testing set, with 5 nodes and 0.08 alpha, FDR at 3%: 0.6714285714285715
In training set, with 6 nodes and 0.08 alpha, FDR at 3%: 0.6597402597402597
In testing set, with 6 nodes and 0.08 alpha, FDR at 3%: 0.6632653061224489
In training set, with 7 nodes and 0.08 alpha, FDR at 3%: 0.6662337662337662
In testing set, with 7 nodes and 0.08 alpha, FDR at 3%: 0.6714285714285715
In training set, with 8 nodes and 0.08 alpha, FDR at 3%: 0.6685714285714285
In testing set, with 8 nodes and 0.08 alpha, FDR at 3%: 0.6836734693877551
In training set, with 9 nodes and 0.08 alpha, FDR at 3%: 0.6690909090909091
In testing set, with 9 nodes and 0.08 alpha, FDR at 3%: 0.6877551020408164
In training set, with 10 nodes and 0.08 alpha, FDR at 3%: 0.6646753246753246
In testing set, with 10 nodes and 0.08 alpha, FDR at 3%: 0.6877551020408162
In training set, with 11 nodes and 0.08 alpha, FDR at 3%: 0.6690909090909091
In testing set,

In [8]:
# 2 layers & hidden_layer_size range(5,11,1) & alpha = 0.01

for n in range(5,21,1):
    fdr_train = 0
    fdr_test = 0
        
    for i in range(5):

        kfolds = KFold(n_splits=10, shuffle=True, random_state=1)
        for train_index, test_index in kfolds.split(data):
            y_train = data.iloc[train_index, -1].to_numpy()
            y_test = data.iloc[test_index, -1].to_numpy()
            x_train = data.iloc[train_index, 0:-1].to_numpy()
            x_test = data.iloc[test_index, 0:-1].to_numpy()

        NN = MLPClassifier(
            hidden_layer_sizes=(n,5),
            activation='relu',
            learning_rate='adaptive',
            max_iter=10000,
            learning_rate_init=.01,
            alpha=.01)

        NN.fit(x_train,y_train)

        NN_train_pred = NN.predict_proba(x_train)[:,1]
        ind = np.argsort(NN_train_pred)
        NN_y_train_sorted = np.take_along_axis(y_train, ind, axis=0)
        NN_train_pred_sorted = np.take_along_axis(NN_train_pred, ind, axis=0)
        NN_y_train_sorted_3per = NN_y_train_sorted[-int(len(NN_y_train_sorted)*0.03):-1]
        NN_y_train_sorted_3per_fraud = NN_y_train_sorted_3per[NN_y_train_sorted_3per==1]
        y_train_fraud = len(y_train[y_train==1])
        fdr_train += len(NN_y_train_sorted_3per_fraud)/y_train_fraud

        NN_test_pred = NN.predict_proba(x_test)[:,1]
        ind = np.argsort(NN_test_pred)
        NN_y_test_sorted = np.take_along_axis(y_test, ind, axis=0)
        NN_test_pred_sorted = np.take_along_axis(NN_test_pred, ind, axis=0)
        NN_y_test_sorted_3per = NN_y_test_sorted[-int(len(NN_y_test_sorted)*0.03):-1]
        NN_y_test_sorted_3per_fraud = NN_y_test_sorted_3per[NN_y_test_sorted_3per==1]
        y_test_fraud = len(y_test[y_test==1])
        fdr_test += len(NN_y_test_sorted_3per_fraud)/y_test_fraud

        i += 1

    avg_fdr_train = fdr_train/5
    avg_fdr_test = fdr_test/5

    print(f'In training set, with 1st {n} nodes, 2nd 5 nodes and 0.01 alpha, FDR at 3%:', avg_fdr_train)
    print(f'In testing set, with 1st {n} nodes, 2nd 5 nodes and 0.01 alpha, FDR at 3%:', avg_fdr_test)

    n += 1

In training set, with 1st 5 nodes, 2nd 5 nodes and 0.01 alpha, FDR at 3%: 0.6854545454545454
In testing set, with 1st 5 nodes, 2nd 5 nodes and 0.01 alpha, FDR at 3%: 0.6979591836734695
In training set, with 1st 6 nodes, 2nd 5 nodes and 0.01 alpha, FDR at 3%: 0.7420779220779221
In testing set, with 1st 6 nodes, 2nd 5 nodes and 0.01 alpha, FDR at 3%: 0.7428571428571429
In training set, with 1st 7 nodes, 2nd 5 nodes and 0.01 alpha, FDR at 3%: 0.7402597402597403
In testing set, with 1st 7 nodes, 2nd 5 nodes and 0.01 alpha, FDR at 3%: 0.7408163265306122
In training set, with 1st 8 nodes, 2nd 5 nodes and 0.01 alpha, FDR at 3%: 0.750909090909091
In testing set, with 1st 8 nodes, 2nd 5 nodes and 0.01 alpha, FDR at 3%: 0.753061224489796
In training set, with 1st 9 nodes, 2nd 5 nodes and 0.01 alpha, FDR at 3%: 0.7584415584415585
In testing set, with 1st 9 nodes, 2nd 5 nodes and 0.01 alpha, FDR at 3%: 0.7510204081632653
In training set, with 1st 10 nodes, 2nd 5 nodes and 0.01 alpha, FDR at 3%: 0.

In [9]:
# 2 layers & hidden_layer_size range(5,11,1) & alpha = 0.01

for n in range(5,21,1):
    fdr_train = 0
    fdr_test = 0
        
    for i in range(5):

        kfolds = KFold(n_splits=10, shuffle=True, random_state=1)
        for train_index, test_index in kfolds.split(data):
            y_train = data.iloc[train_index, -1].to_numpy()
            y_test = data.iloc[test_index, -1].to_numpy()
            x_train = data.iloc[train_index, 0:-1].to_numpy()
            x_test = data.iloc[test_index, 0:-1].to_numpy()

        NN = MLPClassifier(
            hidden_layer_sizes=(n,10),
            activation='relu',
            learning_rate='adaptive',
            max_iter=10000,
            learning_rate_init=.01,
            alpha=.01)

        NN.fit(x_train,y_train)

        NN_train_pred = NN.predict_proba(x_train)[:,1]
        ind = np.argsort(NN_train_pred)
        NN_y_train_sorted = np.take_along_axis(y_train, ind, axis=0)
        NN_train_pred_sorted = np.take_along_axis(NN_train_pred, ind, axis=0)
        NN_y_train_sorted_3per = NN_y_train_sorted[-int(len(NN_y_train_sorted)*0.03):-1]
        NN_y_train_sorted_3per_fraud = NN_y_train_sorted_3per[NN_y_train_sorted_3per==1]
        y_train_fraud = len(y_train[y_train==1])
        fdr_train += len(NN_y_train_sorted_3per_fraud)/y_train_fraud

        NN_test_pred = NN.predict_proba(x_test)[:,1]
        ind = np.argsort(NN_test_pred)
        NN_y_test_sorted = np.take_along_axis(y_test, ind, axis=0)
        NN_test_pred_sorted = np.take_along_axis(NN_test_pred, ind, axis=0)
        NN_y_test_sorted_3per = NN_y_test_sorted[-int(len(NN_y_test_sorted)*0.03):-1]
        NN_y_test_sorted_3per_fraud = NN_y_test_sorted_3per[NN_y_test_sorted_3per==1]
        y_test_fraud = len(y_test[y_test==1])
        fdr_test += len(NN_y_test_sorted_3per_fraud)/y_test_fraud

        i += 1

    avg_fdr_train = fdr_train/5
    avg_fdr_test = fdr_test/5

    print(f'In training set, with 1st {n} nodes, 2nd 10 nodes and 0.01 alpha, FDR at 3%:', avg_fdr_train)
    print(f'In testing set, with 1st {n} nodes, 2nd 10 nodes and 0.01 alpha, FDR at 3%:', avg_fdr_test)

    n += 1

In training set, with 1st 5 nodes, 2nd 10 nodes and 0.01 alpha, FDR at 3%: 0.6922077922077922
In testing set, with 1st 5 nodes, 2nd 10 nodes and 0.01 alpha, FDR at 3%: 0.6979591836734695
In training set, with 1st 6 nodes, 2nd 10 nodes and 0.01 alpha, FDR at 3%: 0.7431168831168831
In testing set, with 1st 6 nodes, 2nd 10 nodes and 0.01 alpha, FDR at 3%: 0.753061224489796
In training set, with 1st 7 nodes, 2nd 10 nodes and 0.01 alpha, FDR at 3%: 0.7431168831168831
In testing set, with 1st 7 nodes, 2nd 10 nodes and 0.01 alpha, FDR at 3%: 0.7285714285714285
In training set, with 1st 8 nodes, 2nd 10 nodes and 0.01 alpha, FDR at 3%: 0.7581818181818182
In testing set, with 1st 8 nodes, 2nd 10 nodes and 0.01 alpha, FDR at 3%: 0.7489795918367348
In training set, with 1st 9 nodes, 2nd 10 nodes and 0.01 alpha, FDR at 3%: 0.7457142857142858
In testing set, with 1st 9 nodes, 2nd 10 nodes and 0.01 alpha, FDR at 3%: 0.7510204081632652
In training set, with 1st 10 nodes, 2nd 10 nodes and 0.01 alpha, F

In [10]:
# 2 layers & hidden_layer_size range(5,11,1) & alpha = 0.01

for n in range(5,21,1):
    fdr_train = 0
    fdr_test = 0
        
    for i in range(5):

        kfolds = KFold(n_splits=10, shuffle=True, random_state=1)
        for train_index, test_index in kfolds.split(data):
            y_train = data.iloc[train_index, -1].to_numpy()
            y_test = data.iloc[test_index, -1].to_numpy()
            x_train = data.iloc[train_index, 0:-1].to_numpy()
            x_test = data.iloc[test_index, 0:-1].to_numpy()

        NN = MLPClassifier(
            hidden_layer_sizes=(n,15),
            activation='relu',
            learning_rate='adaptive',
            max_iter=10000,
            learning_rate_init=.01,
            alpha=.01)

        NN.fit(x_train,y_train)

        NN_train_pred = NN.predict_proba(x_train)[:,1]
        ind = np.argsort(NN_train_pred)
        NN_y_train_sorted = np.take_along_axis(y_train, ind, axis=0)
        NN_train_pred_sorted = np.take_along_axis(NN_train_pred, ind, axis=0)
        NN_y_train_sorted_3per = NN_y_train_sorted[-int(len(NN_y_train_sorted)*0.03):-1]
        NN_y_train_sorted_3per_fraud = NN_y_train_sorted_3per[NN_y_train_sorted_3per==1]
        y_train_fraud = len(y_train[y_train==1])
        fdr_train += len(NN_y_train_sorted_3per_fraud)/y_train_fraud

        NN_test_pred = NN.predict_proba(x_test)[:,1]
        ind = np.argsort(NN_test_pred)
        NN_y_test_sorted = np.take_along_axis(y_test, ind, axis=0)
        NN_test_pred_sorted = np.take_along_axis(NN_test_pred, ind, axis=0)
        NN_y_test_sorted_3per = NN_y_test_sorted[-int(len(NN_y_test_sorted)*0.03):-1]
        NN_y_test_sorted_3per_fraud = NN_y_test_sorted_3per[NN_y_test_sorted_3per==1]
        y_test_fraud = len(y_test[y_test==1])
        fdr_test += len(NN_y_test_sorted_3per_fraud)/y_test_fraud

        i += 1

    avg_fdr_train = fdr_train/5
    avg_fdr_test = fdr_test/5

    print(f'In training set, with 1st {n} nodes, 2nd 15 nodes and 0.01 alpha, FDR at 3%:', avg_fdr_train)
    print(f'In testing set, with 1st {n} nodes, 2nd 15 nodes and 0.01 alpha, FDR at 3%:', avg_fdr_test)

    n += 1

In training set, with 1st 5 nodes, 2nd 15 nodes and 0.01 alpha, FDR at 3%: 0.7083116883116883
In testing set, with 1st 5 nodes, 2nd 15 nodes and 0.01 alpha, FDR at 3%: 0.7102040816326531
In training set, with 1st 6 nodes, 2nd 15 nodes and 0.01 alpha, FDR at 3%: 0.732987012987013
In testing set, with 1st 6 nodes, 2nd 15 nodes and 0.01 alpha, FDR at 3%: 0.746938775510204
In training set, with 1st 7 nodes, 2nd 15 nodes and 0.01 alpha, FDR at 3%: 0.7345454545454546
In testing set, with 1st 7 nodes, 2nd 15 nodes and 0.01 alpha, FDR at 3%: 0.7428571428571428
In training set, with 1st 8 nodes, 2nd 15 nodes and 0.01 alpha, FDR at 3%: 0.7503896103896104
In testing set, with 1st 8 nodes, 2nd 15 nodes and 0.01 alpha, FDR at 3%: 0.7693877551020408
In training set, with 1st 9 nodes, 2nd 15 nodes and 0.01 alpha, FDR at 3%: 0.7514285714285714
In testing set, with 1st 9 nodes, 2nd 15 nodes and 0.01 alpha, FDR at 3%: 0.7489795918367347
In training set, with 1st 10 nodes, 2nd 15 nodes and 0.01 alpha, FD

In [None]:
# 2 layers & hidden_layer_size range(5,11,1) & alpha = 0.01

for n in range(5,21,1):
    fdr_train = 0
    fdr_test = 0
        
    for i in range(5):

        kfolds = KFold(n_splits=10, shuffle=True, random_state=1)
        for train_index, test_index in kfolds.split(data):
            y_train = data.iloc[train_index, -1].to_numpy()
            y_test = data.iloc[test_index, -1].to_numpy()
            x_train = data.iloc[train_index, 0:-1].to_numpy()
            x_test = data.iloc[test_index, 0:-1].to_numpy()

        NN = MLPClassifier(
            hidden_layer_sizes=(n,20),
            activation='relu',
            learning_rate='adaptive',
            max_iter=10000,
            learning_rate_init=.01,
            alpha=.01)

        NN.fit(x_train,y_train)

        NN_train_pred = NN.predict_proba(x_train)[:,1]
        ind = np.argsort(NN_train_pred)
        NN_y_train_sorted = np.take_along_axis(y_train, ind, axis=0)
        NN_train_pred_sorted = np.take_along_axis(NN_train_pred, ind, axis=0)
        NN_y_train_sorted_3per = NN_y_train_sorted[-int(len(NN_y_train_sorted)*0.03):-1]
        NN_y_train_sorted_3per_fraud = NN_y_train_sorted_3per[NN_y_train_sorted_3per==1]
        y_train_fraud = len(y_train[y_train==1])
        fdr_train += len(NN_y_train_sorted_3per_fraud)/y_train_fraud

        NN_test_pred = NN.predict_proba(x_test)[:,1]
        ind = np.argsort(NN_test_pred)
        NN_y_test_sorted = np.take_along_axis(y_test, ind, axis=0)
        NN_test_pred_sorted = np.take_along_axis(NN_test_pred, ind, axis=0)
        NN_y_test_sorted_3per = NN_y_test_sorted[-int(len(NN_y_test_sorted)*0.03):-1]
        NN_y_test_sorted_3per_fraud = NN_y_test_sorted_3per[NN_y_test_sorted_3per==1]
        y_test_fraud = len(y_test[y_test==1])
        fdr_test += len(NN_y_test_sorted_3per_fraud)/y_test_fraud

        i += 1

    avg_fdr_train = fdr_train/5
    avg_fdr_test = fdr_test/5

    print(f'In training set, with 1st {n} nodes, 2nd 20 nodes and 0.01 alpha, FDR at 3%:', avg_fdr_train)
    print(f'In testing set, with 1st {n} nodes, 2nd 20 nodes and 0.01 alpha, FDR at 3%:', avg_fdr_test)

    n += 1

In training set, with 1st 5 nodes, 2nd 20 nodes and 0.01 alpha, FDR at 3%: 0.7363636363636363
In testing set, with 1st 5 nodes, 2nd 20 nodes and 0.01 alpha, FDR at 3%: 0.7285714285714285
In training set, with 1st 6 nodes, 2nd 20 nodes and 0.01 alpha, FDR at 3%: 0.7150649350649351
In testing set, with 1st 6 nodes, 2nd 20 nodes and 0.01 alpha, FDR at 3%: 0.7428571428571429
In training set, with 1st 7 nodes, 2nd 20 nodes and 0.01 alpha, FDR at 3%: 0.7519480519480519
In testing set, with 1st 7 nodes, 2nd 20 nodes and 0.01 alpha, FDR at 3%: 0.7489795918367348
In training set, with 1st 8 nodes, 2nd 20 nodes and 0.01 alpha, FDR at 3%: 0.7592207792207792
In testing set, with 1st 8 nodes, 2nd 20 nodes and 0.01 alpha, FDR at 3%: 0.7653061224489794
In training set, with 1st 9 nodes, 2nd 20 nodes and 0.01 alpha, FDR at 3%: 0.7548051948051947
In testing set, with 1st 9 nodes, 2nd 20 nodes and 0.01 alpha, FDR at 3%: 0.7571428571428572
In training set, with 1st 10 nodes, 2nd 20 nodes and 0.01 alpha, 

In [7]:
# 2 layers & hidden_layer_size range(5,11,1) & alpha = 0.01

for n in range(5,21,1):
    fdr_train = 0
    fdr_test = 0
        
    for i in range(5):

        kfolds = KFold(n_splits=10, shuffle=True, random_state=1)
        for train_index, test_index in kfolds.split(data):
            y_train = data.iloc[train_index, -1].to_numpy()
            y_test = data.iloc[test_index, -1].to_numpy()
            x_train = data.iloc[train_index, 0:-1].to_numpy()
            x_test = data.iloc[test_index, 0:-1].to_numpy()

        NN = MLPClassifier(
            hidden_layer_sizes=(n,5),
            activation='relu',
            learning_rate='adaptive',
            max_iter=10000,
            learning_rate_init=.01,
            alpha=.1)

        NN.fit(x_train,y_train)

        NN_train_pred = NN.predict_proba(x_train)[:,1]
        ind = np.argsort(NN_train_pred)
        NN_y_train_sorted = np.take_along_axis(y_train, ind, axis=0)
        NN_train_pred_sorted = np.take_along_axis(NN_train_pred, ind, axis=0)
        NN_y_train_sorted_3per = NN_y_train_sorted[-int(len(NN_y_train_sorted)*0.03):-1]
        NN_y_train_sorted_3per_fraud = NN_y_train_sorted_3per[NN_y_train_sorted_3per==1]
        y_train_fraud = len(y_train[y_train==1])
        fdr_train += len(NN_y_train_sorted_3per_fraud)/y_train_fraud

        NN_test_pred = NN.predict_proba(x_test)[:,1]
        ind = np.argsort(NN_test_pred)
        NN_y_test_sorted = np.take_along_axis(y_test, ind, axis=0)
        NN_test_pred_sorted = np.take_along_axis(NN_test_pred, ind, axis=0)
        NN_y_test_sorted_3per = NN_y_test_sorted[-int(len(NN_y_test_sorted)*0.03):-1]
        NN_y_test_sorted_3per_fraud = NN_y_test_sorted_3per[NN_y_test_sorted_3per==1]
        y_test_fraud = len(y_test[y_test==1])
        fdr_test += len(NN_y_test_sorted_3per_fraud)/y_test_fraud

        i += 1

    avg_fdr_train = fdr_train/5
    avg_fdr_test = fdr_test/5

    print(f'In training set, with 1st {n} nodes, 2nd 5 nodes and 0.1 alpha, FDR at 3%:', avg_fdr_train)
    print(f'In testing set, with 1st {n} nodes, 2nd 5 nodes and 0.1 alpha, FDR at 3%:', avg_fdr_test)

    n += 1

In training set, with 1st 5 nodes, 2nd 5 nodes and 0.1 alpha, FDR at 3%: 0.6483116883116884
In testing set, with 1st 5 nodes, 2nd 5 nodes and 0.1 alpha, FDR at 3%: 0.6653061224489796
In training set, with 1st 6 nodes, 2nd 5 nodes and 0.1 alpha, FDR at 3%: 0.6714285714285714
In testing set, with 1st 6 nodes, 2nd 5 nodes and 0.1 alpha, FDR at 3%: 0.6979591836734694
In training set, with 1st 7 nodes, 2nd 5 nodes and 0.1 alpha, FDR at 3%: 0.6664935064935066
In testing set, with 1st 7 nodes, 2nd 5 nodes and 0.1 alpha, FDR at 3%: 0.673469387755102
In training set, with 1st 8 nodes, 2nd 5 nodes and 0.1 alpha, FDR at 3%: 0.6745454545454546
In testing set, with 1st 8 nodes, 2nd 5 nodes and 0.1 alpha, FDR at 3%: 0.7163265306122449
In training set, with 1st 9 nodes, 2nd 5 nodes and 0.1 alpha, FDR at 3%: 0.6766233766233766
In testing set, with 1st 9 nodes, 2nd 5 nodes and 0.1 alpha, FDR at 3%: 0.6979591836734694
In training set, with 1st 10 nodes, 2nd 5 nodes and 0.1 alpha, FDR at 3%: 0.6880519480

In [8]:
# 2 layers & hidden_layer_size range(5,11,1) & alpha = 0.01

for n in range(5,21,1):
    fdr_train = 0
    fdr_test = 0
        
    for i in range(5):

        kfolds = KFold(n_splits=10, shuffle=True, random_state=1)
        for train_index, test_index in kfolds.split(data):
            y_train = data.iloc[train_index, -1].to_numpy()
            y_test = data.iloc[test_index, -1].to_numpy()
            x_train = data.iloc[train_index, 0:-1].to_numpy()
            x_test = data.iloc[test_index, 0:-1].to_numpy()

        NN = MLPClassifier(
            hidden_layer_sizes=(n,10),
            activation='relu',
            learning_rate='adaptive',
            max_iter=10000,
            learning_rate_init=.01,
            alpha=.1)

        NN.fit(x_train,y_train)

        NN_train_pred = NN.predict_proba(x_train)[:,1]
        ind = np.argsort(NN_train_pred)
        NN_y_train_sorted = np.take_along_axis(y_train, ind, axis=0)
        NN_train_pred_sorted = np.take_along_axis(NN_train_pred, ind, axis=0)
        NN_y_train_sorted_3per = NN_y_train_sorted[-int(len(NN_y_train_sorted)*0.03):-1]
        NN_y_train_sorted_3per_fraud = NN_y_train_sorted_3per[NN_y_train_sorted_3per==1]
        y_train_fraud = len(y_train[y_train==1])
        fdr_train += len(NN_y_train_sorted_3per_fraud)/y_train_fraud

        NN_test_pred = NN.predict_proba(x_test)[:,1]
        ind = np.argsort(NN_test_pred)
        NN_y_test_sorted = np.take_along_axis(y_test, ind, axis=0)
        NN_test_pred_sorted = np.take_along_axis(NN_test_pred, ind, axis=0)
        NN_y_test_sorted_3per = NN_y_test_sorted[-int(len(NN_y_test_sorted)*0.03):-1]
        NN_y_test_sorted_3per_fraud = NN_y_test_sorted_3per[NN_y_test_sorted_3per==1]
        y_test_fraud = len(y_test[y_test==1])
        fdr_test += len(NN_y_test_sorted_3per_fraud)/y_test_fraud

        i += 1

    avg_fdr_train = fdr_train/5
    avg_fdr_test = fdr_test/5

    print(f'In training set, with 1st {n} nodes, 2nd 10 nodes and 0.1 alpha, FDR at 3%:', avg_fdr_train)
    print(f'In testing set, with 1st {n} nodes, 2nd 10 nodes and 0.1 alpha, FDR at 3%:', avg_fdr_test)

    n += 1

In training set, with 1st 5 nodes, 2nd 10 nodes and 0.1 alpha, FDR at 3%: 0.6724675324675324
In testing set, with 1st 5 nodes, 2nd 10 nodes and 0.1 alpha, FDR at 3%: 0.6938775510204083
In training set, with 1st 6 nodes, 2nd 10 nodes and 0.1 alpha, FDR at 3%: 0.655064935064935
In testing set, with 1st 6 nodes, 2nd 10 nodes and 0.1 alpha, FDR at 3%: 0.673469387755102
In training set, with 1st 7 nodes, 2nd 10 nodes and 0.1 alpha, FDR at 3%: 0.6532467532467533
In testing set, with 1st 7 nodes, 2nd 10 nodes and 0.1 alpha, FDR at 3%: 0.6632653061224489
In training set, with 1st 8 nodes, 2nd 10 nodes and 0.1 alpha, FDR at 3%: 0.6607792207792207
In testing set, with 1st 8 nodes, 2nd 10 nodes and 0.1 alpha, FDR at 3%: 0.6755102040816326
In training set, with 1st 9 nodes, 2nd 10 nodes and 0.1 alpha, FDR at 3%: 0.6696103896103895
In testing set, with 1st 9 nodes, 2nd 10 nodes and 0.1 alpha, FDR at 3%: 0.7020408163265306
In training set, with 1st 10 nodes, 2nd 10 nodes and 0.1 alpha, FDR at 3%: 0.

In [9]:
# 2 layers & hidden_layer_size range(5,11,1) & alpha = 0.01

for n in range(5,21,1):
    fdr_train = 0
    fdr_test = 0
        
    for i in range(5):

        kfolds = KFold(n_splits=10, shuffle=True, random_state=1)
        for train_index, test_index in kfolds.split(data):
            y_train = data.iloc[train_index, -1].to_numpy()
            y_test = data.iloc[test_index, -1].to_numpy()
            x_train = data.iloc[train_index, 0:-1].to_numpy()
            x_test = data.iloc[test_index, 0:-1].to_numpy()

        NN = MLPClassifier(
            hidden_layer_sizes=(n,15),
            activation='relu',
            learning_rate='adaptive',
            max_iter=10000,
            learning_rate_init=.01,
            alpha=.1)

        NN.fit(x_train,y_train)

        NN_train_pred = NN.predict_proba(x_train)[:,1]
        ind = np.argsort(NN_train_pred)
        NN_y_train_sorted = np.take_along_axis(y_train, ind, axis=0)
        NN_train_pred_sorted = np.take_along_axis(NN_train_pred, ind, axis=0)
        NN_y_train_sorted_3per = NN_y_train_sorted[-int(len(NN_y_train_sorted)*0.03):-1]
        NN_y_train_sorted_3per_fraud = NN_y_train_sorted_3per[NN_y_train_sorted_3per==1]
        y_train_fraud = len(y_train[y_train==1])
        fdr_train += len(NN_y_train_sorted_3per_fraud)/y_train_fraud

        NN_test_pred = NN.predict_proba(x_test)[:,1]
        ind = np.argsort(NN_test_pred)
        NN_y_test_sorted = np.take_along_axis(y_test, ind, axis=0)
        NN_test_pred_sorted = np.take_along_axis(NN_test_pred, ind, axis=0)
        NN_y_test_sorted_3per = NN_y_test_sorted[-int(len(NN_y_test_sorted)*0.03):-1]
        NN_y_test_sorted_3per_fraud = NN_y_test_sorted_3per[NN_y_test_sorted_3per==1]
        y_test_fraud = len(y_test[y_test==1])
        fdr_test += len(NN_y_test_sorted_3per_fraud)/y_test_fraud

        i += 1

    avg_fdr_train = fdr_train/5
    avg_fdr_test = fdr_test/5

    print(f'In training set, with 1st {n} nodes, 2nd 15 nodes and 0.1 alpha, FDR at 3%:', avg_fdr_train)
    print(f'In testing set, with 1st {n} nodes, 2nd 15 nodes and 0.1 alpha, FDR at 3%:', avg_fdr_test)

    n += 1

In training set, with 1st 5 nodes, 2nd 15 nodes and 0.1 alpha, FDR at 3%: 0.6483116883116883
In testing set, with 1st 5 nodes, 2nd 15 nodes and 0.1 alpha, FDR at 3%: 0.6653061224489796
In training set, with 1st 6 nodes, 2nd 15 nodes and 0.1 alpha, FDR at 3%: 0.6584415584415585
In testing set, with 1st 6 nodes, 2nd 15 nodes and 0.1 alpha, FDR at 3%: 0.6877551020408162
In training set, with 1st 7 nodes, 2nd 15 nodes and 0.1 alpha, FDR at 3%: 0.6690909090909091
In testing set, with 1st 7 nodes, 2nd 15 nodes and 0.1 alpha, FDR at 3%: 0.6979591836734694
In training set, with 1st 8 nodes, 2nd 15 nodes and 0.1 alpha, FDR at 3%: 0.6664935064935065
In testing set, with 1st 8 nodes, 2nd 15 nodes and 0.1 alpha, FDR at 3%: 0.7081632653061224
In training set, with 1st 9 nodes, 2nd 15 nodes and 0.1 alpha, FDR at 3%: 0.6846753246753245
In testing set, with 1st 9 nodes, 2nd 15 nodes and 0.1 alpha, FDR at 3%: 0.6938775510204083
In training set, with 1st 10 nodes, 2nd 15 nodes and 0.1 alpha, FDR at 3%: 

In [10]:
# 2 layers & hidden_layer_size range(5,11,1) & alpha = 0.01

for n in range(5,21,1):
    fdr_train = 0
    fdr_test = 0
        
    for i in range(5):

        kfolds = KFold(n_splits=10, shuffle=True, random_state=1)
        for train_index, test_index in kfolds.split(data):
            y_train = data.iloc[train_index, -1].to_numpy()
            y_test = data.iloc[test_index, -1].to_numpy()
            x_train = data.iloc[train_index, 0:-1].to_numpy()
            x_test = data.iloc[test_index, 0:-1].to_numpy()

        NN = MLPClassifier(
            hidden_layer_sizes=(n,20),
            activation='relu',
            learning_rate='adaptive',
            max_iter=10000,
            learning_rate_init=.01,
            alpha=.1)

        NN.fit(x_train,y_train)

        NN_train_pred = NN.predict_proba(x_train)[:,1]
        ind = np.argsort(NN_train_pred)
        NN_y_train_sorted = np.take_along_axis(y_train, ind, axis=0)
        NN_train_pred_sorted = np.take_along_axis(NN_train_pred, ind, axis=0)
        NN_y_train_sorted_3per = NN_y_train_sorted[-int(len(NN_y_train_sorted)*0.03):-1]
        NN_y_train_sorted_3per_fraud = NN_y_train_sorted_3per[NN_y_train_sorted_3per==1]
        y_train_fraud = len(y_train[y_train==1])
        fdr_train += len(NN_y_train_sorted_3per_fraud)/y_train_fraud

        NN_test_pred = NN.predict_proba(x_test)[:,1]
        ind = np.argsort(NN_test_pred)
        NN_y_test_sorted = np.take_along_axis(y_test, ind, axis=0)
        NN_test_pred_sorted = np.take_along_axis(NN_test_pred, ind, axis=0)
        NN_y_test_sorted_3per = NN_y_test_sorted[-int(len(NN_y_test_sorted)*0.03):-1]
        NN_y_test_sorted_3per_fraud = NN_y_test_sorted_3per[NN_y_test_sorted_3per==1]
        y_test_fraud = len(y_test[y_test==1])
        fdr_test += len(NN_y_test_sorted_3per_fraud)/y_test_fraud

        i += 1

    avg_fdr_train = fdr_train/5
    avg_fdr_test = fdr_test/5

    print(f'In training set, with 1st {n} nodes, 2nd 20 nodes and 0.1 alpha, FDR at 3%:', avg_fdr_train)
    print(f'In testing set, with 1st {n} nodes, 2nd 20 nodes and 0.1 alpha, FDR at 3%:', avg_fdr_test)

    n += 1

In training set, with 1st 5 nodes, 2nd 20 nodes and 0.1 alpha, FDR at 3%: 0.6561038961038961
In testing set, with 1st 5 nodes, 2nd 20 nodes and 0.1 alpha, FDR at 3%: 0.6795918367346939
In training set, with 1st 6 nodes, 2nd 20 nodes and 0.1 alpha, FDR at 3%: 0.6581818181818182
In testing set, with 1st 6 nodes, 2nd 20 nodes and 0.1 alpha, FDR at 3%: 0.6693877551020407
In training set, with 1st 7 nodes, 2nd 20 nodes and 0.1 alpha, FDR at 3%: 0.6683116883116884
In testing set, with 1st 7 nodes, 2nd 20 nodes and 0.1 alpha, FDR at 3%: 0.7020408163265306
In training set, with 1st 8 nodes, 2nd 20 nodes and 0.1 alpha, FDR at 3%: 0.6851948051948051
In testing set, with 1st 8 nodes, 2nd 20 nodes and 0.1 alpha, FDR at 3%: 0.7183673469387756
In training set, with 1st 9 nodes, 2nd 20 nodes and 0.1 alpha, FDR at 3%: 0.6844155844155844
In testing set, with 1st 9 nodes, 2nd 20 nodes and 0.1 alpha, FDR at 3%: 0.726530612244898
In training set, with 1st 10 nodes, 2nd 20 nodes and 0.1 alpha, FDR at 3%: 0