In [18]:
!pip install keras



In [19]:
import pandas as pd
import numpy as np
import tensorflow as tf
import ast

from keras import regularizers
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout
from keras.optimizers import SGD, Nadam, RMSprop
from keras.layers.normalization import BatchNormalization

from sklearn.model_selection import RandomizedSearchCV
from keras.wrappers.scikit_learn import KerasClassifier

In [20]:
resampled_df = pd.read_csv('resampled_df_40')

resampled_df = resampled_df.drop(resampled_df.columns[0], axis = 1)
response='TARGET'
predictors= [col for col in resampled_df.columns if col != response]
X_resampled = resampled_df[predictors]
y_resampled = resampled_df[response]

In [21]:
keras_grid = {'lr': np.linspace(0.001, 0.01, 15),
     'first_neuron': np.random.randint(100, 500, 5),
     'second_neuron': np.random.randint(100,300,5),
     'third_neuron': np.random.randint(100,300,5),
     'hidden_layers':[1,2,3],
     'batch_size': [50, 100, 200],
     'epochs': [20, 30, 40, 50, 60, 70, 80, 90,100],
     'dropout': np.linspace(0, 0.5, 10),
     # 'weight_regularizer':np.linspace(0.01, 0.5, 10),
     'optimizer': [Nadam, RMSprop, SGD],
     'activation':['relu', 'elu', 'tanh'],
     'kernel_initializer':['glorot_uniform', 'normal', 'VarianceScaling', 'lecun_uniform']
    }

In [22]:
n_features = len(X_resampled.columns)

def build_network(first_neuron, second_neuron, third_neuron, activation, dropout, optimizer, hidden_layers, lr, kernel_initializer):

    # next we can build the model exactly like we would normally do it
    model = Sequential()
    model.add(Dense(first_neuron, input_dim=n_features, kernel_initializer=kernel_initializer))
    model.add(BatchNormalization())
    model.add(Activation(activation))
    model.add(Dropout(dropout))
    
    # if we want to also test for number of layers and shapes, that's possible
    if hidden_layers == 2:
        model.add(Dense(second_neuron, kernel_initializer=kernel_initializer))
        model.add(BatchNormalization())
        model.add(Activation(activation))
        model.add(Dropout(dropout))
        
    elif hidden_layers == 3:
        model.add(Dense(second_neuron, kernel_initializer=kernel_initializer))
        model.add(BatchNormalization())
        model.add(Activation(activation))
        model.add(Dropout(dropout))
        
        model.add(Dense(third_neuron, kernel_initializer=kernel_initializer))
        model.add(BatchNormalization())
        model.add(Activation(activation))
        model.add(Dropout(dropout))
        
   
    # then we finish again with completely standard Keras way
    model.add(Dense(1, kernel_initializer=kernel_initializer))
    model.add(BatchNormalization())
    model.add(Activation('sigmoid'))
    
    optimizer1 = optimizer(lr = lr)
    model.compile(loss='binary_crossentropy', optimizer=optimizer1)
    
    return model

In [23]:
%env JOBLIB_TEMP_FOLDER=/tmp

env: JOBLIB_TEMP_FOLDER=/tmp


In [24]:
#Run if you want the results

# %%time
# from sklearn.model_selection import StratifiedKFold
# kf = StratifiedKFold(n_splits = 2)

# estimator = KerasClassifier(build_fn = build_network, verbose = 0)
# random = RandomizedSearchCV(estimator, keras_grid, n_iter = 60, scoring='roc_auc', cv = kf, return_train_score=False, n_jobs=-1, verbose = True)

# results_nn = random.fit(X_resampled, y_resampled)
# nn_random = pd.DataFrame(results_nn.cv_results_)
# nn_random.to_csv('NN_random_results2')

In [25]:
NN_random_results = pd.read_csv('NN_random_results2').sort_values('mean_test_score', ascending = False).reset_index(drop = True)

In [26]:
import re
def reg_sub(string_example):
  my_string = re.sub(r"('optimizer.+>,)", "", string_example)
  return my_string

In [27]:
estimator = KerasClassifier(build_fn = build_network, verbose = 0)
NN_clf = estimator.set_params(optimizer = SGD, **ast.literal_eval( reg_sub(NN_random_results['params'][0])))

In [28]:
from sklearn.model_selection import cross_val_score, cross_validate
from sklearn.metrics import confusion_matrix, make_scorer
from sklearn.model_selection import StratifiedKFold

def true_neg_rate(y_true, y_pred):
  c_mat = confusion_matrix(y_true, y_pred)
  return c_mat[0,0]/(c_mat[0,0] + c_mat[0,1])

c_mat = {'TNR': make_scorer(true_neg_rate), 'TPR':'recall', 'Acc': 'accuracy', 'AUC':'roc_auc'}

In [32]:
%%time
columns = ['TNR', 'TPR', 'Accuracy', 'AUC', 'Loss']
results = pd.DataFrame(0.0, columns=columns, index = ['NN_Random_clf']) 
kf = StratifiedKFold(n_splits = 3)

m_scores = cross_validate(NN_clf, X_resampled, y_resampled, cv = kf, scoring=c_mat, return_train_score= False, verbose=True)
results.loc['NN_Random_clf','TNR'] = np.mean(m_scores['test_TNR'])
results.loc['NN_Random_clf', 'TPR'] = np.mean(m_scores['test_TPR'])
results.loc['NN_Random_clf', 'Accuracy'] = np.mean(m_scores['test_Acc'])
results.loc['NN_Random_clf', 'AUC'] = np.mean(m_scores['test_AUC'])
results.loc['NN_Random_clf', 'Loss'] = -(1-results.loc['NN_Random_clf','TNR'])*2417 -(1-results.loc['NN_Random_clf', 'TPR'])*1124
    
results = results.round(3)
results.to_csv('NN_results.csv')

CPU times: user 30min 59s, sys: 6min 28s, total: 37min 27s
Wall time: 12min 44s


[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed: 12.7min finished


In [33]:
results

Unnamed: 0,TNR,TPR,Accuracy,AUC,Loss
NN_Random_clf,0.802,0.53,0.69,0.741,-1007.934
