In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
from datetime import datetime
%matplotlib inline

In [3]:
# Loading  data
df = pd.read_csv('data_balanced_cv.csv')

In [4]:
from sklearn.preprocessing import StandardScaler

In [5]:
scaler = StandardScaler()

In [6]:
scaler.fit(df.drop('Y',axis=1))

StandardScaler()

In [7]:
scaled_features = scaler.fit_transform(df.drop('Y',axis=1))

In [8]:
X = pd.DataFrame(scaled_features,columns=df.columns[:-1]).values
y = df['Y'].values

In [9]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, BatchNormalization
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import RepeatedStratifiedKFold, cross_validate

In [10]:
def create_network():

    # create model
    model = Sequential()
        
    # hidden layer
    model.add(Dense(X.shape[1]/2, input_dim=X.shape[1], kernel_initializer='normal', activation='relu'))
    model.add(BatchNormalization())

    # output layer
    model.add(Dense(1, activation='sigmoid'))

    # Compile model
    model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['accuracy'])
    
    # Return compiled network
    return model

In [11]:
# Wrap Keras model so it can be used by scikit-learn
ann = KerasClassifier(build_fn=create_network, epochs=100, batch_size=500, verbose=0)

In [12]:
cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=69)

In [13]:
t0 = datetime.now()
scores = cross_validate(ann, X, y, scoring=['accuracy','roc_auc','precision','recall','f1'], cv=cv, n_jobs=-1)
t1 = datetime.now()

In [14]:
print('Mean Accuracy: %.2f%% Std Dev: (+/- %.3f%%)' % (np.mean(scores['test_accuracy']*100), np.std(scores['test_accuracy']*100)))
print('Mean ROC AUC: %.2f%% Std Dev: (+/- %.3f%%)' % (np.mean(scores['test_roc_auc']*100), np.std(scores['test_roc_auc']*100)))
print('Mean Precision: %.2f%% Std Dev: (+/- %.3f%%)' % (np.mean(scores['test_precision']*100), np.std(scores['test_precision']*100)))
print('Mean Recall: %.2f%% Std Dev: (+/- %.3f%%)' % (np.mean(scores['test_recall']*100), np.std(scores['test_recall']*100)))
print('Mean F1 Score: %.2f%% Std Dev: (+/- %.3f%%)' % (np.mean(scores['test_f1']*100), np.std(scores['test_f1']*100)))
print("Execution Time: ", (t1 - t0))

Mean Accuracy: 75.87% Std Dev: (+/- 0.602%)
Mean ROC AUC: 83.20% Std Dev: (+/- 0.591%)
Mean Precision: 76.77% Std Dev: (+/- 1.037%)
Mean Recall: 70.39% Std Dev: (+/- 1.617%)
Mean F1 Score: 73.42% Std Dev: (+/- 0.809%)
Execution Time:  0:02:04.426371
