In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
import os
from datetime import datetime
warnings.filterwarnings("ignore")
%matplotlib inline

In [2]:
os.chdir(r'C:\Users\alanw\OneDrive\Varsity\Research\Code')

In [3]:
# Loading  data
df_test = pd.read_csv('data_test_set.csv')
df_train = pd.read_csv('data_balanced_train.csv')

# Data Preparation
When using Neural Network and Deep Learning based systems, it is usually a good idea to Standardize your data,

In [4]:
from sklearn.preprocessing import StandardScaler

Scaling Data

In [5]:
scaler_train = StandardScaler()
scaler_test = StandardScaler()

In [6]:
scaler_train.fit(df_train.drop('Y',axis=1))
scaler_test.fit(df_test.drop('Y',axis=1))

StandardScaler()

In [7]:
scaled_features_train = scaler_train.fit_transform(df_train.drop('Y',axis=1))
scaled_features_test = scaler_test.fit_transform(df_test.drop('Y',axis=1))

In [8]:
df_feat_train = pd.DataFrame(scaled_features_train,columns=df_train.columns[:-1])
df_feat_test = pd.DataFrame(scaled_features_test,columns=df_test.columns[:-1])

Setting up datasets

In [9]:
X_train = df_feat_train
y_train = df_train['Y']

In [10]:
X_test = df_feat_test
y_test = df_test['Y']

# Setting Up and Training The Model

In [11]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier

In [12]:
def create_network():

    # create model
    model = Sequential()

    # output layer
    model.add(Dense(30, input_dim=X_train.shape[1], kernel_initializer='normal', activation='relu'))

    # hidden layer
    model.add(Dense(1, kernel_initializer='normal', activation='sigmoid'))

    # Compile model
    model.compile(loss='binary_crossentropy', optimizer='sgd', metrics=['accuracy'])
    
    # Return compiled network
    return model

In [13]:
# Early stop on validation accuracy
es = EarlyStopping(monitor = 'val_loss', patience = 25)

# Save the best model as best_banknote_model.hdf5
mc = ModelCheckpoint('best_simple_ann_balanced.hdf5', monitor = 'val_accuracy', save_best_only = True)

In [14]:
epochs = 50

In [15]:
t0 = datetime.now()

In [16]:
# Wrap Keras model so it can be used by scikit-learn
ann = KerasClassifier(build_fn=create_network, 
                                 epochs=epochs, 
                                 verbose=0)

In [17]:
ann.fit(X_train, y_train, epochs=epochs, validation_data=(X_test, y_test), callbacks=[es,mc] ,verbose=0) 

<tensorflow.python.keras.callbacks.History at 0x2cf1ad11d60>

In [18]:
t1 = datetime.now()

In [19]:
# losses = pd.DataFrame(ann.history.history)

In [20]:
# losses[['loss','val_loss']].plot()

In [21]:
# losses[['accuracy', 'val_accuracy']].plot()

In [22]:
from tensorflow.keras.models import load_model

In [23]:
best_ann = load_model('best_simple_ann.hdf5')

In [24]:
predictions = best_ann.predict_classes(X_test)

Instructions for updating:
Please use instead:* `np.argmax(model.predict(x), axis=-1)`,   if your model does multi-class classification   (e.g. if it uses a `softmax` last-layer activation).* `(model.predict(x) > 0.5).astype("int32")`,   if your model does binary classification   (e.g. if it uses a `sigmoid` last-layer activation).


In [25]:
from sklearn.metrics import confusion_matrix, cohen_kappa_score, accuracy_score, roc_auc_score
from sklearn.metrics import f1_score, precision_score, recall_score

In [26]:
print(confusion_matrix(y_test,predictions))

[[3384  114]
 [ 740  262]]


In [27]:
recall = recall_score(y_test, predictions)
f1 = f1_score(y_test, predictions)
precision = precision_score(y_test, predictions)
accuracy = accuracy_score(y_test, predictions)
auc = roc_auc_score(y_test, predictions)
kappa = cohen_kappa_score(y_test, predictions)
print("Accuracy: %.2f%%" % (accuracy * 100.0))
print("F1-Score: %.2f" % (f1))
print("Recall: %.2f" % (recall))
print("Precision: %.2f" % (precision))
print("Cohen's Kappa: %.2f" % (kappa))
print("Area Under Curve: %.2f" % (auc))
print("Execution Time: ", (t1 - t0))

Accuracy: 81.02%
F1-Score: 0.38
Recall: 0.26
Precision: 0.70
Cohen's Kappa: 0.29
Area Under Curve: 0.61
Execution Time:  0:00:38.984910


In [28]:
x, y = df_train['Y'].value_counts()
print("Training Data Split: " + str(int(x/(x+y)*100)) + ":" + str(int(y/(x+y)*100)))
x, y = df_test['Y'].value_counts()
print("Testing Data Split: " + str(int(x/(x+y)*100)) + ":" + str(int(y/(x+y)*100)))

Training Data Split: 52:47
Testing Data Split: 77:22


In [29]:
from sklearn.model_selection import RepeatedStratifiedKFold, cross_validate, cross_val_score

In [32]:
cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)
scores = cross_validate(ann, X_train, y_train, scoring=['accuracy','roc_auc'], cv=cv, n_jobs=-1)

In [48]:
print('Mean ROC AUC: %.2f' % np.mean(scores['test_roc_auc']))
print('Mean Accuracy: %.2f%%' % (np.mean(scores['test_accuracy'])*100))

Mean ROC AUC: 0.80
Mean Accuracy: 73.37%


RepeatedStratifiedKFold(n_repeats=3, n_splits=10, random_state=1)