## Import Libraries

In [None]:
import pandas as pd

# Sequential will be used to create the three layers of neural networks:
# layers of neural networks:
# (1) Input Layer, (2) Hidden Layers and (3) Output Layer
from keras.models import Sequential

# Use '*' to import all the three types of layers
from keras.layers import *
from sklearn.model_selection import train_test_split

In [30]:
features = pd.read_csv('./test_features.csv')
labels = pd.read_csv('./test_labels.csv')

In [31]:
dimensions = len(features.columns)

# Neural network for Deep Learning Model

In [32]:
# Instantiate the Sequential class
modelDL = Sequential()

# Add the Input Layer
# Note:
# Dense: is the neural network architecture type
# input_dim: is the number of features
# activation: is the activation function ReLU (Rectified Linear Unit)
modelDL.add( Dense(50, input_dim=dimensions, activation='relu') )

# Add the Hidden Layers
modelDL.add( Dense(100, activation='relu') )
modelDL.add( Dense(50, activation='relu') )

# Add the Output Layer
modelDL.add( Dense(1, activation='linear') )

# Compile the Deep Learning model
modelDL.compile(loss='mean_squared_error', optimizer='adam')

In [33]:
# Note:
# features is the array of explanatory (random) variables
# label is the array of response (random) variable
# epochs is the number of times to feed the entire data to the model
# shuffle will randomize the features
# verbose will control the logging of TensorFlow
modelDL.fit( features, labels, epochs=3, shuffle=True, verbose=20)

Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x7f81eccd2250>

In [34]:
features_test = pd.read_csv('./test_features.csv')
labels_test = pd.read_csv('./test_labels.csv')

predictions = modelDL.predict(features_test)

In [35]:
print(predictions.max())
print(predictions.min())

4.5693865
-0.052263863


In [36]:
certainty_param = 0.2

likelyFraud = []
actuallyFraud = []
test = predictions[:5]
for index, prediction in enumerate(predictions):
  if prediction > certainty_param:
    likelyFraud.append(prediction)
    if (labels_test.loc[index] == 1).bool():
      actuallyFraud.append(prediction)

print( 'Total Fraud Cases: {}'.format(len(predictions)))
print( 'Likely Fraud: {}'.format( len( likelyFraud) ) )
print( 'Actual Fraud: {}'.format( len( actuallyFraud) ))

detection = len( actuallyFraud) / len( predictions)*100
accuracy = len( actuallyFraud) / len( likelyFraud)*100
print ('This model is able to predict {} % of fraud with an accuracy of {}%'.format(detection, accuracy))


Total Fraud Cases: 118929
Likely Fraud: 1692
Actual Fraud: 1134
This model is able to predict 0.9535100774411625 % of fraud with an accuracy of 67.02127659574468%
