In [None]:
# Please wait for the installation to be done before proceeding.
! pip install --quiet --requirement requirements.txt

In [None]:
import tensorflow as tf
from tensorflow import keras

import os
import tempfile

import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

import sklearn
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

from imblearn.over_sampling import SMOTE

In [3]:
mpl.rcParams['figure.figsize'] = (12, 10)
colors = plt.rcParams['axes.prop_cycle'].by_key()['color']

In [None]:
file = tf.keras.utils
raw_df = pd.read_csv('https://storage.googleapis.com/download.tensorflow.org/data/creditcard.csv')
raw_df.head()

In [None]:
raw_df[['Time', 'V1', 'V2', 'V3', 'V4', 'V5', 'V26', 'V27', 'V28', 'Amount', 'Class']].describe()

In [None]:
data=raw_df
#Rename Class
data.rename(columns={"Class": "isFraud"}, inplace=True)

#Percentage of fraud
fraud_per = data[data.isFraud == 1].isFraud.count() / data.isFraud.count()
print(fraud_per)

In [None]:
# Looking for missing data
print(data.isnull().any().sum())

In [None]:
#Correlation Plot
plt.figure(figsize = (14,10))
plt.title('Correlation Plot', size = 20)
corr = data.corr()
sns.heatmap(corr,xticklabels=corr.columns,yticklabels=corr.columns,linewidths=.1,cmap="Blues",fmt='.1f',annot=True)
plt.show()

In [9]:
# Defining x and y
y = data["isFraud"]
x = data.drop(["isFraud"], axis = 1)

In [10]:
#Standardization
scaler = StandardScaler()
x = scaler.fit_transform(x)

In [11]:
# Train-Test split
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 0)

In [None]:
# SMOTE
X_train_SMOTE, y_train_SMOTE = SMOTE().fit_resample(X_train, y_train)
#SMOTE plot
pd.Series(y_train_SMOTE).value_counts().plot(kind="bar")
plt.title("Balanced Dataset")
plt.show()

In [None]:
# DNN
layers = keras.layers
model = keras.Sequential([
layers.Dense(input_dim = 30, units = 128, activation = "relu"),
layers.Dense(units= 64, activation = "relu"),
layers.Dropout(0.2),
layers.Dense(units= 32, activation = "relu"),
layers.Dropout(0.2),
layers.Dense(units= 32, activation = "relu"),
layers.Dropout(0.2),
layers.Dense(units= 16, activation = "relu"),
layers.Dropout(0.2),
layers.Dense(units=1, activation = "sigmoid")])
model.summary()

In [None]:
# Metrics
metrics = [
    keras.metrics.Accuracy(name="Accuracy"),
    keras.metrics.Precision(name="Precision"),
    keras.metrics.Recall(name="Recall")]
# Compiling and fiting the model
model.compile(optimizer = "adam", loss = "binary_crossentropy", metrics = metrics)
# Change the epochs to a lower number if you want this to run quickly. But lower epoch is less accuracy and vice versa.
model.fit(X_train_SMOTE, y_train_SMOTE, batch_size = 32, epochs = 100)
print("Evaluate on test data")
score = model.evaluate(X_test, y_test)
print("test loss, test accuracy, test precision, test recall:", score)

In [None]:
# Our model is trained. Lets save it to disk and then convert to openvino ir format.
# Change the model_name variable when you want to preserve multiple models in different folders
model_name = "default"
model.save('tensorflow_pb_models/' + model_name)

# Model Optimizer (mo) helps to convert tensorflow protobuf (.pb) file to openvino supported formate.
# https://docs.openvino.ai/latest/openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide.html
! mo --output_dir openvino_ir_model/{model_name} --saved_model_dir tensorflow_pb_models/{model_name}

### Upload the openvino IR formatted model to your S3 bucket. Follow [guide](./upload-to-s3-and-serve-model.md)

In [None]:
# ## YOU MUST PASTE YOUR INFERENCE LINK GOT AS AN OUTCOME OF THE ABOVE GUIDE LINK
my_route = 'https://PASTE-YOUR-OWN-INFER-LINK'

import requests, json
my_req = '{"inputs": [{"name":"Func/StatefulPartitionedCall/input/_0:0", "shape": [1,30], "datatype": "FP32", "data": [[-0.81527562, -0.62780094,  1.18457726, -0.56138278,  1.97545981, -1.38669424, -0.03372776, -1.08378356, -0.46514641, -1.07813139, -2.98031409,  2.29087639, -2.82230106, 0.76695155, -5.65368683, 0.04526619, -4.77118557, -5.04520325, -3.02616084,  1.14274513, 0.35082495,  1.64467922,  0.38254332,  0.03085198, 0.83964697, -0.38594229, -0.51760032,  1.39294962,  0.22815041, 0.3301235]]}]}'

response = requests.post(my_route, my_req)
response.json()

### ----------------------------------------------------------------------------------------------------------------------------------------

In [None]:
# Below cells are some extra bonus/fun stuff.
# Converting X_test array to numpy array before predicting
pred = model.predict(np.array(X_test))
pred = np.where(pred > 0.5, 1, 0)
pred

In [None]:
# Checking accuracy of the prediction by the model.
from sklearn.metrics import accuracy_score
accuracy_score(y_test, pred)

In [None]:
# Because it was an imbalanced dataset and the SMOTE technique was applied to training set and
# not testing set therefore the above accuracy is not correct but the f1 score below gives a better accuracy.
from sklearn.metrics import f1_score
f1_score(y_test, pred, zero_division=1)

In [None]:
# The confusionmatrix and the plotting shows how many wrong prediction our model did because it is not perfect.
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
cm = confusion_matrix(y_test, pred)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=['non fraud', 'fraud'])
disp.plot()
plt.show()