<a href="https://colab.research.google.com/github/Stereo-Alex/Fraud_detection/blob/main/Workbook_with_NN_.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Colab Set-up: 

In [None]:
!pip install kaggle
from google.colab import files
files.upload()
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

In [None]:
!kaggle datasets download -d mlg-ulb/creditcardfraud

In [None]:
!unzip creditcardfraud.zip

# Preparing the data 

In [None]:
import pandas as pd
import numpy as np 

In [None]:
df = pd.read_csv("/content/creditcard.csv")

In [None]:
X = df.drop(columns=["Class"])
y = df["Class"]

## We standarize the data: 

In [None]:
# We standarize
from sklearn import preprocessing

names = X.columns
scaled_df = preprocessing.scale(X)

scaled_df = pd.DataFrame(scaled_df,columns=names)
scaled_df

## We split the data 

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(scaled_df, y,
                                                    test_size = 0.30, random_state = 0, shuffle = True, stratify = y)

## SMOTE (Synthetic Minority Oversampling Technique)(Data Augmentation): 
We do this due to the unbalanced nature of the Data set, otherwise we can't really perform 

In [None]:
from imblearn.over_sampling import SMOTE

sm = SMOTE(random_state = 33)
X_train_new, y_train_new = sm.fit_sample(X_train, y_train.ravel())

# Modeling
## Model Architecture 

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Flatten, Dense, Dropout, BatchNormalization

model = Sequential()
model.add(Dense(X_train_new.shape[1], activation = 'relu', input_dim = X_train_new.shape[1]))
model.add(BatchNormalization())
model.add(Dense(64, activation = 'relu'))
model.add(BatchNormalization())
model.add(Dropout(0.2))
model.add(Dense(64, activation = 'relu'))
model.add(BatchNormalization())
model.add(Dropout(0.2))
model.add(Dense(1, activation = 'sigmoid'))

In [None]:
model.compile(optimizer = 'adam',
              loss = 'binary_crossentropy')

## Training 

In [None]:
history = model.fit(X_train_new, y_train_new, batch_size = 500, epochs=150,
          validation_data=(X_test, y_test))

# Results

In [None]:
import matplotlib.pyplot as plt

evaluation_metrics=pd.DataFrame(model.history.history)
evaluation_metrics.plot(figsize=(10,5))
plt.title("Loss for both Training and Validation", size = 20)


## Adding a Callback 


In [None]:
from tensorflow.keras.callbacks import EarlyStopping

Callback = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience = 10)

In [None]:
# Reworked model

model_2 = Sequential()
model_2.add(Dense(X_train_new.shape[1], activation = 'relu', input_dim = X_train_new.shape[1]))
model_2.add(BatchNormalization())
model_2.add(Dense(64, activation = 'relu'))
model_2.add(BatchNormalization())
model_2.add(Dropout(0.2))
model_2.add(Dense(64, activation = 'relu'))
model_2.add(BatchNormalization())
model_2.add(Dropout(0.2))
model_2.add(Dense(1, activation = 'sigmoid'))

model_2.compile(optimizer = 'adam',
              loss = 'binary_crossentropy')

history_2 =  model_2.fit(X_train_new, y_train_new, batch_size = 500, epochs=200,
          validation_data=(X_test, y_test), 
          callbacks=[Callback])


# Results with callback


In [None]:
import matplotlib.pyplot as plt

evaluation_metrics=pd.DataFrame(model_2.history.history)
evaluation_metrics.plot(figsize=(10,5))
plt.title("Loss for both Training and Validation", size = 20)


In [None]:
y_pred = model.predict_classes(X_test)
y_pred_2 = model_2.predict_classes(X_test)

In [None]:
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

cm_nn=confusion_matrix(y_test, y_pred)
print('No Callback Matrix')
cm_nn

In [None]:
cm_nn_2 = confusion_matrix(y_test, y_pred_2)
print('Callback Matrix')
cm_nn_2