# Credit Card Fraud Detection


### A fully connected network for fraud detection. 
#### This demo uses the Credit Card Fraud Detection dataset, originally taken from: https://www.kaggle.com/mlg-ulb/creditcardfraud. The notebook is based on a notebook implemented by the Kaggle community (https://www.kaggle.com/omkarsabnis/credit-card-fraud-detection-using-neural-networks), while some changes was applied to make the network's architecture FHE friendly.


In [None]:
import os
##### For reproducibility
seed_value= 1
os.environ['PYTHONHASHSEED']=str(seed_value)
import random
random.seed(seed_value)
import numpy as np
np.random.seed(seed_value)
import tensorflow as tf
tf.random.set_seed(seed_value)
from tensorflow.keras import backend as K
#####
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Dense, Activation
from tensorflow.keras.models import Sequential
import h5py
import numpy as np 
import pandas as pd 
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from sklearn import metrics
#####
# import utils
import sys
path_to_utils='..'
sys.path.append(path_to_utils)
import utils
# import activations
from activations import SquareActivation


PATH = os.path.join('..', 'data', 'net_fraud')
if not os.path.exists(PATH):
    os.makedirs(PATH)

epochs = 3
batch_size = 32 
optimizer = Adam
lr = 0.01
print("misc. init complete")

### Read Dataset

In [None]:
df = pd.read_csv(os.path.join(utils.get_data_sets_dir(path_to_utils), 'net_fraud', 'creditcard.csv'))

print(f'Reading {df.shape[0]} samples')

X = df.loc[:, df.columns.tolist()[1:30]].values
Y = df.loc[:, 'Class'].values
print(f'number of features: {X.shape[1]}')

X = preprocessing.normalize(X)

x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.33, stratify=Y, random_state=0)
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.33, random_state=0)


### Replicate the smallest class for balancing 

In [None]:
def replicate_smallest_class(x, y, class_id):
        y_fraud_list = y[y == class_id]
        x_fraud_list = x[y == class_id]

        for _ in range(5):
            copy_fraudlist = np.copy(x_fraud_list)
            y_fraud_copy = np.copy(y_fraud_list)
            x = np.concatenate((x, copy_fraudlist))
            y = np.concatenate((y, y_fraud_copy))

        permut = np.random.permutation(x.shape[0])
        x = x[permut]
        y = y[permut]

        return x, y

x_train, y_train = replicate_smallest_class(x_train, y_train, class_id=1)

nb_train_samples = (x_train.shape[0] // batch_size) * batch_size
x_train = x_train[:nb_train_samples]
y_train = y_train[:nb_train_samples]


print("After replicating items from the smaller class:")
print(f'x_train: {x_train.shape}')
print(f'x_val: {x_val.shape}')
print(f'x_test: {x_test.shape}')

### Reshape Labels

In [None]:
y_train = y_train.reshape(y_train.shape[0], -1)
y_val = y_val.reshape(y_val.shape[0], -1)
y_test = y_test.reshape(y_test.shape[0], -1)

print("Training data ready")

### Save dataset 

In [None]:
def save_data_set(x, y, data_type, s=''):
    print("Saving x_{} of shape {}".format(data_type, x.shape))
    xf = h5py.File(os.path.join(PATH, f'x_{data_type}{s}.h5'), 'w')
    xf.create_dataset('x_{}'.format(data_type), data=x)
    xf.close()

    yf = h5py.File(os.path.join(PATH, f'y_{data_type}{s}.h5'), 'w')
    yf.create_dataset(f'y_{data_type}', data=y)
    yf.close()
    
save_data_set(x_test, y_test, data_type='test')

### Fraud Detection Network

In [None]:
model = Sequential()

model.add(Dense(20, input_shape=(x_train.shape[1],)))
model.add(SquareActivation())
model.add(Dense(5))
model.add(SquareActivation())
model.add(Dense(1))
model.add(SquareActivation())

model.compile(loss='binary_crossentropy',
                  optimizer=optimizer(lr=lr),
                  metrics=['accuracy'])

model.summary()

### Train

In [None]:
model.fit(x_train, y_train,
              batch_size=batch_size,
              epochs=epochs,
              verbose=2,
              validation_data=(x_val, y_val),
              shuffle=True,
              )
score = model.evaluate(x_test, y_test, verbose=0)

print(f'Test loss: {score[0]:.3f}')
print(f'Test accuracy: {score[1] * 100:.3f}%')

### Confusion Matrix - TEST

In [None]:
batch_size = 4096
x_test = x_test[0:batch_size,:]
y_test = y_test[0:batch_size,:]

y_pred_vals = model.predict(x_test)
y_pred = (y_pred_vals > 0.5).astype("int32")
f,t,thresholds = metrics.roc_curve(y_test, y_pred)
cm = metrics.confusion_matrix(y_test, y_pred)
print(f"AUC Score: {metrics.auc(f,t):.3f}")
print("Classification report:")
print(metrics.classification_report(y_test, y_pred))
print("Confusion Matrix:")
print(cm)

### Serialize model and weights

In [None]:
model_json = model.to_json()
with open(os.path.join(PATH, 'model.json'), "w") as json_file:
    json_file.write(model_json)
# serialize weights to HDF5
model.save_weights(os.path.join(PATH, 'model.h5'))
print("Saved model to ",PATH)