In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import Model, layers, optimizers

import numpy as np
import pandas as pd
import os
import random

In [2]:
np.random.seed(2022)
random.seed(2022)
tf.random.set_seed(2022)
os.environ['PYTHONHASHSEED'] = '0'

In [3]:
data = pd.read_csv('./creditcard.csv')

In [4]:
data.describe()

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
count,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,...,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0
mean,94813.859575,3.918649e-15,5.682686e-16,-8.761736e-15,2.811118e-15,-1.552103e-15,2.04013e-15,-1.698953e-15,-1.893285e-16,-3.14764e-15,...,1.47312e-16,8.042109e-16,5.282512e-16,4.456271e-15,1.426896e-15,1.70164e-15,-3.662252e-16,-1.217809e-16,88.349619,0.001727
std,47488.145955,1.958696,1.651309,1.516255,1.415869,1.380247,1.332271,1.237094,1.194353,1.098632,...,0.734524,0.7257016,0.6244603,0.6056471,0.5212781,0.482227,0.4036325,0.3300833,250.120109,0.041527
min,0.0,-56.40751,-72.71573,-48.32559,-5.683171,-113.7433,-26.16051,-43.55724,-73.21672,-13.43407,...,-34.83038,-10.93314,-44.80774,-2.836627,-10.2954,-2.604551,-22.56568,-15.43008,0.0,0.0
25%,54201.5,-0.9203734,-0.5985499,-0.8903648,-0.8486401,-0.6915971,-0.7682956,-0.5540759,-0.2086297,-0.6430976,...,-0.2283949,-0.5423504,-0.1618463,-0.3545861,-0.3171451,-0.3269839,-0.07083953,-0.05295979,5.6,0.0
50%,84692.0,0.0181088,0.06548556,0.1798463,-0.01984653,-0.05433583,-0.2741871,0.04010308,0.02235804,-0.05142873,...,-0.02945017,0.006781943,-0.01119293,0.04097606,0.0165935,-0.05213911,0.001342146,0.01124383,22.0,0.0
75%,139320.5,1.315642,0.8037239,1.027196,0.7433413,0.6119264,0.3985649,0.5704361,0.3273459,0.597139,...,0.1863772,0.5285536,0.1476421,0.4395266,0.3507156,0.2409522,0.09104512,0.07827995,77.165,0.0
max,172792.0,2.45493,22.05773,9.382558,16.87534,34.80167,73.30163,120.5895,20.00721,15.59499,...,27.20284,10.50309,22.52841,4.584549,7.519589,3.517346,31.6122,33.84781,25691.16,1.0


In [5]:
from sklearn.preprocessing import RobustScaler

rob_scaler = RobustScaler()

data['amount'] = rob_scaler.fit_transform(data['Amount'].values.reshape(-1,1))
data['time'] = rob_scaler.fit_transform(data['Time'].values.reshape(-1,1))

data.drop(['Amount', 'Time'], axis=1, inplace=True)

In [6]:
data1 = data.sample(frac=1)

fraud_data = data1.loc[data1['Class'] == 1]
non_fraud_data = data1.loc[data1['Class'] == 0][:492]

In [7]:
balanced_data = pd.concat([fraud_data, non_fraud_data])

new_data = balanced_data.sample(frac=1, random_state=42)

In [9]:
X = new_data.drop('Class', axis=1)
y = new_data['Class']

In [10]:
from sklearn.model_selection import train_test_split

In [11]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=True)

X_train = X_train.values
X_test = X_test.values
y_train = y_train.values
y_test = y_test.values

In [13]:
model = keras.Sequential([
    layers.Dense(X_train.shape[1], input_shape=(X_train.shape[1], ), activation='relu'),
    layers.Dense(2, activation='softmax')
])

In [14]:
model.compile(optimizer=optimizers.Adam(learning_rate=0.001), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [15]:
model.fit(X_train, y_train, validation_split=0.2, batch_size=20, epochs=20, shuffle=True, verbose=2)

Epoch 1/20
32/32 - 0s - loss: 0.7101 - accuracy: 0.5644 - val_loss: 0.6069 - val_accuracy: 0.6329 - 320ms/epoch - 10ms/step
Epoch 2/20
32/32 - 0s - loss: 0.4366 - accuracy: 0.7520 - val_loss: 0.4215 - val_accuracy: 0.7468 - 31ms/epoch - 970us/step
Epoch 3/20
32/32 - 0s - loss: 0.3090 - accuracy: 0.8808 - val_loss: 0.3363 - val_accuracy: 0.8418 - 30ms/epoch - 938us/step
Epoch 4/20
32/32 - 0s - loss: 0.2445 - accuracy: 0.9285 - val_loss: 0.2882 - val_accuracy: 0.8797 - 30ms/epoch - 938us/step
Epoch 5/20
32/32 - 0s - loss: 0.2051 - accuracy: 0.9428 - val_loss: 0.2633 - val_accuracy: 0.8861 - 30ms/epoch - 938us/step
Epoch 6/20
32/32 - 0s - loss: 0.1784 - accuracy: 0.9491 - val_loss: 0.2432 - val_accuracy: 0.8987 - 31ms/epoch - 970us/step
Epoch 7/20
32/32 - 0s - loss: 0.1599 - accuracy: 0.9555 - val_loss: 0.2305 - val_accuracy: 0.8924 - 30ms/epoch - 938us/step
Epoch 8/20
32/32 - 0s - loss: 0.1455 - accuracy: 0.9555 - val_loss: 0.2220 - val_accuracy: 0.8987 - 31ms/epoch - 970us/step
Epoch 9/

<keras.callbacks.History at 0x1c59223cdf0>

In [17]:
under_test_loss, under_test_acc = model.evaluate(X_test, y_test, verbose=2)

7/7 - 0s - loss: 0.1564 - accuracy: 0.9289 - 15ms/epoch - 2ms/step
