In [1]:
#Librabries
import pandas as pd
import numpy as np
#import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Input, Dense, Dropout
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler


In [2]:
#Preprocessing
data = pd.read_csv('creditcard.csv')

print(f"Data shape : {data.shape}")
print(f"Fraudulent cases : {data[data['Class'] == 1].shape[0]}")
print(f"Normal cases : {data[data['Class'] == 0].shape[0]}")

Data shape : (284807, 31)
Fraudulent cases : 492
Normal cases : 284315


In [3]:
scaler = StandardScaler()
data['Amount'] = scaler.fit_transform(data['Amount'].values.reshape(-1,1))
data['Time'] = scaler.fit_transform(data['Time'].values.reshape(-1,1))

normal_data = data[data['Class'] == 0]
fraud_data = data[data['Class'] == 1]

normal_data = normal_data.drop('Class', axis=1)
fraud_data = fraud_data.drop('Class', axis=1)

x_train,x_val = train_test_split(normal_data, test_size=0.2,random_state=42)

x_train = x_train.values
x_val = x_val.values 
x_fraud = fraud_data.values

input_dim = x_train.shape[1]
print(f"Number of features (input dimension): {input_dim}")

Number of features (input dimension): 30


In [4]:
latent_dim = 8
auto = Sequential()
auto.add(Input(shape=(input_dim,)))
auto.add(Dense(64,activation='relu'))
auto.add(Dropout(0.1))
auto.add(Dense(32,activation='relu'))
auto.add(Dropout(0.1))
auto.add(Dense(16,activation='relu'))
auto.add(Dropout(0.1))
auto.add(Dense(latent_dim, activation='relu'))
auto.add(Dense(16,activation='relu'))
auto.add(Dropout(0.1))
auto.add(Dense(32,activation='relu'))
auto.add(Dropout(0.1))
auto.add(Dense(64,activation='relu'))
auto.add(Dense(30,activation='linear'))


In [5]:
auto.compile(optimizer='adam',
                    loss='mean_squared_error',
                    metrics=['mae'])

auto.summary()

In [6]:
history = auto.fit(
    x_train,x_train,
    epochs=50,
    batch_size=32,
    shuffle=True,
    validation_data=(x_val,x_val)
)

Epoch 1/50
[1m7108/7108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 2ms/step - loss: 0.7022 - mae: 0.5465 - val_loss: 0.4689 - val_mae: 0.4622
Epoch 2/50
[1m7108/7108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 2ms/step - loss: 0.5357 - mae: 0.4889 - val_loss: 0.4378 - val_mae: 0.4433
Epoch 3/50
[1m7108/7108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 2ms/step - loss: 0.4961 - mae: 0.4730 - val_loss: 0.4052 - val_mae: 0.4287
Epoch 4/50
[1m7108/7108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 2ms/step - loss: 0.4745 - mae: 0.4635 - val_loss: 0.3978 - val_mae: 0.4234
Epoch 5/50
[1m7108/7108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 2ms/step - loss: 0.4615 - mae: 0.4580 - val_loss: 0.3927 - val_mae: 0.4215
Epoch 6/50
[1m7108/7108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 2ms/step - loss: 0.4599 - mae: 0.4558 - val_loss: 0.3765 - val_mae: 0.4146
Epoch 7/50
[1m7108/7108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [

In [10]:
resconstruction_val = auto.predict(x_val)
val_loss = tf.keras.losses.mae(resconstruction_val,x_val)

resconstruction_fraud = auto.predict(x_fraud)
fraud_loss = tf.keras.losses.mae(resconstruction_fraud,x_fraud)

print(f"val_loss : {np.mean(val_loss)} , fraud_loss : {np.mean(fraud_loss)}")


[1m1777/1777[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 949us/step
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
val_loss : 0.38747856910099193 , fraud_loss : 1.5190453419036365


In [11]:
threshold = np.mean(val_loss) + 3*np.std(val_loss)
print(f"\nReconstruction Error Threshold (MAE) : {threshold}")

anomalies = fraud_loss > threshold  
print(f"\nTotal fraudulent transactions : {len(x_fraud)}")
print(f"Frauds detected as anomalies (loss > treshold): {np.sum(anomalies)}")
print(f"Detection Rate : {np.sum(anomalies)/ len(x_fraud) * 100 :.2f}%")


Reconstruction Error Threshold (MAE) : 0.9568192776362705

Total fraudulent transactions : 492
Frauds detected as anomalies (loss > treshold): 386
Detection Rate : 78.46%
