In [11]:
#Librabries
import pandas as pd
import numpy as np
#import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Dropout
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [12]:
#Preprocessing
data = pd.read_csv('creditcard.csv')

print(f"Data shape : {data.shape}")
print(f"Fraudulent cases : {data[data['Class'] == 1].shape[0]}")
print(f"Normal cases : {data[data['Class'] == 0].shape[0]}")

Data shape : (284807, 31)
Fraudulent cases : 492
Normal cases : 284315


In [13]:
scaler = StandardScaler()
data['Amount'] = scaler.fit_transform(data['Amount'].values.reshape(-1,1))
data['Time'] = scaler.fit_transform(data['Time'].values.reshape(-1,1))

normal_data = data[data['Class'] == 0]
fraud_data = data[data['Class'] == 1]

normal_data = normal_data.drop('Class', axis=1)
fraud_data = fraud_data.drop('Class', axis=1)

x_train,x_val = train_test_split(normal_data, test_size=0.2,random_state=42)

x_train = x_train.values
x_val = x_val.values 
x_fraud = fraud_data.values

input_dim = x_train.shape[1]
print(f"Number of features (input dimension): {input_dim}")

Number of features (input dimension): 30


In [14]:
latent_dim = 8
input_layer = Input(shape=(input_dim,))

encoder = Dense(64,activation='relu')(input_layer)
encoder = Dropout(0.1)(encoder)
encoder = Dense(32,activation='relu')(encoder)
encoder = Dropout(0.1)(encoder)
encoder = Dense(16,activation='relu')(encoder)
encoder = Dropout(0.1)(encoder)
encoder = Dense(latent_dim, activation='relu')(encoder)

In [5]:
decoder = Dense(16,activation='relu')(encoder)
decoder = Dropout(0.1)(decoder)
decoder = Dense(32,activation='relu')(decoder)
decoder = Dropout(0.1)(decoder)
decoder = Dense(64,activation='relu')(decoder)
decoder = Dropout(0.1)(decoder)
decoder = Dense(input_dim, activation='linear')(decoder)

autoencoder = Model(inputs= input_layer,outputs=decoder)

In [6]:
autoencoder.compile(optimizer='adam',
                    loss='mean_squared_error',
                    metrics=['mae'])

autoencoder.summary()

In [15]:
history = autoencoder.fit(
    x_train,x_train,
    epochs=50,
    batch_size=32,
    shuffle=True,
    validation_data=(x_val,x_val)
)

Epoch 1/50
[1m7108/7108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 3ms/step - loss: 0.3618 - mae: 0.3994 - val_loss: 0.2491 - val_mae: 0.3366
Epoch 2/50
[1m7108/7108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 3ms/step - loss: 0.3682 - mae: 0.4015 - val_loss: 0.2607 - val_mae: 0.3422
Epoch 3/50
[1m7108/7108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 3ms/step - loss: 0.3645 - mae: 0.3999 - val_loss: 0.2522 - val_mae: 0.3366
Epoch 4/50
[1m7108/7108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 3ms/step - loss: 0.3657 - mae: 0.3997 - val_loss: 0.2509 - val_mae: 0.3354
Epoch 5/50
[1m7108/7108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 3ms/step - loss: 0.3629 - mae: 0.4002 - val_loss: 0.2611 - val_mae: 0.3400
Epoch 6/50
[1m7108/7108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 3ms/step - loss: 0.3653 - mae: 0.3998 - val_loss: 0.2511 - val_mae: 0.3369
Epoch 7/50
[1m7108/7108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [

In [9]:
resconstruction_val = autoencoder.predict(x_val)
val_loss = tf.keras.losses.mae(resconstruction_val,x_val)

resconstruction_fraud = autoencoder.predict(x_fraud)
fraud_loss = tf.keras.losses.mae(resconstruction_fraud,x_fraud)

print(f"val_loss : {np.mean(val_loss)} , fraud_loss : {np.mean(fraud_loss)}")


[1m1777/1777[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 603us/step
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 791us/step
val_loss : 0.33761522004925754 , fraud_loss : 1.7308246822637106


In [10]:
threshold = np.mean(val_loss) + 3*np.std(val_loss)
print(f"\nReconstruction Error Threshold (MAE) : {threshold}")

anomalies = fraud_loss > threshold  
print(f"\nTotal fraudulent transactions : {len(x_fraud)}")
print(f"Frauds detected as anomalies (loss > treshold): {np.sum(anomalies)}")
print(f"Detection Rate : {np.sum(anomalies)/ len(x_fraud) * 100 :.2f}%")


Reconstruction Error Threshold (MAE) : 0.8754864534431288

Total fraudulent transactions : 492
Frauds detected as anomalies (loss > treshold): 400
Detection Rate : 81.30%
