In [34]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from keras.models import Model
from keras.layers import Dense, Input
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer

In [38]:
data = pd.read_csv('creditcard.csv')

In [39]:
x = data.drop(columns = ['Time', 'Class'])
y = data['Class']

In [40]:
scaler = StandardScaler()
x_scaled = scaler.fit_transform(x)

In [41]:
xtrain, xtest, ytrain, ytest = train_test_split(x_scaled, y, test_size = 0.25, random_state = 42)

In [42]:
imputer = SimpleImputer(strategy='mean')
xtest = imputer.fit_transform(xtest)

In [43]:
input_dim = xtrain.shape[1]
input_layer = Input(shape = (input_dim,))
encoder = Dense(14, activation = 'relu')(input_layer)
encoder = Dense(7, activation = 'relu')(encoder)
decoder = Dense(14, activation = 'relu')(encoder)
decoder = Dense(input_dim, activation = 'sigmoid')(decoder)

In [44]:
autoencoder = Model(inputs = input_layer, outputs = decoder)
autoencoder.compile(optimizer = 'adam', loss = 'mse')

In [45]:
autoencoder.fit(xtrain, xtrain, epochs = 15, batch_size = 256, validation_split = 0.2)

Epoch 1/15
[1m668/668[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - loss: 1.0289 - val_loss: 0.9316
Epoch 2/15
[1m668/668[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - loss: 0.9115 - val_loss: 0.8914
Epoch 3/15
[1m668/668[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.8681 - val_loss: 0.8731
Epoch 4/15
[1m668/668[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 0.8496 - val_loss: 0.8619
Epoch 5/15
[1m668/668[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4ms/step - loss: 0.8520 - val_loss: 0.8522
Epoch 6/15
[1m668/668[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - loss: 0.8262 - val_loss: 0.8450
Epoch 7/15
[1m668/668[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 4ms/step - loss: 0.8190 - val_loss: 0.8398
Epoch 8/15
[1m668/668[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 0.8238 - val_loss: 0.8357
Epoch 9/15
[1m668/668[0m [32m━━━━━━━━

<keras.src.callbacks.history.History at 0x7fc2a6672b00>

In [46]:
reconstructions = autoencoder.predict(xtest)

[1m2226/2226[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2ms/step


In [50]:
mse = np.mean(np.power(xtest - reconstructions, 2), axis = 1)
threshold = np.percentile(mse, 99)
anomalies = mse > threshold
print(f"Detected anomalies : {np.sum(anomalies)}")

Detected anomalies : 713
