pip install numpy pandas matplotlib scikit-learn tensorflow


Use the Credit Card Fraud Dataset from Kaggle:
🔗 https://www.kaggle.com/datasets/mlg-ulb/creditcardfraud
(We will use only Class 0 transactions to train, and test with both 0 and 1)

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.optimizers import Adam


In [None]:
df = pd.read_csv("creditcard.csv")

# Use only "normal" (non-fraud) data for training
normal_data = df[df['Class'] == 0]
fraud_data  = df[df['Class'] == 1]


In [None]:
# Drop 'Time' and 'Class'
X = normal_data.drop(['Time', 'Class'], axis=1)
X_test = df.drop(['Time', 'Class'], axis=1)
y_test = df['Class']


In [None]:
# Scale features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_test_scaled = scaler.transform(X_test)

In [None]:
# Build Autoencoder 
input_dim = X_scaled.shape[1]

input_layer = Input(shape=(input_dim,))
encoded = Dense(16, activation='relu')(input_layer)
encoded = Dense(8, activation='relu')(encoded)

decoded = Dense(16, activation='relu')(encoded)
decoded = Dense(input_dim, activation='linear')(decoded)

autoencoder = Model(inputs=input_layer, outputs=decoded)
autoencoder.compile(optimizer=Adam(learning_rate=0.001), loss='mse')


In [None]:
#   Train Autoencoder on Normal Data 
autoencoder.fit(X_scaled, X_scaled, epochs=10, batch_size=256, shuffle=True)

#  Predict and Calculate Reconstruction Error 
reconstructed = autoencoder.predict(X_test_scaled)
mse = np.mean(np.power(X_test_scaled - reconstructed, 2), axis=1)


In [None]:
# Set threshold based on 95th percentile
threshold = np.percentile(mse, 95)

# Predict anomalies
predictions = (mse > threshold).astype(int)

#   Evaluate 
from sklearn.metrics import classification_report, confusion_matrix

In [None]:
print("Confusion Matrix:\n", confusion_matrix(y_test, predictions))
print("\nClassification Report:\n", classification_report(y_test, predictions))

#  Plot Errors 
plt.hist(mse[y_test == 0], bins=50, alpha=0.6, label='Normal')
plt.hist(mse[y_test == 1], bins=50, alpha=0.6, label='Fraud')
plt.axvline(threshold, color='r', linestyle='--', label='Threshold')
plt.legend()
plt.title("Reconstruction Error Distribution")
plt.show()