In [None]:
# Step 1Ô∏è‚É£ ‚Äî Import Required Libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.optimizers import Adam

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, classification_report, roc_auc_score

# -------------------------------------------------------------
# Step 2Ô∏è‚É£ ‚Äî Load and Explore Dataset
# -------------------------------------------------------------
data = pd.read_csv("creditcard.csv")
print("Dataset shape:", data.shape)
print(data.head())

# The dataset contains credit card transactions:
# 'Class' column ‚Üí 0 for normal, 1 for fraudulent transactions

# Separate features (X) and target label (y)
X = data.drop("Class", axis=1)
y = data["Class"]

# -------------------------------------------------------------
# Step 3Ô∏è‚É£ ‚Äî Preprocess Data (Standardization)
# -------------------------------------------------------------
# Scale feature values to mean=0 and variance=1
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split dataset into 80% training and 20% testing
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Use only NORMAL transactions for training the Autoencoder
X_train_norm = X_train[y_train == 0]
print("Normal transactions for training:", X_train_norm.shape[0])

# -------------------------------------------------------------
# Step 4Ô∏è‚É£ ‚Äî Build Autoencoder Model
# -------------------------------------------------------------
input_dim = X_train_norm.shape[1]   # number of features
encoding_dim = 14                   # compressed latent space dimension

# Encoder: compress input data
input_layer = Input(shape=(input_dim,))
encoder = Dense(encoding_dim, activation='relu')(input_layer)
encoder = Dense(7, activation='relu')(encoder)    # Bottleneck layer

# Decoder: reconstruct the original input
decoder = Dense(encoding_dim, activation='relu')(encoder)
decoder = Dense(input_dim, activation='sigmoid')(decoder)

# Combine encoder + decoder into autoencoder model
autoencoder = Model(inputs=input_layer, outputs=decoder)

# -------------------------------------------------------------
# Step 5Ô∏è‚É£ ‚Äî Compile Autoencoder
# -------------------------------------------------------------
autoencoder.compile(
    optimizer=Adam(learning_rate=0.001),
    loss='mse',          # Mean Squared Error (for reconstruction)
    metrics=['mae']      # Mean Absolute Error
)

# -------------------------------------------------------------
# Step 6Ô∏è‚É£ ‚Äî Train Autoencoder (on only normal transactions)
# -------------------------------------------------------------
history = autoencoder.fit(
    X_train_norm, X_train_norm,
    epochs=2,
    batch_size=64,
    validation_split=0.2,
    shuffle=True,
    verbose=1
)

# -------------------------------------------------------------
# Step 7Ô∏è‚É£ ‚Äî Evaluate Reconstruction Error
# -------------------------------------------------------------
# Predict reconstruction of test set
reconstructions = autoencoder.predict(X_test)

# Compute reconstruction error (MSE per sample)
mse = np.mean(np.power(X_test - reconstructions, 2), axis=1)

# Set threshold ‚Äî samples with higher error are anomalies
threshold = np.percentile(mse, 95)
print("Reconstruction Error Threshold:", threshold)

# -------------------------------------------------------------
# Step 8Ô∏è‚É£ ‚Äî Detect Anomalies
# -------------------------------------------------------------
y_pred = (mse > threshold).astype(int)

# Confusion Matrix and Classification Report
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred, digits=4))

# ROC AUC Score (Model Evaluation Metric)
print("ROC AUC Score:", roc_auc_score(y_test, mse))

# -------------------------------------------------------------
# Step 9Ô∏è‚É£ ‚Äî Plot Training Loss
# -------------------------------------------------------------
plt.plot(history.history['loss'], label="Train Loss")
plt.plot(history.history['val_loss'], label="Validation Loss")
plt.title("Autoencoder Training Loss")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.legend()
plt.show()

# -------------------------------------------------------------
# Step üîü ‚Äî Count Normal and Fraudulent Transactions
# -------------------------------------------------------------
print("\nNormal Transactions:", sum(y == 0))
print("Fraudulent Transactions:", sum(y == 1))


# | Question                               | Answer                                                                                            |
# | -------------------------------------- | ------------------------------------------------------------------------------------------------- |
# | What is an Autoencoder?                | A neural network that learns to reconstruct its input ‚Äî used for unsupervised learning.           |
# | What is Anomaly Detection?             | Detecting data points that are significantly different from normal patterns.                      |
# | Why use Autoencoders for this?         | Because they learn only normal patterns ‚Äî anomalies produce high reconstruction errors.           |
# | Why use only normal data for training? | To teach the autoencoder how normal data looks; anomalies should then appear unusual.             |
# | What is the Bottleneck layer?          | The smallest hidden layer that holds compressed latent representation of the input.               |
# | What is the threshold used for?        | To classify points with reconstruction error above it as anomalies (frauds).                      |
# | Why percentile 95?                     | Because top 5% of samples usually represent outliers/anomalies.                                   |
# | What is the optimizer?                 | Adam ‚Äî efficient gradient-based optimizer.                                                        |
# | What is the loss function used?        | Mean Squared Error (MSE) ‚Äî measures reconstruction difference.                                    |
# | What is ROC-AUC Score?                 | Metric to evaluate the model‚Äôs ability to distinguish between normal and fraudulent transactions. |
