In [None]:
# Autoencoders for Anomaly Detection - Complete Code

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras import regularizers

# Load dataset (Credit Card Fraud Detection as example)
url = "https://www.dropbox.com/s/sf3jwv3z1ebq1tl/creditcard.csv?dl=1"
df = pd.read_csv(url)

# Drop 'Time' column and extract labels
df = df.drop(['Time'], axis=1)
labels = df['Class'].values
features = df.drop(['Class'], axis=1)

# Standardize features
scaler = StandardScaler()
features_scaled = scaler.fit_transform(features)

# Split data
X_train, X_test, y_train, y_test = train_test_split(features_scaled, labels, test_size=0.2, random_state=42)

# Use only normal (non-fraud) data to train the autoencoder
X_train = X_train[y_train == 0]

# Define autoencoder architecture
input_dim = X_train.shape[1]
input_layer = Input(shape=(input_dim,))
encoded = Dense(14, activation='relu', activity_regularizer=regularizers.l1(1e-5))(input_layer)
encoded = Dense(7, activation='relu')(encoded)
decoded = Dense(14, activation='relu')(encoded)
decoded = Dense(input_dim, activation='sigmoid')(decoded)

autoencoder = Model(inputs=input_layer, outputs=decoded)
autoencoder.compile(optimizer='adam', loss='mse')

# Train autoencoder
history = autoencoder.fit(
    X_train, X_train,
    epochs=20,
    batch_size=32,
    shuffle=True,
    validation_split=0.2,
    verbose=1
)

# Predict on test data
reconstructions = autoencoder.predict(X_test)
mse = np.mean(np.power(X_test - reconstructions, 2), axis=1)

# Set threshold for anomaly detection
threshold = np.percentile(mse[y_test == 0], 95)
print(f"Reconstruction error threshold: {threshold}")

# Detect anomalies
y_pred = mse > threshold

# Evaluation
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

# Plot training history
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.legend()
plt.title("Autoencoder Training Loss")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.show()