# 03. Autoencoder Anomaly Detection

This notebook implements the Autoencoder model for anomaly detection.

In [None]:
import pandas as pd
import numpy as np
import sys
import torch
from pathlib import Path

# Add src to path
sys.path.append('../src')
from preprocessing import load_data, get_selected_features, clean_data, create_target, fit_transform_data
from models import train_autoencoder, compute_reconstruction_error
from evaluation import evaluate_anomaly_detector, plot_evaluation_curves
from sklearn.model_selection import train_test_split

## Load and Preprocess Data

In [None]:
data_path = '../data/diabetic_data.csv'
df = load_data(data_path)
features = get_selected_features()
df_clean = clean_data(df, features)
X, y = create_target(df_clean)
X_processed, preprocessor = fit_transform_data(X)

# Split data
X_train, X_test, y_train, y_test = train_test_split(X_processed.values, y.values, test_size=0.2, stratify=y, random_state=42)

## Train Autoencoder

In [None]:
X_train_normal = X_train[y_train == 0]
input_dim = X_train.shape[1]
device = 'cuda' if torch.cuda.is_available() else 'cpu'

model, losses = train_autoencoder(X_train_normal, input_dim, bottleneck_dim=32, epochs=20, device=device)
print("Autoencoder trained.")

## Evaluate

In [None]:
reconstruction_errors = compute_reconstruction_error(model, X_test, device=device)
metrics = evaluate_anomaly_detector(y_test, reconstruction_errors, model_name="Autoencoder")

plot_evaluation_curves(y_test, {"Autoencoder": reconstruction_errors})