# Autoencoder (AE) Implementation with Nature-Inspired Optimization
### MSC/DSA/134

This notebook implements an Autoencoder for fraud detection. 
Optimization Goal: Find the best architecture (Encoder Layers, Decoder Layers, Latent Size, Units, Dropout) that maximizes the F1 score (Anomaly Detection performance).

In [1]:
# import libraries and dependencies

from globals.pandas_functions import *
import globals.hyperparameter_optimizer as hyp_optimizer
import globals.torch_gpu_processing as torch_gpu_processing
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import globals.ae_runner as ae_runner

In [2]:
# import datasets
data_base_path = "data/processed/null_value_option_1_with_validation_set/scaled_only"

X_train = pd.read_csv(f"{data_base_path}/unified_transaction_data_option2_x_train_scaled.csv")
X_validation = pd.read_csv(f"{data_base_path}/unified_transaction_data_option2_x_validation_scaled.csv")
X_test = pd.read_csv(f"{data_base_path}/unified_transaction_data_option2_x_test_scaled.csv")

y_train = pd.read_csv(f"{data_base_path}/unified_transaction_data_option2_y_train.csv")
y_validation = pd.read_csv(f"{data_base_path}/unified_transaction_data_option2_y_validation.csv")
y_test = pd.read_csv(f"{data_base_path}/unified_transaction_data_option2_y_test.csv")

print("X_train:", X_train.shape)
print("X_validation:", X_validation.shape)
print("X_test:", X_test.shape)

X_train: (354305, 26)
X_validation: (118102, 26)
X_test: (118102, 26)


In [4]:
torch_gpu_processing.test_direct_ml_processing()

DirectML device: privateuseone:0
Test operation successful: [2. 4.]


True

In [14]:
# get a sample for optimization (Non-Fraud Only!)
sample_size = 60000
seed = 42

X_train_sample, y_train_sample = ae_runner.get_sampling_data(
    X_train,
    y_train,
    sample_size=sample_size,
    seed=seed
)

print(f"Optimization Sample Size (Non-Fraud Only): {len(X_train_sample)}")

Optimization Sample Size (Non-Fraud Only): 60000


In [3]:
# set meta data
# Settings
param_optimizer_algorithm = "PSO" # (FA, PSO, GWO)
population = 15
iterations = 5
epochs_for_evaluation = 10
batch_size = 1024
early_stopping = 4

In [16]:
best_hp = ae_runner.run_optimization(
    X_train_sample,
    y_train_sample,
    X_validation,
    y_validation,
    algorithm=param_optimizer_algorithm,
    population=population,
    iterations=iterations,
    batch_size=1024,
    epochs=10
)

Starting AE Optimization using PSO...
Settings: Pop=15, Iter=5, Batch=1024, Epochs=10
Starting AE Optimization using PSO...
Training samples (non-fraud): 60000
Validation samples (non-fraud for training): 113971
Validation samples (full for AUPRC): 118102
Optimizer using DEVICE: privateuseone:0
Intelligently downsampling validation set to preserve all fraud cases...
  Original: 118102 samples (4131 fraud, 113971 normal)
  Downsampled: 10000 samples (4131 fraud, 5869 normal)
  Fraud retention: 100%, Normal retention: 5.1%
.......... [AUPRC: 0.4997 (baseline: 0.4131) | F1: 0.5971 | ROC: 0.6065].......... [AUPRC: 0.5031 (baseline: 0.4131) | F1: 0.5933 | ROC: 0.6074].......... [AUPRC: 0.4941 (baseline: 0.4131) | F1: 0.5982 | ROC: 0.5921].......... [AUPRC: 0.5057 (baseline: 0.4131) | F1: 0.5979 | ROC: 0.6165].......... [AUPRC: 0.5036 (baseline: 0.4131) | F1: 0.5959 | ROC: 0.6051].......... [AUPRC: 0.5364 (baseline: 0.4131) | F1: 0.6154 | ROC: 0.6481].......... [AUPRC: 0.5022 (baseline: 0.41

In [None]:
# best hyperparams found
print("Best Hyperparameters Found:")
print(best_hp)

In [5]:
# temp set hyperparams
best_hp = {'n_encoder_layers': 2, 'n_decoder_layers': 4, 'latent_size': 8, 'encoder_units': [160, 480], 'encoder_activations': ['leaky_relu', 'selu'], 'decoder_units': [400, 160, 256, 384], 'decoder_activations': ['selu', 'silu', 'leaky_relu', 'selu'], 'dropout_rate': 0.03793393746722867, 'batch_norm': False}

In [7]:
# show best hyperparameters
print("Hyperparameters To Train:")
print(best_hp)


Hyperparameters To Train:
{'n_encoder_layers': 2, 'n_decoder_layers': 4, 'latent_size': 8, 'encoder_units': [160, 480], 'encoder_activations': ['leaky_relu', 'selu'], 'decoder_units': [400, 160, 256, 384], 'decoder_activations': ['selu', 'silu', 'leaky_relu', 'selu'], 'dropout_rate': 0.03793393746722867, 'batch_norm': False}


In [8]:
# final model training with best hyperparameters

print("Using validation set for early stopping, test set for final evaluation only.")
model, metrics = torch_gpu_processing.train_final_ae_model(
    best_hp,
    X_train.to_numpy(),
    y_train.to_numpy(),
    X_validation.to_numpy(),  # Validation set for early stopping
    y_validation.to_numpy(),
    X_test.to_numpy(),  # Test set for final evaluation only
    y_test.to_numpy(),
    batch_size=batch_size,
    max_epochs=100  # Increased for better convergence
)

print("\nFinal Test Set Metrics:")
print("=" * 60)


if metrics.get('optimal_auprc') is not None:
    fraud_rate = y_test.to_numpy().flatten().mean()
    auprc = metrics.get('optimal_auprc')
    print(f"PRIMARY METRIC (Optimization Objective):")
    print(f"  Test AUPRC:       {auprc:.4f}")
    print(f"  Baseline (random): {fraud_rate:.4f}")
    print(f"  Improvement:       {auprc/fraud_rate:.2f}x over random")
    print()

if metrics.get('optimal_roc_auc') is not None:
    print(f"  Test ROC-AUC:     {metrics.get('optimal_roc_auc'):.4f}")
    print()

print("Threshold-Dependent Metrics:")
print(f"  Optimal Threshold: {metrics.get('optimal_threshold'):.6f}")
print(f"  Optimal F1:        {metrics.get('optimal_f1'):.4f}")
print(f"  Optimal Precision: {metrics.get('optimal_precision'):.4f}")
print(f"  Optimal Recall:    {metrics.get('optimal_recall'):.4f}")
print("=" * 60)

Using validation set for early stopping, test set for final evaluation only.
FINAL AE TRAINING (Max Epochs: 100)
Training on device: privateuseone:0

Filtering Training Data:
  Original size: 354305
  Fraud samples removed: 12394
  Final training size (non-fraud only): 341911

Validation Data Split:
  For early stopping (non-fraud only): 113971
  For AUPRC evaluation (full set): 118102 (4131 fraud samples)

Training Configuration:
  Noise std: 0.1
  Batch size: 1024
  Early stopping patience: 10
  Optimizer: Adam (lr=0.001, weight_decay=1e-5)

Starting training...
Epoch 1/100: Train Loss=0.115442, Val Loss (clean)=0.038415
Epoch 6/100: Train Loss=0.022598, Val Loss (clean)=0.007708
Epoch 11/100: Train Loss=0.024031, Val Loss (clean)=0.023110
Epoch 16/100: Train Loss=0.018298, Val Loss (clean)=0.011482
Epoch 21/100: Train Loss=0.026660, Val Loss (clean)=0.035354
Epoch 26/100: Train Loss=0.040382, Val Loss (clean)=0.012545
Epoch 31/100: Train Loss=0.017554, Val Loss (clean)=0.008592
Epoc