# Autoencoder (AE) Implementation with Nature-Inspired Optimization
### MSC/DSA/134

This notebook implements an Autoencoder for fraud detection. 
Optimization Goal: Find the best architecture (Encoder Layers, Decoder Layers, Latent Size, Units, Dropout) that maximizes the F1 score (Anomaly Detection performance).

In [1]:
# import libraries and dependencies

from globals.pandas_functions import *
import globals.hyperparameter_optimizer as hyp_optimizer
import globals.torch_gpu_processing as torch_gpu_processing
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

In [2]:
# import datasets
data_base_path = "data/processed/null_value_option_1_with_validation_set/scaled_only"

X_train = pd.read_csv(f"{data_base_path}/unified_transaction_data_option2_x_train_scaled.csv")
X_validation = pd.read_csv(f"{data_base_path}/unified_transaction_data_option2_x_validation_scaled.csv")
X_test = pd.read_csv(f"{data_base_path}/unified_transaction_data_option2_x_test_scaled.csv")

y_train = pd.read_csv(f"{data_base_path}/unified_transaction_data_option2_y_train.csv")
y_validation = pd.read_csv(f"{data_base_path}/unified_transaction_data_option2_y_validation.csv")
y_test = pd.read_csv(f"{data_base_path}/unified_transaction_data_option2_y_test.csv")

print("X_train:", X_train.shape)
print("X_validation:", X_validation.shape)
print("X_test:", X_test.shape)

X_train: (354305, 26)
X_validation: (118102, 26)
X_test: (118102, 26)


In [4]:
torch_gpu_processing.test_direct_ml_processing()

torch: 2.4.1+cpu
DirectML device: privateuseone:0
Tensor device: privateuseone:0


In [4]:
# get a sample for optimization (Non-Fraud Only!)
sample_size = 50000
seed = 42

def get_clean_sample(X, y, sample_size, random_state=42):
    # Filter for Class 0 (Non-Fraud)
    mask = (y.ravel() == 0)
    X_clean = X[mask]
    y_clean = y[mask]
    
    if sample_size >= len(X_clean): return X_clean, y_clean
    
    X_sample, _, y_sample, _ = train_test_split(
        X_clean, y_clean, train_size=sample_size, random_state=random_state
    )
    return X_sample, y_sample

X_train_sample, y_train_sample = get_clean_sample(
    X_train.to_numpy(),
    y_train.to_numpy().ravel(),
    sample_size=sample_size,
    random_state=seed
)

print(f"Optimization Sample Size (Non-Fraud Only): {len(X_train_sample)}")

Optimization Sample Size (Non-Fraud Only): 50000


In [None]:
# set meta data
# Settings
param_optimizer_algorithm = "GWO" # (FA, PSO, GWO)
population = 15
iterations = 10
epochs_for_evaluation = 40  # Increased from 20 to ensure models converge during optimization
batch_size = 2048
early_stopping = 5

In [6]:
# ==========================================
# HYPERPARAMETER OPTIMIZATION (AE)
# ==========================================

from mealpy.swarm_based import FA, GWO, PSO
from mealpy.utils.space import IntegerVar, FloatVar, BoolVar, CategoricalVar

# define objective
objective_function = torch_gpu_processing.set_ae_optimizer_objective(
    X_train_sample,
    y_train_sample,
    X_validation.to_numpy(), # Pass full validation, helper handles downsampling
    y_validation.to_numpy(),
    max_epochs=epochs_for_evaluation,
    batch_size=batch_size,
    seed=seed,
    early_stopping_patience=early_stopping
)

# 2. Define Bounds (AE Specific)
bounds_cfg = hyp_optimizer.get_ae_hyperparameter_bounds_config(min_layers=1, max_layers=6)
optimizer_bounds = []
for cfg in bounds_cfg:
    if cfg['type'] == 'int':
        optimizer_bounds.append(IntegerVar(lb=cfg['lb'], ub=cfg['ub'], name=cfg.get('name')))
    elif cfg['type'] == 'float':
        optimizer_bounds.append(FloatVar(lb=cfg['lb'], ub=cfg['ub'], name=cfg.get('name')))
    elif cfg['type'] == 'bool':
        optimizer_bounds.append(BoolVar(name=cfg.get('name')))
    elif cfg['type'] == 'categorical':
        optimizer_bounds.append(CategoricalVar(valid_sets=cfg['choices'], name=cfg.get('name')))

problem = dict(obj_func=objective_function, bounds=optimizer_bounds, minmax="min", log_to=None)

# 3. Run Optimizer
print(f"Starting AE Optimization using {param_optimizer_algorithm}...")
optimizer = FA.OriginalFA(epoch=iterations, pop_size=population)

best_agent = optimizer.solve(problem)
best_vec = best_agent.solution
best_obj = best_agent.target.fitness
best_hp = hyp_optimizer.optimizer_vectors_to_ae_hyperparams(best_vec)

print("\n=== BEST SOLUTION ===")
print(f"Best Objective (1-F1): {best_obj:.6f} => F1: {1-best_obj:.6f}")
print(f"Best Hyperparameters: {best_hp}")

Optimizer detected DirectML (privateuseone:0).
  ! DirectML is unstable for intensive optimization loops (causes hangs).
  ! Falling back to CPU for STABILITY. (Expected runtime: ~20-40 mins)
Smart Downsampling Validation Set (Limit: 20000)...
  - Original Size: 118102 (Fraud: 4131)
  - New Size: 20000 (Fraud: 4131 KEPT 100%)
Starting AE Optimization using GWO...
[1:0.4s][2:0.4s][3:0.3s][4:0.3s][5:0.4s][6:0.3s][7:0.4s][8:0.3s][9:0.4s][10:0.5s][11:0.4s][12:0.4s][13:0.5s][14:0.4s][15:0.5s][16:0.6s][17:0.6s]]
[1:0.7s][2:0.6s][3:0.4s][4:0.5s][5:0.4s][6:0.5s][7:0.4s][8:0.4s][9:0.5s][10:0.5s][11:0.5s][12:0.6s][13:0.4s][14:0.4s][15:0.6s][16:0.4s][17:0.5s][18:0.4s]]
[1:0.6s][2:0.6s][3:0.7s][4:0.6s][5:0.7s][6:0.5s][7:0.7s][8:0.6s][9:0.5s][10:0.5s][11:0.6s][12:0.5s][13:0.6s][14:0.5s][15:0.6s][16:0.6s][17:0.5s][18:0.5s][19:0.6s][20:0.5s]]
[1:0.5s][2:0.6s][3:0.4s][4:0.6s][5:0.4s][6:0.6s]]
[1:0.5s][2:0.6s][3:0.4s][4:0.5s][5:0.6s][6:0.6s][7:0.4s][8:0.4s][9:0.5s][10:0.5s][11:0.4s][12:0.5s][13:0.4s][1

In [5]:
# temp set hyperparams
best_hp = {'n_encoder_layers': 2, 'n_decoder_layers': 1, 'latent_size': 4, 'encoder_units': [24, 16], 'encoder_activations': ['relu', 'relu'], 'decoder_units': [24], 'decoder_activations': ['relu'], 'dropout_rate': 0.03629487284200539, 'batch_norm': True}

In [None]:
# ==========================================
# FINAL MODEL TRAINING
# ==========================================

print("Using validation set for early stopping, test set for final evaluation only.")
model, metrics = torch_gpu_processing.train_final_ae_model(
    best_hp,
    X_train.to_numpy(),
    y_train.to_numpy(),
    X_validation.to_numpy(),  # Validation set for early stopping
    y_validation.to_numpy(),
    X_test.to_numpy(),  # Test set for final evaluation only
    y_test.to_numpy(),
    batch_size=batch_size,
    max_epochs=100  # Increased for better convergence
)

print("\nFinal Test Set Metrics:")
print("=" * 60)
print("Primary Metrics (Optimal Threshold - matches optimization):")
print(f"  Optimal F1:       {metrics.get('optimal_f1', 'N/A'):.4f}")
print(f"  Optimal Precision: {metrics.get('optimal_precision', 'N/A'):.4f}")
print(f"  Optimal Recall:    {metrics.get('optimal_recall', 'N/A'):.4f}")
print(f"  Optimal Threshold: {metrics.get('optimal_threshold', 'N/A'):.6f}")
if metrics.get('optimal_roc_auc') is not None:
    print(f"  Optimal ROC AUC:  {metrics.get('optimal_roc_auc', 'N/A'):.4f}")
if metrics.get('optimal_auprc') is not None:
    print(f"  Optimal AUPRC:    {metrics.get('optimal_auprc', 'N/A'):.4f}")

FINAL AE TRAINING
Training AE for max 50 epochs on privateuseone:0...
Epoch 1/50: Train Loss=0.603213, Val Loss=0.310031
Epoch 2/50: Train Loss=0.299299, Val Loss=0.235183
Epoch 3/50: Train Loss=0.242401, Val Loss=0.187456
Epoch 4/50: Train Loss=0.203171, Val Loss=0.148685
Epoch 5/50: Train Loss=0.174729, Val Loss=0.128722
Epoch 6/50: Train Loss=0.156863, Val Loss=0.113966
Epoch 7/50: Train Loss=0.147055, Val Loss=0.106973
Epoch 8/50: Train Loss=0.138195, Val Loss=0.102900
Epoch 9/50: Train Loss=0.130207, Val Loss=0.095208
Epoch 10/50: Train Loss=0.127401, Val Loss=0.091993
Epoch 11/50: Train Loss=0.123052, Val Loss=0.090673
Epoch 12/50: Train Loss=0.115440, Val Loss=0.089947
Epoch 13/50: Train Loss=0.113776, Val Loss=0.084833
Epoch 14/50: Train Loss=0.109284, Val Loss=0.084854
Epoch 15/50: Train Loss=0.109183, Val Loss=0.081300
Epoch 16/50: Train Loss=0.104706, Val Loss=0.077954
Epoch 17/50: Train Loss=0.105524, Val Loss=0.077246
Epoch 18/50: Train Loss=0.101928, Val Loss=0.077996
Epo