# Convolutional Autoencoder Hyperparameter Tuning

This notebook performs hyperparameter optimization for Convolutional Autoencoder using Optuna.

**Task**: Binary anomaly detection (normal vs fault)

**Architecture**: 1D CNN encoder-decoder with optional Transformer layer for reconstruction-based anomaly detection.

**Data Handling**:
- Windows are created within simulation runs only (no cross-run windows)
- Subsampling is done by simulation runs, not individual rows

**Outputs**:
- Best hyperparameters: `outputs/hyperparams/conv_autoencoder_best.json`
- Optuna study: `outputs/optuna_studies/conv_autoencoder_study.pkl`

In [1]:
import os
import sys
import time
import json
import pickle
from pathlib import Path

start_time = time.time()
print("="*60)
print("Convolutional Autoencoder Hyperparameter Tuning")
print("="*60)
print(f"Started at: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(start_time))}")

QUICK_MODE = os.getenv('QUICK_MODE', 'False').lower() in ('true', '1', 'yes')

if QUICK_MODE:
    RUN_FRACTION = 0.01
    MIN_RUNS_PER_CLASS = 5
    N_TRIALS = 5
    MAX_EPOCHS = 10
    PATIENCE = 3
    print("ðŸš€ QUICK MODE (1% runs, min 5/class, 5 trials, max 10 epochs)")
else:
    RUN_FRACTION = 0.50
    MIN_RUNS_PER_CLASS = 5
    N_TRIALS = 50
    MAX_EPOCHS = 50
    PATIENCE = 5
    print("ðŸ”¬ TUNING MODE (50% runs, 50 trials, max 50 epochs)")

DATA_DIR = Path('../data')
OUTPUT_DIR = Path('../outputs')
HYPERPARAM_DIR = OUTPUT_DIR / 'hyperparams'
STUDY_DIR = OUTPUT_DIR / 'optuna_studies'
PROGRESS_FILE = OUTPUT_DIR / 'conv_autoencoder_progress.log'

HYPERPARAM_DIR.mkdir(parents=True, exist_ok=True)
STUDY_DIR.mkdir(parents=True, exist_ok=True)

RANDOM_SEED = 42

print(f"Run fraction: {RUN_FRACTION*100}%")
print(f"Trials: {N_TRIALS}, Max epochs: {MAX_EPOCHS}, Patience: {PATIENCE}")
print("="*60)

def log_progress(message):
    print(message, flush=True)
    with open(PROGRESS_FILE, 'a') as f:
        f.write(f"{message}\n")
        f.flush()

PROGRESS_FILE.write_text("")

Convolutional Autoencoder Hyperparameter Tuning
Started at: 2026-01-03 23:27:23
ðŸš€ QUICK MODE (1% runs, min 5/class, 5 trials, max 10 epochs)
Run fraction: 1.0%
Trials: 5, Max epochs: 10, Patience: 3


0

In [2]:
log_progress("\n[Step 1/6] Loading libraries...")
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import f1_score
import optuna
from optuna.pruners import MedianPruner
import warnings
warnings.filterwarnings('ignore')

torch.manual_seed(RANDOM_SEED)
np.random.seed(RANDOM_SEED)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
log_progress(f"âœ“ Using device: {device}")


[Step 1/6] Loading libraries...


âœ“ Using device: cuda


In [None]:
log_progress("\n[Step 2/6] Loading data...")

train = pd.read_csv(DATA_DIR / 'binary_train.csv')
val = pd.read_csv(DATA_DIR / 'binary_val.csv')

log_progress(f"âœ“ Full data - Train: {train.shape}, Val: {val.shape}")

def sample_by_runs(df, fraction, seed, min_runs=5):
    """Sample complete simulation runs, preserving temporal structure."""
    runs = df.groupby(['faultNumber', 'simulationRun']).size().reset_index()[['faultNumber', 'simulationRun']]
    
    def sample_class(x):
        n_total = len(x)
        n_sample = max(min_runs, int(n_total * fraction))
        n_sample = min(n_sample, n_total)
        return x.sample(n=n_sample, random_state=seed)
    
    sampled_runs = runs.groupby('faultNumber', group_keys=False).apply(sample_class)
    df_sampled = df.merge(sampled_runs, on=['faultNumber', 'simulationRun'])
    return df_sampled.sort_values(['faultNumber', 'simulationRun', 'sample']).reset_index(drop=True)

# Sample training data - we only use normal data for training
train_sampled = sample_by_runs(train, RUN_FRACTION, RANDOM_SEED, MIN_RUNS_PER_CLASS)
train_normal = train_sampled[train_sampled['faultNumber'] == 0]

# For validation, we need BOTH normal and fault samples to evaluate anomaly detection
# Sample from FULL validation set to ensure we get both classes
val_sampled = sample_by_runs(val, RUN_FRACTION, RANDOM_SEED, MIN_RUNS_PER_CLASS)

n_train_runs = train_normal.groupby(['faultNumber', 'simulationRun']).ngroups
n_val_runs = val_sampled.groupby(['faultNumber', 'simulationRun']).ngroups

# Create binary label for evaluation (0=normal, 1=fault)
val_sampled['label'] = (val_sampled['faultNumber'] != 0).astype(int)

# Check class distribution in validation set
n_normal = (val_sampled['label'] == 0).sum()
n_fault = (val_sampled['label'] == 1).sum()

log_progress(f"âœ“ Train (normal only): {train_normal.shape} ({n_train_runs} runs)")
log_progress(f"âœ“ Val (all): {val_sampled.shape} ({n_val_runs} runs)")
log_progress(f"  - Normal samples: {n_normal}, Fault samples: {n_fault}")
log_progress(f"  - Fault rate: {val_sampled['label'].mean():.2%}")

features = [col for col in train.columns if 'xmeas' in col or 'xmv' in col]
num_features = len(features)

scaler = StandardScaler()
scaler.fit(train_normal[features])

log_progress(f"âœ“ Features: {num_features}")

In [4]:
log_progress("\n[Step 3/6] Defining Convolutional Autoencoder model...")

class SimulationRunDataset(Dataset):
    """Dataset for autoencoder - creates windows WITHIN simulation runs only."""
    def __init__(self, df, features, scaler, sequence_length=10):
        self.seq_len = sequence_length
        self.windows = []
        
        for (fault, run), group in df.groupby(['faultNumber', 'simulationRun']):
            group = group.sort_values('sample')
            X = scaler.transform(group[features].values)
            
            for i in range(len(X) - sequence_length + 1):
                self.windows.append(X[i:i+sequence_length])
        
        self.windows = np.array(self.windows, dtype=np.float32)
    
    def __len__(self):
        return len(self.windows)
    
    def __getitem__(self, idx):
        return torch.from_numpy(self.windows[idx])

class LabeledSimulationRunDataset(Dataset):
    """Dataset with labels for evaluation."""
    def __init__(self, df, features, scaler, sequence_length=10):
        self.seq_len = sequence_length
        self.windows = []
        self.labels = []
        
        for (fault, run), group in df.groupby(['faultNumber', 'simulationRun']):
            group = group.sort_values('sample')
            X = scaler.transform(group[features].values)
            # label column created in load-data cell: 0=normal, 1=fault
            y = group['label'].values
            
            for i in range(len(X) - sequence_length + 1):
                self.windows.append(X[i:i+sequence_length])
                self.labels.append(y[i+sequence_length-1])
        
        self.windows = np.array(self.windows, dtype=np.float32)
        self.labels = np.array(self.labels, dtype=np.int64)
    
    def __len__(self):
        return len(self.windows)
    
    def __getitem__(self, idx):
        return torch.from_numpy(self.windows[idx]), torch.tensor(self.labels[idx])

class ConvAutoencoder(nn.Module):
    """Convolutional Autoencoder with optional Transformer layer."""
    def __init__(self, input_dim, seq_len, conv_filters=64, kernel_size=3,
                 latent_filters=128, nhead=4, ff_dim=32, dropout=0.05,
                 use_transformer=True):
        super().__init__()
        self.use_transformer = use_transformer
        
        self.encoder = nn.Sequential(
            nn.Conv1d(input_dim, conv_filters, kernel_size, padding=kernel_size//2),
            nn.ReLU(),
            nn.Conv1d(conv_filters, latent_filters, kernel_size, padding=kernel_size//2),
            nn.ReLU()
        )
        
        if use_transformer:
            self.transformer_layer = nn.TransformerEncoderLayer(
                d_model=latent_filters, nhead=nhead, 
                dim_feedforward=ff_dim, dropout=dropout, batch_first=True
            )
            self.transformer = nn.TransformerEncoder(self.transformer_layer, num_layers=1)
        
        self.decoder = nn.Sequential(
            nn.Conv1d(latent_filters, conv_filters, kernel_size, padding=kernel_size//2),
            nn.ReLU(),
            nn.Conv1d(conv_filters, input_dim, kernel_size, padding=kernel_size//2)
        )
        
    def forward(self, x):
        x = x.permute(0, 2, 1)
        x = self.encoder(x)
        
        if self.use_transformer:
            x = x.permute(0, 2, 1)
            x = self.transformer(x)
            x = x.permute(0, 2, 1)
        
        x = self.decoder(x)
        x = x.permute(0, 2, 1)
        return x

log_progress("âœ“ Convolutional Autoencoder model defined")


[Step 3/6] Defining Convolutional Autoencoder model...


âœ“ Convolutional Autoencoder model defined


In [None]:
log_progress("\n[Step 4/6] Setting up optimization...")

def objective(trial):
    # Sequence length range [20, 40] to include manuscript's default of 30
    sequence_length = trial.suggest_int('sequence_length', 20, 40)
    conv_filters = trial.suggest_categorical('conv_filters', [32, 64, 128])
    kernel_size = trial.suggest_categorical('kernel_size', [3, 5, 7])
    latent_filters = trial.suggest_categorical('latent_filters', [64, 128, 256])
    use_transformer = trial.suggest_categorical('use_transformer', [True, False])
    
    if use_transformer:
        nhead = trial.suggest_categorical('nhead', [2, 4])
        latent_filters = (latent_filters // nhead) * nhead
        ff_dim = trial.suggest_categorical('ff_dim', [32, 64, 128])
    else:
        nhead = 4
        ff_dim = 32
        latent_filters = (latent_filters // 4) * 4
    
    dropout = trial.suggest_float('dropout', 0.0, 0.3)
    learning_rate = trial.suggest_float('learning_rate', 1e-4, 1e-2, log=True)
    batch_size = trial.suggest_categorical('batch_size', [32, 64, 128])
    threshold_percentile = trial.suggest_float('threshold_percentile', 90.0, 99.0)
    
    train_dataset = SimulationRunDataset(train_normal, features, scaler, sequence_length)
    val_dataset = LabeledSimulationRunDataset(val_sampled, features, scaler, sequence_length)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size)
    
    model = ConvAutoencoder(
        num_features, sequence_length, conv_filters, kernel_size,
        latent_filters, nhead, ff_dim, dropout, use_transformer
    ).to(device)
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    
    # Early stopping variables
    best_train_loss = float('inf')
    patience_counter = 0
    best_model_state = None
    
    for epoch in range(MAX_EPOCHS):
        # Training phase
        model.train()
        train_loss = 0.0
        for X_batch in train_loader:
            X_batch = X_batch.to(device)
            optimizer.zero_grad()
            reconstruction = model(X_batch)
            loss = criterion(reconstruction, X_batch)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
        train_loss /= len(train_loader)
        
        # Early stopping based on training loss (autoencoders train on normal data only)
        if train_loss < best_train_loss:
            best_train_loss = train_loss
            patience_counter = 0
            best_model_state = {k: v.cpu().clone() for k, v in model.state_dict().items()}
        else:
            patience_counter += 1
            if patience_counter >= PATIENCE:
                break
    
    # Restore best model
    if best_model_state is not None:
        model.load_state_dict({k: v.to(device) for k, v in best_model_state.items()})
    
    model.eval()
    train_errors = []
    with torch.no_grad():
        for X_batch in train_loader:
            X_batch = X_batch.to(device)
            reconstruction = model(X_batch)
            errors = ((reconstruction - X_batch) ** 2).mean(dim=(1, 2))
            train_errors.extend(errors.cpu().numpy())
    
    threshold = np.percentile(train_errors, threshold_percentile)
    
    all_preds, all_labels = [], []
    with torch.no_grad():
        for X_batch, y_batch in val_loader:
            X_batch = X_batch.to(device)
            reconstruction = model(X_batch)
            errors = ((reconstruction - X_batch) ** 2).mean(dim=(1, 2))
            preds = (errors.cpu().numpy() > threshold).astype(int)
            all_preds.extend(preds)
            all_labels.extend(y_batch.numpy())
    
    return f1_score(all_labels, all_preds, average='weighted')

log_progress("âœ“ Objective function defined")

In [6]:
log_progress(f"\n{'='*60}")
log_progress(f"[Step 5/6] Starting optimization")
log_progress(f"{'='*60}\n")

optuna_start = time.time()
optuna.logging.set_verbosity(optuna.logging.WARNING)

study = optuna.create_study(
    direction='maximize',
    pruner=MedianPruner(n_startup_trials=5, n_warmup_steps=5),
    study_name='conv_autoencoder_binary'
)

log_progress(f"Running {N_TRIALS} trials...")

for trial_num in range(N_TRIALS):
    study.optimize(objective, n_trials=1, show_progress_bar=False)
    trial = study.trials[-1]
    log_progress(f"Trial {trial_num + 1}/{N_TRIALS}: F1={trial.value:.4f} (best={study.best_value:.4f})")

optuna_time = time.time() - optuna_start

log_progress(f"\n{'='*60}")
log_progress("âœ“ Optimization complete!")
log_progress(f"Total time: {optuna_time:.2f}s")




[Step 5/6] Starting optimization





Running 5 trials...


Trial 1/5: F1=0.0016 (best=0.0016)


Trial 2/5: F1=0.8914 (best=0.8914)


Trial 3/5: F1=0.7624 (best=0.8914)


Trial 4/5: F1=0.0049 (best=0.8914)


Trial 5/5: F1=0.2177 (best=0.8914)





âœ“ Optimization complete!


Total time: 19.19s


In [7]:
end_time = time.time()
total_runtime = end_time - start_time

log_progress("\n[Step 6/6] Saving results...")

results = {
    'model': 'Conv-Autoencoder',
    'task': 'binary',
    'best_params': study.best_params,
    'best_f1_weighted': float(study.best_value),
    'num_trials': N_TRIALS,
    'run_fraction': RUN_FRACTION,
    'quick_mode': QUICK_MODE,
    'max_epochs': MAX_EPOCHS,
    'early_stopping_patience': PATIENCE,
    'optimization_time_seconds': optuna_time,
    'random_seed': RANDOM_SEED,
    'timing': {
        'start_time': time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(start_time)),
        'end_time': time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(end_time)),
        'total_runtime_seconds': float(total_runtime),
        'total_runtime_formatted': f"{int(total_runtime // 60)}m {int(total_runtime % 60)}s"
    }
}

mode_suffix = "_quick" if QUICK_MODE else ""
json_path = HYPERPARAM_DIR / f'conv_autoencoder_best{mode_suffix}.json'
study_path = STUDY_DIR / f'conv_autoencoder_study{mode_suffix}.pkl'

with open(json_path, 'w') as f:
    json.dump(results, f, indent=2)
log_progress(f"âœ“ Saved to {json_path}")

with open(study_path, 'wb') as f:
    pickle.dump(study, f)

log_progress(f"\n{'='*60}")
log_progress("âœ“ Convolutional Autoencoder Hyperparameter Tuning Complete!")
log_progress(f"Runtime: {results['timing']['total_runtime_formatted']}")
log_progress(f"Best F1: {study.best_value:.4f}")
log_progress(f"{'='*60}")


[Step 6/6] Saving results...


âœ“ Saved to ../outputs/hyperparams/conv_autoencoder_best_quick.json





âœ“ Convolutional Autoencoder Hyperparameter Tuning Complete!


Runtime: 0m 22s


Best F1: 0.8914


