# LSTM-FCN Hyperparameter Tuning

This notebook performs hyperparameter optimization for LSTM-FCN (LSTM + Fully Convolutional Network) using Optuna.

**Task**: Multiclass fault classification (18 classes)

**Architecture**: Combines LSTM for temporal features with 1D CNN for local patterns.

**Data Handling**:
- Windows are created within simulation runs only (no cross-run windows)
- Subsampling is done by simulation runs, not individual rows

**Outputs**:
- Best hyperparameters: `outputs/hyperparams/lstm_fcn_best.json`
- Optuna study: `outputs/optuna_studies/lstm_fcn_study.pkl`

## Configuration

In [1]:
import os
import sys
import time
import json
import pickle
from pathlib import Path

start_time = time.time()
print("="*60)
print("LSTM-FCN Hyperparameter Tuning")
print("="*60)
print(f"Started at: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(start_time))}")

QUICK_MODE = os.getenv('QUICK_MODE', 'False').lower() in ('true', '1', 'yes')

if QUICK_MODE:
    RUN_FRACTION = 0.01
    MIN_RUNS_PER_CLASS = 5
    N_TRIALS = 5
    MAX_EPOCHS = 10
    PATIENCE = 3
    print("ðŸš€ QUICK MODE (1% runs, min 5/class, 5 trials, max 10 epochs)")
else:
    RUN_FRACTION = 0.50
    MIN_RUNS_PER_CLASS = 5
    N_TRIALS = 50
    MAX_EPOCHS = 50
    PATIENCE = 5
    print("ðŸ”¬ TUNING MODE (50% runs, 50 trials, max 50 epochs)")

DATA_DIR = Path('../data')
OUTPUT_DIR = Path('../outputs')
HYPERPARAM_DIR = OUTPUT_DIR / 'hyperparams'
STUDY_DIR = OUTPUT_DIR / 'optuna_studies'
PROGRESS_FILE = OUTPUT_DIR / 'lstm_fcn_progress.log'

HYPERPARAM_DIR.mkdir(parents=True, exist_ok=True)
STUDY_DIR.mkdir(parents=True, exist_ok=True)

RANDOM_SEED = 42

print(f"Run fraction: {RUN_FRACTION*100}%")
print(f"Trials: {N_TRIALS}, Max epochs: {MAX_EPOCHS}, Patience: {PATIENCE}")
print("="*60)

def log_progress(message):
    print(message, flush=True)
    with open(PROGRESS_FILE, 'a') as f:
        f.write(f"{message}\n")
        f.flush()

PROGRESS_FILE.write_text("")

LSTM-FCN Hyperparameter Tuning
Started at: 2026-01-04 15:27:54
ðŸ”¬ TUNING MODE (50% runs, 50 trials, max 50 epochs)
Run fraction: 50.0%
Trials: 50, Max epochs: 50, Patience: 5


0

In [2]:
log_progress("\n[Step 1/6] Loading libraries...")
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import f1_score
import optuna
from optuna.pruners import MedianPruner
import warnings
warnings.filterwarnings('ignore')

torch.manual_seed(RANDOM_SEED)
np.random.seed(RANDOM_SEED)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
log_progress(f"âœ“ Using device: {device}")


[Step 1/6] Loading libraries...


âœ“ Using device: cuda


## Data Loading and Preprocessing

In [3]:
log_progress("\n[Step 2/6] Loading data...")

train = pd.read_csv(DATA_DIR / 'multiclass_train.csv')
val = pd.read_csv(DATA_DIR / 'multiclass_val.csv')

log_progress(f"âœ“ Full data - Train: {train.shape}, Val: {val.shape}")

def sample_by_runs(df, fraction, seed, min_runs=5):
    """Sample complete simulation runs, preserving temporal structure."""
    runs = df.groupby(['faultNumber', 'simulationRun']).size().reset_index()[['faultNumber', 'simulationRun']]
    
    def sample_class(x):
        n_total = len(x)
        n_sample = max(min_runs, int(n_total * fraction))
        n_sample = min(n_sample, n_total)
        return x.sample(n=n_sample, random_state=seed)
    
    sampled_runs = runs.groupby('faultNumber', group_keys=False).apply(sample_class)
    df_sampled = df.merge(sampled_runs, on=['faultNumber', 'simulationRun'])
    return df_sampled.sort_values(['faultNumber', 'simulationRun', 'sample']).reset_index(drop=True)

train_sampled = sample_by_runs(train, RUN_FRACTION, RANDOM_SEED, MIN_RUNS_PER_CLASS)
val_sampled = sample_by_runs(val, RUN_FRACTION, RANDOM_SEED, MIN_RUNS_PER_CLASS)

n_train_runs = train_sampled.groupby(['faultNumber', 'simulationRun']).ngroups
n_val_runs = val_sampled.groupby(['faultNumber', 'simulationRun']).ngroups

log_progress(f"âœ“ Sampled - Train: {train_sampled.shape} ({n_train_runs} runs), Val: {val_sampled.shape} ({n_val_runs} runs)")

features = [col for col in train.columns if 'xmeas' in col or 'xmv' in col]
num_features = len(features)

scaler = StandardScaler()
scaler.fit(train_sampled[features])

label_encoder = LabelEncoder()
label_encoder.fit(train_sampled['faultNumber'])
num_classes = len(label_encoder.classes_)

log_progress(f"âœ“ Features: {num_features}, Classes: {num_classes}")


[Step 2/6] Loading data...


âœ“ Full data - Train: (864000, 57), Val: (432000, 57)


âœ“ Sampled - Train: (432000, 57) (900 runs), Val: (216000, 57) (450 runs)


âœ“ Features: 52, Classes: 18


In [4]:
log_progress("\n[Step 3/6] Defining LSTM-FCN model...")

class SimulationRunDataset(Dataset):
    """Dataset that creates windows WITHIN simulation runs only."""
    def __init__(self, df, features, label_col, scaler, label_encoder, sequence_length=10):
        self.seq_len = sequence_length
        self.windows = []
        self.labels = []
        
        for (fault, run), group in df.groupby(['faultNumber', 'simulationRun']):
            group = group.sort_values('sample')
            X = scaler.transform(group[features].values)
            y = label_encoder.transform(group['faultNumber'].values)
            
            for i in range(len(X) - sequence_length + 1):
                self.windows.append(X[i:i+sequence_length])
                self.labels.append(y[i+sequence_length-1])
        
        self.windows = np.array(self.windows, dtype=np.float32)
        self.labels = np.array(self.labels, dtype=np.int64)
    
    def __len__(self):
        return len(self.windows)
    
    def __getitem__(self, idx):
        return torch.from_numpy(self.windows[idx]), torch.tensor(self.labels[idx])

class LSTMFCN(nn.Module):
    """LSTM-FCN: Combines LSTM with 1D CNN for time series classification.
    
    Architecture matches v1/LSTM_FCN_final.ipynb:
    - Hierarchical conv filters (32â†’24â†’16) 
    - Dilation rates (1â†’2â†’4) for multi-scale feature extraction
    - SpatialDropout after first conv layer
    """
    def __init__(self, input_size, lstm_hidden, lstm_layers, dropout, num_classes):
        super().__init__()
        
        # LSTM branch (matches v1: num_cells=24)
        self.lstm = nn.LSTM(input_size, lstm_hidden, lstm_layers,
                           batch_first=True, dropout=dropout if lstm_layers > 1 else 0)
        
        # FCN branch with hierarchical filters and dilation (matching v1)
        # v1 uses: Conv1d(32, k=7) -> Conv1d(24, k=5, dilation=2) -> Conv1d(16, k=3, dilation=4)
        self.conv1 = nn.Conv1d(input_size, 32, kernel_size=7, padding='same', dilation=1)
        self.bn1 = nn.BatchNorm1d(32)
        self.spatial_dropout = nn.Dropout2d(0.3)  # SpatialDropout1D equivalent
        
        self.conv2 = nn.Conv1d(32, 24, kernel_size=5, padding='same', dilation=2)
        self.bn2 = nn.BatchNorm1d(24)
        
        self.conv3 = nn.Conv1d(24, 16, kernel_size=3, padding='same', dilation=4)
        self.bn3 = nn.BatchNorm1d(16)
        
        self.gap = nn.AdaptiveAvgPool1d(1)
        
        # Classifier head (matches v1: Dense(max(32, num_classes*2)) -> Dense(num_classes))
        combined_dim = lstm_hidden + 16  # LSTM output + final conv filters
        hidden_dim = max(32, num_classes * 2)
        self.fc1 = nn.Linear(combined_dim, hidden_dim)
        self.dropout = nn.Dropout(0.5)
        self.fc2 = nn.Linear(hidden_dim, num_classes)
        
    def forward(self, x):
        # LSTM branch: (batch, seq, features)
        lstm_out, _ = self.lstm(x)
        lstm_out = lstm_out[:, -1, :]
        
        # FCN branch: (batch, features, seq)
        x_cnn = x.permute(0, 2, 1)
        
        # Conv1 with spatial dropout
        x_cnn = torch.relu(self.bn1(self.conv1(x_cnn)))
        x_cnn = self.spatial_dropout(x_cnn.unsqueeze(-1)).squeeze(-1)
        
        # Conv2 and Conv3 with dilation
        x_cnn = torch.relu(self.bn2(self.conv2(x_cnn)))
        x_cnn = torch.relu(self.bn3(self.conv3(x_cnn)))
        
        x_cnn = self.gap(x_cnn).squeeze(-1)
        
        # Combine and classify
        combined = torch.cat([lstm_out, x_cnn], dim=1)
        out = torch.relu(self.fc1(combined))
        out = self.dropout(out)
        return self.fc2(out)

log_progress("âœ“ LSTM-FCN model defined")


[Step 3/6] Defining LSTM-FCN model...


âœ“ LSTM-FCN model defined


In [5]:
log_progress("\n[Step 4/6] Setting up optimization...")

def objective(trial):
    """
    Optuna objective for LSTM-FCN.
    
    Hyperparameters:
    - sequence_length: Input sequence length (20-40 to include manuscript's 30)
    - lstm_hidden: LSTM hidden units
    - lstm_layers: Number of LSTM layers
    - dropout: Dropout rate
    - learning_rate: Optimizer LR
    - batch_size: Training batch size
    
    Note: FCN architecture is fixed to match v1 (hierarchical filters with dilation)
    """
    # Sequence length range [20, 40] to include manuscript's default of 30
    sequence_length = trial.suggest_int('sequence_length', 20, 40)
    lstm_hidden = trial.suggest_categorical('lstm_hidden', [24, 32, 64, 128])  # v1 uses 24
    lstm_layers = trial.suggest_int('lstm_layers', 1, 2)
    dropout = trial.suggest_float('dropout', 0.3, 0.5)  # v1 uses 0.5
    learning_rate = trial.suggest_float('learning_rate', 1e-4, 1e-2, log=True)
    batch_size = trial.suggest_categorical('batch_size', [32, 64, 128])
    
    train_dataset = SimulationRunDataset(
        train_sampled, features, 'faultNumber', scaler, label_encoder, sequence_length
    )
    val_dataset = SimulationRunDataset(
        val_sampled, features, 'faultNumber', scaler, label_encoder, sequence_length
    )
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size)
    
    model = LSTMFCN(num_features, lstm_hidden, lstm_layers, dropout, num_classes).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    
    # Early stopping variables
    best_val_loss = float('inf')
    patience_counter = 0
    best_model_state = None
    
    for epoch in range(MAX_EPOCHS):
        # Training phase
        model.train()
        for X_batch, y_batch in train_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            optimizer.zero_grad()
            outputs = model(X_batch)
            loss = criterion(outputs, y_batch)
            loss.backward()
            optimizer.step()
        
        # Validation phase for early stopping
        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for X_batch, y_batch in val_loader:
                X_batch, y_batch = X_batch.to(device), y_batch.to(device)
                outputs = model(X_batch)
                val_loss += criterion(outputs, y_batch).item()
        val_loss /= len(val_loader)
        
        # Early stopping check
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            patience_counter = 0
            best_model_state = {k: v.cpu().clone() for k, v in model.state_dict().items()}
        else:
            patience_counter += 1
            if patience_counter >= PATIENCE:
                break
    
    # Restore best model
    if best_model_state is not None:
        model.load_state_dict({k: v.to(device) for k, v in best_model_state.items()})
    
    model.eval()
    all_preds, all_labels = [], []
    with torch.no_grad():
        for X_batch, y_batch in val_loader:
            X_batch = X_batch.to(device)
            outputs = model(X_batch)
            preds = outputs.argmax(dim=1).cpu().numpy()
            all_preds.extend(preds)
            all_labels.extend(y_batch.numpy())
    
    return f1_score(all_labels, all_preds, average='weighted')

log_progress("âœ“ Objective function defined")


[Step 4/6] Setting up optimization...


âœ“ Objective function defined


In [6]:
log_progress(f"\n{'='*60}")
log_progress(f"[Step 5/6] Starting optimization")
log_progress(f"{'='*60}\n")

optuna_start = time.time()
optuna.logging.set_verbosity(optuna.logging.WARNING)

study = optuna.create_study(
    direction='maximize',
    pruner=MedianPruner(n_startup_trials=5, n_warmup_steps=5),
    study_name='lstm_fcn_multiclass'
)

log_progress(f"Running {N_TRIALS} trials...")

for trial_num in range(N_TRIALS):
    study.optimize(objective, n_trials=1, show_progress_bar=False)
    trial = study.trials[-1]
    log_progress(f"Trial {trial_num + 1}/{N_TRIALS}: F1={trial.value:.4f} (best={study.best_value:.4f})")

optuna_time = time.time() - optuna_start

log_progress(f"\n{'='*60}")
log_progress("âœ“ Optimization complete!")
log_progress(f"Total time: {optuna_time:.2f}s")




[Step 5/6] Starting optimization





Running 50 trials...


Trial 1/50: F1=0.9875 (best=0.9875)


Trial 2/50: F1=0.9856 (best=0.9875)


Trial 3/50: F1=0.9731 (best=0.9875)


Trial 4/50: F1=0.9828 (best=0.9875)


Trial 5/50: F1=0.9861 (best=0.9875)


Trial 6/50: F1=0.9835 (best=0.9875)


Trial 7/50: F1=0.9844 (best=0.9875)


Trial 8/50: F1=0.9816 (best=0.9875)


Trial 9/50: F1=0.9780 (best=0.9875)


Trial 10/50: F1=0.9833 (best=0.9875)


Trial 11/50: F1=0.9862 (best=0.9875)


Trial 12/50: F1=0.9835 (best=0.9875)


Trial 13/50: F1=0.9875 (best=0.9875)


Trial 14/50: F1=0.9865 (best=0.9875)


Trial 15/50: F1=0.9840 (best=0.9875)


Trial 16/50: F1=0.9863 (best=0.9875)


Trial 17/50: F1=0.9848 (best=0.9875)


Trial 18/50: F1=0.9822 (best=0.9875)


Trial 19/50: F1=0.9837 (best=0.9875)


Trial 20/50: F1=0.9834 (best=0.9875)


Trial 21/50: F1=0.9803 (best=0.9875)


Trial 22/50: F1=0.9862 (best=0.9875)


Trial 23/50: F1=0.9839 (best=0.9875)


Trial 24/50: F1=0.9860 (best=0.9875)


Trial 25/50: F1=0.9876 (best=0.9876)


Trial 26/50: F1=0.9862 (best=0.9876)


Trial 27/50: F1=0.9847 (best=0.9876)


Trial 28/50: F1=0.9831 (best=0.9876)


Trial 29/50: F1=0.9823 (best=0.9876)


Trial 30/50: F1=0.9879 (best=0.9879)


Trial 31/50: F1=0.9879 (best=0.9879)


Trial 32/50: F1=0.9894 (best=0.9894)


Trial 33/50: F1=0.9896 (best=0.9896)


Trial 34/50: F1=0.9877 (best=0.9896)


Trial 35/50: F1=0.9884 (best=0.9896)


Trial 36/50: F1=0.9844 (best=0.9896)


Trial 37/50: F1=0.9868 (best=0.9896)


Trial 38/50: F1=0.9852 (best=0.9896)


Trial 39/50: F1=0.9864 (best=0.9896)


Trial 40/50: F1=0.9851 (best=0.9896)


Trial 41/50: F1=0.9852 (best=0.9896)


Trial 42/50: F1=0.9882 (best=0.9896)


Trial 43/50: F1=0.9873 (best=0.9896)


Trial 44/50: F1=0.9883 (best=0.9896)


Trial 45/50: F1=0.9883 (best=0.9896)


Trial 46/50: F1=0.9867 (best=0.9896)


Trial 47/50: F1=0.9874 (best=0.9896)


Trial 48/50: F1=0.9850 (best=0.9896)


Trial 49/50: F1=0.9827 (best=0.9896)


Trial 50/50: F1=0.9872 (best=0.9896)





âœ“ Optimization complete!


Total time: 39642.17s


In [7]:
end_time = time.time()
total_runtime = end_time - start_time

log_progress("\n[Step 6/6] Saving results...")

results = {
    'model': 'LSTM-FCN',
    'task': 'multiclass',
    'best_params': study.best_params,
    'best_f1_weighted': float(study.best_value),
    'num_trials': N_TRIALS,
    'run_fraction': RUN_FRACTION,
    'quick_mode': QUICK_MODE,
    'max_epochs': MAX_EPOCHS,
    'early_stopping_patience': PATIENCE,
    'optimization_time_seconds': optuna_time,
    'random_seed': RANDOM_SEED,
    'timing': {
        'start_time': time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(start_time)),
        'end_time': time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(end_time)),
        'total_runtime_seconds': float(total_runtime),
        'total_runtime_formatted': f"{int(total_runtime // 60)}m {int(total_runtime % 60)}s"
    }
}

mode_suffix = "_quick" if QUICK_MODE else ""
json_path = HYPERPARAM_DIR / f'lstm_fcn_best{mode_suffix}.json'
study_path = STUDY_DIR / f'lstm_fcn_study{mode_suffix}.pkl'

with open(json_path, 'w') as f:
    json.dump(results, f, indent=2)
log_progress(f"âœ“ Saved to {json_path}")

with open(study_path, 'wb') as f:
    pickle.dump(study, f)

log_progress(f"\n{'='*60}")
log_progress("âœ“ LSTM-FCN Hyperparameter Tuning Complete!")
log_progress(f"Runtime: {results['timing']['total_runtime_formatted']}")
log_progress(f"Best F1: {study.best_value:.4f}")
log_progress(f"{'='*60}")


[Step 6/6] Saving results...


âœ“ Saved to ../outputs/hyperparams/lstm_fcn_best.json





âœ“ LSTM-FCN Hyperparameter Tuning Complete!


Runtime: 660m 52s


Best F1: 0.9896


