# Notebook 7: Neural Network Models for Anomaly Detection

**Purpose**: Train and evaluate deep learning models using PyTorch.

**Models**:
1. Deep MLP
2. Deep Autoencoder
3. Variational Autoencoder (VAE)
4. LSTM Autoencoder
5. Deep One-Class Neural Network

**Outputs**:
- `neural_metrics.csv` → `results/`
- Training curves → `figures/`

---

In [None]:
# Imports
import sys
sys.path.append('..')

import numpy as np
import pandas as pd
import json
import time
from pathlib import Path
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    roc_auc_score, roc_curve, confusion_matrix
)
import warnings
warnings.filterwarnings('ignore')

# Set random seeds
RANDOM_SEED = 42
np.random.seed(RANDOM_SEED)
torch.manual_seed(RANDOM_SEED)

# Device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Device: {device}")

# Paths
BASE_DIR = Path('.').resolve().parent
FEATURES_DIR = BASE_DIR / 'data' / 'features'
RESULTS_DIR = BASE_DIR / 'results'
MODELS_DIR = BASE_DIR / 'models'
FIGURES_DIR = BASE_DIR / 'figures'

TARGET_COLUMN = 'Class'

print(f"Random Seed: {RANDOM_SEED}")

In [None]:
# Utility function to safely normalize arrays
def safe_normalize(arr):
    """Safely normalize array to [0,1] range, handling edge cases."""
    arr = np.asarray(arr, dtype=float)
    min_val, max_val = arr.min(), arr.max()
    if max_val - min_val < 1e-10:
        return np.full_like(arr, 0.5)
    return (arr - min_val) / (max_val - min_val)

## 1. Load Data

In [None]:
# Load PCA data
train_df = pd.read_csv(FEATURES_DIR / 'pca_train.csv')
test_df = pd.read_csv(FEATURES_DIR / 'pca_test.csv')

X_train = train_df.drop(columns=[TARGET_COLUMN]).values.astype(np.float32)
y_train = train_df[TARGET_COLUMN].values.astype(np.float32)

X_test = test_df.drop(columns=[TARGET_COLUMN]).values.astype(np.float32)
y_test = test_df[TARGET_COLUMN].values.astype(np.float32)

input_dim = X_train.shape[1]

print(f"Training: {X_train.shape}, Test: {X_test.shape}")
print(f"Input dimension: {input_dim}")

In [None]:
# Create DataLoaders
BATCH_SIZE = 32

train_dataset = TensorDataset(torch.tensor(X_train), torch.tensor(y_train))
test_dataset = TensorDataset(torch.tensor(X_test), torch.tensor(y_test))

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

# Normal-only data for autoencoder training
X_train_normal = X_train[y_train == 0]
normal_dataset = TensorDataset(torch.tensor(X_train_normal))
normal_loader = DataLoader(normal_dataset, batch_size=BATCH_SIZE, shuffle=True)

## 2. Define Neural Network Models

In [None]:
# 1. Deep MLP Classifier
class DeepMLP(nn.Module):
    def __init__(self, input_dim, hidden_dims=[128, 64, 32, 16], dropout=0.3):
        super().__init__()
        layers = []
        prev_dim = input_dim
        for dim in hidden_dims:
            layers.extend([
                nn.Linear(prev_dim, dim),
                nn.ReLU(),
                nn.Dropout(dropout)
            ])
            prev_dim = dim
        layers.append(nn.Linear(prev_dim, 1))
        layers.append(nn.Sigmoid())
        self.network = nn.Sequential(*layers)
    
    def forward(self, x):
        return self.network(x)

In [None]:
# 2. Deep Autoencoder
class DeepAutoencoder(nn.Module):
    def __init__(self, input_dim, encoder_dims=[64, 32, 16, 8]):
        super().__init__()
        encoder_layers = []
        prev_dim = input_dim
        for dim in encoder_dims:
            encoder_layers.extend([nn.Linear(prev_dim, dim), nn.ReLU()])
            prev_dim = dim
        self.encoder = nn.Sequential(*encoder_layers)
        
        decoder_dims = encoder_dims[::-1][1:] + [input_dim]
        decoder_layers = []
        for dim in decoder_dims[:-1]:
            decoder_layers.extend([nn.Linear(prev_dim, dim), nn.ReLU()])
            prev_dim = dim
        decoder_layers.append(nn.Linear(prev_dim, decoder_dims[-1]))
        self.decoder = nn.Sequential(*decoder_layers)
    
    def forward(self, x):
        return self.decoder(self.encoder(x))

In [None]:
# 3. Variational Autoencoder (VAE)
class VAE(nn.Module):
    def __init__(self, input_dim, hidden_dims=[32, 16], latent_dim=4):
        super().__init__()
        encoder_layers = []
        prev_dim = input_dim
        for dim in hidden_dims:
            encoder_layers.extend([nn.Linear(prev_dim, dim), nn.ReLU()])
            prev_dim = dim
        self.encoder = nn.Sequential(*encoder_layers)
        self.fc_mu = nn.Linear(hidden_dims[-1], latent_dim)
        self.fc_var = nn.Linear(hidden_dims[-1], latent_dim)
        
        decoder_dims = hidden_dims[::-1]
        decoder_layers = [nn.Linear(latent_dim, decoder_dims[0]), nn.ReLU()]
        prev_dim = decoder_dims[0]
        for dim in decoder_dims[1:]:
            decoder_layers.extend([nn.Linear(prev_dim, dim), nn.ReLU()])
            prev_dim = dim
        decoder_layers.append(nn.Linear(prev_dim, input_dim))
        self.decoder = nn.Sequential(*decoder_layers)
        
    def encode(self, x):
        h = self.encoder(x)
        return self.fc_mu(h), self.fc_var(h)
    
    def reparameterize(self, mu, log_var):
        std = torch.exp(0.5 * log_var)
        return mu + torch.randn_like(std) * std
    
    def forward(self, x):
        mu, log_var = self.encode(x)
        z = self.reparameterize(mu, log_var)
        return self.decoder(z), mu, log_var

In [None]:
# 4. LSTM Autoencoder
class LSTMAutoencoder(nn.Module):
    def __init__(self, input_dim, hidden_dim=32, latent_dim=16):
        super().__init__()
        self.input_dim = input_dim
        self.encoder_lstm = nn.LSTM(1, hidden_dim, batch_first=True)
        self.encoder_fc = nn.Linear(hidden_dim, latent_dim)
        self.decoder_fc = nn.Linear(latent_dim, hidden_dim)
        self.decoder_lstm = nn.LSTM(hidden_dim, hidden_dim, batch_first=True)
        self.output_fc = nn.Linear(hidden_dim, 1)
        
    def forward(self, x):
        x = x.unsqueeze(-1)
        _, (h_n, _) = self.encoder_lstm(x)
        latent = self.encoder_fc(h_n[-1])
        h_decoded = self.decoder_fc(latent).unsqueeze(1).repeat(1, self.input_dim, 1)
        decoded, _ = self.decoder_lstm(h_decoded)
        return self.output_fc(decoded).squeeze(-1)

In [None]:
# 5. Deep One-Class Neural Network
class DeepOCNN(nn.Module):
    def __init__(self, input_dim, hidden_dims=[64, 32], output_dim=16):
        super().__init__()
        layers = []
        prev_dim = input_dim
        for dim in hidden_dims:
            layers.extend([nn.Linear(prev_dim, dim), nn.ReLU(), nn.BatchNorm1d(dim)])
            prev_dim = dim
        layers.append(nn.Linear(prev_dim, output_dim))
        self.network = nn.Sequential(*layers)
        self.center = nn.Parameter(torch.zeros(output_dim), requires_grad=False)
        
    def forward(self, x):
        return self.network(x)
    
    def compute_score(self, x):
        return torch.sum((self.forward(x) - self.center) ** 2, dim=1)

## 3. Training Functions

In [None]:
def train_classifier(model, train_loader, epochs=100, lr=0.001):
    model.to(device)
    criterion = nn.BCELoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)
    history = []
    for epoch in range(epochs):
        model.train()
        epoch_loss = 0
        for X_batch, y_batch in train_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            optimizer.zero_grad()
            loss = criterion(model(X_batch).squeeze(), y_batch)
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item()
        history.append(epoch_loss / len(train_loader))
        if (epoch + 1) % 20 == 0:
            print(f"  Epoch {epoch+1}/{epochs}, Loss: {history[-1]:.4f}")
    return history

def train_autoencoder(model, normal_loader, epochs=100, lr=0.001):
    model.to(device)
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)
    history = []
    for epoch in range(epochs):
        model.train()
        epoch_loss = 0
        for batch in normal_loader:
            X_batch = batch[0].to(device)
            optimizer.zero_grad()
            loss = criterion(model(X_batch), X_batch)
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item()
        history.append(epoch_loss / len(normal_loader))
        if (epoch + 1) % 20 == 0:
            print(f"  Epoch {epoch+1}/{epochs}, Loss: {history[-1]:.6f}")
    return history

def train_vae(model, normal_loader, epochs=100, lr=0.001, kl_weight=0.1):
    model.to(device)
    optimizer = optim.Adam(model.parameters(), lr=lr)
    history = []
    for epoch in range(epochs):
        model.train()
        epoch_loss = 0
        for batch in normal_loader:
            X_batch = batch[0].to(device)
            optimizer.zero_grad()
            recon, mu, log_var = model(X_batch)
            recon_loss = nn.functional.mse_loss(recon, X_batch, reduction='sum')
            kl_loss = -0.5 * torch.sum(1 + log_var - mu.pow(2) - log_var.exp())
            loss = recon_loss + kl_weight * kl_loss
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item()
        history.append(epoch_loss / len(normal_loader.dataset))
        if (epoch + 1) % 20 == 0:
            print(f"  Epoch {epoch+1}/{epochs}, Loss: {history[-1]:.4f}")
    return history

In [None]:
def compute_metrics(y_true, y_pred, y_prob=None):
    tn, fp, fn, tp = confusion_matrix(y_true, y_pred, labels=[0, 1]).ravel()
    metrics = {
        'accuracy': accuracy_score(y_true, y_pred),
        'precision': precision_score(y_true, y_pred, zero_division=0),
        'recall': recall_score(y_true, y_pred, zero_division=0),
        'f1_score': f1_score(y_true, y_pred, zero_division=0),
        'fpr': fp / (fp + tn) if (fp + tn) > 0 else 0,
        'tpr': tp / (tp + fn) if (tp + fn) > 0 else 0,
    }
    if y_prob is not None and not np.any(np.isnan(y_prob)):
        try:
            metrics['roc_auc'] = roc_auc_score(y_true, y_prob)
        except:
            metrics['roc_auc'] = 0.5
    else:
        metrics['roc_auc'] = 0.5
    return metrics

## 4. Train and Evaluate Models

In [None]:
all_metrics = []
all_histories = {}
all_probabilities = {}

In [None]:
# 1. Deep MLP
print("Training Deep MLP...")
model = DeepMLP(input_dim)
start_time = time.time()
history = train_classifier(model, train_loader, epochs=100)
train_time = time.time() - start_time

model.eval()
with torch.no_grad():
    y_prob = model(torch.tensor(X_test).to(device)).cpu().numpy().flatten()
    y_pred = (y_prob > 0.5).astype(int)
inference_time = time.time() - start_time - train_time

metrics = compute_metrics(y_test, y_pred, y_prob)
metrics.update({'model': 'Deep_MLP', 'train_time': train_time, 'inference_time': inference_time})
all_metrics.append(metrics)
all_histories['Deep_MLP'] = history
all_probabilities['Deep_MLP'] = y_prob
torch.save(model.state_dict(), MODELS_DIR / 'deep_mlp.pth')
print(f"  F1: {metrics['f1_score']:.4f}, ROC-AUC: {metrics['roc_auc']:.4f}")

In [None]:
# 2. Deep Autoencoder
print("\nTraining Deep Autoencoder...")
model = DeepAutoencoder(input_dim)
start_time = time.time()
history = train_autoencoder(model, normal_loader, epochs=100)
train_time = time.time() - start_time

model.eval()
with torch.no_grad():
    reconstructed = model(torch.tensor(X_test).to(device)).cpu().numpy()
    reconstruction_error = np.mean((X_test - reconstructed) ** 2, axis=1)
    train_reconstructed = model(torch.tensor(X_train_normal).to(device)).cpu().numpy()
    train_error = np.mean((X_train_normal - train_reconstructed) ** 2, axis=1)
    threshold = np.percentile(train_error, 95)
    y_pred = (reconstruction_error > threshold).astype(int)
    y_prob = safe_normalize(reconstruction_error)

metrics = compute_metrics(y_test, y_pred, y_prob)
metrics.update({'model': 'Deep_Autoencoder', 'train_time': train_time, 'inference_time': 0.01})
all_metrics.append(metrics)
all_histories['Deep_Autoencoder'] = history
all_probabilities['Deep_Autoencoder'] = y_prob
torch.save(model.state_dict(), MODELS_DIR / 'deep_autoencoder.pth')
print(f"  F1: {metrics['f1_score']:.4f}, ROC-AUC: {metrics['roc_auc']:.4f}")

In [None]:
# 3. VAE
print("\nTraining Variational Autoencoder...")
model = VAE(input_dim)
start_time = time.time()
history = train_vae(model, normal_loader, epochs=100)
train_time = time.time() - start_time

model.eval()
with torch.no_grad():
    reconstructed, _, _ = model(torch.tensor(X_test).to(device))
    reconstruction_error = np.mean((X_test - reconstructed.cpu().numpy()) ** 2, axis=1)
    train_reconstructed, _, _ = model(torch.tensor(X_train_normal).to(device))
    train_error = np.mean((X_train_normal - train_reconstructed.cpu().numpy()) ** 2, axis=1)
    threshold = np.percentile(train_error, 95)
    y_pred = (reconstruction_error > threshold).astype(int)
    y_prob = safe_normalize(reconstruction_error)

metrics = compute_metrics(y_test, y_pred, y_prob)
metrics.update({'model': 'VAE', 'train_time': train_time, 'inference_time': 0.01})
all_metrics.append(metrics)
all_histories['VAE'] = history
all_probabilities['VAE'] = y_prob
torch.save(model.state_dict(), MODELS_DIR / 'vae.pth')
print(f"  F1: {metrics['f1_score']:.4f}, ROC-AUC: {metrics['roc_auc']:.4f}")

In [None]:
# 4. LSTM Autoencoder
print("\nTraining LSTM Autoencoder...")
model = LSTMAutoencoder(input_dim)
start_time = time.time()
history = train_autoencoder(model, normal_loader, epochs=50)
train_time = time.time() - start_time

model.eval()
with torch.no_grad():
    reconstructed = model(torch.tensor(X_test).to(device)).cpu().numpy()
    reconstruction_error = np.mean((X_test - reconstructed) ** 2, axis=1)
    train_reconstructed = model(torch.tensor(X_train_normal).to(device)).cpu().numpy()
    train_error = np.mean((X_train_normal - train_reconstructed) ** 2, axis=1)
    threshold = np.percentile(train_error, 95)
    y_pred = (reconstruction_error > threshold).astype(int)
    y_prob = safe_normalize(reconstruction_error)

metrics = compute_metrics(y_test, y_pred, y_prob)
metrics.update({'model': 'LSTM_Autoencoder', 'train_time': train_time, 'inference_time': 0.01})
all_metrics.append(metrics)
all_histories['LSTM_Autoencoder'] = history
all_probabilities['LSTM_Autoencoder'] = y_prob
torch.save(model.state_dict(), MODELS_DIR / 'lstm_autoencoder.pth')
print(f"  F1: {metrics['f1_score']:.4f}, ROC-AUC: {metrics['roc_auc']:.4f}")

In [None]:
# 5. Deep One-Class Neural Network
print("\nTraining Deep One-Class Neural Network...")
model = DeepOCNN(input_dim)
model.to(device)

with torch.no_grad():
    model.center.data = model(torch.tensor(X_train_normal[:100]).to(device)).mean(dim=0)

optimizer = optim.Adam(model.parameters(), lr=0.001)
start_time = time.time()
history = []
for epoch in range(100):
    model.train()
    epoch_loss = 0
    for batch in normal_loader:
        X_batch = batch[0].to(device)
        optimizer.zero_grad()
        loss = torch.mean(model.compute_score(X_batch))
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()
    history.append(epoch_loss / len(normal_loader))
    if (epoch + 1) % 20 == 0:
        print(f"  Epoch {epoch+1}/100, Loss: {history[-1]:.4f}")
train_time = time.time() - start_time

model.eval()
with torch.no_grad():
    scores = model.compute_score(torch.tensor(X_test).to(device)).cpu().numpy()
    train_scores = model.compute_score(torch.tensor(X_train_normal).to(device)).cpu().numpy()
    threshold = np.percentile(train_scores, 95)
    y_pred = (scores > threshold).astype(int)
    y_prob = safe_normalize(scores)

metrics = compute_metrics(y_test, y_pred, y_prob)
metrics.update({'model': 'Deep_OCNN', 'train_time': train_time, 'inference_time': 0.01})
all_metrics.append(metrics)
all_histories['Deep_OCNN'] = history
all_probabilities['Deep_OCNN'] = y_prob
torch.save(model.state_dict(), MODELS_DIR / 'deep_ocnn.pth')
print(f"  F1: {metrics['f1_score']:.4f}, ROC-AUC: {metrics['roc_auc']:.4f}")

## 5. Save Results

In [None]:
metrics_df = pd.DataFrame(all_metrics)
col_order = ['model', 'accuracy', 'precision', 'recall', 'f1_score', 'roc_auc', 'fpr', 'tpr', 'train_time', 'inference_time']
extra_cols = [c for c in metrics_df.columns if c not in col_order]
metrics_df = metrics_df[col_order + extra_cols]

metrics_path = RESULTS_DIR / 'neural_metrics.csv'
metrics_df.to_csv(metrics_path, index=False)

print(f"✅ Saved neural metrics to: {metrics_path}")
print("\nNeural Network Results:")
print(metrics_df.to_string(index=False))

## 6. Visualizations

In [None]:
# Training curves
fig, axes = plt.subplots(2, 3, figsize=(15, 8))
axes = axes.flatten()

for i, (name, history) in enumerate(all_histories.items()):
    ax = axes[i]
    ax.plot(history)
    ax.set_xlabel('Epoch')
    ax.set_ylabel('Loss')
    ax.set_title(f'{name} Training Loss')
    ax.grid(True, alpha=0.3)

if len(all_histories) < 6:
    axes[-1].axis('off')

plt.tight_layout()
plt.savefig(FIGURES_DIR / 'neural_training_curves.png', dpi=150)
plt.show()

In [None]:
# ROC Curves (with NaN handling)
plt.figure(figsize=(10, 8))

for model_name, y_prob in all_probabilities.items():
    if y_prob is not None and not np.any(np.isnan(y_prob)):
        try:
            fpr_vals, tpr_vals, _ = roc_curve(y_test, y_prob)
            auc_val = metrics_df[metrics_df['model'] == model_name]['roc_auc'].values[0]
            plt.plot(fpr_vals, tpr_vals, label=f'{model_name} (AUC={auc_val:.3f})')
        except Exception as e:
            print(f"Skipping {model_name}: {e}")

plt.plot([0, 1], [0, 1], 'k--', label='Random')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curves - Neural Network Models')
plt.legend(loc='lower right')
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig(FIGURES_DIR / 'roc_curves_neural.png', dpi=150)
plt.show()

In [None]:
print("\n" + "="*50)
print("NEURAL NETWORK MODELS SUMMARY")
print("="*50)
print(f"Total models trained: {len(all_metrics)}")
print(f"\nBest by F1 Score: {metrics_df.loc[metrics_df['f1_score'].idxmax(), 'model']} ({metrics_df['f1_score'].max():.4f})")
print(f"Best by ROC-AUC: {metrics_df.loc[metrics_df['roc_auc'].idxmax(), 'model']} ({metrics_df['roc_auc'].max():.4f})")
print("\n✅ Notebook 7 Complete!")