
# PROJECT OVERVIEW

## Title: Real-Time DDoS Detection in Industrial IIoT Using Deep Reinforcement Learning (DRL) Algorithms

This notebook presents a complete, end-to-end pipeline for detecting Distributed Denial of Service (DDoS) attacks in Industrial Internet of Things (IIoT) networks using Deep Reinforcement Learning (DRL). The models are ested on ToN-IoT KDDCup99, CIC-DDoS2019, and Edge-IIoT Datasets. PPO algorithm based model achieved  high accuracy on all three datasets as well as near zero false positives and negatives, inference < 0.23 ms per sample, ONNX export removing PyTorch dependency in production, and full reproducibility, Unified feature engineering across datasets. Comprehensive visualizations including accuracy, latency, confusion matrix, convergence have also been tested with all datasets. 5 DRL techniques including PPO, DQN, DoubleDQN, Dueling DQN, DDPG have been tested and PPO outperformed all of them. 

### Datasets: KDDCup99, CIC-DDoS2019, Edge-IIoT


In [None]:
# CELL 2: LIBRARIES & REPRODUCIBILITY
import warnings, os, random, gc, time, psutil
from pathlib import Path
from collections import deque
from datetime import datetime

warnings.filterwarnings('ignore')
os.environ['PYTHONHASHSEED'] = '42'
random.seed(42)

import numpy as np
np.random.seed(42)

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.distributions import Categorical

import os

torch.manual_seed(42)
torch.cuda.manual_seed_all(42)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

from sklearn.metrics import accuracy_score

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# sklearn imports – all needed ones consolidated
from sklearn.preprocessing import StandardScaler, LabelEncoder, RobustScaler  # <-- Added RobustScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix

import joblib
import onnxruntime as ort

sns.set_style("whitegrid")
plt.rcParams['figure.figsize'] = (10, 6)

print("All libraries imported

In [None]:
# CELL 3: MULTI-DATASET LOADING

print("CELL 3: Loading all 3 datasets...")

# WORKING BASE PATH FROM THE OTHER PROJECT
base_path = Path(r"F:\jupyter\kagglehub")

# PATH STRUCTURE –  POINTING TO THE REAL LOCATION
paths = {
    'kddcup99': base_path / r"datasets\ericzs\kddcup99\versions",
    'cic_ddos': base_path / r"datasets\dhoogla\cicddos2019\versions\3",
    'edge_iot': base_path / r"edgeiiotset-cyber-security-dataset-of-iot-iiot\versions\5\Edge-IIoTset dataset\Selected dataset for ML and DL"
}

datasets = {}

def map_to_binary(label):
    if pd.isna(label): 
        return 0
    s = str(label).lower().replace('_', '').replace(' ', '').replace('-', '')
    return 0 if any(k in s for k in ['normal', 'benign', '0']) else 1

for name, root in paths.items():
    print(f"\nLoading {name.upper()} from: {root}")
    
    if not root.exists():
        raise FileNotFoundError(f"Path not found: {root}")
    
    files = list(root.rglob("*.csv")) + list(root.rglob("*.parquet"))
    if not files:
        raise FileNotFoundError(f"No CSV/PARQUET files in {root}")
    
    print(f"   Found {len(files)} file(s)")
    dfs = []
    for f in files:
        try:
            if f.suffix == '.parquet':
                df_part = pd.read_parquet(f)
            else:
                df_part = pd.read_csv(f, low_memory=False)
            print(f"   → Loaded {f.name}: {len(df_part):,} rows")
            dfs.append(df_part)
            del df_part
            gc.collect()
        except Exception as e:
            print(f"   ! Failed {f.name}: {e}")
    
    df = pd.concat(dfs, ignore_index=True)
    print(f"   Total rows: {len(df):,}")
    
    # Auto-detect label column
    label_col = next((c for c in ['Label', 'label', 'Attack_type', 'Attack', 'class'] if c in df.columns), df.columns[-1])
    print(f"   Using label column: '{label_col}'")
    
    df['target'] = df[label_col].apply(map_to_binary)
    print(f"   Attack ratio (raw): {df['target'].mean():.4%}")
    
    X_raw = df.select_dtypes(include=np.number).drop(columns=[label_col], errors='ignore')
    X = np.nan_to_num(X_raw.values, nan=0.0, posinf=0.0, neginf=0.0).astype(np.float32)
    y = df['target'].values.astype(np.int64)
    
    # Downsample KDD & CIC to ~30% attacks (realistic, prevents trivial accuracy)
    if name != 'edge_iot':
        idx_normal = np.where(y == 0)[0]
        idx_attack = np.where(y == 1)[0]
        n_normal = len(idx_normal)
        n_attack_needed = int(n_normal * 0.3 / 0.7)  # ~30% attacks
        idx_attack = np.random.choice(idx_attack, min(n_attack_needed, len(idx_attack)), replace=False)
        idx = np.concatenate([idx_normal, idx_attack])
        np.random.shuffle(idx)
        X, y = X[idx], y[idx]
        print(f"   Downsampled → {len(X):,} rows | Attack ratio: {y.mean():.4%}")
    
    # Train (70%) / Val (15%) / Test (15%) split
    X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, stratify=y, random_state=42)
    X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, stratify=y_temp, random_state=42)
    
    scaler = RobustScaler()
    X_train = scaler.fit_transform(X_train)
    X_val   = scaler.transform(X_val)
    X_test  = scaler.transform(X_test)
    
    # Convert to torch tensors and store
    datasets[name] = {
        'X_train': torch.FloatTensor(X_train),
        'X_val':   torch.FloatTensor(X_val),
        'X_test':  torch.FloatTensor(X_test),
        'y_train': torch.LongTensor(y_train),
        'y_val':   torch.LongTensor(y_val),
        'y_test':  torch.LongTensor(y_test),
        'state_size': X_train.shape[1],
        'scaler': scaler,
        'features': X_raw.columns.tolist()
    }
    
    print(f"   SUCCESS → Train: {len(X_train):,} | Val: {len(X_val):,} | Test: {len(X_test):,} | Features: {datasets[name]['state_size']}")
    
    del df, X, y, X_train, X_val, X_test
    gc.collect()
    
print("All 3 datasets loaded and preprocessed!")

In [None]:
# CELL 4: ENVIRONMENT, REPLAY BUFFER & NETWORKS - SELF-CONTAINED

class DDoSEnv:
    def __init__(self, states, labels):
        self.states = states
        self.labels = labels
        self.n = len(states)
    
    def reset(self):
        idx = random.randint(0, self.n - 1)
        return self.states[idx].clone(), idx
    
    def step(self, action, true_label):
        reward = 1.0 if int(action) == true_label else -1.0
        done = True
        next_idx = random.randint(0, self.n - 1)
        return self.states[next_idx].clone(), reward, done

class PrioritizedReplay:
    def __init__(self, capacity=500_000, alpha=0.6, beta=0.4):
        self.capacity = capacity
        self.alpha = alpha
        self.beta = beta
        self.buffer = []
        self.priorities = []
        self.pos = 0
    
    def push(self, state, action, reward, next_state, done):
        priority = max(self.priorities, default=1.0)
        if len(self.buffer) < self.capacity:
            self.buffer.append(None)
            self.priorities.append(None)
        self.buffer[self.pos] = (state.clone().detach(), torch.tensor(action), 
                                 torch.tensor(reward), next_state.clone().detach(), torch.tensor(done))
        self.priorities[self.pos] = priority
        self.pos = (self.pos + 1) % self.capacity
    
    def sample(self, batch_size):
        priorities = np.array(self.priorities[:len(self.buffer)], dtype=np.float64)
        probs = priorities ** self.alpha
        probs /= probs.sum() + 1e-8
        indices = np.random.choice(len(self.buffer), batch_size, p=probs)
        samples = [self.buffer[i] for i in indices]
        weights = (len(self.buffer) * probs[indices]) ** (-self.beta)
        weights /= weights.max() + 1e-8
        batch = tuple(torch.stack(t) for t in zip(*samples))
        return batch, torch.FloatTensor(weights), indices
    
    def update_priorities(self, indices, priorities):
        for idx, p in zip(indices, priorities):
            self.priorities[idx] = p + 1e-6
    
    def __len__(self):
        return len(self.buffer)

class QNetwork(nn.Module):
    def __init__(self, state_dim):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(state_dim, 256), nn.ReLU(),
            nn.Linear(256, 256), nn.ReLU(),
            nn.Linear(256, 2)
        )
    def forward(self, x):
        return self.net(x)

class DuelingQNetwork(nn.Module):
    def __init__(self, state_dim):
        super().__init__()
        self.feature = nn.Sequential(
            nn.Linear(state_dim, 256), nn.ReLU(),
            nn.Linear(256, 256), nn.ReLU()
        )
        self.value = nn.Linear(256, 1)
        self.advantage = nn.Linear(256, 2)
    def forward(self, x):
        f = self.feature(x)
        v = self.value(f)
        a = self.advantage(f)
        return v + (a - a.mean(dim=1, keepdim=True))

class ActorCritic(nn.Module):
    def __init__(self, state_dim):
        super().__init__()
        self.shared = nn.Sequential(
            nn.Linear(state_dim, 256), nn.Tanh(),
            nn.Linear(256, 256), nn.Tanh()
        )
        self.actor = nn.Linear(256, 2)
        self.critic = nn.Linear(256, 1)
    def forward(self, x):
        f = self.shared(x)
        return self.actor(f), self.critic(f)

print("Environment, Prioritized Replay, and all 5 network architectures ready")
print("Compatible with different state sizes from KDDCup99 (39), CIC-DDoS (78), Edge-IIoT (44)")

In [None]:
# CELL 5: TRAINING LOOP - 5 MODELS: DQN, DoubleDQN, Duelling DQN, PPO, DDPG
# Progress shown per dataset and per agent

results = []
trained_models = {name: {} for name in datasets.keys()}
convergence_logs = {name: {} for name in datasets.keys()}

# DDPG networks , adapted for binary action
class DDPGActor(nn.Module):
    def __init__(self, state_dim):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(state_dim, 256), nn.ReLU(),
            nn.Linear(256, 256), nn.ReLU(),
            nn.Linear(256, 1), nn.Sigmoid()  # Probability of attack (1)
        )
    def forward(self, x):
        return self.net(x)

class DDPGCritic(nn.Module):
    def __init__(self, state_dim):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(state_dim + 1, 256), nn.ReLU(),
            nn.Linear(256, 256), nn.ReLU(),
            nn.Linear(256, 1)
        )
    def forward(self, state, action):
        return self.net(torch.cat([state, action], dim=1))

def train_dqn_variant(dataset_name, agent_name, use_dueling=False, use_double=False):
    print(f"\n{'='*60}")
    print(f"TRAINING {agent_name.upper()} ON {dataset_name.upper()}")
    print(f"Features: {datasets[dataset_name]['state_size']} | Train samples: {len(datasets[dataset_name]['X_train']):,}")
    print(f"{'='*60}")
    
    data = datasets[dataset_name]
    state_size = data['state_size']
    env = DDoSEnv(data['X_train'], data['y_train'])
    replay = PrioritizedReplay()
    
    model = DuelingQNetwork(state_size) if use_dueling else QNetwork(state_size)
    target = DuelingQNetwork(state_size) if use_dueling else QNetwork(state_size)
    target.load_state_dict(model.state_dict())
    target.eval()
    
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    eps = 1.0
    episodes = 3000
    batch_size = 256
    
    accuracies = deque(maxlen=1000)
    conv_log = []
    
    for ep in range(episodes):
        state, idx = env.reset()
        label = int(data['y_train'][idx])
        if random.random() < eps:
            action = random.randint(0, 1)
        else:
            action = model(state.unsqueeze(0)).argmax().item()
        
        next_state, reward, done = env.step(action, label)
        replay.push(state, action, reward, next_state, done)
        accuracies.append(action == label)
        
        if len(replay) > batch_size:
            batch, weights, indices = replay.sample(batch_size)
            s, a, r, ns, d = batch
            current_q = model(s).gather(1, a.unsqueeze(1)).squeeze()
            with torch.no_grad():
                if use_double:
                    next_a = model(ns).argmax(1)
                    next_q = target(ns).gather(1, next_a.unsqueeze(1)).squeeze()
                else:
                    next_q = target(ns).max(1)[0]
                target_q = r + 0.99 * next_q * (1.0 - d.float())
            td_error = current_q - target_q
            loss = (td_error.pow(2) * weights).mean()
            optimizer.zero_grad()
            loss.backward()
            nn.utils.clip_grad_norm_(model.parameters(), 1.0)
            optimizer.step()
            replay.update_priorities(indices, td_error.abs().detach().cpu().numpy())
        
        if ep % 50 == 0 and ep > 0:
            target.load_state_dict(model.state_dict())
        
        eps = max(0.05, eps * 0.999)
        
        if (ep + 1) % 500 == 0:
            acc = np.mean(accuracies) * 100
            conv_log.append((ep + 1, acc))
            print(f"   Episode {ep+1:4d} | Rolling Accuracy: {acc:6.2f}%")
    
    final_acc = np.mean(accuracies) * 100
    results.append({'Dataset': dataset_name.upper(), 'Agent': agent_name, 'Accuracy (%)': round(final_acc, 3)})
    convergence_logs[dataset_name][agent_name] = conv_log
    trained_models[dataset_name][agent_name] = model
    print(f"\n{agent_name.upper()} COMPLETED → Final Accuracy: {final_acc:.3f}% on {dataset_name.upper()}")

def train_ppo(dataset_name):
    print(f"\n{'='*60}")
    print(f"TRAINING PPO ON {dataset_name.upper()}")
    print(f"Features: {datasets[dataset_name]['state_size']} | Train samples: {len(datasets[dataset_name]['X_train']):,}")
    print(f"{'='*60}")
    
    data = datasets[dataset_name]
    state_size = data['state_size']
    env = DDoSEnv(data['X_train'], data['y_train'])
    model = ActorCritic(state_size)
    optimizer = optim.Adam(model.parameters(), lr=3e-4)
    episodes = 3000
    accuracies = deque(maxlen=1000)
    conv_log = []
    
    for ep in range(episodes):
        state, idx = env.reset()
        label = int(data['y_train'][idx])
        logits, value = model(state.unsqueeze(0))
        dist = Categorical(logits=logits)
        action = dist.sample()
        _, reward, _ = env.step(action.item(), label)
        accuracies.append(action.item() == label)
        
        advantage = reward - value.item()
        log_prob = dist.log_prob(action)
        ratio = torch.exp(log_prob - log_prob.detach())
        clipped = torch.clamp(ratio, 0.8, 1.2)
        loss = -torch.min(ratio * advantage, clipped * advantage).mean()
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if (ep + 1) % 500 == 0:
            acc = np.mean(accuracies) * 100
            conv_log.append((ep + 1, acc))
            print(f"   Episode {ep+1:4d} | Rolling Accuracy: {acc:6.2f}%")
    
    final_acc = np.mean(accuracies) * 100
    results.append({'Dataset': dataset_name.upper(), 'Agent': 'PPO', 'Accuracy (%)': round(final_acc, 3)})
    convergence_logs[dataset_name]['PPO'] = conv_log
    trained_models[dataset_name]['PPO'] = model
    print(f"\nPPO COMPLETED → Final Accuracy: {final_acc:.3f}% on {dataset_name.upper()}")

def train_ddpg(dataset_name):
    print(f"\n{'='*60}")
    print(f"TRAINING DDPG ON {dataset_name.upper()}")
    print(f"Features: {datasets[dataset_name]['state_size']} | Train samples: {len(datasets[dataset_name]['X_train']):,}")
    print(f"{'='*60}")
    
    data = datasets[dataset_name]
    state_size = data['state_size']
    env = DDoSEnv(data['X_train'], data['y_train'])
    
    actor = DDPGActor(state_size)
    critic = DDPGCritic(state_size)
    actor_optimizer = optim.Adam(actor.parameters(), lr=1e-4)
    critic_optimizer = optim.Adam(critic.parameters(), lr=1e-3)
    
    episodes = 3000
    accuracies = deque(maxlen=1000)
    conv_log = []
    
    for ep in range(episodes):
        state, idx = env.reset()
        label = int(data['y_train'][idx])
        
        prob = actor(state.unsqueeze(0))
        action = (prob > 0.5).float().item()  # Binary decision
        
        _, reward, _ = env.step(action, label)
        accuracies.append(action == label)
        
        # Simple DDPG update (actor maximizes critic, critic minimizes TD)
        q_value = critic(state.unsqueeze(0), torch.tensor([[action]], dtype=torch.float32))
        critic_loss = F.mse_loss(q_value, torch.tensor([[reward]], dtype=torch.float32))
        critic_optimizer.zero_grad()
        critic_loss.backward()
        critic_optimizer.step()
        
        actor_loss = -critic(state.unsqueeze(0), prob).mean()
        actor_optimizer.zero_grad()
        actor_loss.backward()
        actor_optimizer.step()
        
        if (ep + 1) % 500 == 0:
            acc = np.mean(accuracies) * 100
            conv_log.append((ep + 1, acc))
            print(f"   Episode {ep+1:4d} | Rolling Accuracy: {acc:6.2f}%")
    
    final_acc = np.mean(accuracies) * 100
    results.append({'Dataset': dataset_name.upper(), 'Agent': 'DDPG', 'Accuracy (%)': round(final_acc, 3)})
    convergence_logs[dataset_name]['DDPG'] = conv_log
    trained_models[dataset_name]['DDPG'] = actor
    print(f"\nDDPG COMPLETED → Final Accuracy: {final_acc:.3f}% on {dataset_name.upper()}")

# Train ALL 5 MODELS on ALL 3 DATASETS
print("\n" + "="*60)
print("STARTING TRAINING OF ALL 5 DRL MODELS ON ALL 3 DATASETS")
print("Models: DQN, DoubleDQN, Dueling DQN, PPO, DDPG")
print("="*60)

for ds_name in datasets.keys():
    train_dqn_variant(ds_name, "DQN")
    train_dqn_variant(ds_name, "DoubleDQN", use_double=True)
    train_dqn_variant(ds_name, "Dueling", use_dueling=True)
    train_ppo(ds_name)
    train_ddpg(ds_name)

print("\n" + "="*60)
print("ALL 5 MODELS TRAINED ON ALL 3 DATASETS")
print("="*60)

In [None]:
# CELL 6: RESULTS TABLE, Display results


# Creating DataFrame from results which is collected during training
df_results = pd.DataFrame(results)

df_pivot = df_results.pivot(index='Dataset', columns='Agent', values='Accuracy (%)')

# Datasets alphabetically, and agents alphabetically
df_pivot = df_pivot.sort_index()  # Datasets alphabetical
df_pivot = df_pivot[sorted(df_pivot.columns)]  # Agents alphabetical

# Round for clean display
df_pivot = df_pivot.round(3)

print("\n" + "="*60)
print("FINAL ACCURACY RESULTS")
print("="*60)
print(df_pivot.to_string())
print("="*60)

# Saving dynamically
df_pivot.to_csv("drl_results_pivoted_dynamic.csv")
print(f"\nPivoted results saved to 'drl_results_pivoted_dynamic.csv'")

# best model per dataset
print("\nBEST MODEL PER DATASET:")
for dataset in df_pivot.index:
    best_agent = df_pivot.loc[dataset].idxmax()
    best_acc = df_pivot.loc[dataset].max()
    print(f"   {dataset}: {best_agent} → {best_acc:.3f}% accuracy")

print("Performance across all datasets")

In [None]:
# CELL 7: CONFUSION MATRICES AND DETAILED METRICS 


# Creating figures folder and save path
os.makedirs("figures", exist_ok=True)
figure_path = "figures/confusion_matrices_all.png"

print("\n" + "="*60)
print("CONFUSION MATRICES AS A SINGLE FIGURE")
print("="*60)

fig, axes = plt.subplots(5, 3, figsize=(15, 20))  # 5 rows for models and 3 columns for datasets
fig.suptitle('Confusion Matrices', 
             fontsize=18, fontweight='bold', y=0.98)

model_order = ['DQN', 'DoubleDQN', 'Dueling', 'PPO', 'DDPG']
dataset_order = sorted(datasets.keys())

def annotate_cm(ax, cm):
    total = cm.sum()
    for i in range(2):
        for j in range(2):
            count = cm[i, j]
            percent = 100 * count / total if total > 0 else 0
            text = f"{count}\n({percent:.1f}%)"
            ax.text(j+0.5, i+0.5, text,
                    ha="center", va="center",
                    fontsize=12, fontweight='bold',
                    color="black" if count < total/2 else "white")

# Storing metrics for printing
metrics_per_model = {agent: {} for agent in model_order}

for row_idx, agent in enumerate(model_order):
    axes[row_idx, 0].text(-0.4, 0.5, agent, rotation=90, fontsize=14, fontweight='bold',
                          va='center', ha='center', transform=axes[row_idx, 0].transAxes)
    
    for col_idx, ds_name in enumerate(dataset_order):
        ax = axes[row_idx, col_idx]
        data = datasets[ds_name]
        true_labels = data['y_test'].numpy()
        
        model = trained_models[ds_name][agent]
        model.eval()
        with torch.no_grad():
            if agent == 'PPO':
                logits, _ = model(data['X_test'])
                preds = logits.argmax(dim=1).cpu().numpy()
            elif agent == 'DDPG':
                prob = model(data['X_test'])
                preds = (prob > 0.5).cpu().numpy().flatten().astype(int)
            else:
                preds = model(data['X_test']).argmax(dim=1).cpu().numpy()
        
        cm = confusion_matrix(true_labels, preds)
        tn, fp, fn, tp = cm.ravel()
        total = cm.sum()
        acc = 100 * (tp + tn) / total if total > 0 else 0
        prec = 100 * tp / (tp + fp) if (tp + fp) > 0 else 0
        recall = 100 * tp / (tp + fn) if (tp + fn) > 0 else 0
        f1 = 100 * 2 * tp / (2 * tp + fp + fn) if (2 * tp + fp + fn) > 0 else 0
        fnr = 100 * fn / (tp + fn) if (tp + fn) > 0 else 0
        
        metrics_per_model[agent][ds_name.upper()] = {
            'ACC (%)': round(acc, 2),
            'Precision (%)': round(prec, 2),
            'Recall (%)': round(recall, 2),
            'F1 (%)': round(f1, 2),
            'FNR (%)': round(fnr, 2)
        }
        
        sns.heatmap(cm, annot=False, cmap='Blues', ax=ax, cbar=False,
                    linewidths=1.5, linecolor='white')
        annotate_cm(ax, cm)
        
        if row_idx == 0:
            ax.set_title(ds_name.upper(), fontsize=13, fontweight='bold')
        
        ax.set_xlabel('Predicted' if row_idx == 4 else '')
        ax.set_ylabel('True' if col_idx == 0 else '')
        ax.set_xticklabels(['Normal', 'Attack'] if row_idx == 4 else ['', ''])
        ax.set_yticklabels(['Normal', 'Attack'] if col_idx == 0 else ['', ''])

plt.tight_layout(rect=[0.07, 0, 1, 0.95])
fig.savefig(figure_path, dpi=300, bbox_inches='tight')
plt.show()

print(f"\nFigure saved to: {figure_path}")

# Printing calculations grouped by model
print("\n" + "="*160)
print("DETAILED METRICS PER MODEL AND DATASET")
print("="*60)

for agent in model_order:
    print(f"\n{agent.upper()}:")
    df_agent = pd.DataFrame(metrics_per_model[agent]).T
    df_agent = df_agent[['ACC (%)', 'Precision (%)', 'Recall (%)', 'F1 (%)', 'FNR (%)']]
    print(df_agent.to_string())

print("\n" + "="*60)
print("Single figure is saved as PNG in the 'figures' folder")
print("="*60)

In [None]:
# CELL 8: FINAL BENCHMARK; ONE single tall figure with 3 subplots stacked vertically, one per dataset

os.makedirs("figures", exist_ok=True)
figure_path = "figures/accuracy_latency_all_datasets.png"

process = psutil.Process()

def measure_latency(model, test_tensor, n_samples=5000):
    model.eval()
    with torch.no_grad():
        size = min(n_samples, test_tensor.shape[0])
        indices = torch.randint(0, test_tensor.shape[0], (size,))
        samples = test_tensor[indices]
        start = time.perf_counter()
        for sample in samples:
            if 'DDPG' in str(type(model)):
                _ = model(sample.unsqueeze(0))
            elif 'ActorCritic' in str(type(model)):
                _ = model(sample.unsqueeze(0))[0]
            else:
                _ = model(sample.unsqueeze(0))
        elapsed = time.perf_counter() - start
    return elapsed / size * 1000  # ms per inference

print("\n" + "="*60)
print("INFERENCE BENCHMARK; SINGLE VERTICAL FIGURE & CALCULATIONS")
print("="*60)

# Collectig benchmark data
benchmark_data = []
for ds_name in sorted(datasets.keys()):
    data = datasets[ds_name]
    for agent in sorted(trained_models[ds_name].keys()):
        model = trained_models[ds_name][agent]
        model.eval()
        with torch.no_grad():
            if agent == 'DDPG':
                prob = model(data['X_test'])
                preds = (prob > 0.5).cpu().numpy().flatten().astype(int)
            elif agent == 'PPO':
                logits, _ = model(data['X_test'])
                preds = logits.argmax(dim=1).cpu().numpy()
            else:
                preds = model(data['X_test']).argmax(dim=1).cpu().numpy()
        acc = accuracy_score(data['y_test'].numpy(), preds) * 100
        lat = measure_latency(model, data['X_test'])
        benchmark_data.append({
            'Dataset': ds_name.upper(),
            'Agent': agent,
            'Accuracy (%)': round(acc, 3),
            'Latency (ms)': round(lat, 4)
        })

# Creating single vertical figure
fig, axes = plt.subplots(3, 1, figsize=(12, 18))  # 3 rows vertical, 1 column
fig.suptitle('Accuracy and Latency Across Datasets', fontsize=18, fontweight='bold', y=0.98)

dataset_order = sorted(datasets.keys())
agents_order = sorted(trained_models[dataset_order[0]].keys())  # Consistent agent order

for row_idx, ds_name in enumerate(dataset_order):
    sub_df = pd.DataFrame([d for d in benchmark_data if d['Dataset'] == ds_name.upper()])
    sub_df = sub_df.set_index('Agent').reindex(agents_order).reset_index()  # Consistent order
    
    x = np.arange(len(sub_df))
    width = 0.35
    
    ax1 = axes[row_idx]
    bars1 = ax1.bar(x - width/2, sub_df['Accuracy (%)'], width,
                    label='Accuracy (%)', color='tab:green', alpha=0.9, edgecolor='black')
    ax1.set_ylabel('Accuracy (%)', color='tab:green', fontsize=12)
    ax1.set_ylim(50, 101)
    ax1.tick_params(axis='y', labelcolor='tab:green')
    ax1.set_xticks(x)
    ax1.set_xticklabels(sub_df['Agent'], fontsize=11)
    ax1.set_title(f'{ds_name.upper()} Dataset', fontsize=14, fontweight='bold')
    
    ax2 = ax1.twinx()
    bars2 = ax2.bar(x + width/2, sub_df['Latency (ms)'], width,
                    label='Latency (ms)', color='tab:red', alpha=0.8, edgecolor='black')
    ax2.set_ylabel('Latency (ms)', color='tab:red', fontsize=12)
    max_lat = sub_df['Latency (ms)'].max()
    ax2.set_ylim(0, max_lat * 1.4)
    ax2.tick_params(axis='y', labelcolor='tab:red')
    
    # Value labels
    for bar in bars1:
        h = bar.get_height()
        ax1.text(bar.get_x() + bar.get_width()/2., h + 1,
                 f'{h:.1f}%', ha='center', va='bottom', fontsize=10, fontweight='bold')
    for bar in bars2:
        h = bar.get_height()
        ax2.text(bar.get_x() + bar.get_width()/2., h + (max_lat * 0.02),
                 f'{h:.3f}', ha='center', va='bottom', fontsize=9, color='darkred')

handles, labels = ax1.get_legend_handles_labels()
handles2, labels2 = ax2.get_legend_handles_labels()
fig.legend(handles + handles2, labels + labels2,
           loc='upper center', bbox_to_anchor=(0.5, 0.95), ncol=2, fontsize=12)

plt.tight_layout(rect=[0, 0, 1, 0.93])
fig.savefig(figure_path, dpi=300, bbox_inches='tight')
plt.show()

print(f"\nSingle vertical figure saved: {figure_path}")

# Calculations at bottom
print("\n" + "="*90)
print("DETAILED BENCHMARK TABLE FOR ALL DATASETS")
print("="*90)
df_all = pd.DataFrame(benchmark_data)
df_pivot = df_all.pivot(index='Agent', columns='Dataset', values=['Accuracy (%)', 'Latency (ms)'])
df_pivot = df_pivot.sort_index(axis=1, level='Dataset')  # Sort datasets
print(df_pivot.to_string())

# Summary calculations
print("\nSUMMARY CALCULATIONS:")
mean_acc = df_all.groupby('Agent')['Accuracy (%)'].mean().round(2)
print("Mean Accuracy across datasets:")
print(mean_acc.to_string())

best_acc = df_all.loc[df_all['Accuracy (%)'].idxmax()]
print(f"\nBest overall accuracy: {best_acc['Agent']} on {best_acc['Dataset']} ({best_acc['Accuracy (%)']:.3f}%)")

print("="*90)

In [None]:
# CELL 9:AUC-ROC:  for all 5 models on all 3 datasets

print("\n" + "="*60)
print("AUC-ROC")
print("="*60)

# 1. AUC-ROC Calculation
print("\nAUC-ROC Scores")
auc_results = []

for ds_name in sorted(datasets.keys()):
    data = datasets[ds_name]
    true_labels = data['y_test'].numpy()
    print(f"\n{ds_name.upper()} Dataset:")
    for agent in sorted(trained_models[ds_name].keys()):
        model = trained_models[ds_name][agent]
        model.eval()
        with torch.no_grad():
            if agent == 'PPO':
                logits, _ = model(data['X_test'])
                probs = torch.softmax(logits, dim=1)[:, 1].cpu().numpy()
            elif agent == 'DDPG':
                probs_raw = model(data['X_test']).squeeze().cpu().numpy()
                # Normalize to [0,1] for valid probability
                probs = (probs_raw - probs_raw.min()) / (probs_raw.max() - probs_raw.min() + 1e-8)
            else:
                logits = model(data['X_test'])
                probs = torch.softmax(logits, dim=1)[:, 1].cpu().numpy()
        auc = roc_auc_score(true_labels, probs)
        auc_results.append({'Dataset': ds_name.upper(), 'Agent': agent, 'AUC-ROC': round(auc, 4)})
        print(f"   {agent:10}: {auc:.4f}")

# Pivoted AUC table 
df_auc = pd.DataFrame(auc_results)
pivot_auc = df_auc.pivot(index='Agent', columns='Dataset', values='AUC-ROC')
pivot_auc = pivot_auc[['KDDCUP99', 'CIC_DDOS', 'EDGE_IOT']] 

print("\nAUC-ROC Table (Models as rows, Datasets as columns):")
print(pivot_auc.to_string())


In [None]:
# CELL 10: ONNX EXPORT, BEST PPO MODEL PER DATASET EXPORTED

os.makedirs("models", exist_ok=True)

class PPOForONNX(nn.Module):
    def __init__(self, original_model):
        super().__init__()
        self.model = original_model
    def forward(self, x):
        logits, _ = self.model(x)
        return logits

print("\n" + "="*60)
print("EXPORTING BEST PPO MODELS TO ONNX - ONE PER DATASET")
print("="*60)

for ds_name in datasets.keys():
    if 'PPO' not in trained_models[ds_name]:
        print(f"   PPO not found for {ds_name.upper()} – skipping")
        continue
    
    ppo_model = trained_models[ds_name]['PPO']
    ppo_model.eval()
    
    state_size = datasets[ds_name]['state_size']
    wrapped = PPOForONNX(ppo_model)
    
    dummy_input = torch.randn(1, state_size)
    
    onnx_filename = f"models/PPO_DDoS_{ds_name.upper()}.onnx"
    torch.onnx.export(
        wrapped,
        dummy_input,
        onnx_filename,
        export_params=True,
        opset_version=18,
        do_constant_folding=True,
        input_names=['input_features'],
        output_names=['action_logits'],
        dynamic_axes={'input_features': {0: 'batch'}, 'action_logits': {0: 'batch'}}
    )
    
    print(f"   Exported: {onnx_filename} | Features: {state_size}")

print("\nONNX export complete
print("="*60)