# RL-Enhanced IDS — Results Visualisation
**Project**: RL-Enhanced Intrusion Detection System for Autonomous Network Defence  
**Author**: Abishik Macherla Vijayakrishna | 40594078

This notebook generates all dissertation figures from experiment results.


In [None]:
import json
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.patches import Patch

%matplotlib inline
sns.set_theme(style="whitegrid", font_scale=1.15, rc={
    "figure.dpi": 150,
    "font.family": "serif",
    "axes.titleweight": "bold",
})

PROJECT_DIR = "/home/abishik/HONOURS_PROJECT"
RESULTS_DIR = os.path.join(PROJECT_DIR, "results")
FIGURES_DIR = os.path.join(RESULTS_DIR, "figures")
os.makedirs(FIGURES_DIR, exist_ok=True)


## 1. Load Data

In [None]:
# Load scenario results
with open(os.path.join(RESULTS_DIR, "all_scenarios_results.json")) as f:
    scenarios = json.load(f)
print(f"Loaded scenarios: {list(scenarios.keys())}")

# Load DQN experiment results
with open(os.path.join(RESULTS_DIR, "dqn_experiments.json")) as f:
    dqn_experiments = json.load(f)
print(f"Loaded {len(dqn_experiments)} DQN experiments")

# Colour palette
MODEL_COLORS = {
    "Random Forest": "#27ae60", "XGBoost": "#2980b9",
    "DQN": "#e67e22", "DQN (Standard)": "#e67e22",
    "DQN (No DDoS)": "#f39c12", "DQN (No Web)": "#f39c12",
}

SCENARIO_TITLES = {
    "scenario_1": "Scenario 1: Standard Classification (CIC-IDS2017)",
    "scenario_2": "Scenario 2: Zero-Day DDoS Detection",
    "scenario_3": "Scenario 3: Zero-Day Web Attack Detection",
    "scenario_4": "Scenario 4: Cross-Dataset Generalisation (2017→2023)",
}


## 2. Scenario Comparison Charts

In [None]:
metrics = ["Accuracy", "Precision", "Recall", "F1"]
metric_colors = ["#1abc9c", "#3498db", "#e74c3c", "#9b59b6"]

for s_key, s_data in scenarios.items():
    if s_key not in SCENARIO_TITLES:
        continue
    models = list(s_data.keys())
    x = np.arange(len(models))
    width = 0.2

    fig, ax = plt.subplots(figsize=(max(8, len(models) * 2.2), 5))
    for i, metric in enumerate(metrics):
        vals = [s_data[m].get(metric, 0) for m in models]
        bars = ax.bar(x + i * width, vals, width, label=metric,
                      color=metric_colors[i], edgecolor="white")
        for bar, val in zip(bars, vals):
            ax.text(bar.get_x() + bar.get_width() / 2, bar.get_height() + 0.8,
                    f"{val:.1f}", ha="center", va="bottom", fontsize=8, fontweight="bold")

    ax.set_xticks(x + 1.5 * width)
    ax.set_xticklabels(models, fontsize=10)
    ax.set_ylabel("Score (%)")
    ax.set_ylim(0, 108)
    ax.set_title(SCENARIO_TITLES[s_key], fontsize=14, pad=15)
    ax.legend(loc="upper right", fontsize=9)
    ax.spines["top"].set_visible(False)
    ax.spines["right"].set_visible(False)
    fig.tight_layout()
    fig.savefig(os.path.join(FIGURES_DIR, f"fig_{s_key}_comparison.png"), bbox_inches="tight")
    plt.show()


## 3. Cross-Scenario F1 Summary

In [None]:
names, best_f1, best_models, dqn_f1 = [], [], [], []
for s_key in ["scenario_1", "scenario_2", "scenario_3", "scenario_4"]:
    if s_key not in scenarios: continue
    data = scenarios[s_key]
    names.append(s_key.replace("scenario_", "S"))
    best_m = max(data, key=lambda m: data[m].get("F1", 0))
    best_f1.append(data[best_m]["F1"])
    best_models.append(best_m)
    dqn_keys = [k for k in data if "DQN" in k]
    best_dqn = max(dqn_keys, key=lambda k: data[k]["F1"]) if dqn_keys else None
    dqn_f1.append(data[best_dqn]["F1"] if best_dqn else 0)

x = np.arange(len(names))
fig, ax = plt.subplots(figsize=(10, 5))
ax.bar(x - 0.175, best_f1, 0.35, label="Best Model", color="#27ae60")
ax.bar(x + 0.175, dqn_f1, 0.35, label="Best DQN", color="#e67e22")
for i in range(len(names)):
    ax.text(x[i] - 0.175, best_f1[i] + 1, f"{best_f1[i]:.1f}%
({best_models[i]})",
            ha="center", fontsize=8, fontweight="bold")
    ax.text(x[i] + 0.175, dqn_f1[i] + 1, f"{dqn_f1[i]:.1f}%",
            ha="center", fontsize=8, fontweight="bold")
ax.set_xticks(x)
ax.set_xticklabels(names)
ax.set_ylabel("F1 Score (%)")
ax.set_title("Cross-Scenario F1 Performance Summary", fontsize=14, pad=15)
ax.legend()
ax.spines["top"].set_visible(False)
ax.spines["right"].set_visible(False)
fig.tight_layout()
fig.savefig(os.path.join(FIGURES_DIR, "fig_cross_scenario_summary.png"), bbox_inches="tight")
plt.show()


## 4. DQN Hyperparameter Experiments

In [None]:
names, f1s, fps, fns, precs, recs = [], [], [], [], [], []
for exp_id, exp in dqn_experiments.items():
    r = exp["results"]
    names.append(exp.get("name", exp_id).replace("Exp ", "E"))
    f1s.append(r["F1"]); fps.append(r["FP"]); fns.append(r["FN"])
    precs.append(r["Precision"]); recs.append(r["Recall"])

# F1 progression
colors = ["#e74c3c"] * 5 + ["#2980b9"] * 3
fig, ax = plt.subplots(figsize=(10, 5))
bars = ax.bar(names, f1s, color=colors[:len(names)], edgecolor="white")
for bar, val in zip(bars, f1s):
    ax.text(bar.get_x() + bar.get_width()/2, bar.get_height()+0.5,
            f"{val:.1f}%", ha="center", fontsize=9, fontweight="bold")
ax.axvline(x=4.5, color="gray", linestyle="--", alpha=0.5)
ax.text(2, max(f1s)+5, "Phase 1: Reward Tuning", ha="center", fontsize=11,
        color="#e74c3c", fontweight="bold")
ax.text(6, max(f1s)+5, "Phase 2: Architecture", ha="center", fontsize=11,
        color="#2980b9", fontweight="bold")
ax.set_ylabel("F1 Score (%)")
ax.set_ylim(0, max(f1s)+10)
ax.set_title("DQN Experiments — F1 Score Comparison", fontsize=14, pad=15)
ax.spines["top"].set_visible(False)
ax.spines["right"].set_visible(False)
fig.tight_layout()
fig.savefig(os.path.join(FIGURES_DIR, "fig_dqn_f1_progression.png"), bbox_inches="tight")
plt.show()


## 5. Precision vs Recall Trade-off

In [None]:
fig, ax = plt.subplots(figsize=(8, 6))
sc = ax.scatter(recs, precs, c=f1s, cmap="RdYlGn", s=150, edgecolors="black", linewidth=0.8)
short_names = [n.split(": ")[-1] if ": " in n else n for n in names]
for i, name in enumerate(short_names):
    ax.annotate(name, (recs[i], precs[i]), textcoords="offset points",
                xytext=(0, 12), ha="center", fontsize=8, fontweight="bold")
fig.colorbar(sc, ax=ax, label="F1 Score (%)")
ax.set_xlabel("Recall (%)")
ax.set_ylabel("Precision (%)")
ax.set_title("DQN — Precision vs Recall Trade-off", fontsize=14, pad=15)
fig.tight_layout()
fig.savefig(os.path.join(FIGURES_DIR, "fig_dqn_precision_recall.png"), bbox_inches="tight")
plt.show()


## 6. Error Analysis (FP vs FN)

In [None]:
x = np.arange(len(names))
fig, ax = plt.subplots(figsize=(10, 5))
ax.bar(x - 0.175, fps, 0.35, label="False Positives", color="#e74c3c")
ax.bar(x + 0.175, fns, 0.35, label="False Negatives", color="#3498db")
for i in range(len(names)):
    ax.text(x[i]-0.175, fps[i]+max(fps)*0.02, f"{fps[i]:,}", ha="center", fontsize=7)
    ax.text(x[i]+0.175, fns[i]+max(fns)*0.02, f"{fns[i]:,}", ha="center", fontsize=7)
ax.set_xticks(x)
ax.set_xticklabels(names, fontsize=9)
ax.set_ylabel("Count")
ax.set_title("DQN — False Positives vs False Negatives", fontsize=14, pad=15)
ax.legend()
ax.spines["top"].set_visible(False)
ax.spines["right"].set_visible(False)
fig.tight_layout()
fig.savefig(os.path.join(FIGURES_DIR, "fig_dqn_fp_fn_analysis.png"), bbox_inches="tight")
plt.show()


## 7. Reward Structure Impact

In [None]:
# Phase 1 only (first 5 experiments)
p1_names, p1_recs, p1_precs, p1_f1s, p1_rewards = [], [], [], [], []
for i, (exp_id, exp) in enumerate(dqn_experiments.items()):
    if i >= 5: break
    r, rw = exp["results"], exp.get("rewards", {})
    p1_names.append(exp.get("name", exp_id).split(": ")[-1] if ": " in exp.get("name","") else exp_id)
    p1_recs.append(r["Recall"]); p1_precs.append(r["Precision"]); p1_f1s.append(r["F1"])
    p1_rewards.append(f"+{rw.get("tp",0)}/+{rw.get("tn",0)}/{rw.get("fn",0)}/{rw.get("fp",0)}")

x = np.arange(len(p1_names))
width = 0.25
fig, ax = plt.subplots(figsize=(10, 5.5))
ax.bar(x - width, p1_recs, width, label="Recall", color="#e74c3c")
ax.bar(x, p1_precs, width, label="Precision", color="#3498db")
ax.bar(x + width, p1_f1s, width, label="F1", color="#2ecc71")
ax.set_xticks(x)
ax.set_xticklabels([f"{n}
{rl}" for n, rl in zip(p1_names, p1_rewards)], fontsize=9)
ax.set_ylabel("Score (%)")
ax.set_ylim(0, 105)
ax.set_title("Impact of Reward Structure on DQN Detection Behaviour", fontsize=14, pad=15)
ax.legend(fontsize=10, loc="lower right")
ax.spines["top"].set_visible(False)
ax.spines["right"].set_visible(False)
fig.tight_layout()
fig.savefig(os.path.join(FIGURES_DIR, "fig_reward_impact.png"), bbox_inches="tight")
plt.show()


## 8. Confusion Matrices

In [None]:
for s_key, s_data in scenarios.items():
    if s_key not in SCENARIO_TITLES: continue
    models = list(s_data.keys())
    cols = min(len(models), 3)
    rows = (len(models) + cols - 1) // cols

    fig, axes = plt.subplots(rows, cols, figsize=(5*cols, 4.5*rows))
    if len(models) == 1: axes = np.array([axes])
    axes = np.array(axes).flatten()

    for i, model in enumerate(models):
        ax = axes[i]
        cm = np.array(s_data[model].get("ConfusionMatrix", [[0,0],[0,0]]))
        total = max(cm.sum(), 1)
        annot = np.array([
            [f"TN
{cm[0][0]:,}
({cm[0][0]/total*100:.1f}%)",
             f"FP
{cm[0][1]:,}
({cm[0][1]/total*100:.1f}%)"],
            [f"FN
{cm[1][0]:,}
({cm[1][0]/total*100:.1f}%)",
             f"TP
{cm[1][1]:,}
({cm[1][1]/total*100:.1f}%)"]
        ])
        color_vals = np.array([[0.75, 0.25], [0.25, 0.75]])
        sns.heatmap(color_vals, annot=annot, fmt="", ax=ax, cmap="RdYlGn",
                    vmin=0, vmax=1, cbar=False,
                    xticklabels=["Pred: Allow", "Pred: Block"],
                    yticklabels=["True: Benign", "True: Attack"],
                    annot_kws={"fontsize": 10, "fontweight": "bold"})
        ax.set_title(model, fontsize=13, fontweight="bold", pad=10)

    for j in range(len(models), len(axes)): axes[j].set_visible(False)
    fig.suptitle(SCENARIO_TITLES[s_key], fontsize=14, fontweight="bold", y=1.02)
    fig.tight_layout()
    fig.savefig(os.path.join(FIGURES_DIR, f"fig_cm_{s_key}.png"), bbox_inches="tight")
    plt.show()
