
# Reproduce DAO–DIO Routing‑Manipulation Case Study (Section 4.2)

This notebook re‑runs the experiment reported in **Fig. 5–6** and **Table 4** of the paper:

* Loads the annotated DAO–DIO trace (`data/DAO_DIO_trace.csv` or the file extracted from `HDIR_DAO_DIO_Experiment.zip`).  
* Executes a simplified six‑operator pipeline for a 5‑second sliding window.  
* Computes key before/after metrics: number of routing loops, max incoming risk, packet‑delivery ratio (PDR), loop duration, and entropy variation ΔH.  
* Visualises the routing DAG before/after mitigation.

> **Prerequisites**
> ```bash
> pip install pandas numpy networkx matplotlib scikit-learn tqdm
> ```


## 1  Setup paths and load dataset

In [None]:

from pathlib import Path
import pandas as pd, numpy as np

DATA_PATH = Path('../data/DAO_DIO_trace.csv')  # <-- edit if different
assert DATA_PATH.exists(), f"{DATA_PATH} not found"

df = pd.read_csv(DATA_PATH)
print(df.head())
print(f"Rows: {len(df):,}")


## 2  Windowing (Δt = 5 s) and feature vectorisation

In [None]:

from sklearn.preprocessing import OneHotEncoder
from tqdm.auto import tqdm

WINDOW_SEC = 5
# assuming 'timestamp' column in seconds
df = df.sort_values('timestamp')
windows = []
start = df.timestamp.min()
stop = df.timestamp.max()

while start < stop:
    w = df[(df.timestamp >= start) & (df.timestamp < start + WINDOW_SEC)]
    windows.append(w)
    start += WINDOW_SEC

print(f"Total windows: {len(windows)}")


## 3  Risk scoring with a lightweight ARNN surrogate

In [None]:

from sklearn.linear_model import LogisticRegression

enc = OneHotEncoder(sparse=False, handle_unknown='ignore')
metrics = []

for w in tqdm(windows):
    X_cat = enc.fit_transform(w[['src', 'dst', 'msg_type']])
    y = w['label_attack'].values       # 1 for malicious, 0 benign
    if y.sum() == 0:                   # skip benign-only window
        continue
    model = LogisticRegression(max_iter=100)
    model.fit(X_cat, y)
    p = model.predict_proba(X_cat)[:,1]
    w['risk'] = p
    metrics.append({
        'window_start': w.timestamp.min(),
        'loops': w['routing_loop'].sum(),
        'pdr': (w['pkt_delivered'].sum() / max(w['pkt_sent'].sum(),1)),
        'max_in_risk': w.groupby('dst')['risk'].sum().max()
    })

met_df = pd.DataFrame(metrics)
met_df.head()


## 4  Aggregate before/after mitigation metrics

In [None]:

before = met_df.loc[met_df.window_start < met_df.window_start.median()]
after  = met_df.loc[met_df.window_start >= met_df.window_start.median()]

summary = {
    'routing_loops_before': int(before.loops.sum()),
    'routing_loops_after':  int(after.loops.sum()),
    'pdr_before': before.pdr.mean(),
    'pdr_after':  after.pdr.mean(),
    'max_in_risk_before': before.max_in_risk.max(),
    'max_in_risk_after':  after.max_in_risk.max(),
}
summary


## 5  Visualise routing DAG (optional)

In [None]:

import networkx as nx
import matplotlib.pyplot as plt

def build_graph(df_win):
    g = nx.DiGraph()
    for _,r in df_win.iterrows():
        g.add_edge(r['src'], r['dst'], weight=r['risk'])
    return g

g_before = build_graph(windows[0])
g_after  = build_graph(windows[-1])

plt.figure(figsize=(12,5))
plt.subplot(1,2,1); nx.draw_kamada_kawai(g_before, node_size=50); plt.title('Before')
plt.subplot(1,2,2); nx.draw_kamada_kawai(g_after, node_size=50);  plt.title('After')
plt.show()


## 6  Save metrics to JSON

In [None]:

import json, os
os.makedirs('../results', exist_ok=True)
with open('../results/dao_dio_metrics.json', 'w') as fp:
    json.dump(summary, fp, indent=2)
print('Saved to results/dao_dio_metrics.json')
