
# HISOL VANET Simulation â€” Analysis Notebook

This notebook automates analysis of the HISOL VANET simulation logs and produces:
- Packet-level statistics (sent / received counts)
- Packet Delivery Ratio (PDR) per node and overall
- RSSI statistics and distribution
- Neighbor-count time series and summary
- Attack impact analysis (pre/post windows) using sybil/replay/jammer logs

**Expected log files** (placed in the same folder as this notebook):
- `bsm_log.csv` (sent BSMs)  
- `rssi_log.csv` (received BSMs / RSSI)  
- `neighbor_log.csv` (neighbor counts over time)  
- `sybil_log.csv`, `replay_log.csv`, `jammer_log.csv` (attack event logs, optional)

Run the cells below (or run the notebook) after placing your simulation logs in `/mnt/data`.


In [None]:

import os, pandas as pd, numpy as np
DATA_DIR = "/mnt/data"
expected = ["bsm_log.csv","rssi_log.csv","neighbor_log.csv","sybil_log.csv","replay_log.csv","jammer_log.csv"]
found = [f for f in expected if os.path.exists(os.path.join(DATA_DIR,f))]
missing = [f for f in expected if f not in found]
print("Found files:", found)
print("Missing files (if any):", missing)


In [None]:

import os, pandas as pd
DATA_DIR = "/mnt/data"

def try_load(fname, **kwargs):
    path = os.path.join(DATA_DIR, fname)
    if os.path.exists(path):
        try:
            return pd.read_csv(path, **kwargs)
        except Exception as e:
            print(f"Error reading {fname}:", e)
            return None
    else:
        return None

bsm = try_load("bsm_log.csv", names=None)  # will infer header if present
rssi = try_load("rssi_log.csv")
neighbor = try_load("neighbor_log.csv")
sybil = try_load("sybil_log.csv")
replay = try_load("replay_log.csv")
jammer = try_load("jammer_log.csv")

print("Loaded: bsm:", bsm is not None, " rssi:", rssi is not None, " neighbor:", neighbor is not None)


In [None]:

import re
from collections import defaultdict
import pandas as pd, numpy as np, os

DATA_DIR = "/mnt/data"

# helper to parse bsm dataframe
def parse_bsm(df):
    if df is None:
        return None
    # If header exists with expected columns, use it; otherwise ensure columns
    cols = list(df.columns)
    # common header in our simulator: node,x,y,vx,vy,time
    if set(['node','x','y','vx','vy','time']).issubset(set(cols)):
        df = df[['node','x','y','vx','vy','time']].copy()
    else:
        # try rename if first row is header
        if df.shape[1] >= 6:
            df.columns = ['node','x','y','vx','vy','time'] + list(df.columns[6:])
            df = df[['node','x','y','vx','vy','time']]
        else:
            raise ValueError("bsm_log.csv: unexpected format")
    df['node'] = df['node'].astype(int)
    df['time'] = df['time'].astype(float)
    return df

def parse_rssi(df):
    if df is None:
        return None
    # rssi rows expected: node, message_string, rssi
    cols = list(df.columns)
    if len(cols) >= 3:
        # try to standardize to ['node','msg','rssi']
        df = df.iloc[:,0:3].copy()
        df.columns = ['node','msg','rssi']
    else:
        raise ValueError("rssi_log.csv: unexpected format")
    df['node'] = df['node'].astype(int)
    # extract sender id from msg string if it contains BSM,<id>,...
    def extract_sender(msg):
        try:
            if isinstance(msg, str) and msg.startswith("BSM,"):
                parts = msg.split(",")
                return int(parts[1])
        except:
            return np.nan
        return np.nan
    df['sender'] = df['msg'].apply(extract_sender)
    df['rssi'] = pd.to_numeric(df['rssi'], errors='coerce')
    return df

def parse_neighbor(df):
    if df is None:
        return None
    # expected: time,node,count
    cols = list(df.columns)
    if set(['time','node','count']).issubset(set(cols)):
        df = df[['time','node','count']].copy()
    else:
        # if first col is time, etc.
        if df.shape[1] >= 3:
            df = df.iloc[:,0:3].copy()
            df.columns = ['time','node','count']
    df['time'] = pd.to_numeric(df['time'], errors='coerce')
    df['node'] = df['node'].astype(int)
    df['count'] = pd.to_numeric(df['count'], errors='coerce')
    return df

bsm_df = parse_bsm(bsm) if 'bsm' in globals() else None
rssi_df = parse_rssi(rssi) if 'rssi' in globals() else None
neighbor_df = parse_neighbor(neighbor) if 'neighbor' in globals() else None

# basic reports
reports = {}
if bsm_df is not None:
    sent_counts = bsm_df['node'].value_counts().sort_index()
    reports['sent_total'] = int(len(bsm_df))
    reports['sent_per_node_mean'] = float(sent_counts.mean())
else:
    sent_counts = None

if rssi_df is not None:
    recv_counts = rssi_df.groupby('sender').size()
    reports['recv_total'] = int(len(rssi_df))
else:
    recv_counts = None

# PDR per node (only nodes with senders present)
if sent_counts is not None and recv_counts is not None:
    pdr = (recv_counts / sent_counts).fillna(0)
    overall_pdr = recv_counts.sum() / sent_counts.sum()
    reports['overall_pdr'] = float(overall_pdr)
else:
    pdr = None

# quick DataFrames for display
sent_summary = sent_counts.reset_index().rename(columns={'index':'node', 'node':'sent_count'}) if sent_counts is not None else None
recv_summary = recv_counts.reset_index().rename(columns={0:'recv_count', 'sender':'node'}) if recv_counts is not None else None

reports


In [None]:

from ace_tools import display_dataframe_to_user
display_items = {}
if 'sent_summary' in globals() and sent_summary is not None:
    display_items['sent_summary'] = sent_summary
if 'recv_summary' in globals() and recv_summary is not None:
    display_items['recv_summary'] = recv_summary
if 'pdr' in globals() and pdr is not None:
    pdr_df = pdr.reset_index().rename(columns={0:'pdr','sender':'node'})
    pdr_df.columns = ['node','pdr']
    display_items['pdr'] = pdr_df

for name, df in display_items.items():
    display_dataframe_to_user(name, df)


In [None]:

import numpy as np
ATTACK_WINDOW = 5.0  # seconds before/after to compare by default

def get_attack_times(df):
    if df is None:
        return []
    # try to extract times from first column if numeric timestamp present
    cols = list(df.columns)
    for col in cols:
        if np.issubdtype(df[col].dtype, np.number):
            return df[col].dropna().unique().tolist()
    # fallback: try to parse times from text lines
    try:
        times = df.iloc[:,0].str.extract(r'(\d+\.?\d*)')[0].astype(float).dropna().unique().tolist()
        return times
    except:
        return []

sybil_times = get_attack_times(sybil) if 'sybil' in globals() else []
replay_times = get_attack_times(replay) if 'replay' in globals() else []
jammer_times = get_attack_times(jammer) if 'jammer' in globals() else []

attack_summary = dict(sybil=sybil_times, replay=replay_times, jammer=jammer_times)
attack_summary


In [None]:

import matplotlib.pyplot as plt
import os

plots = []

# PDR histogram
if 'pdr' in globals() and pdr is not None:
    plt.figure()
    plt.hist(pdr.dropna().values, bins=20)
    plt.title("PDR distribution across nodes")
    plt.xlabel("PDR")
    plt.ylabel("Number of nodes")
    pdr_plot = os.path.join("/mnt/data", "pdr_hist.png")
    plt.savefig(pdr_plot)
    plt.close()
    plots.append(pdr_plot)

# RSSI summary (mean per sender)
if 'rssi_df' in globals() and rssi_df is not None:
    rssi_stats = rssi_df.groupby('sender')['rssi'].agg(['mean','std','count']).reset_index().rename(columns={'mean':'rssi_mean','std':'rssi_std','count':'recv_count'})
    display_dataframe_to_user("rssi_stats", rssi_stats)
    plt.figure()
    plt.plot(rssi_stats['sender'], rssi_stats['rssi_mean'])
    plt.title("Mean RSSI per sender")
    plt.xlabel("sender node id")
    plt.ylabel("mean RSSI")
    rssi_plot = os.path.join("/mnt/data", "rssi_mean.png")
    plt.savefig(rssi_plot)
    plt.close()
    plots.append(rssi_plot)

# Neighbor count timeseries (average neighbors over time)
if 'neighbor_df' in globals() and neighbor_df is not None:
    nb_avg = neighbor_df.groupby('time')['count'].mean().reset_index()
    plt.figure()
    plt.plot(nb_avg['time'], nb_avg['count'])
    plt.title("Average neighbor count vs time (network)")
    plt.xlabel("time (s)")
    plt.ylabel("avg neighbor count")
    nb_plot = os.path.join("/mnt/data", "neighbor_timeseries.png")
    plt.savefig(nb_plot)
    plt.close()
    plots.append(nb_plot)

print("Saved plots:", plots)
plots



## Next steps / Usage notes

- If logs are not found in `/mnt/data`, copy them there and re-run the notebook.
- You can customize the attack pre/post window and the PDR calculations to suit your evaluation plan.
- The notebook produces PNG plots in `/mnt/data` and uses `ace_tools.display_dataframe_to_user` to render tables in the UI.

This notebook is aligned with the evaluation metrics and dataset guidance in the literature (see references included in your project).  
