In [33]:
from os.path import join, abspath, dirname, isfile
from os import listdir

import sys
import uuid
import json
import re
import plotly.graph_objects as go
import plotly.express as px
from scipy.interpolate import interp1d  
sys.path.insert(0, abspath(join(dirname("."), '..')))

import pickle as pkl
import pandas as pd
from numpy import genfromtxt

from random import seed
from pathlib import Path
from collections import defaultdict

from modules.helpers.utils import get_simple_seed_for_graph
from modules.Simulator import *


In [34]:
DS_PATH = "/media/xander/HDD/Repos/data/graphs/"
SEED_PATH = "/media/xander/HDD/Repos/data/graphs/"
SOLVED_PATH = "/media/xander/HDD/Repos/MeshShield/results"
SIMULATION_PATH = "/media/xander/HDD/Repos/MeshShield/simulations"

simmulations_to_run = 10
threads_to_use = 22
CURRENT_SEED = 2026

colors = px.colors.qualitative.Set1

seed(CURRENT_SEED)

In [None]:
RUN_MODE_FULL = 'FULL_RUN'
RUN_MODE_JUST_PLOTS = 'JUST_PLOTS'
RUN_MODE_JUST_RELOAD = 'RELOAD_MODE'
RUN_MODES = [RUN_MODE_FULL, RUN_MODE_JUST_PLOTS, RUN_MODE_JUST_RELOAD]

CURRENT_RUN_MODE = RUN_MODE_FULL

In [36]:
def smooth_line(x_data, y_data, num_points=50):
    unique_x, unique_indices = np.unique(x_data, return_index=True)
    unique_y = np.array(y_data)[unique_indices]
    if len(unique_x) < 2:
        return x_data, y_data
    x_smooth = np.linspace(unique_x.min(), unique_x.max(), num_points)
    f = interp1d(unique_x, unique_y, kind='linear', bounds_error=False, fill_value="extrapolate")
    return x_smooth, f(x_smooth)

In [37]:
def budget_vs_time_plot(methods, df_for_graph, graph_name):
    fig1 = go.Figure()
    i = 0
    for algo in methods:
        d = df_for_graph[df_for_graph["algorithm"] == algo].sort_values("budget").copy()
        
        # Just smooth line + big markers for actual data
        x_s, y_s = smooth_line(d["budget"], d["runtime"])
        fig1.add_trace(go.Scatter(
            x=x_s, y=y_s,
            mode='lines+markers',
            name=algo,
            line=dict(color=colors[i % len(colors)], width=4),
            marker=dict(color=colors[i % len(colors)], size=12, line=dict(width=2)),
            line_shape='spline',
            hovertemplate='<b>%{fullData.name}</b><br>Budget: ~%{x:.0f}<br>Runtime: %{y:.1f}s<extra></extra>'
        ))
        
        # Add actual data points
        fig1.add_trace(go.Scatter(
            x=d["budget"], y=d["runtime"],
            mode='markers',
            marker=dict(symbol='x', size=10, color=colors[i % len(colors)], line=dict(width=3, color='white')),
            name='',  # Hide duplicate legend
            showlegend=False,
            hovertemplate=''
        ))
        i +=1
        
    fig1.update_layout(
        title=f"{graph_name.capitalize()} - Budget VS Time",
        xaxis_title="Budget", 
        yaxis_title="Runtime (seconds)",
        height=800,
        hovermode='x unified',
        legend=dict(yanchor="top", y=0.99, xanchor="left", x=0.01)
    )
    fig1.write_image(join(SIMULATION_PATH, "images", f"budget_x_runtime_{graph_name}.png".replace(" ", "")), 
                    width=1200, height=800, scale=2,  # 2400x1600 effective
                    format="png")

In [38]:
def budget_vs_saved_plot(methods, df_for_graph, graph_name):
    # === 2. BUDGET → SAVED ===
    fig2 = go.Figure()
    i = 0
    for algo in methods:
        d = df_for_graph[df_for_graph["algorithm"] == algo].sort_values("budget").copy()
        
        # Bars
        fig2.add_trace(go.Bar(
            x=d["budget"], y=d["saved"],
            name=algo,
            marker_color=colors[i % len(colors)],
            offsetgroup=i
        ))
        
        # Smooth trend
        x_s, y_s = smooth_line(d["budget"], d["saved"])
        fig2.add_trace(go.Scatter(
            x=x_s, y=y_s,
            mode='lines',
            name=f'{algo} trend',
            line=dict(color=colors[i % len(colors)], width=3, dash="dash"),
            showlegend=False
        ))
        i +=1

    fig2.update_layout(
        title=f"{graph_name.capitalize()} - Budget VS Saved Nodes",
        xaxis_title="Budget", yaxis_title="Saved Nodes",
        barmode='group',
        height=800
    )    
    fig2.write_image(join(SIMULATION_PATH, "images", f"budget_x_saved_{graph_name}.png".replace(" ", "")), 
                    width=1200, height=800, scale=2,  # 2400x1600 effective
                    format="png")

In [39]:
def budget_vs_ratio_plot(methods, df_for_graph, graph_name):
    if "ratio_ppm" not in df_for_graph.columns:
        return
    # === 3. BUDGET → RATIO ===
    fig3 = go.Figure()
    i = 0
    for algo in methods:
        d = df_for_graph[df_for_graph["algorithm"] == algo].sort_values("budget").copy()
        x_s, y_s = smooth_line(d["budget"], d["ratio_ppm"])
        fig3.add_trace(go.Scatter(
            x=x_s, y=y_s,
            mode='lines+markers',
            name=algo,
            line=dict(color=colors[i % len(colors)], width=5),
            line_shape='spline',
            marker=dict(size=8)
        ))
        i +=1

    fig3.update_layout(
        title=f"{graph_name.capitalize()} - Budget VS Ratio Nodes",
        xaxis_title="Budget", yaxis_title="Ratio (ppm)",
        height=800
    )
    fig3.write_image(join(SIMULATION_PATH, "images", f"budget_x_ratio_{graph_name}.png".replace(" ", "")), 
                    width=1200, height=800, scale=2,  # 2400x1600 effective
                    format="png")

In [40]:
def saved_nodes_distribution(df_for_graph, graph_name):
    # Box plot comparison similar to the earlier one
    fig_box = px.box(
        df_for_graph,
        x="algorithm",
        y="saved",
        color="algorithm",
        points="all",
        title=f"{graph_name} - Saved nodes distribution per algorithm (all budgets)",
    )
    fig_box.update_traces(quartilemethod="inclusive")  # standard box stats[web:57]
    fig_box.update_layout(
        xaxis_title="Algorithm",
        yaxis_title="Saved nodes",
        showlegend=False,
        height=800,
    )

    fig_box.write_image(join(SIMULATION_PATH, "images", f"nodes_distribution_{graph_name}.png".replace(" ", "")), 
                        width=1200, height=800, scale=2,  # 2400x1600 effective
                        format="png")


In [41]:
def safe_qcut_bins(series, q=3, labels=['low', 'medium', 'high']):
    try:
        return pd.qcut(series, q=q, labels=labels, duplicates='drop')
    except ValueError:
        print("qcut failed (duplicate edges), using equal-width cut")
        return pd.cut(series, bins=q, labels=labels, duplicates='drop')


In [42]:
def runtime_vs_infected_budget(methods, df_for_graph, graph_name):
    if "infected" not in df_for_graph.columns:
        return
    df_plot = df_for_graph.copy()
    if 'infection_rate' not in df_plot.columns:
        df_plot['infection_rate'] = safe_qcut_bins(df_plot['infected'])
    unique_rates = sorted(df_plot['infection_rate'].unique())
    
    fig1 = go.Figure()
    i = 0
    for rate in unique_rates:
        for algo in methods:
            d = df_plot[(df_plot["algorithm"] == algo) & 
                        (df_plot["infection_rate"] == rate)].sort_values("budget").copy()
            if len(d) < 2: continue  # Skip if insufficient points for smooth_line
            
            x_s, y_s = smooth_line(d["budget"], d["runtime"])
            fig1.add_trace(go.Scatter(
                x=x_s, y=y_s,
                mode='lines+markers',
                name=f"{algo} ({rate})",
                line=dict(color=colors[i % len(colors)], width=4),
                marker=dict(color=colors[i % len(colors)], size=12, line=dict(width=2)),
                line_shape='spline',
                hovertemplate='<b>%{fullData.name}</b><br>Budget: ~%{x:.0f}<br>Runtime: %{y:.1f}s<br>Infected: ~%{customdata:.0f}<extra></extra>',
                customdata=d["infected"].values
            ))
            
            fig1.add_trace(go.Scatter(
                x=d["budget"], y=d["runtime"],
                mode='markers',
                marker=dict(symbol='x', size=10, color=colors[i % len(colors)], line=dict(width=3, color='white')),
                name='', showlegend=False,
                hovertemplate=''
            ))
            i +=1
            
    fig1.update_layout(
        title=f"{graph_name.capitalize()} - Runtime vs Infected x Budget (Binned Rates)",
        xaxis_title="Budget", 
        yaxis_title="Runtime (seconds)",
        height=800,
        hovermode='x unified',
        legend=dict(yanchor="top", y=0.99, xanchor="left", x=0.01)
    )
    fig1.write_image(join(SIMULATION_PATH, "images", f"runtime_x_infected_budget_{graph_name}.png".replace(" ", "")), 
                    width=1200, height=800, scale=2, format="png")

In [43]:
def saved_vs_infected_budget(methods, df_for_graph, graph_name):
    if "infected" not in df_for_graph.columns:
        return
    
    df_plot = df_for_graph.copy()
    if 'infection_rate' not in df_plot.columns:
        df_plot['infection_rate'] = safe_qcut_bins(df_plot['infected'])
    unique_rates = sorted(df_plot['infection_rate'].unique())
    
    fig2 = go.Figure()
    i = 0
    for rate in unique_rates:
        for algo in methods:
            d = df_plot[(df_plot["algorithm"] == algo) & 
                        (df_plot["infection_rate"] == rate)].sort_values("budget").copy()
            if len(d) < 2: continue
            
            x_s, y_s = smooth_line(d["budget"], d["saved"])
            fig2.add_trace(go.Scatter(
                x=x_s, y=y_s,
                mode='lines+markers',
                name=f"{algo} ({rate})",
                line=dict(color=colors[i % len(colors)], width=4),
                marker=dict(color=colors[i % len(colors)], size=12, line=dict(width=2)),
                line_shape='spline',
                hovertemplate='<b>%{fullData.name}</b><br>Budget: ~%{x:.0f}<br>Saved: %{y:.0f}<br>Infected: ~%{customdata:.0f}<extra></extra>',
                customdata=d["infected"].values
            ))
            
            fig2.add_trace(go.Scatter(
                x=d["budget"], y=d["saved"],
                mode='markers',
                marker=dict(symbol='x', size=10, color=colors[i % len(colors)], line=dict(width=3, color='white')),
                name='', showlegend=False,
                hovertemplate=''
            ))
            i +=1
            
    fig2.update_layout(
        title=f"{graph_name.capitalize()} - Saved vs Infected x Budget (Binned Rates)",
        xaxis_title="Budget", 
        yaxis_title="Saved Nodes",
        height=800,
        hovermode='x unified',
        legend=dict(yanchor="top", y=0.99, xanchor="left", x=0.01)
    )
    fig2.write_image(join(SIMULATION_PATH, "images", f"saved_x_infected_budget_{graph_name}.png".replace(" ", "")), 
                    width=1200, height=800, scale=2, format="png")

In [44]:
if CURRENT_RUN_MODE in [RUN_MODE_FULL, RUN_MODE_JUST_RELOAD]:
    all_graphs = [f for f in listdir(DS_PATH) if (isfile(join(DS_PATH, f)) and f.endswith(".pkl"))]

    graph_data = {}

    for graph in all_graphs:
        # Read Graph Data
        graph_name = graph.replace(".pkl", "").replace("_edges", "")
        graph_config = pkl.load(open(join(DS_PATH, graph), 'rb'))
        seed_path = get_simple_seed_for_graph(SEED_PATH, f"{graph_name}_edges")
        seed_nodes = genfromtxt(seed_path, delimiter=",")

        graph_data[graph_name] = (graph_config, seed_nodes)

In [45]:
if CURRENT_RUN_MODE in [RUN_MODE_FULL, RUN_MODE_JUST_RELOAD]:
    pattern = re.compile(
        r"^result_(?P<algorithm>[^_]+)_(?P<budget>\d+)_(?P<graphName>.*)_edges\.json$"
    )

    folder = Path(SOLVED_PATH)

    results = []

    for path in folder.iterdir():
        if not path.is_file():
            continue
        m = pattern.match(path.name)
        if not m:
            continue
        
        # Read JSON
        with open(path, 'r') as f:
            data = json.load(f)

        info = m.groupdict()
        info["budget"] = int(info["budget"])
        info["filename"] = str(path)
        info["created"] = data["created"]
        info["total_time"] = data["Total time"]
        info["blocked_nodes"] = data["Blocked nodes"]
        info["blocked_count"] = len(data["Blocked nodes"])
        
        results.append(info)

    # Group by graphName
    blocked_nodes = defaultdict(list)
    complex_data = {}
    for r in results:
        blocked_nodes[r["graphName"]].append(r)

        key = f"{r["graphName"]}_{r["algorithm"]}_{r["budget"]}"
        complex_data[key] = {"total_time": r["total_time"]}
        
    blocked_nodes = dict(blocked_nodes)

In [46]:
if CURRENT_RUN_MODE == RUN_MODE_FULL:
    sim_results = []
    for (graph_name, run_data) in graph_data.items():
        (graph, seed) = run_data
        sim = Simulator(graph, seed)

        for configuration in blocked_nodes[graph_name]:
            run_id = f"{configuration['algorithm']}_{configuration['budget']}"
            sim.add_blocked(run_id, configuration['blocked_nodes'])

        results = sim.run(simmulations_to_run, threads_to_use)
        sim_results.append(results)


    with open(join(SIMULATION_PATH, "sim_results.json"), 'w') as f:
        json.dump(sim_results, f)

In [47]:
if CURRENT_RUN_MODE in [RUN_MODE_FULL, RUN_MODE_JUST_RELOAD]:
    # Step 1: Load initial JSON from file
    with open(join(SIMULATION_PATH, "sim_results.json"), 'r') as f:
        data = json.load(f)

    rows = []
    for record in data:
        graph_name = record["graph_name"]
        sim_time = record["simulation time"]["mean"]
        for solver_name, s in record["solvers"].items():
            algo, budget_str = solver_name.rsplit("_", 1)
            budget = int(budget_str)
            key = f"{graph_name}_{algo}_{budget}"
            complex_data[key] = {"total_time": complex_data[key]["total_time"], 
                                "saved": s["saved nodes"]["mean"],
                                "ratio_ppm": s["fraction of saved nodes to active nodes"]["mean"] * 1_000_000,}
            rows.append({
                "graph": graph_name,
                "algorithm": algo,
                "budget": budget,
                "saved": s["saved nodes"]["mean"],
                "ratio_ppm": s["fraction of saved nodes to active nodes"]["mean"] * 1_000_000,
                "runtime": complex_data[key]["total_time"],
            })        

    df = pd.DataFrame(rows)

In [48]:
graphs = sorted(df["graph"].unique())
methods = sorted(df["algorithm"].unique())
# df.to_csv(f"{str(uuid.uuid4())}.csv", index=False)

for graph_name in graphs:
    df_for_graph = df[df["graph"] == graph_name].copy()

    budget_vs_time_plot(methods, df_for_graph, graph_name)
    budget_vs_saved_plot(methods, df_for_graph, graph_name)
    budget_vs_ratio_plot(methods, df_for_graph, graph_name)
    saved_nodes_distribution(df_for_graph, graph_name)

    runtime_vs_infected_budget(methods, df_for_graph, graph_name)
    saved_vs_infected_budget(methods, df_for_graph, graph_name)
