# Simulation Results Analyzer

This notebook analyzes existing saved simulation results from the simulation runner script. It loads all per-run metrics and posterior summaries to generate aggregated summary plots and LaTeX tables for the manuscript.

**Functionality:**
- Loads all per-run metrics (`.json` files).
- Loads posterior summaries (`.npz` files) for each run.
- Loads benchmark results (`.npz` files) for each run.
- Aggregates the time-series data to create average factual and counterfactual curves for each scenario.
- Generates and saves summary plots:
    - A 4x3 grid plot for factual CFR estimates.
    - A 4x3 grid plot for counterfactual CFR estimates.
    - Box plots for all evaluation metrics (MAE, MCIW, Coverage, Bias, etc.).
- Aggregates scalar metrics and produces LaTeX summary tables.

## 1. Imports and Configuration

In [None]:
import pandas as pd
import numpy as np
import os
import time
import json
from tqdm.notebook import tqdm
import importlib

# Import and reload custom modules to ensure the latest versions are used
import config; importlib.reload(config)
import data_generation; importlib.reload(data_generation)
import benchmarks; importlib.reload(benchmarks)
import evaluation; importlib.reload(evaluation)
import plotting; importlib.reload(plotting)
import tables; importlib.reload(tables)
import results_io; importlib.reload(results_io)

print("All modules imported and reloaded.")

## 2. Helper and Main Analysis Functions

In [None]:
def sanitize_metrics_dataframe(df):
    """Cleans a DataFrame by converting list-like values in object columns to scalars."""
    for col in df.columns:
        if df[col].dtype == 'object':
            is_list_like = df[col].notna().any() and isinstance(df[col].dropna().iloc[0], list)
            if is_list_like:
                df[col] = df[col].apply(
                    lambda x: x[0] if isinstance(x, list) and len(x) == 1 else (np.nan if isinstance(x, list) else x)
                ).astype(float, errors='ignore')
    return df

def prepare_aggregated_plot_data(results_df_all):
    """Aggregates time-series results from all valid MC runs for the summary plots."""
    aggregated_plot_data_list = []
    study_global_seed = config.GLOBAL_BASE_SEED

    for scenario_idx, scenario_config in enumerate(tqdm(config.SCENARIOS, desc="Aggregating Plot Data")):
        scenario_id = scenario_config["id"]
        scenario_base_seed = study_global_seed + (scenario_idx * config.NUM_MONTE_CARLO_RUNS * 1000)
        
        # Regenerate true data for one run to get the ground truth curves
        sim_data_true = data_generation.simulate_scenario_data(scenario_config, run_seed=scenario_base_seed)
        T_analyze = config.T_ANALYSIS_LENGTH

        scen_df_valid = results_df_all[(results_df_all["scenario_id"] == scenario_id) & (results_df_all["error"].isin([None, "None"]))]
        if scen_df_valid.empty: continue
        
        # Initialize lists to collect time-series data from all valid runs
        series_data = {key: [] for key in ['sCFR_mean', 'sCFR_lower', 'sCFR_upper', 
                                           'sCFR_cf_mean', 'sCFR_cf_lower', 'sCFR_cf_upper',
                                           'cCFR_mean', 'cCFR_lower', 'cCFR_upper', 
                                           'aCFR_mean', 'aCFR_lower', 'aCFR_upper',
                                           'ITS_factual_mean', 'ITS_factual_lower', 'ITS_factual_upper',
                                           'ITS_cf_mean', 'ITS_cf_lower', 'ITS_cf_upper']}

        for mc_run_idx in scen_df_valid["mc_run"].astype(int) - 1:
            posterior_summary = results_io.load_posterior_summary_for_run(scenario_id, mc_run_idx, config.OUTPUT_DIR_POSTERIOR_SUMMARIES)
            if posterior_summary:
                series_data['sCFR_mean'].append(posterior_summary.get("p_mean", []))
                series_data['sCFR_lower'].append(posterior_summary.get("p_q025", []))
                series_data['sCFR_upper'].append(posterior_summary.get("p_q975", []))
                series_data['sCFR_cf_mean'].append(posterior_summary.get("p_cf_mean", []))
                series_data['sCFR_cf_lower'].append(posterior_summary.get("p_cf_q025", []))
                series_data['sCFR_cf_upper'].append(posterior_summary.get("p_cf_q975", []))

            benchmark_results = results_io.load_benchmark_results(scenario_id, mc_run_idx, config.OUTPUT_DIR_BENCHMARK_RESULTS)
            if benchmark_results:
                series_data['cCFR_mean'].append(benchmark_results.get("cCFR_cumulative", []))
                series_data['cCFR_lower'].append(benchmark_results.get("cCFR_cumulative_lower", []))
                series_data['cCFR_upper'].append(benchmark_results.get("cCFR_cumulative_upper", []))
                series_data['aCFR_mean'].append(benchmark_results.get("aCFR_cumulative", []))
                series_data['aCFR_lower'].append(benchmark_results.get("aCFR_cumulative_lower", []))
                series_data['aCFR_upper'].append(benchmark_results.get("aCFR_cumulative_upper", []))
                series_data['ITS_factual_mean'].append(benchmark_results.get("its_factual_mean",[]))
                series_data['ITS_factual_lower'].append(benchmark_results.get("its_factual_lower",[]))
                series_data['ITS_factual_upper'].append(benchmark_results.get("its_factual_upper",[]))
                series_data['ITS_cf_mean'].append(benchmark_results.get("its_counterfactual_mean",[]))
                series_data['ITS_cf_lower'].append(benchmark_results.get("its_counterfactual_lower",[]))
                series_data['ITS_cf_upper'].append(benchmark_results.get("its_counterfactual_upper",[]))

        # Calculate the point-wise average of the curves and intervals
        agg_plot_dict = {
            "scenario_id": scenario_id,
            "true_r_t": sim_data_true["true_r_0_t"][:T_analyze],
            "true_rcf_0_t": sim_data_true["true_rcf_0_t"][:T_analyze],
            "true_intervention_times_0_abs": sim_data_true["true_intervention_times_0_abs"],
            "estimated_r_t_dict": {
                "sCFR": {k.replace('sCFR_', ''): np.mean([s for s in series_data[k] if len(s)>0], axis=0)[:T_analyze] for k in series_data if 'sCFR' in k},
                "cCFR_cumulative": {k.replace('cCFR_', ''): np.mean(series_data[k], axis=0)[:T_analyze] for k in series_data if 'cCFR' in k},
                "aCFR_cumulative": {k.replace('aCFR_', ''): np.mean(series_data[k], axis=0)[:T_analyze] for k in series_data if 'aCFR' in k},
                "ITS_MLE": {k.replace('ITS_', ''): np.mean(series_data[k], axis=0)[:T_analyze] for k in series_data if 'ITS' in k}
            }
        }
        aggregated_plot_data_list.append(agg_plot_dict)

    return aggregated_plot_data_list

def main_analysis():
    """Main function to orchestrate the post-hoc analysis of simulation results."""
    all_loaded_metrics = []
    print("Starting analysis of existing simulation results...")
    
    for dir_path in [config.OUTPUT_DIR_PLOTS, config.OUTPUT_DIR_TABLES, config.OUTPUT_DIR_RESULTS_CSV]:
        os.makedirs(dir_path, exist_ok=True)

    # Load all saved metrics from JSON files
    for scenario in tqdm(config.SCENARIOS, desc="Loading All Metrics"):
        for mc_run in range(config.NUM_MONTE_CARLO_RUNS):
            run_metrics = results_io.load_run_metrics(scenario["id"], mc_run, config.OUTPUT_DIR_RUN_METRICS_JSON)
            if run_metrics: all_loaded_metrics.append(run_metrics)
    
    if not all_loaded_metrics:
        print("No metrics files found. Cannot generate plots or tables.")
        return
        
    results_df_all = pd.DataFrame(all_loaded_metrics)
    results_df_valid = sanitize_metrics_dataframe(results_df_all)
    results_df_valid = results_df_valid[results_df_valid['error'].isin([None, "None"])].copy()
    
    if results_df_valid.empty:
        print("No valid simulation runs found. Analysis cannot proceed.")
        return
    
    # Aggregate scalar metrics (mean and std) for tables
    cover_cols = [col for col in results_df_valid.columns if 'cover' in col]
    for col in cover_cols: results_df_valid[col] = results_df_valid[col].astype('Int64')

    summary_mean = results_df_valid.groupby("scenario_id").mean(numeric_only=True).add_suffix('_mean').reset_index().rename(columns={'scenario_id_mean':'scenario_id'})
    summary_std = results_df_valid.groupby("scenario_id").std(numeric_only=True).add_suffix('_std').reset_index().rename(columns={'scenario_id_std':'scenario_id'})
    results_df_summary = pd.merge(summary_mean, summary_std, on="scenario_id", how="left")
    
    analysis_csv_path = os.path.join(config.OUTPUT_DIR_RESULTS_CSV, "all_scenarios_metrics_aggregated.csv")
    results_df_summary.to_csv(analysis_csv_path, index=False)
    print(f"\nAggregated summary metrics saved to {analysis_csv_path}")

    # --- Generate Plots and Tables ---
    print("\nPreparing aggregated data for summary plots...")
    aggregated_plot_data = prepare_aggregated_plot_data(results_df_all)
    
    print("Generating aggregated factual summary plot...")
    plotting.plot_aggregated_factual_summary(aggregated_plot_data, config.OUTPUT_DIR_PLOTS)
    
    print("Generating aggregated counterfactual summary plot...")
    plotting.plot_aggregated_counterfactual_summary(aggregated_plot_data, config.OUTPUT_DIR_PLOTS)
    
    print("Generating summary boxplots...")
    plotting.plot_metric_summary_boxplots(results_df_valid, config.OUTPUT_DIR_PLOTS)
    plotting.plot_combined_metrics_summary(results_df_valid, config.OUTPUT_DIR_PLOTS)
    
    print("Generating LaTeX summary tables...")
    tables.generate_rt_metrics_table(results_df_summary, config.OUTPUT_DIR_TABLES)
    tables.generate_param_metrics_table(results_df_summary, config.OUTPUT_DIR_TABLES)

    print("\nAnalysis complete.")

## 3. Execute Analysis

Run the cell below to perform the analysis. Make sure the simulation output directories in `config.py` point to where your simulation results are stored.

In [None]:
if __name__ == '__main__':
    main_analysis()