# Simulation Results Analyzer

This notebook analyzes existing saved simulation results from the `run_simulation_notebook.ipynb` (or its script equivalent).

**Functionality:**
- Loads per-run metrics (`.json` files) from `OUTPUT_DIR_RUN_METRICS_JSON`.
- Loads posterior summaries (mean, quantiles as `.npz` files) for the first MC run of each scenario from `OUTPUT_DIR_POSTERIOR_SUMMARIES`.
- Regenerates true data (`sim_data`) for the first MC run of each scenario using stored seeds to provide context for plots.
- Generates and saves individual time-series plots for the first MC run of each scenario (to `OUTPUT_DIR_PLOTS`).
- Generates and saves a combined 4x3 grid plot of these time-series (to `OUTPUT_DIR_PLOTS`).
- Generates and saves summary box plots for all evaluation metrics across all MC runs (to `OUTPUT_DIR_PLOTS`).
- Aggregates metrics and produces LaTeX summary tables (to `OUTPUT_DIR_TABLES`).

**Prerequisites:**
1. The simulation runner notebook/script must have completed, and its output files must be available in the directories specified in `config.py`.
2. All helper Python modules (`config.py`, `data_generation.py`, etc.) must be in the same directory or accessible.

## 1. Imports and Configuration

In [2]:
import pandas as pd
import numpy as np
import os
import time
import json 
from tqdm.auto import tqdm

# Import your custom modules
import config
import data_generation
import benchmarks
import evaluation
import plotting
import tables
import results_io

print("Reading configuration and output paths from config.py")

Reading configuration and output paths from config.py


## 2. Main Analysis Function

In [15]:
def sanitize_metrics_dataframe(df):
    """Cleans a DataFrame of metrics by converting list-like values to scalars."""
    for col in df.columns:
        if df[col].dtype == 'object':
            if df[col].notna().any() and isinstance(df[col].dropna().iloc[0], list):
                df[col] = df[col].apply(
                    lambda x: x[0] if isinstance(x, list) and len(x) == 1 else x
                )
    return df

def prepare_aggregated_plot_data(results_df_all):
    """
    Aggregates time-series results from all valid MC runs for the summary plot.
    """
    aggregated_plot_data_list = []
    study_global_seed = config.GLOBAL_BASE_SEED

    for scenario_idx, scenario_config_dict in enumerate(tqdm(config.SCENARIOS, desc="Aggregating Plot Data")):
        scenario_id = scenario_config_dict["id"]
        scenario_base_seed = study_global_seed + (scenario_idx * config.NUM_MONTE_CARLO_RUNS * 1000)
        
        sim_data_true = data_generation.simulate_scenario_data(scenario_config_dict, run_seed=scenario_base_seed)
        T_analyze = config.T_ANALYSIS_LENGTH

        # Filter results for valid runs in this scenario
        scen_df_valid = results_df_all[
            (results_df_all["scenario_id"] == scenario_id) & 
            (results_df_all["error"].isin([None, "None"]))
        ]
        if scen_df_valid.empty: continue
        
        # Stacks for collecting time-series from all valid runs
        sCFR_means, sCFR_lowers, sCFR_uppers, sCFR_cf_means, sCFR_cf_lowers, sCFR_cf_uppers = [], [], [], [], [], []
        cCFR_means, cCFR_lowers, cCFR_uppers = [], [], []
        aCFR_means, aCFR_lowers, aCFR_uppers = [], [], []

        for mc_run_idx in scen_df_valid["mc_run"].astype(int) - 1:
            posterior_summary = results_io.load_posterior_summary_for_run(scenario_id, mc_run_idx, config.OUTPUT_DIR_POSTERIOR_SUMMARIES)
            if posterior_summary:
                # Append sCFR data
                sCFR_means.append(posterior_summary.get("p_mean", []))
                sCFR_lowers.append(posterior_summary.get("p_q025", []))
                sCFR_uppers.append(posterior_summary.get("p_q975", []))
                sCFR_cf_means.append(posterior_summary.get("p_cf_mean", []))
                sCFR_cf_lowers.append(posterior_summary.get("p_cf_q025", []))
                sCFR_cf_uppers.append(posterior_summary.get("p_cf_q975", []))

            sim_data_run = data_generation.simulate_scenario_data(scenario_config_dict, run_seed=(scenario_base_seed + mc_run_idx))
            benchmark_cis = benchmarks.calculate_benchmark_cis_with_bayesian(sim_data_run["d_t"], sim_data_run["c_t"], sim_data_run["f_s_true"])
            
            # Append benchmark data
            cCFR_means.append(benchmarks.calculate_crude_cfr(sim_data_run["d_t"], sim_data_run["c_t"], cumulative=True))
            cCFR_lowers.append(benchmark_cis["cCFR_cumulative_lower"])
            cCFR_uppers.append(benchmark_cis["cCFR_cumulative_upper"])
            
            aCFR_means.append(benchmarks.calculate_nishiura_cfr_cumulative(sim_data_run["d_t"], sim_data_run["c_t"], sim_data_run["f_s_true"]))
            aCFR_lowers.append(benchmark_cis["aCFR_cumulative_lower"])
            aCFR_uppers.append(benchmark_cis["aCFR_cumulative_upper"])

        # Calculate the point-wise average of the curves and intervals
        agg_plot_dict = {
            "scenario_id": scenario_id,
            "true_r_t": sim_data_true["true_r_0_t"][:T_analyze],
            "true_rcf_0_t": sim_data_true["true_rcf_0_t"][:T_analyze],
            "estimated_r_t_dict": {
                "sCFR": {
                    "mean": np.mean([s for s in sCFR_means if len(s)>0], axis=0)[:T_analyze],
                    "lower": np.mean([s for s in sCFR_lowers if len(s)>0], axis=0)[:T_analyze],
                    "upper": np.mean([s for s in sCFR_uppers if len(s)>0], axis=0)[:T_analyze],
                    "cf_mean": np.mean([s for s in sCFR_cf_means if len(s)>0], axis=0)[:T_analyze],
                    "cf_lower": np.mean([s for s in sCFR_cf_lowers if len(s)>0], axis=0)[:T_analyze],
                    "cf_upper": np.mean([s for s in sCFR_cf_uppers if len(s)>0], axis=0)[:T_analyze]
                },
                "cCFR_cumulative": {
                    "mean": np.mean(cCFR_means, axis=0)[:T_analyze],
                    "lower": np.mean(cCFR_lowers, axis=0)[:T_analyze],
                    "upper": np.mean(cCFR_uppers, axis=0)[:T_analyze]
                },
                "aCFR_cumulative": {
                    "mean": np.mean(aCFR_means, axis=0)[:T_analyze],
                    "lower": np.mean(aCFR_lowers, axis=0)[:T_analyze],
                    "upper": np.mean(aCFR_uppers, axis=0)[:T_analyze]
                }
            }
        }
        aggregated_plot_data_list.append(agg_plot_dict)

    return aggregated_plot_data_list

def main_analysis():
    """
    Main function to orchestrate the post-hoc analysis of simulation results.
    """
    all_loaded_metrics_list = []
    all_plot_data_for_first_runs = []
    
    print("Starting analysis of existing simulation results...")
    start_time_analysis = time.time()

    for dir_path in [config.OUTPUT_DIR_PLOTS, config.OUTPUT_DIR_TABLES, config.OUTPUT_DIR_RESULTS_CSV]:
        if not os.path.exists(dir_path): os.makedirs(dir_path)

    study_global_seed = config.GLOBAL_BASE_SEED

    for scenario_idx, scenario_config_dict in enumerate(tqdm(config.SCENARIOS, desc="Loading & Preparing Data")):
        scenario_id = scenario_config_dict["id"]
        scenario_base_seed = study_global_seed + (scenario_idx * config.NUM_MONTE_CARLO_RUNS * 1000)
        
        for mc_run_idx in range(config.NUM_MONTE_CARLO_RUNS):
            run_metrics = results_io.load_run_metrics(scenario_id, mc_run_idx, config.OUTPUT_DIR_RUN_METRICS_JSON)
            if run_metrics:
                all_loaded_metrics_list.append(run_metrics)

            # Prepare plot data for the first valid MC run of each scenario
            if mc_run_idx == 0:
                run_specific_seed_dgp = scenario_base_seed + mc_run_idx
                sim_data = data_generation.simulate_scenario_data(scenario_config_dict, run_seed=run_specific_seed_dgp)
                
                posterior_summary = results_io.load_posterior_summary_for_run(
                    scenario_id, mc_run_idx, config.OUTPUT_DIR_POSTERIOR_SUMMARIES
                )
                
                if posterior_summary:
                    T_analyze = config.T_ANALYSIS_LENGTH

                    sCFR_est_plot = {
                        "mean": posterior_summary.get("p_mean", np.array([])),
                        "lower": posterior_summary.get("p_q025", np.array([])),
                        "upper": posterior_summary.get("p_q975", np.array([])),
                        "cf_mean": posterior_summary.get("p_cf_mean", np.array([])),
                        "cf_lower": posterior_summary.get("p_cf_q025", np.array([])),
                        "cf_upper": posterior_summary.get("p_cf_q975", np.array([]))
                    }

                    # r_t_mean_sCFR = posterior_summary.get("p_mean", np.full(T_analyze, np.nan))
                    # r_t_lower_sCFR = posterior_summary.get("p_q025", np.full(T_analyze, np.nan))
                    # r_t_upper_sCFR = posterior_summary.get("p_q975", np.full(T_analyze, np.nan))
                    # rcf_t_mean_sCFR = posterior_summary.get("p_cf_mean", np.full(T_analyze, np.nan))

                    # Calculate benchmark point estimates                    
                    benchmark_r_t_estimates = {
                        "cCFR_cumulative": benchmarks.calculate_crude_cfr(sim_data["d_t"], sim_data["c_t"], cumulative=True),
                        "aCFR_cumulative": benchmarks.calculate_nishiura_cfr_cumulative(sim_data["d_t"], sim_data["c_t"], sim_data["f_s_true"])
                    }

                    # Calculate benchmark credible intervals using the Bayesian Beta-Binomial method
                    benchmark_cis = benchmarks.calculate_benchmark_cis_with_bayesian(
                        sim_data["d_t"], sim_data["c_t"], sim_data["f_s_true"]
                    )

                    plot_data_for_run = {
                            "scenario_id": scenario_id, "mc_run_idx": mc_run_idx,
                            "true_r_t": sim_data["true_r_0_t"][:T_analyze], 
                            "true_rcf_0_t": sim_data["true_rcf_0_t"][:T_analyze],
                            "estimated_r_t_dict": {
                                "cCFR_cumulative": {
                                    "mean": benchmark_r_t_estimates["cCFR_cumulative"][:T_analyze],
                                    "lower": benchmark_cis["cCFR_cumulative_lower"][:T_analyze],
                                    "upper": benchmark_cis["cCFR_cumulative_upper"][:T_analyze]
                                },
                                "aCFR_cumulative": {
                                    "mean": benchmark_r_t_estimates["aCFR_cumulative"][:T_analyze],
                                    "lower": benchmark_cis["aCFR_cumulative_lower"][:T_analyze],
                                    "upper": benchmark_cis["aCFR_cumulative_upper"][:T_analyze]
                                }
                            }
                        }

                    plot_data_for_run["estimated_r_t_dict"]["sCFR"] = sCFR_est_plot
                    all_plot_data_for_first_runs.append(plot_data_for_run)
                else:
                    print(f"  Warning: Posterior summary not found for plotting Scen {scenario_id}, Run {mc_run_idx + 1}.")
    
    if not all_loaded_metrics_list:
        print("No metrics files found. Cannot generate plots or tables.")
        return
        
    # --- Aggregate results and generate outputs ---
    results_df_all = pd.DataFrame(all_loaded_metrics_list)
    results_df_all = sanitize_metrics_dataframe(results_df_all)
    results_df_valid = results_df_all[results_df_all['error'].isin([None, "None"])].copy()
    
    if results_df_valid.empty:
        print("No valid simulation runs found in the loaded metrics. Analysis cannot proceed.")
        return

    analysis_csv_path = os.path.join(config.OUTPUT_DIR_RESULTS_CSV, "all_scenarios_metrics_aggregated_by_analyzer.csv")
    results_df_valid.to_csv(analysis_csv_path, index=False)
    print(f"\nAggregated valid metrics saved to {analysis_csv_path}")

    # --- Generate Plots and Tables ---
    print("\nPreparing aggregated data for summary plot...")
    aggregated_plot_data = prepare_aggregated_plot_data(results_df_valid)
    
    print("Generating combined 4x3 aggregated summary plot...")
    plotting.plot_aggregated_scenarios_summary(aggregated_plot_data, config.OUTPUT_DIR_PLOTS)
    
    # This part should now work without KeyError because the upstream data is fixed.
    print("Generating time-series plots from saved summaries...")
    for p_data in all_plot_data_for_first_runs:
        plotting.plot_cfr_timeseries_from_data(p_data["scenario_id"], p_data["mc_run_idx"], p_data, config.OUTPUT_DIR_PLOTS)
    
    print("Generating combined 4x3 summary plot...")
    plotting.plot_all_scenarios_summary(all_plot_data_for_first_runs, config.OUTPUT_DIR_PLOTS)
    
    print("Generating summary boxplots from loaded metrics...")
    plotting.plot_metric_summary_boxplots(results_df_valid, config.OUTPUT_DIR_PLOTS)
    
    print("Generating LaTeX summary tables from loaded metrics...")
    # Find all columns that contain 'cover'
    cover_cols = [col for col in results_df_valid.columns if 'cover' in col]
    for col in cover_cols:
        # Convert True/False to 1/0 so that .mean() calculates the coverage probability
        results_df_valid[col] = results_df_valid[col].astype('Int64')
    
    summary_metrics_mean = results_df_valid.groupby("scenario_id").mean(numeric_only=True).reset_index()
    summary_metrics_std = results_df_valid.groupby("scenario_id").std(numeric_only=True).reset_index()
    
    summary_metrics_mean = summary_metrics_mean.add_suffix('_mean').rename(columns={'scenario_id_mean':'scenario_id'})
    summary_metrics_std = summary_metrics_std.add_suffix('_std').rename(columns={'scenario_id_std':'scenario_id'})
    results_df_summary_for_tables = pd.merge(summary_metrics_mean, summary_metrics_std, on="scenario_id", how="left")
    
    tables.generate_rt_metrics_table(results_df_summary_for_tables, config.OUTPUT_DIR_TABLES)
    tables.generate_param_metrics_table(results_df_summary_for_tables, config.OUTPUT_DIR_TABLES)

    end_time_analysis = time.time()
    print(f"\nAnalysis of existing results complete in {end_time_analysis - start_time_analysis:.2f} seconds.")

## 3. Execute Analysis

Run the cell below to perform the analysis. Make sure the simulation output directories in `config.py` point to where your simulation results are stored.

In [16]:
if __name__ == '__main__':
    main_analysis()

Starting analysis of existing simulation results...


Loading & Preparing Data: 100%|██████████████████████████████| 12/12 [00:00<00:00, 77.49it/s]



Aggregated valid metrics saved to ./simulation_outputs/results_csv/all_scenarios_metrics_aggregated_by_analyzer.csv

Preparing aggregated data for summary plot...


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
Aggregating Plot Data:   0%|                                          | 0/12 [00:00<?, ?it/s]


IndexError: invalid index to scalar variable.