In [14]:
import os
import glob
import numpy as np
import pandas as pd

In [15]:
N_e = 100
Z_CRIT = 1.96  # for ~95% CI

In [16]:
# Path to observed switch output files
obs_switch_dir = f"obs_switches/recomb_map/n_{N_e}"
file_glob  = os.path.join(obs_switch_dir, "observed_switches_rep_*.csv")

files = glob.glob(file_glob)
if len(files) == 0:
    raise FileNotFoundError(f"No files matched: {file_glob}")

In [17]:
dfs = []
for f in files:
    df_i = pd.read_csv(f)
    df_i["generation"] = pd.to_numeric(df_i["generation"], errors="coerce")
    df_i["observed_switches"] = pd.to_numeric(df_i["observed_switches"], errors="coerce")
    dfs.append(df_i)

all_data = pd.concat(dfs, ignore_index=True).dropna(subset=["generation", "observed_switches"])

# Shift generations so they start at 0 to match exp_switches (1->0, 2->1, ...)
all_data["generation"] = all_data["generation"].astype(int) - 1
all_data = all_data[all_data["generation"] >= 0]

# Aggregate across reps
N = max(1, len(files))
agg = (
    all_data
    .groupby("generation", as_index=True)["observed_switches"]
    .agg(obs_mean="mean", obs_std="std", obs_var="var")
    .sort_index()
)
# SEM + CI
agg["obs_sem"] = agg["obs_std"].fillna(0.0) / np.sqrt(N)
agg["obs_ci_lower"] = agg["obs_mean"] - Z_CRIT * agg["obs_sem"]
agg["obs_ci_upper"] = agg["obs_mean"] + Z_CRIT * agg["obs_sem"]

df_obs = agg.reset_index().sort_values("generation")

In [18]:
# Path to output directory
summary_dir = "obs_switches/recomb_map"
os.makedirs(summary_dir, exist_ok=True)

summary_path = os.path.join(summary_dir, f"summary_obs_switches_n_{N_e}.csv")
df_obs.to_csv(summary_path, index=False)

print(f"Summary written to {summary_path}")

Summary written to obs_switches/recomb_map/summary_obs_switches_n_100.csv
