# 05 - Robustness Analysis

Computes robustness deltas: how much performance degrades from `No Noise` to perturbed environments.

**Output:**
- `/content/drive/MyDrive/robustness_deltas_per_seed.csv`
- `/content/drive/MyDrive/robustness_deltas_agg.csv`

In [None]:
# ==============================
# Script A: Robustness Deltas
# ==============================
import os
import pandas as pd
import numpy as np

# Path to your training results CSV
RESULTS_CSV = "/content/drive/MyDrive/results_combined_new.csv"

# Output paths
ROBUST_PER_SEED = "/content/drive/MyDrive/robustness_deltas_per_seed.csv"
ROBUST_AGG = "/content/drive/MyDrive/robustness_deltas_agg.csv"

print(f"Loading training results from: {RESULTS_CSV}")
df = pd.read_csv(RESULTS_CSV)

# We only care about final policies
df_final = df[df["phase"] == "final"].copy()

# Sanity check columns
expected_cols = {"baseline", "env", "seed", "phase", "mean_return", "std_dev", "train_minutes"}
missing = expected_cols - set(df_final.columns)
if missing:
    raise ValueError(f"CSV is missing columns: {missing}")

# Normalize environment names so we can pivot reliably
df_final["env_norm"] = df_final["env"].str.strip().str.lower()

env_map = {
    "no noise": "No Noise",
    "noise": "Noise",
    "delay": "Delay",
    "combo": "Combo",
}
df_final["env_clean"] = df_final["env_norm"].map(env_map)

if df_final["env_clean"].isna().any():
    bad_rows = df_final[df_final["env_clean"].isna()][["env"]].drop_duplicates()
    raise ValueError(f"Unexpected env names found:\n{bad_rows}")

# Keep one row per baseline, env, seed (mean_return)
grouped = (
    df_final
    .groupby(["baseline", "env_clean", "seed"], as_index=False)["mean_return"]
    .mean()
)

# Pivot so each row is (baseline, seed) with columns for each env's return
pivot = grouped.pivot_table(
    index=["baseline", "seed"],
    columns="env_clean",
    values="mean_return"
).reset_index()

# Ensure all four envs exist as columns even if some are missing in the data
for col in ["No Noise", "Noise", "Delay", "Combo"]:
    if col not in pivot.columns:
        pivot[col] = np.nan

# Compute robustness deltas per seed
pivot["delta_noise"] = pivot["No Noise"] - pivot["Noise"]
pivot["delta_delay"] = pivot["No Noise"] - pivot["Delay"]
pivot["delta_combo"] = pivot["No Noise"] - pivot["Combo"]

# Save per-seed robustness table
per_seed_cols = [
    "baseline", "seed",
    "No Noise", "Noise", "Delay", "Combo",
    "delta_noise", "delta_delay", "delta_combo",
]
pivot[per_seed_cols].to_csv(ROBUST_PER_SEED, index=False)
print(f"Saved per-seed robustness deltas to: {ROBUST_PER_SEED}")

# Aggregate over seeds: mean and std for each baseline
agg_rows = []
for b, sub in pivot.groupby("baseline"):
    row = {
        "baseline": b,
        "V_no_noise_mean": sub["No Noise"].mean(),
        "V_no_noise_std": sub["No Noise"].std(),

        "V_noise_mean": sub["Noise"].mean(),
        "V_noise_std": sub["Noise"].std(),

        "V_delay_mean": sub["Delay"].mean(),
        "V_delay_std": sub["Delay"].std(),

        "V_combo_mean": sub["Combo"].mean(),
        "V_combo_std": sub["Combo"].std(),

        "delta_noise_mean": sub["delta_noise"].mean(),
        "delta_noise_std": sub["delta_noise"].std(),

        "delta_delay_mean": sub["delta_delay"].mean(),
        "delta_delay_std": sub["delta_delay"].std(),

        "delta_combo_mean": sub["delta_combo"].mean(),
        "delta_combo_std": sub["delta_combo"].std(),
    }
    agg_rows.append(row)

agg_df = pd.DataFrame(agg_rows)
agg_df.to_csv(ROBUST_AGG, index=False)
print(f"Saved aggregated robustness stats to: {ROBUST_AGG}")

print("\nPreview (aggregated):")
print(agg_df)