In [None]:
import os
import json
import pandas as pd
from pathlib import Path

def compute_metrics_summary(
    decision_df: pd.DataFrame,
    experiment_id: str,
    domain: str,
):
    """
    REQUIRED COLUMNS (STRICT):
    entity_id, t, obs_value, state_value, trend_value,
    event_flag, decision_raw, decision_final
    """

    df = decision_df.copy().sort_values(["entity_id", "t"])

    # --- row counts ---
    n_rows_total = len(df)

    # --- helper: toggle rate ---
    def _toggle_rate(sub_df, col):
        if len(sub_df) <= 1:
            return 0.0
        toggles = (sub_df[col] != sub_df[col].shift(1)).sum()
        return toggles / (len(sub_df) - 1)

    # --- helper: false intervention rate (proxy) ---
    def _false_intervention_rate(sub_df, col):
        if len(sub_df) == 0:
            return 0.0
        false_cnt = ((sub_df["event_flag"] == 0) & (sub_df[col] == 1)).sum()
        return false_cnt / len(sub_df)

    # --- helper: stabilization rate ---
    def _stabilization_rate(sub_df, col, k=3):
        event_idx = sub_df.index[sub_df["event_flag"] == 1].tolist()
        if not event_idx:
            return float("nan")

        success = 0
        for idx in event_idx:
            loc = sub_df.index.get_loc(idx)
            window = sub_df.iloc[loc:loc+k]
            if len(window) == k and (window[col] == 1).all():
                success += 1

        return success / len(event_idx)

    toggle_raw, toggle_final = [], []
    false_raw, false_final = [], []
    stab_raw, stab_final = [], []

    for _, g in df.groupby("entity_id"):
        g = g.sort_values("t")

        toggle_raw.append(_toggle_rate(g, "decision_raw"))
        toggle_final.append(_toggle_rate(g, "decision_final"))

        false_raw.append(_false_intervention_rate(g, "decision_raw"))
        false_final.append(_false_intervention_rate(g, "decision_final"))

        stab_raw.append(_stabilization_rate(g, "decision_raw"))
        stab_final.append(_stabilization_rate(g, "decision_final"))

    return {
        "experiment_id": experiment_id,
        "domain": domain,
        "n_rows_total": n_rows_total,
        "n_rows_valid": int(df["state_value"].notna().sum()),
        "toggle_rate_raw": float(pd.Series(toggle_raw).mean()),
        "toggle_rate_final": float(pd.Series(toggle_final).mean()),
        "false_intervention_rate_raw": float(pd.Series(false_raw).mean()),
        "false_intervention_rate_final": float(pd.Series(false_final).mean()),
        "stabilization_rate_raw": float(pd.Series(stab_raw).mean()),
        "stabilization_rate_final": float(pd.Series(stab_final).mean()),
    }

In [2]:
# ===== 설정 =====
EXPERIMENT_ID = "battery_main_long"
DOMAIN = "battery"

BASE_DIR = Path("../../first_week/01_13/outputs")
DECISION_PATH = BASE_DIR / "decision" / "battery_decision_guarded.csv"
REPORT_DIR = BASE_DIR / "reports"

REPORT_DIR.mkdir(parents=True, exist_ok=True)

# ===== load decision log =====
decision_df = pd.read_csv(DECISION_PATH)

# ===== compute metrics =====
metrics = compute_metrics_summary(
    decision_df=decision_df,
    experiment_id=EXPERIMENT_ID,
    domain=DOMAIN,
)

metrics_df = pd.DataFrame([metrics])

# ===== save CSV (append-safe) =====
summary_csv = REPORT_DIR / "metrics_summary.csv"

if summary_csv.exists():
    old = pd.read_csv(summary_csv)
    new = pd.concat([old, metrics_df], ignore_index=True)
    new.to_csv(summary_csv, index=False)
else:
    metrics_df.to_csv(summary_csv, index=False)

# ===== save JSON (debug) =====
with open(REPORT_DIR / f"{EXPERIMENT_ID}_metrics.json", "w") as f:
    json.dump(metrics, f, indent=2)

metrics_df

KeyError: 'state_valid'