In [None]:
from __future__ import annotations
import json, re
from pathlib import Path
from typing import List, Tuple
import pandas as pd

# ─────────── Locate project root ────────────
PROJECT_ROOT = Path(__file__).resolve().parent if "__file__" in globals() else Path.cwd()
OUTPUT_DIR   = PROJECT_ROOT / "outputs"
TABLE_DIR    = PROJECT_ROOT / "tables"
CSV_OUT      = TABLE_DIR / "press_results_f1.csv"

RAW_BENCHMARKS = ["gsm8k", "math", "olympiadbench", "omnimath"]
BENCH_DISPLAY  = {
    "gsm8k": "GSM8K",
    "math": "MATH",
    "olympiadbench": "Olympiad-Bench",
    "omnimath": "Omni-MATH",
}
RUN_RE = re.compile(r"_(?P<press>[^_]+)_r(?P<ratio>[0-9]*\.?[0-9]+)_q", re.IGNORECASE)

# ───────────────── Helpers ──────────────────
def extract_press_ratio(dirname: str) -> Tuple[str, float]:
    m = RUN_RE.search(dirname)
    if not m:
        raise ValueError(f"Not a run folder: {dirname}")
    return m.group("press"), float(m.group("ratio"))

def load_f1(path: Path):
    if not path.is_file():
        return None
    try:
        with path.open() as f:
            return json.load(f).get("f1")
    except Exception as exc:
        print(f"⚠️  Skipping {path} ({exc})")
        return None

# ─────────── Gather & build table ───────────
def gather_rows() -> List[dict]:
    rows = []
    if not OUTPUT_DIR.exists():
        raise SystemExit(f"outputs/ directory not found at {OUTPUT_DIR}")

    for run_dir in OUTPUT_DIR.iterdir():
        if not run_dir.is_dir():
            continue
        try:
            press, ratio = extract_press_ratio(run_dir.name)
        except ValueError:
            continue  # unrelated folder
        row = {"Press": press, "Ratio": ratio}
        for bm in RAW_BENCHMARKS:
            row[bm] = load_f1(run_dir / f"{bm}_summary.json")
        rows.append(row)
    return rows

def build_table(rows: List[dict]) -> pd.DataFrame:
    df = pd.DataFrame(rows)

    # Round F‑1s
    for bm in RAW_BENCHMARKS:
        df[bm] = df[bm].apply(lambda x: round(x, 1) if isinstance(x, (int, float)) else x)

    # Compute Average when all four scores present
    df["Average"] = df[RAW_BENCHMARKS].apply(lambda r: round(r.mean(), 1) if r.notna().all() else pd.NA, axis=1)

    # Sort: baseline first
    baseline = (df["Press"] == "None") & (df["Ratio"] == 0.0)
    df = pd.concat([df[baseline], df[~baseline].sort_values(["Press", "Ratio"])],
                   ignore_index=True)

    # Rename columns for display/CSV
    df.rename(columns=BENCH_DISPLAY, inplace=True)
    return df

# ─────────────────── Main ───────────────────
def main():
    rows = gather_rows()
    if not rows:
        print("No run folders found - nothing to do.")
        return

    table = build_table(rows)

    TABLE_DIR.mkdir(exist_ok=True, parents=True)
    table.to_csv(CSV_OUT, index=False, float_format="%.2f", na_rep="")

    # Console display tweaks
    display_table        = table.copy()
    display_table["Ratio"] = display_table["Ratio"].apply(lambda r: f"{r*100:.0f}%")

    # turn all missing values into empty strings for clean printing
    display_table = display_table.astype(object).where(display_table.notna(), "")

    print("=== F1 Table Summary ===")
    print(display_table.to_string(index=False, na_rep=""))
    print(f"\nCSV saved to {CSV_OUT.relative_to(PROJECT_ROOT)}")

if __name__ == "__main__":
    main()


=== F1 Table Summary ===
                 Press Ratio GSM8K  MATH Olympiad-Bench Omni-MATH Average
                  None    0%  45.0  36.9           30.6      28.9    35.4
ExpectedAttentionPress   10%  47.1  38.2           32.4      30.9    37.2
ExpectedAttentionPress   50%  39.0  29.2           29.0      28.1    31.3
            KnormPress   10%  33.4  31.4           24.7      24.8    28.6
            KnormPress   50%   5.4                                       
ObservedAttentionPress   10%  16.3  13.7            8.6      13.5    13.0
ObservedAttentionPress   50%  25.4  17.4            9.5      13.8    16.5
          QFilterPress   10%  31.0  30.8           21.2      21.5    26.1
          QFilterPress   50%  36.0  28.1           20.9      19.6    26.2
           RandomPress   50%  28.7  22.9           18.7      20.4    22.7
           SnapKVPress   10%  45.0  37.3           31.8      29.3    35.8
           SnapKVPress   25%  45.5  36.8           31.2      29.4    35.7
           Sn