In [100]:
import os
import sys
import json
import math
import time
import argparse
import datetime as dt
import multiprocessing as mp
from dataclasses import dataclass, asdict
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple

import numpy as np
import pandas as pd

import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt

log_timestamp = "20260112_193738"
log_dir = Path(f"./log/{log_timestamp}")
SCENARIOS = [
    "fusing_bump",             # equally spaced with tiny bump above base top
    "uniform_gap",             # equally spaced, larger range
    "uniform_tiny_gap",        # equally spaced, tiny gaps (hard)
    "unique_best_tiny",        # μ1 > μ2=...=μK (tiny gap)
    "unique_best",             # μ1 > μ2=...=μK (larger gap)
    "gaps_increasing_tiny",    # Δ_{i,i+1} < Δ_{i+1,i+2} (tiny total range)
    "gaps_increasing",
    "gaps_decreasing_tiny",    # Δ_{i,i+1} > Δ_{i+1,i+2} (tiny total range)
    "gaps_decreasing",
    "random",                  # random generation, sorted descending
]

# read summary.csv from log_dir/SCENARIOS__K32__T...
Ks = [32]
Ts = [10000, 20000, 40000]
run_dir = Path(f'./Analysis/{log_timestamp}')

In [101]:
def alpha0_from_zeta(zeta: float, c: float = 1, d: float = 2) -> float:
    """
    rho = 2*T_R/T_D = 2*zeta/(1-zeta)
    alpha = 1 - ((c-1) - rho*(d-1) + sqrt((rho*(d-1) - (c-1))**2 + 2*c*d*rho)) / (2*c)
    clamp into [0, 0.5]
    """
    z = float(zeta)
    if z <= 0.0:
        return 0.5
    if z >= 1.0:
        return 0.0
    rho = 2.0 * z / (1.0 - z)
    disc = (rho * (d - 1) - (c - 1))**2 + 2.0 * c * d * rho
    a0 = 1.0 - ((c - 1) - rho * (d - 1) + math.sqrt(disc)) / (2.0 * c)
    return float(min(0.5, max(0.0, a0)))

# c and d are derived in [1, 1.5， 2]
c_values = np.array([1.0, 1.5, 2.0], dtype=float)
d_values = np.array([1.0, 1.5, 2.0], dtype=float)
c_d_grid = [(float(c), float(d)) for c in c_values for d in d_values]

In [102]:
def infer_grids_from_logs(log_dir: Path) -> Tuple[np.ndarray, np.ndarray]:
    """
    Infer alpha/zeta grids from the first summary.csv under log_dir.
    Assumes files are named *__summary.csv in subfolders.
    """
    candidates = sorted(log_dir.glob("**/*__summary.csv"))
    if not candidates:
        raise FileNotFoundError(f"No summary csv found under {log_dir}")
    df0 = pd.read_csv(candidates[0])
    alpha_vals = np.array(sorted(df0["alpha_req"].unique()), dtype=float)
    zeta_vals = np.array(sorted(df0["zeta_req"].unique()), dtype=float)
    return alpha_vals, zeta_vals


def plot_alpha0_cd_grid(c_d_grid: List[Tuple[float, float]], zetas: np.ndarray, outdir: Path):
    outdir.mkdir(parents=True, exist_ok=True)
    plt.figure(figsize=(10, 6))
    for c, d in c_d_grid:
        a_line = np.array([alpha0_from_zeta(z, c=c, d=d) for z in zetas], dtype=float)
        plt.plot(zetas, a_line, linewidth=1.2, label=f"c={c:g}, d={d:g}")
    plt.xlabel("zeta")
    plt.ylabel("alpha0(zeta; c,d)")
    plt.ylim(-0.02, 0.52)
    plt.title("alpha0(zeta) for c,d grid")
    plt.legend(ncol=2, fontsize=8)
    plt.tight_layout()
    plt.savefig(outdir / "alpha0_vs_zeta__c_d_grid.png", dpi=200)
    plt.close()

In [103]:
alpha_grid, zeta_grid = infer_grids_from_logs(log_dir)
alpha_grid = np.array(alpha_grid, dtype=float)
zeta_grid = np.array(zeta_grid, dtype=float)

run_dir.mkdir(parents=True, exist_ok=True)

zeta_min = float(zeta_grid.min()) if zeta_grid.size else 0.0
zeta_max = float(zeta_grid.max()) if zeta_grid.size else 1.0
zeta_line = np.linspace(max(0.0, zeta_min), min(0.999999, zeta_max), 400)

plot_alpha0_cd_grid(c_d_grid, zeta_line, run_dir / "alpha0_cd_grid")

In [104]:
# =============================================================================
 # Plotting
 # =============================================================================
def plot_heatmap(df_sum: pd.DataFrame, scenario: str, K: int, T: int, outdir: Path, alpha_grid: np.ndarray, c_d_grid: Optional[List[Tuple[float, float]]] = None):
    sub = df_sum[(df_sum["scenario"] == scenario) & (df_sum["K"] == K) & (df_sum["T"] == T)].copy()
    if sub.empty:
        return

    piv = sub.pivot(index="alpha_req", columns="zeta_req", values="acc_mean")
    if piv.empty:
        return

    alphas = np.array(piv.index.values, dtype=float)
    zetas = np.array(piv.columns.values, dtype=float)
    Z = piv.values

    plt.figure(figsize=(10, 6))
    im = plt.imshow(
        Z,
        aspect="auto",
        origin="lower",
        extent=[float(zetas.min()), float(zetas.max()), float(alphas.min()), float(alphas.max())],
    )
    plt.colorbar(im, label="acc_mean")
    plt.xlabel("zeta")
    plt.ylabel("alpha")
    plt.title(f"{scenario} | K={K} T={T}: acc_mean heatmap")

    # color mapping per (c,d)
    cd_list = list(c_d_grid or [(1.0, 2.0)])
    cmap = plt.get_cmap("tab20")
    cd_colors = {pair: cmap(i / max(1, len(cd_list))) for i, pair in enumerate(cd_list)}

    # overlay alpha0(zeta) curves for each (c,d)
    z_line = np.linspace(float(zetas.min()), float(zetas.max()), 200)
    for c, d in cd_list:
        a_line = np.array([alpha0_from_zeta(z, c=c, d=d) for z in z_line], dtype=float)
        plt.plot(z_line, a_line, linewidth=2, label=f"alpha0 c={c:g}, d={d:g}", color=cd_colors[(c, d)])

    outpath = outdir / f"{scenario}__K{K}__T{T}__heatmap_acc_mean.png"
    plt.tight_layout()
    plt.legend()
    plt.savefig(outpath, dpi=200)
    plt.close()


def plot_acc_vs_zeta_lines(df_sum: pd.DataFrame, scenario: str, K: int, T: int, outdir: Path, alpha_grid: np.ndarray, c_d_grid: Optional[List[Tuple[float, float]]] = None):
    sub = df_sum[(df_sum["scenario"] == scenario) & (df_sum["K"] == K) & (df_sum["T"] == T)].copy()
    if sub.empty:
        return

    zetas = np.array(sorted(sub["zeta_req"].unique()), dtype=float)

    best_mean, best_ci = [], []

    # color mapping per (c,d)
    cd_list = list(c_d_grid or [(1.0, 2.0)])
    cmap = plt.get_cmap("tab20")
    cd_colors = {pair: cmap(i / max(1, len(cd_list))) for i, pair in enumerate(cd_list)}

    # per-(c,d) alpha0 performance (nearest alpha grid)
    cd_perf: Dict[Tuple[float, float], Dict[str, List[float]]] = {}
    for pair in cd_list:
        cd_perf[pair] = {"mean": [], "ci": [], "alpha_near": []}

    # prepare arrays for explicit alpha targets (nearest grid match)
    a05_near = float(alpha_grid[np.argmin(np.abs(alpha_grid - 0.5))])
    a00_near = float(alpha_grid[np.argmin(np.abs(alpha_grid - 0.0))])
    alpha05_mean, alpha05_ci = [], []
    alpha00_mean, alpha00_ci = [], []

    for z in zetas:
        zz = float(z)
        s = sub[sub["zeta_req"] == zz]
        if s.empty:
            continue

        # best across alpha for this zeta
        ib = s["acc_mean"].idxmax()
        b = s.loc[ib]

        best_mean.append(float(b["acc_mean"]))
        best_ci.append(float(b["acc_ci95"]))

        # alpha0 per (c,d) using nearest alpha grid
        for c, d in cd_list:
            a0 = alpha0_from_zeta(zz, c=c, d=d)
            a_near = float(alpha_grid[np.argmin(np.abs(alpha_grid - a0))])
            cd_perf[(c, d)]["alpha_near"].append(a_near)

            s2 = s[s["alpha_req"] == a_near]
            if s2.empty:
                cd_perf[(c, d)]["mean"].append(float("nan"))
                cd_perf[(c, d)]["ci"].append(float("nan"))
            else:
                cd_perf[(c, d)]["mean"].append(float(s2.iloc[0]["acc_mean"]))
                cd_perf[(c, d)]["ci"].append(float(s2.iloc[0]["acc_ci95"]))

    zetas_plot = zetas[: len(best_mean)]
    best_mean = np.array(best_mean, dtype=float)
    best_ci = np.array(best_ci, dtype=float)

    plt.figure(figsize=(10, 6))

    plt.plot(zetas_plot, best_mean, linewidth=2, label="best over alpha", color="black")
    plt.fill_between(
        zetas_plot,
        np.clip(best_mean - best_ci, 0, 1),
        np.clip(best_mean + best_ci, 0, 1),
        alpha=0.2,
        color="black",
    )

    # alpha0(zeta) performance lines for each (c,d)
    for (c, d), vals in cd_perf.items():
        mean_arr = np.array(vals["mean"], dtype=float)
        ci_arr = np.array(vals["ci"], dtype=float)
        color = cd_colors[(c, d)]
        plt.plot(zetas_plot, mean_arr, linewidth=2, label=f"alpha0 c={c:g}, d={d:g}", color=color)
        plt.fill_between(
            zetas_plot,
            np.clip(mean_arr - ci_arr, 0, 1),
            np.clip(mean_arr + ci_arr, 0, 1),
            alpha=0.15,
            color=color,
        )

    plt.xlabel("zeta")
    plt.ylabel("acc_mean")
    plt.title(f"{scenario} | K={K} T={T}: acc_mean vs zeta (best/alpha0 grid) with 95% CI")
    plt.legend()

    outpath = outdir / f"{scenario}__K{K}__T{T}__acc_mean_vs_zeta_lines.png"
    plt.tight_layout()
    plt.savefig(outpath, dpi=200)
    plt.close()

    # audit nearest alpha selection per (c,d)
    audit = pd.DataFrame({"zeta": zetas_plot})
    for (c, d), vals in cd_perf.items():
        audit[f"alpha0_nearest_c{c:g}_d{d:g}"] = np.array(vals["alpha_near"], dtype=float)
        audit[f"alpha0_theory_c{c:g}_d{d:g}"] = np.array([alpha0_from_zeta(z, c=c, d=d) for z in zetas_plot], dtype=float)
    audit.to_csv(outdir / f"{scenario}__K{K}__T{T}__alpha0_nearest_grid.csv", index=False)


In [105]:
for K in Ks:
    for scenario in SCENARIOS:
        for T in Ts:
            df_sum = pd.read_csv(log_dir / f"{scenario}__K{K}__T{T}/summary.csv")
            plot_heatmap(df_sum, scenario, K, T, run_dir, alpha_grid, c_d_grid)
            plot_acc_vs_zeta_lines(df_sum, scenario, K, T, run_dir, alpha_grid, c_d_grid)