# Cell 0 — 环境与路径

In [6]:
import shutil
import os, sys, shlex, glob, re, subprocess
import shlex, threading, queue
from pathlib import Path
from typing import Iterable, List, Union
import pandas as pd
import numpy as np

# --------------- 目录与 CLI ---------------
NB_DIR = Path.cwd()
ROOT = NB_DIR.parent if (NB_DIR.name.lower() == "notebooks") else NB_DIR
PY = sys.executable
CLI = ROOT / "scripts" / "rd_cli.py"
assert CLI.exists(), f"rd_cli.py not found at {CLI}"

RES_ROOT = ROOT / "notebooks" / "results"
OUT_CSV  = RES_ROOT / "out_csv"
OUT_FIG  = RES_ROOT / "figs"
OUT_SYM  = RES_ROOT / "symmetry"
for d in [OUT_CSV, OUT_FIG, OUT_SYM]:
    d.mkdir(parents=True, exist_ok=True)

STYLE  = "ieee"
DEVICE = "cuda" if (shutil.which("nvidia-smi") is not None) else "cpu"



def _pump_stream(stream, label, outq):
    for line in iter(stream.readline, ''):
        outq.put((label, line.rstrip('\n')))
    stream.close()

def safe_run(cmd, cwd=None, check=False, env=None):
    """
    实时把子进程 stdout / stderr 逐行打印到 Notebook。
    返回 (retcode, cmd_str)。
    """
    cmd = [str(c) for c in cmd]
    # 若是 python，强制无缓冲
    if len(cmd) >= 1 and os.path.basename(cmd[0]).lower().startswith("python"):
        if "-u" not in cmd[1:2]:
            cmd = [cmd[0], "-u"] + cmd[1:]
    cmd_str = " ".join(shlex.quote(x) for x in cmd)
    print("[run]", cmd_str, flush=True)

    env2 = os.environ.copy()
    env2["PYTHONUNBUFFERED"] = "1"
    if env:
        env2.update(env)

    proc = subprocess.Popen(
        cmd, cwd=str(cwd) if cwd else None,
        stdout=subprocess.PIPE, stderr=subprocess.PIPE,
        universal_newlines=True, bufsize=1, env=env2
    )
    outq = queue.Queue()

    t1 = threading.Thread(target=_pump_stream, args=(proc.stdout, "STDOUT", outq), daemon=True)
    t2 = threading.Thread(target=_pump_stream, args=(proc.stderr, "STDERR", outq), daemon=True)
    t1.start(); t2.start()

    # 主循环：交替转发两路输出
    while True:
        try:
            label, line = outq.get(timeout=0.05)
            if label == "STDERR":
                print("┈", line, flush=True)
            else:
                print(line, flush=True)
        except queue.Empty:
            if proc.poll() is not None:
                break

    t1.join(timeout=0.1); t2.join(timeout=0.1)
    ret = proc.returncode
    if check and ret != 0:
        raise RuntimeError(f"Command failed (ret={ret}): {cmd_str}")
    return ret, cmd_str


def build_cmd(base_subcmd:str, base:list[str], opts:dict)->list[str]:
    """
    智能构造命令：仅添加 bool True 的 flag；None/False 不加；其余转为字符串。
    """
    cmd = base[:]
    for k, v in opts.items():
        if isinstance(v, bool):
            if v: cmd.append(k)
        elif v is not None:
            cmd += [k, str(v)]
    return cmd

def glob_many(*patterns: Union[str, Path], dedup: bool = True) -> List[str]:
    """
    支持 Path / str 混合，返回绝对路径字符串列表。
    - dedup=True: 去重并按路径排序
    """
    files: List[str] = []
    for pat in patterns:
        pat_str = str(pat)
        files.extend(glob.glob(pat_str))
    if dedup:
        files = sorted(set(files))
    else:
        files = sorted(files)
    return files

# Cell 1 — CLI 子命令帮助（可快速确认参数）

In [None]:
for sub in ["stage1","ga","viz-all","entropy","archetypes","symmetry","motifs","motifs-explain"]:
    cmd = [PY, str(CLI), sub, "-h"]
    _ = subprocess.run(cmd)


# Cell 2 — 批量运行 stage1（小规模穷举）

In [None]:
def stage1_exists(n:int, k:int)->bool:
    pats = [OUT_CSV / f"stage1_pareto_n{n}_k{k}_*.csv"]
    return any(glob.glob(str(p)) for p in pats)

def list_stage1_outputs(n:int, k:int):
    pats = [OUT_CSV / f"stage1_all_n{n}_k{k}_*.csv",
            OUT_CSV / f"stage1_pareto_n{n}_k{k}_*.csv"]
    files = []
    for p in pats: files += glob.glob(str(p))
    return sorted(files)

def run_stage1(n:int, k:int, reuse=True):
    if reuse and stage1_exists(n, k):
        print(f"[stage1] reuse n={n},k={k}")
        return 0
    cmd = [PY, str(CLI), "stage1", "--n", str(n), "--k", str(k),
           "--out-csv", str(OUT_CSV), "--style", STYLE]
    ret = safe_run(cmd, cwd=ROOT)
    outs = list_stage1_outputs(n, k)
    print(f"[stage1] outputs ({n},{k}) ->", len(outs)); [print("  •",x) for x in outs]
    return ret

ret_log = []
for n in range(2, 5):       # n = 2,3,4
    for k in range(2, n+1): # k <= n
        rc = run_stage1(n, k, reuse=False)  # 首次建议设 reuse=False 强制重跑
        ret_log.append((n, k, rc))
ret_log


# Cell 3 — 批量运行 GA（可复用已有前沿）

In [4]:
def ga_front_exists(n:int, k:int)->bool:
    pats = [OUT_CSV / f"pareto_front_n{n}_k{k}*.csv"]
    return any(glob.glob(str(p)) for p in pats)

def list_ga_outputs(n:int, k:int):
    pats = [OUT_CSV / f"pareto_front_n{n}_k{k}*.csv",
            OUT_CSV / f"gen_summary_n{n}_k{k}*.csv"]
    files = []
    for p in pats: files += glob.glob(str(p))
    return sorted(files)

def run_ga(n:int, k:int, device=DEVICE, reuse=True,
           pop=24, gens=10, p_mut=0.08, p_cx=0.85, elite_keep=6,
           r_vals=3, power_iters=60, trace_mode="hutchpp", hutch_s=24,
           seed_from_stage1=True, max_stage1_seeds=400,
           fast_eval=False, progress_every=8):
    if reuse and ga_front_exists(n,k):
        print(f"[GA] reuse n={n},k={k}"); return 0
    base = [PY, str(CLI), "ga", "--n", str(n), "--k", str(k)]
    opts = {
        "--out-csv": str(OUT_CSV), "--device": device,
        "--pop-size": pop, "--generations": gens,
        "--p-mut": p_mut, "--p-cx": p_cx, "--elite-keep": elite_keep,
        "--r-vals": r_vals, "--power-iters": power_iters,
        "--trace-mode": trace_mode, "--hutch-s": hutch_s,
        "--seed-from-stage1": bool(seed_from_stage1),
        "--max-stage1-seeds": max_stage1_seeds,
        "--fast-eval": bool(fast_eval),
        "--progress-every": progress_every
    }
    cmd = build_cmd("ga", base, opts)
    ret = safe_run(cmd, cwd=ROOT)
    outs = list_ga_outputs(n,k)
    print(f"[GA] outputs ({n},{k}) ->", len(outs)); [print("  •",x) for x in outs]
    return ret

ret_ga = []
for n in range(2, 7):  
    for k in range(2, n+1):
        rc = run_ga(n, k, seed_from_stage1=True, fast_eval=False, reuse=False) 
        ret_ga.append((n, k, rc))
ret_ga[:10], " ... ", len(ret_ga)

[run] 'c:\Users\admin\anaconda3\envs\llmcompressor\python.exe' -u 'd:\师大云盘\课业\复杂系统概论\rules_diversity_final\scripts\rd_cli.py' ga --n 2 --k 2 --out-csv 'd:\师大云盘\课业\复杂系统概论\rules_diversity_final\notebooks\results\out_csv' --device cuda --pop-size 24 --generations 10 --p-mut 0.08 --p-cx 0.85 --elite-keep 6 --r-vals 3 --power-iters 60 --trace-mode hutchpp --hutch-s 24 --seed-from-stage1 --max-stage1-seeds 400 --progress-every 8
┈ [16:41:16] INFO rules.ga: GA start | n=2, k=2, device=cuda, pop=24, gens=10, fast_eval=False, seed_from_stage1=True
┈ [16:41:16] INFO rules.ga: Loaded 4 stage1 seeds; take 4.
[GA] GEN 00 | front0=  6 | best=(0,-1.000e+300) | pop=24 | dt=0.58s
┈ [16:41:17] INFO rules.ga: [GEN 01] front0=15 best=(0,-1.000e+300)
┈ [16:41:17] INFO rules.ga: [GEN 02] front0=20 best=(0,-1.000e+300)
┈ [16:41:17] INFO rules.ga: [GEN 03] front0=21 best=(0,-1.000e+300)
┈ [16:41:17] INFO rules.ga: [GEN 04] front0=22 best=(0,-1.000e+300)
┈ [16:41:17] INFO rules.ga: [GEN 05] front0=18 best=(0,-

([(2,
   2,
   (0,
    "'c:\\Users\\admin\\anaconda3\\envs\\llmcompressor\\python.exe' -u 'd:\\师大云盘\\课业\\复杂系统概论\\rules_diversity_final\\scripts\\rd_cli.py' ga --n 2 --k 2 --out-csv 'd:\\师大云盘\\课业\\复杂系统概论\\rules_diversity_final\\notebooks\\results\\out_csv' --device cuda --pop-size 24 --generations 10 --p-mut 0.08 --p-cx 0.85 --elite-keep 6 --r-vals 3 --power-iters 60 --trace-mode hutchpp --hutch-s 24 --seed-from-stage1 --max-stage1-seeds 400 --progress-every 8")),
  (3,
   2,
   (0,
    "'c:\\Users\\admin\\anaconda3\\envs\\llmcompressor\\python.exe' -u 'd:\\师大云盘\\课业\\复杂系统概论\\rules_diversity_final\\scripts\\rd_cli.py' ga --n 3 --k 2 --out-csv 'd:\\师大云盘\\课业\\复杂系统概论\\rules_diversity_final\\notebooks\\results\\out_csv' --device cuda --pop-size 24 --generations 10 --p-mut 0.08 --p-cx 0.85 --elite-keep 6 --r-vals 3 --power-iters 60 --trace-mode hutchpp --hutch-s 24 --seed-from-stage1 --max-stage1-seeds 400 --progress-every 8")),
  (3,
   3,
   (0,
    "'c:\\Users\\admin\\anaconda3\\envs\\llmc

# Cell 4 — 汇聚表（master_index）与 front_paths 构建

In [8]:
def read_csvs(patterns):
    files = []
    for pat in patterns:
        files.extend(glob.glob(str(OUT_CSV / pat)))
    dfs = []
    for f in sorted(set(files)):
        try:
            df = pd.read_csv(f)
            df["__file__"] = Path(f).name
            dfs.append(df)
        except Exception as e:
            print("[WARN] skip", f, "->", e)
    return pd.concat(dfs, ignore_index=True) if dfs else pd.DataFrame()

# 读取 stage1 的全量与前沿、GA 的前沿
df_stage1_all    = read_csvs(["stage1_all_*.csv"])
df_stage1_pareto = read_csvs(["stage1_pareto_*.csv"])
df_ga_front      = read_csvs(["pareto_front_*.csv"])

def normalize_cols(df):
    if df is None or df.empty: 
        return df
    # 统一“二目标”的值列名称到 trace_or_Z（便于下游汇聚）
    if "Z_exact" in df.columns:
        df = df.rename(columns={"Z_exact": "trace_or_Z"})
    if "sum_lambda_powers" in df.columns:
        df = df.rename(columns={"sum_lambda_powers": "trace_or_Z"})
    return df

df_s1a = normalize_cols(df_stage1_all).assign(source="stage1_all") if not df_stage1_all.empty else pd.DataFrame()
df_s1p = normalize_cols(df_stage1_pareto).assign(source="stage1_pareto") if not df_stage1_pareto.empty else pd.DataFrame()
df_gaf = normalize_cols(df_ga_front).assign(source="ga_front") if not df_ga_front.empty else pd.DataFrame()

_non_empty = [x for x in [df_s1a, df_s1p, df_gaf] if (x is not None and not x.empty)]
if _non_empty:
    master = pd.concat(_non_empty, ignore_index=True)
    print("[master] shape:", master.shape, "columns:", master.columns.tolist())
    master_path = OUT_CSV / "master_index.csv"
    master.to_csv(master_path, index=False)
else:
    print("[master] no inputs -> empty master; skip writing master_index.csv")
    master = pd.DataFrame()
    master_path = OUT_CSV / "master_index.csv"

# ---- 构建 front_paths（供 Cell 5/6/8 使用）----
front_paths = glob_many(OUT_CSV/"stage1_pareto_*.csv", OUT_CSV/"pareto_front_*.csv")
print("front_paths =", len(front_paths))


[master] shape: (5087, 31) columns: ['run_tag', 'n', 'k', 'rule_bits_raw', 'rule_bits_canon', 'rule_count', 'trace_or_Z', 'rows_m', 'arch_star_core', 'arch_has_tri', 'arch_has_quad', 'arch_has_pent', 'arch_near_bipartite_chord', 'arch_selfloop_rich', 'is_canonical_rep', '__file__', 'source', 'rule_bits', 'lambda_max', 'lambda_top2', 'spectral_gap', 'is_front0', 'active_k', 'lower_bound', 'upper_bound', 'lower_bound_raw', 'upper_bound_raw', 'upper_bound_raw_gersh', 'upper_bound_raw_maxdeg', 'archetype_tags', 'exact_Z']
front_paths = 27


# Cell 5 — 批次可视化：viz-all（三图）

In [9]:
# ===== 批量可视化：每张图一个 (n,k)，同图对比 raw & canon =====
import os, re, glob
from pathlib import Path

MAX_COMBOS_PER_FIG = 1  # 一图一个 (n,k)

rx_stage1 = re.compile(r"stage1_(?:all|pareto)_n(\d+)_k(\d+)")
rx_ga     = re.compile(r"pareto_front_(?:nk_)?n(\d+)_k(\d+)(?:_|\.csv)")

pair2files = {}
for p in front_paths:
    fname = os.path.basename(p)
    m = rx_stage1.search(fname) or rx_ga.search(fname)
    if not m:
        continue
    n, k = int(m.group(1)), int(m.group(2))
    pair2files.setdefault((n, k), []).append(p)

pairs_sorted = sorted(pair2files.keys(), key=lambda t: (t[0], t[1]))
if not pairs_sorted:
    print("[viz-all(nk)] No pareto CSVs to plot.")
else:
    def chunks(lst, size):
        for i in range(0, len(lst), size):
            yield lst[i:i+size]

    for batch_idx, batch_pairs in enumerate(chunks(pairs_sorted, MAX_COMBOS_PER_FIG), 1):
        n0, k0 = batch_pairs[0]
        batch_files = pair2files[(n0, k0)]
        out_dir = OUT_FIG / f"viz_n{n0}_k{k0}"
        out_dir.mkdir(parents=True, exist_ok=True)

        cmd = [PY, str(CLI), "viz-all",
               "--front", *batch_files,
               "--n", str(n0), "--k", str(k0),
               "--out-dir", str(out_dir),
               "--style", STYLE,
               "--y-log"]
        ret, _ = safe_run(cmd, cwd=ROOT)
        if ret != 0:
            print(f"[viz-all(nk) {n0},{k0}] ERROR: ret={ret}")


[run] 'c:\Users\admin\anaconda3\envs\llmcompressor\python.exe' -u 'd:\师大云盘\课业\复杂系统概论\rules_diversity_final\scripts\rd_cli.py' viz-all --front 'd:\师大云盘\课业\复杂系统概论\rules_diversity_final\notebooks\results\out_csv\pareto_front_n2_k2.csv' 'd:\师大云盘\课业\复杂系统概论\rules_diversity_final\notebooks\results\out_csv\stage1_pareto_n2_k2_canon.csv' 'd:\师大云盘\课业\复杂系统概论\rules_diversity_final\notebooks\results\out_csv\stage1_pareto_n2_k2_raw.csv' --n 2 --k 2 --out-dir 'd:\师大云盘\课业\复杂系统概论\rules_diversity_final\notebooks\results\figs\viz_n2_k2' --style ieee --y-log
[viz-all(nk)] saved: d:\师大云盘\课业\复杂系统概论\rules_diversity_final\notebooks\results\figs\viz_n2_k2\nk_n2_k2_scatter_log.png d:\师大云盘\课业\复杂系统概论\rules_diversity_final\notebooks\results\figs\viz_n2_k2\nk_n2_k2_growth_knees_log.png d:\师大云盘\课业\复杂系统概论\rules_diversity_final\notebooks\results\figs\viz_n2_k2\nk_n2_k2_knees_gap_log.png
[run] 'c:\Users\admin\anaconda3\envs\llmcompressor\python.exe' -u 'd:\师大云盘\课业\复杂系统概论\rules_diversity_final\scripts\rd_cli.py' viz-all

# Cell 6 — 单图 viz-all（可选熵收敛叠加）

In [None]:
# 1) 从 stage1 前沿选一条规则用于熵收敛绘图（可选）
bits_candidate = None
k_candidate = None
if ('df_stage1_pareto' in globals()
    and df_stage1_pareto is not None
    and not df_stage1_pareto.empty
    and "rule_bits" in df_stage1_pareto.columns):
    row = df_stage1_pareto.loc[df_stage1_pareto["trace_or_Z"].idxmax()] \
          if "trace_or_Z" in df_stage1_pareto.columns else \
          df_stage1_pareto.iloc[0]
    bits_candidate = str(row["rule_bits"])
    k_candidate = int(row["k"])
    print("entropy rule candidate:", bits_candidate[:32]+"...", "k=", k_candidate)

if not front_paths:
    print("[viz-all] skipped (no fronts).")
else:
    cmd = [PY, str(CLI), "viz-all",
           "--out-dir", str(OUT_FIG),
           "--style", STYLE,
           "--y-log",
           "--front", *front_paths]

    if bits_candidate is not None and k_candidate is not None:
        cmd += ["--entropy-bits", bits_candidate,
                "--entropy-k", str(k_candidate),
                "--n-min", "3", "--n-max", "6",
                "--device", DEVICE]

    ret, _ = safe_run(cmd, cwd=ROOT)
    print("[viz-all] return code:", ret)


# Cell 7 — motifs → motifs-explain 全流程（一键）

In [10]:
# =========================
# Cell 7 — 一键“膝点→解释器”流水线（CLI）
# 先生成 motifs（膝点关键结构样本），再运行 motifs-explain（Δ特征解释器）
# =========================
import pandas as pd

if not front_paths:
    raise RuntimeError("未找到前沿 CSV，请先运行 stage1/ga 以生成 stage1_pareto_*.csv 或 pareto_front_*.csv。")

OUT_MOTIF_CSV = OUT_CSV / "motifs"
OUT_MOTIF_FIG = OUT_FIG / "motifs"
OUT_MOTIF_CSV.mkdir(parents=True, exist_ok=True)
OUT_MOTIF_FIG.mkdir(parents=True, exist_ok=True)

# 3) 运行 CLI：motifs
cmd_motifs = [
    PY, str(CLI), "motifs",
    "--front", *front_paths,     # ← 用已解析出的具体文件，而非通配
    "--out-csv", str(OUT_MOTIF_CSV),
    "--out-dir", str(OUT_MOTIF_FIG),
    "--style", STYLE,
    "--y-log",
]
ret, _ = safe_run(cmd_motifs, cwd=ROOT)
if ret != 0:
    raise RuntimeError(f"[Cell7] rd_cli.py motifs ret={ret}")

# 4) 解析索引
index_file = OUT_MOTIF_CSV / "motifs_index.txt"
if index_file.exists():
    paths = {}
    for line in index_file.read_text(encoding="utf-8").splitlines():
        if "=" in line:
            k, v = line.split("=", 1)
            paths[k.strip()] = v.strip()
    ex_csv   = Path(paths.get("examples", OUT_MOTIF_CSV / "motif_knee_examples.csv"))
    sum_csv  = Path(paths.get("summary",  OUT_MOTIF_CSV / "motif_knee_summary.csv"))
    glob_csv = Path(paths.get("global",   OUT_MOTIF_CSV / "motif_global_report.csv"))
else:
    ex_csv   = OUT_MOTIF_CSV / "motif_knee_examples.csv"
    sum_csv  = OUT_MOTIF_CSV / "motif_knee_summary.csv"
    glob_csv = OUT_MOTIF_CSV / "motif_global_report.csv"

print("[Cell7] examples_csv =", ex_csv)
print("[Cell7] summary_csv  =", sum_csv)
print("[Cell7] global_csv   =", glob_csv)
for p in [ex_csv, sum_csv, glob_csv]:
    if not p.exists():
        raise FileNotFoundError(f"[Cell7] 预期输出缺失：{p}")

# 5) 运行 CLI：motifs-explain
cmd_explain = [
    PY, str(CLI), "motifs-explain",
    "--examples", str(ex_csv),
    "--out-csv", str(OUT_MOTIF_CSV),
    "--out-dir", str(OUT_MOTIF_FIG),
    "--style", STYLE,
    "--topN", "20",
    "--tree-depth", "3",
    "--tree-min-leaf", "8",
    "--seed", "0",
]
ret, _ = safe_run(cmd_explain, cwd=ROOT)
if ret != 0:
    raise RuntimeError(f"[Cell7] rd_cli.py motifs-explain ret={ret}")

# 6) 产物总览与快速预览
PRODUCTS = {
    "examples_csv": ex_csv,
    "summary_csv":  sum_csv,
    "global_csv":   glob_csv,
    "dataset_csv":  OUT_MOTIF_CSV / "motif_delta_dataset.csv",
    "logreg_csv":   OUT_MOTIF_CSV / "motif_feature_importance_logreg.csv",
    "tree_csv":     OUT_MOTIF_CSV / "motif_feature_importance_tree.csv",
    "perm_csv":     OUT_MOTIF_CSV / "motif_perm_importance.csv",
    "l1_coeffs":    OUT_MOTIF_CSV / "motif_lr_coeffs.csv",
    "tree_rules":   OUT_MOTIF_CSV / "motif_tree_rules.txt",
    "summary_md":   OUT_MOTIF_CSV / "motif_knee_topline.md",
    "png_logreg":   OUT_MOTIF_FIG / "motif_importance_logreg.png",
    "png_tree":     OUT_MOTIF_FIG / "motif_importance_tree.png",
    "png_l1":       OUT_MOTIF_FIG / "motif_lr_coeffs.png",
    "png_perm":     OUT_MOTIF_FIG / "motif_perm_importance.png",
}
print("\n[Cell7] 关键输出：")
for k, p in PRODUCTS.items():
    print(f"  - {k}: {p}")

EXAMPLES_CSV = ex_csv
SUMMARY_CSV  = sum_csv
GLOBAL_CSV   = glob_csv

# 7) 预览
try:
    df_ex = pd.read_csv(EXAMPLES_CSV)
    delta_cols_pos = [c for c in df_ex.columns if c.startswith("delta_pre_to_knee_")]
    base_cols  = ["n","k","pre_rule_count","knee_rule_count","post_rule_count"]
    preview_cols = [c for c in base_cols if c in df_ex.columns] + (delta_cols_pos[:10] if delta_cols_pos else [])
    if preview_cols:
        try:
            display(df_ex[preview_cols].head(8))
        except Exception:
            print(df_ex[preview_cols].head(8).to_string(index=False))
    else:
        print("[Cell7] examples CSV loaded but preview columns not found.")
except Exception as e:
    print("[Cell7] 预览失败：", e)


[run] 'c:\Users\admin\anaconda3\envs\llmcompressor\python.exe' -u 'd:\师大云盘\课业\复杂系统概论\rules_diversity_final\scripts\rd_cli.py' motifs --front 'd:\师大云盘\课业\复杂系统概论\rules_diversity_final\notebooks\results\out_csv\pareto_front_n2_k2.csv' 'd:\师大云盘\课业\复杂系统概论\rules_diversity_final\notebooks\results\out_csv\pareto_front_n3_k2.csv' 'd:\师大云盘\课业\复杂系统概论\rules_diversity_final\notebooks\results\out_csv\pareto_front_n3_k3.csv' 'd:\师大云盘\课业\复杂系统概论\rules_diversity_final\notebooks\results\out_csv\pareto_front_n4_k2.csv' 'd:\师大云盘\课业\复杂系统概论\rules_diversity_final\notebooks\results\out_csv\pareto_front_n4_k3.csv' 'd:\师大云盘\课业\复杂系统概论\rules_diversity_final\notebooks\results\out_csv\pareto_front_n4_k4.csv' 'd:\师大云盘\课业\复杂系统概论\rules_diversity_final\notebooks\results\out_csv\pareto_front_n5_k2.csv' 'd:\师大云盘\课业\复杂系统概论\rules_diversity_final\notebooks\results\out_csv\pareto_front_n5_k3.csv' 'd:\师大云盘\课业\复杂系统概论\rules_diversity_final\notebooks\results\out_csv\pareto_front_n5_k4.csv' 'd:\师大云盘\课业\复杂系统概论\rules_diversity_final

Unnamed: 0,n,k,pre_rule_count,knee_rule_count,post_rule_count,delta_pre_to_knee_c4,delta_pre_to_knee_c5,delta_pre_to_knee_clustering,delta_pre_to_knee_deg_max,delta_pre_to_knee_diag_cnt,delta_pre_to_knee_gap,delta_pre_to_knee_kcore,delta_pre_to_knee_lambda1,delta_pre_to_knee_lap_algebraic,delta_pre_to_knee_near_bip_chord
0,2,2,,1.0,2.0,,,,,,,,,,
1,3,2,,,2.0,,,,,,,,,,
2,3,3,,,,,,,,,,,,,
3,4,2,,1.0,,,,,,,,,,,
4,4,3,,,,,,,,,,,,,
5,4,4,,,,,,,,,,,,,
6,5,2,1.0,2.0,3.0,0.0,0.0,0.0,1.0,0.0,2.0,1.0,1.0,2.0,0.0
7,5,3,3.0,4.0,5.0,0.0,0.0,1.0,1.0,-1.0,2.0,2.0,1.0,3.0,0.0


# Cell 8 — 前沿对称性统计与示例渲染（可选）

In [11]:
# ======= Cell 8 — 只用 CLI，聚焦膝点，尽量复用 =======
import os, re, glob
from pathlib import Path

# -------------- 可配置区 --------------
TARGET_NK = (4 , 4)           # 固定目标 (n,k)；如不设则自动从 front_paths 解析第一个
GEO_OPS = "rot,ref,trans"  # 几何对称集合
STATE_PERM = True          # 是否启用状态置换对称
SAMPLES = 512                # 采样条数（枚举不可行时）
ENUM_LIMIT = 1_000_000     # 精确枚举上限，超过退化采样
REUSE = False               # 若已有 symmetry_summary_n{k}_k{k}.csv 则直接复用
KNEE_ONLY = True           # 仅对膝点代表规则做统计；如果 motifs 缺失或无匹配，则回退为全前沿

# -------------- 解析 (n,k) --------------
def _parse_nk_from_filename(p: str):
    rx_stage1 = re.compile(r"stage1_pareto_n(\d+)_k(\d+)")
    rx_ga     = re.compile(r"pareto_front_(?:nk_)?n(\d+)_k(\d+)(?:_|\.csv)")
    fn = os.path.basename(p)
    m = rx_stage1.search(fn) or rx_ga.search(fn)
    if m: return int(m.group(1)), int(m.group(2))
    return None

def _pick_target_nk(front_paths, target_nk=None):
    if target_nk is not None:
        return target_nk
    # 否则挑第一个能解析的
    for p in front_paths:
        nk = _parse_nk_from_filename(p)
        if nk: return nk
    return None

# -------------- 主流程 --------------
if not front_paths:
    raise RuntimeError("[Cell 8] 未发现 front_paths；请先完成 stage1/ga 以生成前沿 CSV。")

nk = _pick_target_nk(front_paths, TARGET_NK)
if nk is None:
    print("[Cell 8] 无法从 front_paths 解析 (n,k)，已跳过对称性分析。")
else:
    n0, k0 = nk
    fps = [p for p in front_paths if (f"_n{n0}_k{k0}" in p) or p.endswith(f"n{n0}_k{k0}_canon.csv")]
    if not fps:
        print(f"[Cell 8] 未找到 n={n0},k={k0} 的前沿 CSV，已跳过。")
    else:
        # 读取 motifs index → examples.csv，用于 --knee-only
        motifs_idx = OUT_CSV / "motifs" / "motifs_index.txt"
        examples_csv = None
        if motifs_idx.exists():
            kv = dict(line.strip().split("=",1) for line in motifs_idx.read_text(encoding="utf-8").splitlines() if "=" in line)
            if "examples" in kv:
                examples_csv = kv["examples"]
        # 若没 index，就尝试默认路径
        if not examples_csv:
            default_ex = OUT_CSV / "motifs" / "motif_knee_examples.csv"
            if default_ex.exists(): examples_csv = str(default_ex)

        cmd = [
            PY, str(CLI), "symmetry",
            "--front", *fps,
            "--n", str(n0), "--k", str(k0),
            "--geo", GEO_OPS,
            "--samples", str(SAMPLES),
            "--enum-limit", str(ENUM_LIMIT),
            "--out-csv", str(OUT_SYM),
            "--out-dir", str(OUT_FIG),
            "--style", STYLE
        ]
        if STATE_PERM: cmd.append("--state-perm")
        if REUSE: cmd.append("--reuse")
        if KNEE_ONLY: cmd.append("--knee-only")
        if examples_csv: cmd += ["--motifs-examples", str(examples_csv)]

        ret, _ = safe_run(cmd, cwd=ROOT)
        if ret != 0:
            print(f"[Cell 8] symmetry CLI 失败（ret={ret}）")
        else:
            # 汇报产物
            summary_csv = OUT_SYM / f"symmetry_summary_n{n0}_k{k0}.csv"
            print("[Cell 8] symmetry summary:", summary_csv if summary_csv.exists() else "(未生成)")
            figs = sorted(glob.glob(str(OUT_FIG / f"*n{n0}_k{k0}*symmetry*.png"))) or \
                   sorted(glob.glob(str(OUT_FIG / "symmetry_*.png")))
            if figs:
                print("[Cell 8] 示例图：")
                for p in figs: print("  •", p)
            else:
                print("[Cell 8] 未发现示例图（模块命名可能不同，属正常）。")


[run] 'c:\Users\admin\anaconda3\envs\llmcompressor\python.exe' -u 'd:\师大云盘\课业\复杂系统概论\rules_diversity_final\scripts\rd_cli.py' symmetry --front 'd:\师大云盘\课业\复杂系统概论\rules_diversity_final\notebooks\results\out_csv\pareto_front_n4_k4.csv' 'd:\师大云盘\课业\复杂系统概论\rules_diversity_final\notebooks\results\out_csv\stage1_pareto_n4_k4_canon.csv' 'd:\师大云盘\课业\复杂系统概论\rules_diversity_final\notebooks\results\out_csv\stage1_pareto_n4_k4_raw.csv' --n 4 --k 4 --geo rot,ref,trans --samples 512 --enum-limit 1000000 --out-csv 'd:\师大云盘\课业\复杂系统概论\rules_diversity_final\notebooks\results\symmetry' --out-dir 'd:\师大云盘\课业\复杂系统概论\rules_diversity_final\notebooks\results\figs' --style ieee --state-perm --knee-only --motifs-examples 'd:\师大云盘\课业\复杂系统概论\rules_diversity_final\notebooks\results\out_csv\motifs\motif_knee_examples.csv'
[symmetry] --knee-only 打开，但未在 d:\师大云盘\课业\复杂系统概论\rules_diversity_final\notebooks\results\out_csv\motifs\motif_knee_examples.csv 找到 (n=4,k=4) 的膝点规则；将回退为全前沿。
[symmetry] summary: d:\师大云盘\课业\复杂系统概论\rul