In [79]:
# === Cell 1: 配置 ===
from pathlib import Path
import re
import numpy as np
from typing import Dict, List, Tuple, Optional

In [80]:
# 数据目录
PERF_DIR = Path("./Performances/")

# 参与统计的数据集前缀（会自动匹配形如 fr1_*.txt、fr2_*.txt、fr3_*.txt）
DATASETS = ["fr1", "fr2", "fr3"]

# 文件名中的短代号  ->  表格展示用全称
short_tags: List[str]  = ["orb", "akz", "kaz", "sft", "bsk", "spp", "corr"]
full_names: List[str]  = ["ORB", "AKAZE", "KAZE", "SIFT", "BRISK", "SuperPoint", "All-five-other"]
TAG2NAME: Dict[str,str] = dict(zip(short_tags, full_names))

# 仅以你声明过的短代号为准；若为 False，未知代号将以大写原样显示
STRICT_TAGS = False

# 小数位数（秒与毫秒可分别设定，表 A 用到 mean±std，表 B 仅数值）
PREC_S = 3      # s
PREC_MS = 3     # ms
PREC_B = 3      # 表 B 的精度

# 表 A：是否对每一列按“均值”找最小并加粗（\textbf）
BOLD_MIN_IN_TABLE_A = True

# 表 B 的列头（按 DATASETS 顺序给出人类可读标签）
DATASET_COL_LABELS = {
    "fr1": "fr1 (ms)",
    "fr2": "fr2 (ms)",
    "fr3": "fr3 (ms)",
}


In [81]:
# === Cell 2: 解析器与聚合工具 ===

def _tokenize_combo_from_filename(stem: str, dataset: str) -> List[str]:
    """
    从文件名去掉前缀 dataset_ 和后缀 _pref，剩下的部分按 '_' 拆为组合短代号列表。
    例如： fr1_kaz_spp_pref.txt -> ["kaz", "spp"]
           fr1_corr_pref.txt    -> ["corr"]
    """
    core = stem
    if core.startswith(dataset + "_"):
        core = core[len(dataset)+1:]
    if core.endswith("_pref"):
        core = core[:-5]
    tags = [t for t in core.split("_") if t]
    return tags

def _nice_label_from_tags(tags: List[str], tag2name: Dict[str,str], strict: bool) -> str:
    parts = [(tag2name[t] if (t in tag2name) else (t.upper() if not strict else None)) for t in tags]
    parts = [p for p in parts if p is not None]
    return " + ".join(parts) if parts else "UNKNOWN"

def parse_pref_runs(txt_path: Path) -> Dict[str, Dict[str, List[float]]]:
    """
    解析 *_pref.txt 的 20 段 “Performance Summary (ms)”。
    返回结构：
      {
        "<MetricName>": {
           "mean": [run1_mean, run2_mean, ...],
           "rmse": [run1_rmse, run2_rmse, ...]
        },
        ...
      }
    其中常见 MetricName 有：
      "Tracking", "Correlation", "Pipeline", "Total Init", "<Extractor> Extract" 等
    """
    metrics: Dict[str, Dict[str, List[float]]] = {}

    header_pat = re.compile(r"^\s*name\s+mean\s+median\s+rmse\s+min\s+max\s+count\s*$", re.I)
    row_pat = re.compile(
        r"^\s*(?P<name>.+?)\s+"
        r"(?P<mean>[+-]?\d+(?:\.\d+)?)(?:\s+)"
        r"(?P<median>[+-]?\d+(?:\.\d+)?)(?:\s+)"
        r"(?P<rmse>[+-]?\d+(?:\.\d+)?)(?:\s+)"
        r"(?P<min>[+-]?\d+(?:\.\d+)?)(?:\s+)"
        r"(?P<max>[+-]?\d+(?:\.\d+)?)(?:\s+)"
        r"(?P<count>\d+)\s*$"
    )

    in_table = False
    with txt_path.open("r", encoding="utf-8", errors="ignore") as f:
        for line in f:
            if not in_table:
                if header_pat.match(line):
                    in_table = True
                continue
            # in a table block
            if line.strip() == "" or line.startswith("="):
                in_table = False
                continue
            m = row_pat.match(line)
            if not m:
                continue
            name = m.group("name").strip()
            mean = float(m.group("mean"))
            rmse = float(m.group("rmse"))
            d = metrics.setdefault(name, {"mean": [], "rmse": []})
            d["mean"].append(mean)
            d["rmse"].append(rmse)

    return metrics

def agg_mean_std(vals: List[float]) -> Tuple[Optional[float], Optional[float], int]:
    """
    对若干个 run 的数值计算 mean 与 std（ddof=1），返回 (mean, std, n)。
    """
    n = len(vals)
    if n == 0:
        return (None, None, 0)
    arr = np.asarray(vals, float)
    m = float(np.mean(arr))
    s = float(np.std(arr, ddof=1)) if n > 1 else 0.0
    return (m, s, n)

def scan_performance_dir(perf_dir: Path, datasets: List[str]) -> List[dict]:
    """
    扫描 ./Performance，收集所有 {dataset}_*_pref.txt 文件。
    返回列表，每个元素：
      {
        "dataset": "fr1",
        "tags": ["kaz","spp"],
        "label": "KAZE + SuperPoint",
        "metrics": {  # parse_pref_runs 直接结果
            "Tracking":   {"mean":[...], "rmse":[...]},
            "Correlation":{"mean":[...], "rmse":[...]},
            "Pipeline":   {"mean":[...], "rmse":[...]},
            "Total Init": {"mean":[...], "rmse":[...]},
            ...
        },
        "path": Path
      }
    """
    rows = []
    for dataset in datasets:
        for p in sorted(perf_dir.glob(f"{dataset}_*_pref.txt")):
            tags = _tokenize_combo_from_filename(p.stem, dataset)
            if STRICT_TAGS and any(t not in TAG2NAME for t in tags):
                continue
            label = _nice_label_from_tags(tags, TAG2NAME, STRICT_TAGS)
            metrics = parse_pref_runs(p)
            rows.append({
                "dataset": dataset,
                "tags": tags,
                "label": label,
                "metrics": metrics,
                "path": p,
            })
    return rows


In [82]:
# === Cell 3: 表格 A（单数据集） ===

def _fmt_pm(m: Optional[float], s: Optional[float], prec: int, bold: bool=False) -> str:
    if m is None or s is None:
        return "--"
    txt = f"{m:.{prec}f} $\\pm$ {s:.{prec}f}"
    return f"\\textbf{{{txt}}}" if bold else txt

def build_table_A_for_dataset(all_rows: List[dict], dataset: str) -> str:
    """
    生成“单一数据集”的 LaTeX 表：
      列： Initialising(s) | Tracking(ms) | Correlation(ms) | Pipeline(ms)
      值： 各组合在 20 次运行上的 mean±std；Init 从 ms -> s
    额外规则（本次新增）：
      - 若组合是“单个实际特征”（仅 1 个 tag，且不是 'corr'），Correlation 列显示 '-'，
        且该行不参与 Correlation 列的最小值比较/加粗。
    """
    rows = sorted(
    (r for r in all_rows if r["dataset"] == dataset),
    key=lambda r: (len(r["tags"]), r["label"])  # 与 Pose 分析相同：先按特征数，再按名称
    )
    if not rows:
        return f"% No rows for dataset {dataset}\n"

    # 定义“单通道实际特征”的判定（排除 'corr'）
    BASE_FEATURES = {t for t in short_tags if t != "corr"}

    # 预先计算各行各列的均值（用于列内找最小）
    cols_mean = {"init": [], "trk": [], "corr": [], "pipe": []}
    single_corr_mask = {}  # 记录哪些行在 Correlation 列应当显示 '-'（不参与最小比较）
    for i, r in enumerate(rows):
        M = r["metrics"]
        tags = r.get("tags", [])
        is_single_feature = (len(tags) == 1 and tags[0] in BASE_FEATURES)
        single_corr_mask[i] = is_single_feature

        # Init: ms -> s
        m_init, s_init, _ = agg_mean_std(M.get("Total Init", {}).get("mean", []))
        m_trk,  s_trk,  _  = agg_mean_std(M.get("Tracking",   {}).get("mean", []))
        m_corr, s_corr, _  = agg_mean_std(M.get("Correlation",{}).get("mean", []))
        m_pipe, s_pipe, _  = agg_mean_std(M.get("Pipeline",   {}).get("mean", []))

        cols_mean["init"].append((i, (m_init/1000.0) if m_init is not None else None))
        cols_mean["trk"].append( (i, m_trk))
        # 关键改动：单特征时，不把 Correlation 的值用于“最小值比较”（置为 None）
        cols_mean["corr"].append((i, None if is_single_feature else m_corr))
        cols_mean["pipe"].append((i, m_pipe))

    # 找每列最小均值行（忽略 None）
    argmin = {}
    for k, arr in cols_mean.items():
        arr = [(i,v) for (i,v) in arr if v is not None]
        argmin[k] = (min(arr, key=lambda t: t[1])[0] if arr else None)

    # 表头
    header = fr"""\begin{{table}}[htbp]
\centering
\caption{{Performance on {dataset} sequence over 20 runs for all feature combinations}}
\vspace{{0.2cm}}
\label{{tab:perf_{dataset}}}
\begin{{tabular}}{{lcccc}}
\toprule
\textbf{{Feature Extractor(s)}} &
\multicolumn{{1}}{{c}}{{\textbf{{Initialising}}(s)}} &
\multicolumn{{1}}{{c}}{{\textbf{{Tracking}}(ms)}} &
\multicolumn{{1}}{{c}}{{\textbf{{Correlation}}(ms)}} &
\multicolumn{{1}}{{c}}{{\textbf{{Pipeline}}(ms)}} \\
\cmidrule(lr){{2-2}}\cmidrule(lr){{3-3}}\cmidrule(lr){{4-4}}\cmidrule(lr){{5-5}}
& Mean $\downarrow$ & Mean $\downarrow$ & Mean $\downarrow$ & Mean $\downarrow$ \\
\midrule
"""

    body = []
    for i, r in enumerate(rows):
        lab = r["label"]
        M = r["metrics"]
        tags = r.get("tags", [])
        is_single_feature = single_corr_mask[i]

        m_init, s_init, _ = agg_mean_std(M.get("Total Init", {}).get("mean", []))
        m_trk,  s_trk,  _  = agg_mean_std(M.get("Tracking",   {}).get("mean", []))
        m_corr, s_corr, _  = agg_mean_std(M.get("Correlation",{}).get("mean", []))
        m_pipe, s_pipe, _  = agg_mean_std(M.get("Pipeline",   {}).get("mean", []))

        # 单位：Init 转秒；其它是 ms
        cell_init = _fmt_pm((m_init/1000.0) if m_init is not None else None,
                            (s_init/1000.0) if s_init is not None else None,
                            PREC_S, bold=(BOLD_MIN_IN_TABLE_A and argmin["init"]==i))
        cell_trk  = _fmt_pm(m_trk,  s_trk,  PREC_MS, bold=(BOLD_MIN_IN_TABLE_A and argmin["trk"]==i))
        # 关键改动：单特征时显示 '-' 且不加粗；多特征时按常规显示并参与加粗
        if is_single_feature:
            cell_corr = "-"  # 显示为短横线
        else:
            cell_corr = _fmt_pm(m_corr, s_corr, PREC_MS, bold=(BOLD_MIN_IN_TABLE_A and argmin["corr"]==i))
        cell_pipe = _fmt_pm(m_pipe, s_pipe, PREC_MS, bold=(BOLD_MIN_IN_TABLE_A and argmin["pipe"]==i))

        body.append(f"{lab}\n& {cell_init} & {cell_trk} & {cell_corr} & {cell_pipe} \\\\")

    footer = r"""
\bottomrule
\end{tabular}
\end{table}
""".strip("\n")

    return header + "\n".join(body) + "\n" + footer



In [83]:
# === Cell 4: 表格 B（跨数据集汇总：Main Feature × Cooperation） ===
# 说明：
# - 仅统计 Tracking 指标；
# - “Standard Alone”：文件组合正好是 [f]；
# - “with XXX”：文件组合正好是 [f, other]（任意顺序），且存在数据时才生成该行；
# - “with All-five-other”：使用组合 ["corr"] 的文件（若存在），视为“与其它全部特征协同”。

def _collect_extract_values_for(dataset: str, rows: List[dict], combos: List[List[str]], feature_tag: str) -> Tuple[List[float], List[float]]:
    """
    在指定 dataset 下，聚合若干组合（combos）对应文件里“<FULL_NAME> Extract”的 per-run mean 与 rmse。
    例如：主特征= 'kaz' -> 指标名 'KAZE Extract'；主特征= 'spp' -> 指标名 'SuperPoint Extract'。
    combos 采用“集合精确匹配”：{tags} 必须等于 combos 中某个集合（如 {f} 或 {f, other} 或 {'corr'}）。
    返回：(all_run_means, all_run_rmses)
    """
    metric_name = f"{TAG2NAME.get(feature_tag, feature_tag.upper())} Extract"
    means, rmses = [], []
    targets = [set(c) for c in combos]

    for r in rows:
        if r["dataset"] != dataset:
            continue
        s = set(r["tags"])
        if any(s == t for t in targets):
            M = r["metrics"].get(metric_name, {})
            means.extend(M.get("mean", []))
            rmses.extend(M.get("rmse", []))
    return means, rmses

def _fmt_num(x: Optional[float], prec: int) -> str:
    return ("--" if x is None else f"{x:.{prec}f}")

def build_table_B_summary(all_rows: List[dict]) -> str:
    """
    构建“Main Feature × Cooperation”的总表（统计 *Extract time*：Mean & RMSE，不含 std）。
    每一行都严格统计“包含主特征 f 的组合”的 <FULL_NAME> Extract 指标：
      - Standard Alone  : {f}
      - with XXX        : {f, other}
      - with All-five...: {'corr'} 文件中该主特征的 <FULL_NAME> Extract
    """
    # 列头
    cols = "cc" * len(DATASETS)
    dataset_cols = [fr"\multicolumn{{2}}{{c}}{{\textbf{{{ds}}}}}" for ds in DATASETS]
    cmid_rules   = [fr"\cmidrule(lr){{{i}-{i+1}}}" for i in range(3, 2*len(DATASETS)+3, 2)]
    metric_cols  = [r"Mean $\downarrow$ & RMSE $\downarrow$" for _ in DATASETS]

    header = fr"""\begin{{table}}[htbp]
\centering
\caption{{Tracking performance summary across datasets (per feature \& cooperation)}}
\vspace{{0.2cm}}
\label{{tab:perf_summary}}
\begin{{tabular}}{{ll{cols}}}
\toprule
\multicolumn{{2}}{{c}}{{\textbf{{Feature Extractor(s)}}}} & {' & '.join(dataset_cols)} \\
\cmidrule(lr){{1-2}} {' '.join(cmid_rules)}
Main Feature & Cooperation & {' & '.join(metric_cols)} \\
\midrule
"""

    lines = []
    # 主特征顺序按 short_tags；排除 'corr' 和（可选）被排除的特征
    base_feats = [t for t in short_tags if t != "corr"]
    if 'EXCLUDE_FEATURES' in globals():
        base_feats = [t for t in base_feats]

    for f in base_feats:
        subrows = []

        # 1) Standard Alone: {f}
        combo_alone = [[f]]
        has_alone = any((set(r["tags"]) == set(combo_alone[0]) and r["dataset"] in DATASETS) for r in all_rows)
        if has_alone:
            subrows.append(("Standard Alone", ("exact", combo_alone)))

        # 2) with other: {f, other}
        for other in base_feats:
            if other == f:
                continue
            combo_pair = [[f, other]]
            has_pair = any((set(r["tags"]) == set(combo_pair[0]) and r["dataset"] in DATASETS) for r in all_rows)
            if has_pair:
                subrows.append((f"with {TAG2NAME.get(other, other.upper())}", ("exact", combo_pair)))

        # 3) with All-five-other（= corr 文件）：从 {'corr'} 文件里取该主特征的 <FULL_NAME> Extract
        has_corr = any((set(r["tags"]) == {"corr"} and r["dataset"] in DATASETS) for r in all_rows)
        if has_corr:
            subrows.append(("with All-five-other", ("exact", [["corr"]])))

        if not subrows:
            continue

        # 输出
        for j, (coop, how) in enumerate(subrows):
            cells = []
            for ds in DATASETS:
                means, rmses = _collect_extract_values_for(
                    ds, all_rows,
                    combos=how[1],
                    feature_tag=f  # 关键：用主特征 f 的 "<FULL_NAME> Extract"
                )
                m = float(np.mean(means)) if len(means) else None
                r = float(np.mean(rmses)) if len(rmses) else None
                cells.append(_fmt_num(m, PREC_B))
                cells.append(_fmt_num(r, PREC_B))

            if j == 0:
                main = TAG2NAME.get(f, f.upper())
                lines.append(fr"\multirow{{{len(subrows)}}}{{*}}{{{main}}} & {coop} & " + " & ".join(cells) + r" \\")
            else:
                lines.append(f"& {coop} & " + " & ".join(cells) + r" \\")

        lines.append(r"\midrule")

    if lines and lines[-1] == r"\midrule":
        lines.pop()

    footer = r"""
\bottomrule
\end{tabular}
\end{table}
""".strip("\n")

    return header + "\n".join(lines) + "\n" + footer


In [84]:
# === Cell 5: 执行 ===
all_rows = scan_performance_dir(PERF_DIR, DATASETS)

# 表格 A：分别为每个数据集生成一张（你也可以只挑某个）
for ds in DATASETS:
    latex_A = build_table_A_for_dataset(all_rows, ds)
    print(latex_A)
    print("\n% ------------------------------\n")
    # 如需保存：
    # Path(f"tableA_{ds}.tex").write_text(latex_A, encoding="utf-8")

# 表格 B：跨数据集总表
latex_B = build_table_B_summary(all_rows)
print(latex_B)
# 如需保存：
# Path("tableB_summary.tex").write_text(latex_B, encoding="utf-8")


\begin{table}[htbp]
\centering
\caption{Performance on fr1 sequence over 20 runs for all feature combinations}
\vspace{0.2cm}
\label{tab:perf_fr1}
\begin{tabular}{lcccc}
\toprule
\textbf{Feature Extractor(s)} &
\multicolumn{1}{c}{\textbf{Initialising}(s)} &
\multicolumn{1}{c}{\textbf{Tracking}(ms)} &
\multicolumn{1}{c}{\textbf{Correlation}(ms)} &
\multicolumn{1}{c}{\textbf{Pipeline}(ms)} \\
\cmidrule(lr){2-2}\cmidrule(lr){3-3}\cmidrule(lr){4-4}\cmidrule(lr){5-5}
& Mean $\downarrow$ & Mean $\downarrow$ & Mean $\downarrow$ & Mean $\downarrow$ \\
\midrule
AKAZE
& 1.154 $\pm$ 0.015 & 3.834 $\pm$ 0.072 & - & 22.178 $\pm$ 0.214 \\
All-five-other
& 4.818 $\pm$ 0.290 & 43.858 $\pm$ 0.714 & 5.614 $\pm$ 0.089 & 187.450 $\pm$ 1.570 \\
BRISK
& 1.655 $\pm$ 0.033 & 3.879 $\pm$ 0.087 & - & 37.833 $\pm$ 0.256 \\
KAZE
& 2.476 $\pm$ 0.051 & 4.314 $\pm$ 0.046 & - & 78.608 $\pm$ 0.555 \\
ORB
& 7.601 $\pm$ 1.476 & \textbf{2.974 $\pm$ 0.077} & - & \textbf{9.648 $\pm$ 0.199} \\
SIFT
& 1.652 $\pm$ 0.034 & 3.7