### Per-image 评估导出

In [91]:
# %%
from __future__ import annotations

from pathlib import Path
from collections import Counter
from typing import Dict, Any, List, Optional, Tuple

import pandas as pd
import fiftyone as fo
from fiftyone import ViewField as F


### User Config (reuse yours)


In [92]:
# =========================
# User Config (reuse yours)
# =========================
version = "sahi_null_v2"  # 你已有
confidence_thresholds = [0.5, 0.6, 0.7, 0.8, 0.85, 0.9, 0.91, 0.92, 0.93]  # 你已有
# ckpt_paths = ckpt_paths  # 直接复用你前面定义好的
# 多模型：直接在这里放 .pt
model_root = Path("/home/tianqi/D/01_Projects/01_swd/02_code/pipeline/ultralytics_ty/_ty/02_models/best_models/04_swd_hbb") 
model_names = [
    # "a02_yolo11s_custom7_v2-13_7_34_36-40_10-11_8.pt",
    # "a02_yolo11s_custom7_v2-13_7_34_36-40_10-11_4.pt",
    # "a02_yolo11s_custom7_v1-34_36_40_11_-13-10_16.pt",
    # "a02_yolo11s_custom7_v4-36_40_10_11-7_34-13_16.pt",
    # "a02_yolo11s_custom7_v4-36_40_10_11-7_34-13_8.pt",
    "a03_yolo11s_custom7null_cv1_ms2_0809-0823_10_ok_16.pt",
    "a03_yolo11n_custom7null_cv1_ms2_0809-0823_10_ok_8.pt",
    "a03_yolo11s_custom7null_cv1_ms2_0809-0823_10_ok_8.pt",
    "a03_yolo11s_custom7null_cv1_ms2_0809-0823_10_ok_4.pt",
    "a04_yolo11s_custom7null_cv1_ms2_0809-0823_10_ok_16.pt",
    "a04_yolo11n_custom7null_cv1_ms2_0809-0823_10_ok_4.pt",
    "a04_yolo11n_custom7null_cv1_ms2_0809-0823_10_ok_8.pt",
    "a04_yolo11s_custom7null_cv1_ms2_0809-0823_10_ok_8.pt",
]

ckpt_paths = [str(model_root / name) for name in model_names]
out_dir = Path("./_eval_exports_per_images") / version
out_dir.mkdir(parents=True, exist_ok=True)

GT_FIELD = "ground_truth"

# 你想更“hit-based”的话，把 IoU 放低一些
IOU_THR = 0.50  # 可改 0.05 / 0.01

### Helpers

In [93]:
# =========================
# Helpers
# =========================
import re
import pandas as pd
from pathlib import Path
import numpy as np
from pydash import min_

def parse_dt_focus(fp: str, year=2024):
    m = re.search(r"(\d{4})_(\d{4})_(\d+)\.(jpg|png)$", Path(fp).name)
    if not m:
        return pd.NaT, None

    mmdd, hhmm, focus = m.group(1), m.group(2), int(m.group(3))
    return (
        pd.Timestamp(
            year,
            int(mmdd[:2]),
            int(mmdd[2:]),
            int(hhmm[:2]),
            int(hhmm[2:]),
        ),
        focus,
    )

def model_tag_from_path(p: str) -> str:
    return Path(p).stem

def safe_slug(s: str) -> str:
    # 文件名安全：避免奇怪字符
    return s.replace("/", "_").replace(" ", "_").replace("-", "__")

def _safe_dets(sample: fo.Sample, field: str):
    obj = sample.get_field(field)
    if obj is None:
        return []
    dets = getattr(obj, "detections", None)
    return dets if dets else []

def _get_eval_type(det: fo.Detection, eval_key: str) -> Optional[str]:
    """
    读取 detection 上由 evaluate_detections 写入的 tp/fp/fn 类型。
    不同 FO 版本可能写入结构略不同，这里做兜底。
    """
    try:
        v = det.get(eval_key, None)
    except Exception:
        v = None

    if isinstance(v, dict) and "type" in v:
        return v["type"]
    if isinstance(v, str):
        return v

    try:
        ev = det.get("eval", None)
    except Exception:
        ev = None
    if isinstance(ev, dict):
        vv = ev.get(eval_key, None)
        if isinstance(vv, dict) and "type" in vv:
            return vv["type"]

    return None



def export_image_level_rows(
    view: fo.DatasetView,
    dataset_name: str,
    subdir_name: str,
    subdir_path: str,
    model_tag: str,
    ckpt_path: str,
    pred_field: str,
    conf_thr: float,
    eval_key: str,
) -> pd.DataFrame:
    rows: List[Dict[str, Any]] = []

    for s in view.iter_samples(progress=True):
        gt_dets = _safe_dets(s, GT_FIELD)
        

        # 获取每个检测的概率信息  
        pred_dets = _safe_dets(s, pred_field)  
        confidences = [d.confidence for d in pred_dets if d.confidence is not None]  
        ious = [getattr(d, f"{eval_key}_iou") for d in pred_dets   
                if hasattr(d, f"{eval_key}_iou") and getattr(d, f"{eval_key}_iou") is not None]

        tp_img = getattr(s, f"{eval_key}_tp", 0)  
        fp_img = getattr(s, f"{eval_key}_fp", 0)   
        fn_img = getattr(s, f"{eval_key}_fn", 0)
        gt_count_img = len(gt_dets)
        pred_count_img = len(pred_dets)

        # 计算统计值
        q1_confidence = np.percentile(confidences, 25) if confidences else 0.0
        q3_confidence = np.percentile(confidences, 75) if confidences else 0.0
        iqr_confidence = q3_confidence - q1_confidence
        std_confidence = np.std(confidences) if confidences else 0.0
        median_confidence = np.median(confidences) if confidences else 0.0
        avg_confidence = np.mean(confidences) if confidences else 0.0  
        max_confidence = np.max(confidences) if confidences else 0.0
        min_confidence = np.min(confidences) if confidences else 0.0

        q1_iou = np.percentile(ious, 25) if ious else 0.0
        q3_iou = np.percentile(ious, 75) if ious else 0.0
        iqr_iou = q3_iou - q1_iou
        std_iou = np.std(ious) if ious else 0.0
        median_iou = np.median(ious) if ious else 0.0
        avg_iou = np.mean(ious) if ious else 0.0  
        max_iou = np.max(ious) if ious else 0.0
        min_iou = np.min(ious) if ious else 0.0

        pred_gt_ratio = (
            pred_count_img / gt_count_img
            if gt_count_img > 0 else np.nan
        )
        tp_ratio = (
            tp_img / gt_count_img
            if gt_count_img > 0 else np.nan
        )
        fp_ratio = (
            fp_img / pred_count_img
            if pred_count_img > 0 else np.nan
        )
        fn_ratio = (
            fn_img / gt_count_img
            if gt_count_img > 0 else np.nan
        )


        # 基于 IoU 匹配的指标  
        gt_present = gt_count_img > 0
        pred_present = pred_count_img > 0

        hit_img = int(gt_present and pred_present)
        miss_img = int(gt_present and not pred_present)
        false_alarm_img = int(not gt_present and pred_present)
        correct_reject_img = int(not gt_present and not pred_present)

        # hit_img = int(tp_img > 0)  # 至少有一个真正的 IoU 匹配，boolean 转 int
        # miss_img = int(fn_img > 0)  # 有 GT 但没有匹配的预测, boolean 转 int
        # false_alarm_img = int(fp_img > 0)  # 有预测但没有匹配的 GT , boolean 转 int
        # correct_reject_img = int(gt_count_img == 0 and pred_count_img == 0)  # 没有 GT 也没有预测 , boolean 转 int

        # 解析时间和焦点（如果需要）
        capture_dt, focus = parse_dt_focus(s.filepath)
        if pd.isna(capture_dt):
            capture_date = None
            capture_time = None
        else:
            capture_date = capture_dt.date()     # datetime.date
            capture_time = capture_dt.time()     # datetime.time


        rows.append({
            # --- identity ---
            "dataset_name": dataset_name,
            "subdir_name": subdir_name,
            "subdir_path": subdir_path,
            "version": version,
            "model_tag": model_tag,
            "ckpt_path": ckpt_path,
            "pred_field": pred_field,
            "confidence_threshold": conf_thr,
            "iou_threshold": IOU_THR,

            # --- sample ---
            "sample_id": str(s.id),
            "filepath": s.filepath,
            "capture_datetime": capture_dt,
            "capture_date": capture_date,
            "capture_time": capture_time,
            "focus": focus,

            # --- counts ---
            "gt_count_img": gt_count_img,
            "pred_count_img": pred_count_img,
            "tp_img": tp_img,
            "fp_img": fp_img,
            "fn_img": fn_img,
            "tp_ratio": tp_ratio,
            "fp_ratio": fp_ratio,
            "fn_ratio": fn_ratio,
            "pred_gt_ratio": pred_gt_ratio,
            "hit_img": hit_img,  
            "miss_img": miss_img,  
            "false_alarm_img": false_alarm_img,  
            "correct_reject_img": correct_reject_img, 
            
            # --- stats ---
            "avg_confidence": avg_confidence,  
            "max_confidence": max_confidence,  
            "min_confidence": min_confidence,
            "std_confidence": std_confidence,
            "median_confidence": median_confidence,
            "q1_confidence": q1_confidence,
            "q3_confidence": q3_confidence,
            "iqr_confidence": iqr_confidence,
            "avg_iou": avg_iou,  
            "max_iou": max_iou,  
            "min_iou": min_iou,
            "std_iou": std_iou,
            "median_iou": median_iou,
            "q1_iou": q1_iou,
            "q3_iou": q3_iou,
            "iqr_iou": iqr_iou,

            # full data
            "confidences": confidences,
            "ious": ious,
        })

    return pd.DataFrame(rows)

def summarize_from_image_df(img_df: pd.DataFrame) -> Dict[str, Any]:  
    # 全局汇总（IoU-based）  
    tp = int(img_df["tp_img"].sum())  
    fp = int(img_df["fp_img"].sum())  
    fn = int(img_df["fn_img"].sum())  

    gt_total = int(img_df["gt_count_img"].sum())  
    pred_total = int(img_df["pred_count_img"].sum())  

    # IoU-match based precision/recall  
    precision = tp / (tp + fp) if (tp + fp) > 0 else float("nan")  
    recall = tp / (tp + fn) if (tp + fn) > 0 else float("nan")  
    f1 = (2 * precision * recall / (precision + recall)) if (precision + recall) > 0 else float("nan")  

    # 使用 IoU-based 图像级别指标  
    hit_img = int(img_df["hit_img"].sum())  
    miss_img = int(img_df["miss_img"].sum())  
    false_alarm_img = int(img_df["false_alarm_img"].sum())  
    correct_reject_img = int(img_df["correct_reject_img"].sum())  

    # 图像级别的 recall/precision  
    denom_pos = hit_img + miss_img  
    img_recall = hit_img / denom_pos if denom_pos > 0 else float("nan")  

    denom_predpos = hit_img + false_alarm_img  
    img_precision = hit_img / denom_predpos if denom_predpos > 0 else float("nan")  

    img_f1 = (2 * img_precision * img_recall / (img_precision + img_recall)) if (img_precision + img_recall) > 0 else float("nan")  

    return {  
        "gt_total": gt_total,  
        "pred_total": pred_total,  
        "tp_total": tp,  
        "fp_total": fp,  
        "fn_total": fn,  
        "precision_iou": precision,  
        "recall_iou": recall,  
        "f1_iou": f1,  

        # 更新为 IoU-based 图像级别指标  
        "hit_images": hit_img,  
        "miss_images": miss_img,  
        "false_alarm_images": false_alarm_img,  
        "correct_reject_images": correct_reject_img,  
        "img_precision": img_precision,  
        "img_recall": img_recall,  
        "img_f1": img_f1,  
        "hit_rate_img": float(img_df["hit_img"].mean()) if len(img_df) else float("nan"),  
    }

def fetch_subsequent_dir(data_root: Path, target_subdir_name: Path) -> Tuple[List[Path], List[str]]:
    data_paths = list(data_root.glob(f"*/{target_subdir_name}"))
    subdir_path_list = [data_path.parent for data_path in data_paths]
    subdir_name_list = [subdir.name for subdir in subdir_path_list]
    return subdir_path_list, subdir_name_list



In [94]:
# %% =========================
# 3) Discover subdirs
# =========================
data_root = Path("/home/tianqi/D/01_Projects/01_swd/02_code/pipeline/ultralytics_ty/_ty/01_data/a02_16mp_2024_datasets_fiftyone")
target_subdir_name = Path("data")

subdir_path_list, subdir_name_list = fetch_subsequent_dir(data_root, target_subdir_name)
print("Found subdirs:", len(subdir_name_list))
for n, p in zip(subdir_name_list, subdir_path_list):
    print(n, p)


Found subdirs: 7
ms2_0726-0809_13_ok /home/tianqi/D/01_Projects/01_swd/02_code/pipeline/ultralytics_ty/_ty/01_data/a02_16mp_2024_datasets_fiftyone/ms2_0726-0809_13_ok
sw1_0605-0613_07_ok /home/tianqi/D/01_Projects/01_swd/02_code/pipeline/ultralytics_ty/_ty/01_data/a02_16mp_2024_datasets_fiftyone/sw1_0605-0613_07_ok
ms1_0809-0823_34_ok /home/tianqi/D/01_Projects/01_swd/02_code/pipeline/ultralytics_ty/_ty/01_data/a02_16mp_2024_datasets_fiftyone/ms1_0809-0823_34_ok
ms1_0710-0726_36_ok /home/tianqi/D/01_Projects/01_swd/02_code/pipeline/ultralytics_ty/_ty/01_data/a02_16mp_2024_datasets_fiftyone/ms1_0710-0726_36_ok
ms1_0605-0621_40_ok /home/tianqi/D/01_Projects/01_swd/02_code/pipeline/ultralytics_ty/_ty/01_data/a02_16mp_2024_datasets_fiftyone/ms1_0605-0621_40_ok
ms2_0809-0823_10_ok /home/tianqi/D/01_Projects/01_swd/02_code/pipeline/ultralytics_ty/_ty/01_data/a02_16mp_2024_datasets_fiftyone/ms2_0809-0823_10_ok
ms1_0726-0809_11_ok /home/tianqi/D/01_Projects/01_swd/02_code/pipeline/ultralytics_

### Batch Run


In [95]:
# =========================
# Batch Run
# =========================
summary_rows: List[Dict[str, Any]] = []

# 复用你前面 fetch_subsequent_dir 得到的 subdir_name_list / subdir_path_list
for subdir_path, subdir_name in zip(subdir_path_list, subdir_name_list):
    dataset_name = f"{version}_{subdir_name}"
    ds = fo.load_dataset(dataset_name)
    print(f"processing: {dataset_name}")
    ds.delete_evaluations()  


    for ckpt_path in ckpt_paths:
        model_tag = model_tag_from_path(ckpt_path)
        pred_field = f"small_slices_{model_tag}"

        if pred_field not in ds.get_field_schema():
            print(f"[WARN] Missing pred_field={pred_field} in dataset={dataset_name}, skip.")
            continue

        for thr in confidence_thresholds:
            # --- make view (thresholded) ---
            view = ds.filter_labels(pred_field, F("confidence") > thr, only_matches=False)

            # --- run eval (writes tp/fp/fn info onto detections) ---
            eval_key = f"eval_img__{safe_slug(model_tag)}__c{int(thr*100)}__iou{int(IOU_THR*100)}"
            view.evaluate_detections(
                pred_field,
                gt_field=GT_FIELD,
                eval_key=eval_key,
                iou=IOU_THR,
                compute_mAP=False,
            )

            # --- export per-image table ---
            img_df = export_image_level_rows(
                view=view,
                dataset_name=dataset_name,
                subdir_name=subdir_name,
                subdir_path=str(subdir_path),
                model_tag=model_tag,
                ckpt_path=ckpt_path,
                pred_field=pred_field,
                conf_thr=thr,
                eval_key=eval_key,
            )

            out_img = out_dir / safe_slug(dataset_name) / f"image_level_{safe_slug(model_tag)}__c{int(thr*100)}__iou{int(IOU_THR*100)}.csv"
            out_img.parent.mkdir(parents=True, exist_ok=True)
            img_df.to_csv(out_img, index=False)
            print("[SAVE]", out_img)

            # --- append summary row from per-image ---
            srow = {
                "dataset_name": dataset_name,
                "subdir_name": subdir_name,
                "subdir_path": str(subdir_path),
                "version": version,
                "model_tag": model_tag,
                "ckpt_path": ckpt_path,
                "pred_field": pred_field,
                "confidence_threshold": thr,
                "iou_threshold": IOU_THR,
            }
            srow.update(summarize_from_image_df(img_df))
            summary_rows.append(srow)
    #         break
    #     break
    # break

# export global summary
summary_df = pd.DataFrame(summary_rows)
out_sum = out_dir / f"per_image_summary__{version}__iou{int(IOU_THR*100)}.csv"
summary_df.to_csv(out_sum, index=False)
print("Saved summary:", out_sum)
print("Summary shape:", summary_df.shape)


processing: sahi_null_v2_ms2_0726-0809_13_ok
Evaluating detections...
 100% |█████████████████| 279/279 [894.9ms elapsed, 0s remaining, 311.8 samples/s]      
 100% |█████████████████| 279/279 [1.6s elapsed, 0s remaining, 235.9 samples/s]      
[SAVE] _eval_exports_per_images/sahi_null_v2/sahi_null_v2_ms2_0726__0809_13_ok/image_level_a03_yolo11s_custom7null_cv1_ms2_0809__0823_10_ok_16__c50__iou50.csv
Evaluating detections...
 100% |█████████████████| 279/279 [928.9ms elapsed, 0s remaining, 300.4 samples/s]      
 100% |█████████████████| 279/279 [1.7s elapsed, 0s remaining, 231.5 samples/s]      
[SAVE] _eval_exports_per_images/sahi_null_v2/sahi_null_v2_ms2_0726__0809_13_ok/image_level_a03_yolo11s_custom7null_cv1_ms2_0809__0823_10_ok_16__c60__iou50.csv
Evaluating detections...
 100% |█████████████████| 279/279 [930.4ms elapsed, 0s remaining, 299.9 samples/s]      
 100% |█████████████████| 279/279 [1.7s elapsed, 0s remaining, 192.5 samples/s]      
[SAVE] _eval_exports_per_images/sahi_