In [4]:
# report_generator.py
from __future__ import annotations

from pathlib import Path
from typing import Dict, List, Optional

import pandas as pd
import plotly.graph_objects as go

from ty_fo_tools import (
    add_fo_url,
    ensure_event_cols,
    ensure_time_cols,
    find_per_image_csvs,
    infer_event_label,
    safe_name,
)
from ty_fo_tools import make_day_summary_html, make_top_table_html
from ty_fo_tools import PlottingStrategy, SchemeAGtTpStrategy

# Config
PER_IMAGE_DIR = Path("./_eval_exports_per_images") / "sahi_null_v2_test"
OUT_ROOT = PER_IMAGE_DIR / "_prod_kiss_reports"
LIMIT: Optional[int] = None  # set None to run all
TOP_N = 20


def _x_for_day(df_day: pd.DataFrame) -> tuple[pd.Series, str, pd.DataFrame]:
    """Return (x_series, x_title, df_day_with_index_if_needed)."""
    df_day = df_day.copy()
    if "capture_datetime" in df_day.columns:
        df_day["capture_datetime"] = pd.to_datetime(df_day["capture_datetime"], errors="coerce")
        if df_day["capture_datetime"].notna().any():
            return df_day["capture_datetime"], "capture_datetime", df_day
    df_day["_index"] = range(len(df_day))
    return df_day["_index"], "index", df_day


def write_image_level_day_html(
    df_day: pd.DataFrame,
    out_html: Path,
    title: str,
    strategy: PlottingStrategy,
    top_n: int = TOP_N,
) -> None:
    df_day = df_day.copy()
    df_day = add_fo_url(df_day)

    x, x_title, df_day = _x_for_day(df_day)

    # Ensure numeric
    for c in ["gt_count_img", "pred_count_img", "tp_img", "fp_img", "fn_img", "avg_confidence"]:
        if c in df_day.columns:
            df_day[c] = pd.to_numeric(df_day[c], errors="coerce").fillna(0)

    # event + error
    df_day["event"] = infer_event_label(df_day)
    df_day["err_obj"] = df_day.get("fp_img", 0).fillna(0) + df_day.get("fn_img", 0).fillna(0)

    # tp_ratio (only for gt>0; else NaN)
    if "gt_count_img" in df_day.columns and "tp_img" in df_day.columns:
        gt = pd.to_numeric(df_day["gt_count_img"], errors="coerce").fillna(0)
        tp = pd.to_numeric(df_day["tp_img"], errors="coerce").fillna(0)
        df_day["tp_ratio"] = (tp / gt.replace(0, pd.NA)).astype("float")  # NaN where gt==0
    else:
        df_day["tp_ratio"] = pd.NA

    # corr(GT, TP) across images
    corr_gt_tp: Optional[float] = None
    if "gt_count_img" in df_day.columns and "tp_img" in df_day.columns:
        gts = pd.to_numeric(df_day["gt_count_img"], errors="coerce")
        tps = pd.to_numeric(df_day["tp_img"], errors="coerce")
        tmp = pd.DataFrame({"gt": gts, "tp": tps}).dropna()
        if len(tmp) >= 2:
            corr_gt_tp = float(tmp["gt"].corr(tmp["tp"]))

    # Hover cols (shared)
    hover_cols = _make_hover_cols(df_day)

    # Generate plots using strategy
    html1, html2 = strategy.generate_plots(df_day, x, x_title, hover_cols, title)

    # Click to open FiftyOne for BOTH figures in the same HTML doc
    post_script = """
    document.addEventListener("DOMContentLoaded", function() {
        var plots = document.getElementsByClassName('js-plotly-plot');
        if (!plots || !plots.length) return;
        for (var i=0; i<plots.length; i++) {
            (function(plot){
                plot.on('plotly_click', function(e) {
                    var url = e?.points?.[0]?.customdata?.[0];
                    if (url) window.open(url, '_blank');
                });
            })(plots[i]);
        }
    });
    """

    out_html.parent.mkdir(parents=True, exist_ok=True)

    summary_html = make_day_summary_html(df_day, corr_gt_tp)

    table_html = make_top_table_html(df_day, top_n=top_n)

    full = []
    full.append("<html><head><meta charset='utf-8'><title>Image-level</title></head><body>")
    full.append(summary_html)
    full.append(html1)
    full.append(html2)
    full.append(table_html)
    full.append(f"<script>{post_script}</script>")
    full.append("</body></html>")

    out_html.write_text("\n".join(full), encoding="utf-8")
    print(f"[SAVE][image-day] {out_html}")


def _make_hover_cols(df: pd.DataFrame) -> List[str]:
    cols = []
    for c in [
        "filepath",
        "capture_datetime",
        "gt_count_img",
        "pred_count_img",
        "tp_img",
        "fp_img",
        "fn_img",
        "avg_confidence",
        "median_confidence",
        "confidence_threshold",
        "iou_threshold",
        "model_tag",
        "event",
        "err_obj",
        "tp_ratio",  # added
    ]:
        if c in df.columns:
            cols.append(c)
    return cols


def write_daily_overview_html(
    df: pd.DataFrame,
    out_html: Path,
    day_to_file: Dict[str, str],
    title: str,
) -> None:
    daily = (
        df.groupby("capture_date", dropna=False)
        .agg(
            images=("sample_id", "count") if "sample_id" in df.columns else ("filepath", "count"),
            hit=("hit_img", "sum"),
            miss=("miss_img", "sum"),
            false_alarm=("false_alarm_img", "sum"),
            correct_reject=("correct_reject_img", "sum"),
            gt_total=("gt_count_img", "sum") if "gt_count_img" in df.columns else ("hit_img", "sum"),
            pred_total=("pred_count_img", "sum") if "pred_count_img" in df.columns else ("hit_img", "sum"),
            tp_total=("tp_img", "sum") if "tp_img" in df.columns else ("hit_img", "sum"),
            fp_total=("fp_img", "sum") if "fp_img" in df.columns else ("hit_img", "sum"),
            fn_total=("fn_img", "sum") if "fn_img" in df.columns else ("hit_img", "sum"),
        )
        .reset_index()
    ).sort_values("capture_date")

    daily["hit_rate"] = daily["hit"] / daily["images"].clip(lower=1)
    daily["error_rate"] = (daily["miss"] + daily["false_alarm"]) / daily["images"].clip(lower=1)

    denom = (daily["tp_total"] + daily["fp_total"] + daily["fn_total"]).replace(0, pd.NA)
    daily["obj_error_rate"] = ((daily["fp_total"] + daily["fn_total"]) / denom).astype("float")

    daily["avg_gt_per_img"] = (daily["gt_total"] / daily["images"].clip(lower=1)).astype("float")
    daily["avg_pred_per_img"] = (daily["pred_total"] / daily["images"].clip(lower=1)).astype("float")

    daily["drill_html"] = daily["capture_date"].map(day_to_file).fillna("")
    x = daily["capture_date"]

    fig = go.Figure()

    hover_pack = daily[[
        "drill_html",
        "images",
        "hit", "miss", "false_alarm", "correct_reject",
        "hit_rate", "error_rate",
        "gt_total", "pred_total", "tp_total", "fp_total", "fn_total",
        "avg_gt_per_img", "avg_pred_per_img",
        "obj_error_rate",
    ]].values

    def daily_hovertemplate(series_name: str) -> str:
        return (
            "Date: %{x}<br>"
            "<b>" + series_name + "</b>: %{y}<br>"
            "Images: %{customdata[1]}<br>"
            "Hit/Miss/FA/CR: %{customdata[2]}/%{customdata[3]}/%{customdata[4]}/%{customdata[5]}<br>"
            "Hit rate (img): %{customdata[6]:.3f}<br>"
            "Error rate (img): %{customdata[7]:.3f}<br>"
            "GT total: %{customdata[8]} | Pred total: %{customdata[9]}<br>"
            "TP/FP/FN: %{customdata[10]}/%{customdata[11]}/%{customdata[12]}<br>"
            "avg GT/img: %{customdata[13]:.3f} | avg Pred/img: %{customdata[14]:.3f}<br>"
            "Obj error rate (FP+FN)/(TP+FP+FN): %{customdata[15]:.3f}<br>"
            "<b>Click</b> to drill down<br>"
            "<extra></extra>"
        )

    for col, name in [
        ("hit", "Hit (GT=1 & Pred=1)"),
        ("miss", "Miss (GT=1 & Pred=0)"),
        ("false_alarm", "False Alarm (GT=0 & Pred=1)"),
        ("correct_reject", "Correct Reject (GT=0 & Pred=0)"),
    ]:
        fig.add_trace(
            go.Bar(
                x=x,
                y=daily[col],
                name=name,
                customdata=hover_pack,
                hovertemplate=daily_hovertemplate(name),
            )
        )

    fig.add_trace(
        go.Scatter(
            x=x,
            y=daily["hit_rate"],
            name="Hit rate (image)",
            mode="lines+markers",
            yaxis="y2",
            customdata=hover_pack,
            hovertemplate=daily_hovertemplate("Hit rate (image)"),
        )
    )
    fig.add_trace(
        go.Scatter(
            x=x,
            y=daily["error_rate"],
            name="Error rate (image) = (miss+FA)/images",
            mode="lines+markers",
            yaxis="y2",
            customdata=hover_pack,
            hovertemplate=daily_hovertemplate("Error rate (image)"),
        )
    )

    # top labels: show images count at top of stacked bar
    fig.add_trace(
        go.Scatter(
            x=x,
            y=daily["images"],
            mode="text",
            text=[f"{int(v)}" for v in daily["images"].fillna(0)],
            textposition="top center",
            showlegend=False,
            hoverinfo="skip",
        )
    )

    fig.update_layout(
        title=title,
        barmode="stack",
        xaxis=dict(title="capture_date", rangeslider=dict(visible=True)),
        yaxis=dict(title="Images (stacked by outcome)"),
        yaxis2=dict(title="Rate", overlaying="y", side="right", range=[0, 1]),
        template="plotly_white",
        margin=dict(l=70, r=70, t=60, b=40),
        legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
    )

    post_script = """
    document.addEventListener("DOMContentLoaded", function() {
        var plot = document.getElementsByClassName('js-plotly-plot')[0];
        if (!plot) return;
        plot.on('plotly_click', function(e) {
            var rel = e?.points?.[0]?.customdata?.[0];
            if (rel) window.open(rel, '_blank');
        });
    });
    """

    out_html.parent.mkdir(parents=True, exist_ok=True)
    fig.write_html(out_html, include_plotlyjs="cdn", full_html=True, post_script=post_script)
    print(f"[SAVE][daily] {out_html}")


def process_one_csv(csv_path: Path, strategy: PlottingStrategy = SchemeAGtTpStrategy()) -> None:
    df = pd.read_csv(csv_path)

    if "filepath" not in df.columns:
        print(f"[SKIP] missing filepath: {csv_path}")
        return

    df = ensure_time_cols(df)
    df = ensure_event_cols(df)

    report_dir = OUT_ROOT / csv_path.parent.name / safe_name(csv_path.stem)
    report_dir.mkdir(parents=True, exist_ok=True)

    day_to_file: Dict[str, str] = {}

    for day, g in df.groupby("capture_date"):
        day = str(day)
        day_file = f"image_level_{day}.html"
        out_day_html = report_dir / day_file

        write_image_level_day_html(
            df_day=g,
            out_html=out_day_html,
            title=f"{csv_path.stem} | {day}",
            strategy=strategy,
            top_n=TOP_N,
        )
        day_to_file[day] = day_file

    out_daily_html = report_dir / "daily_overview.html"
    write_daily_overview_html(
        df=df,
        out_html=out_daily_html,
        day_to_file=day_to_file,
        title=f"{csv_path.stem} | Daily overview (click bars/lines to drill down)",
    )

    (report_dir / "INDEX.txt").write_text(
        "\n".join([
            f"CSV: {csv_path}",
            f"Daily: {out_daily_html.name}",
            f"Days: {len(day_to_file)}",
            "",
            "Daily page:",
            "- Stacked bars (Hit/Miss/FA/CR) + lines (Hit rate / Error rate)",
            "- Hover shows image-level + object-level summary",
            "- Labels show images count at the top of each bar",
            "- Click stacked bars OR lines -> open image_level_YYYY-MM-DD.html",
            "",
            "Image-level day page (Scheme A):",
            "- Summary block at the top includes corr(GT,TP) + TP/GT mean/median",
            "- A1) GT vs TP (lines+markers) + TP/GT on right axis",
            "- A2) FP/FN/(FP+FN) (lines+markers)",
            "- Range slider in both charts",
            "- Click any point -> FiftyOne",
            "- Top table sorted by fp+fn (desc), clickable Open links",
        ]),
        encoding="utf-8"
    )

    print(f"[DONE] report_dir = {report_dir}")


def main():
    csvs = find_per_image_csvs(PER_IMAGE_DIR)
    print("[INFO] Found CSVs:", len(csvs))
    if LIMIT is not None:
        csvs = csvs[:LIMIT]
        print("[INFO] Using first:", len(csvs))

    for p in csvs:
        print("\n" + "=" * 90)
        print("[CSV]", p)
        process_one_csv(p)

    print("\nAll done. Open the generated daily_overview.html files under:")
    print(OUT_ROOT)


if __name__ == "__main__":
    main()

  from .autonotebook import tqdm as notebook_tqdm


[INFO] Found CSVs: 0

All done. Open the generated daily_overview.html files under:
_eval_exports_per_images/sahi_null_v2_test/_prod_kiss_reports
