In [2]:
import pandas as pd
from pathlib import Path
from charset_normalizer import from_path

IN_DIR = Path(".")
PATTERN = "*.csv"

def read_csv_any(p: Path) -> pd.DataFrame:
    enc = from_path(p).best().encoding or "utf-8"
    df = pd.read_csv(p, encoding=enc, engine="python")
    df.columns = [str(c).strip().lstrip("\ufeff") for c in df.columns]
    return df

def norm_decision(s: pd.Series) -> pd.Series:
    return s.astype(str).str.strip().str.lower()

def compute_counts(df: pd.DataFrame) -> dict:
    s = norm_decision(df["gold_decision"])
    c = (s == "correct").sum()
    ic = (s == "incorrect").sum()
    sk = (s == "skip").sum()
    labeled = c + ic
    all_rows = labeled + sk
    acc = c / labeled if labeled else 0.0
    acc_with_skip = c / all_rows if all_rows else 0.0
    return dict(correct=int(c), incorrect=int(ic), skip=int(sk),
                labeled=int(labeled), all_rows=int(all_rows),
                accuracy=acc, accuracy_including_skip=acc_with_skip)


In [4]:
files = sorted(IN_DIR.glob(PATTERN))
if not files:
    raise SystemExit("CSV file not found")

rows = []
per_source_rows = []

for f in files:
    df = read_csv_any(f)
    if "gold_decision" not in df.columns:
        print(f"[WARN] {f.name} has not gold_decision，Skipping now")
        continue
    if "source" not in df.columns:
        df["source"] = ""  # If not, leave it blank as a group

    # File-level aggregation
    total_stats = compute_counts(df)
    rows.append({"file": f.name, **total_stats})

    # Group by source column（llm / rules / llm,rules / other）
    df["source_norm"] = df["source"].astype(str).str.strip().str.lower()
    for src_val, g in df.groupby("source_norm", dropna=False):
        stats = compute_counts(g)
        per_source_rows.append({
            "file": f.name,
            "source_group": src_val or "(empty)",
            **stats
        })

# Summary Table
by_file = pd.DataFrame(rows).sort_values("file")
by_file_source = pd.DataFrame(per_source_rows).sort_values(["file","source_group"])

# Overall (all files combined)
overall = compute_counts(pd.concat([read_csv_any(f) for f in files], ignore_index=True))
overall_row = pd.DataFrame([{"file": "TOTAL", **overall}])

# Overall by source
all_df = pd.concat([read_csv_any(f) for f in files], ignore_index=True)
if "source" not in all_df.columns:
    all_df["source"] = ""
all_df["source_norm"] = all_df["source"].astype(str).str.strip().str.lower()
overall_by_source = []
for src_val, g in all_df.groupby("source_norm", dropna=False):
    overall_by_source.append({
        "source_group": src_val or "(empty)",
        **compute_counts(g)
    })
overall_by_source = pd.DataFrame(overall_by_source).sort_values("source_group")

# Output
out_dir = Path("./Report/Group_by_source")
out_dir.mkdir(parents=True, exist_ok=True)

by_file.to_csv(out_dir / "accuracy_by_file.csv", index=False, encoding="utf-8-sig")
by_file_source.to_csv(out_dir / "accuracy_by_file_source.csv", index=False, encoding="utf-8-sig")
overall_row.to_csv(out_dir / "accuracy_overall.csv", index=False, encoding="utf-8-sig")
overall_by_source.to_csv(out_dir / "accuracy_overall_by_source.csv", index=False, encoding="utf-8-sig")

print("Saved:")
print(" -", out_dir / "accuracy_by_file.csv")
print(" -", out_dir / "accuracy_by_file_source.csv")
print(" -", out_dir / "accuracy_overall.csv")
print(" -", out_dir / "accuracy_overall_by_source.csv")

Saved:
 - Report\Group_by_source\accuracy_by_file.csv
 - Report\Group_by_source\accuracy_by_file_source.csv
 - Report\Group_by_source\accuracy_overall.csv
 - Report\Group_by_source\accuracy_overall_by_source.csv
