In [5]:
import pandas as pd
from pathlib import Path
from charset_normalizer import from_path

IN_DIR = Path(".")   # Directory path
PATTERN = "*.csv"    # File pattern

def read_csv_any(p: Path) -> pd.DataFrame:
    enc = from_path(p).best().encoding or "utf-8"
    df = pd.read_csv(p, encoding=enc, engine="python")
    df.columns = [str(c).strip().lstrip("\ufeff") for c in df.columns]
    return df

files = sorted(IN_DIR.glob(PATTERN))
if not files:
    raise SystemExit("CSV file not found")

rows = []
for f in files:
    df = read_csv_any(f)
    if "gold_decision" not in df.columns:
        print(f"[WARN] {f.name} has not gold_decision，Skiping now")
        continue
    # Regularization
    s = df["gold_decision"].astype(str).str.strip().str.lower()
    c = (s == "correct").sum()
    ic = (s == "incorrect").sum()
    sk = (s == "skip").sum()
    tot_labeled = c + ic
    tot_all = c + ic + sk

    acc = c / tot_labeled if tot_labeled else 0.0
    acc_with_skip = c / tot_all if tot_all else 0.0

    rows.append({
        "file": f.name,
        "correct": int(c),
        "incorrect": int(ic),
        "skip": int(sk),
        "labeled": int(tot_labeled),
        "all_rows": int(tot_all),
        "accuracy": acc,
        "accuracy_including_skip": acc_with_skip
    })

# Summary
summary = pd.DataFrame(rows).sort_values("file")
total = {
    "file": "TOTAL",
    "correct": summary["correct"].sum(),
    "incorrect": summary["incorrect"].sum(),
    "skip": summary["skip"].sum(),
}
total["labeled"] = total["correct"] + total["incorrect"]
total["all_rows"] = total["labeled"] + total["skip"]
total["accuracy"] = (total["correct"] / total["labeled"]) if total["labeled"] else 0.0
total["accuracy_including_skip"] = (total["correct"] / total["all_rows"]) if total["all_rows"] else 0.0

summary = pd.concat([summary, pd.DataFrame([total])], ignore_index=True)

# Output to console and CSV
pd.options.display.float_format = "{:.4f}".format
print(summary)

OUT = "./Report/accuracy_report.csv"
summary.to_csv(OUT, index=False, encoding="utf-8-sig")
print("Saved:", OUT)

[WARN] accuracy_report.csv has not gold_decision，Skiping now
                                                file  correct  incorrect  \
0  2007_tshibubudze_themarkoyefault_2007_merged_f...       68          7   
1  2010_matsheka_irvinfinalthesis_merged_final_re...       55         18   
2  2011_woolfe_thestratigraphyandmetamorphicfacie...       29         15   
3  2012_simoko_petrology,geochemistryandstructure...       25          9   
4  2013_ramabulana_sadiolahillpetrology_merged_fi...       50         20   
5                                              TOTAL      227         69   

   skip  labeled  all_rows  accuracy  accuracy_including_skip  
0     0       75        75    0.9067                   0.9067  
1     0       73        73    0.7534                   0.7534  
2     0       44        44    0.6591                   0.6591  
3     0       34        34    0.7353                   0.7353  
4     0       70        70    0.7143                   0.7143  
5     0      296      