In [13]:
import os, json
from collections import Counter
from pathlib import Path
import pandas as pd

# ── configuration ──────────────────────────────────────────────────────────
PROMPTS  = ["regular", "signature"]
MODELS   = [
    "llama3.2:3b-instruct-fp16",
    "phi4:14b-fp16",
    "llama3.3:70b-instruct-fp16"
]
BASE_DIR = Path.cwd()                       # notebook’s working dir
SHOT     = 5                                # adjust if 1- or 10-shot

# master containers
summary_rows   = []                         # one row per model/prompt
matrix_tables  = {}                         # {(prompt,model): DataFrame}

for prompt in PROMPTS:
    # ranking depends on prompt
    RANK = {"": 4, "Tests": 3, "Semantic": 2, "Syntax": 1}
    if prompt == "regular":       # “Tests” isn’t meaningful here
        RANK.pop("Tests")
    CLASSES = list(RANK.keys())   # stable order

    for model in MODELS:
        folder = (BASE_DIR / "RAG" / prompt / "3_fold" /
                  f"{prompt}_RAG_{SHOT}_shot_{model}")
        if not folder.exists():
            print("⚠️  Folder not found:", folder)
            continue

        initial_cnt = Counter()
        final_cnt   = Counter()
        trans       = Counter()
        improved = worse = same = total = 0

        # ── read every fold file ───────────────────────────────────────────
        for file in sorted(folder.glob("*.json")):
            with open(file) as fh:
                data = json.load(fh)
            experiments = data if isinstance(data, list) else [data]

            for exp in experiments:
                for task_id, cands in exp.get("task_candidates", {}).items():
                    for cand in cands:
                        init = cand["initial_error_category"]
                        fin  = cand["final_error_category"]
                        if init not in RANK or fin not in RANK:
                            continue
                        total += 1
                        initial_cnt[init] += 1
                        final_cnt[fin]   += 1
                        trans[(init, fin)] += 1
                        if RANK[fin] > RANK[init]:
                            improved += 1
                        elif RANK[fin] < RANK[init]:
                            worse += 1
                        else:
                            same += 1

        if total == 0:
            print(f"No data for {prompt} | {model}")
            continue

        # ── store per-model summary row ────────────────────────────────────
        summary_rows.append({
            "Prompt": prompt,
            "Model":  model,
            "Total":  total,

            "Improved (n)": improved,
            "Improved (%)": f"{100*improved/total:.1f}",

            "Worse (n)":    worse,
            "Worse (%)":    f"{100*worse/total:.1f}",

            "Same (n)":     same,
            "Same (%)":     f"{100*same/total:.1f}",

            "Final-Pass (n)": final_cnt[""],
            "Final-Pass (%)": f"{100*final_cnt['']/total:.1f}",
        })


        # ── build transition matrix dataframe ──────────────────────────────
        mat = pd.DataFrame(
            [[trans[(r, c)] for c in CLASSES] for r in CLASSES],
            index=[c or "Pass" for c in CLASSES],
            columns=[c or "Pass" for c in CLASSES]
        )
        matrix_tables[(prompt, model)] = mat

        # quick console glance
        print(f"\n{prompt} | {model}  (n={total})")
        def add_percentages(df: pd.DataFrame) -> pd.DataFrame:
            """
            Return a new DataFrame whose cells look like  "42 (17.5 %)".
            Percent is w.r.t. the *grand total* of the table.
            """
            grand_total = df.values.sum()
            pct = (df / grand_total * 100).round(1)

            return df.astype(int).astype(str) + " (" + pct.astype(str) + "%)"

        # use it on your transition matrix
        mat_with_pct = add_percentages(mat)
        display(mat_with_pct)        # notebook-friendly

        print(f"Improved {improved/total:.1%}, Worse {worse/total:.1%}, "
              f"Same {same/total:.1%}, Final-Pass {final_cnt['']/total:.1%}")

# # ── export LaTeX for thesis ────────────────────────────────────────────────
# summary_df = pd.DataFrame(summary_rows)
# latex_summary = summary_df.to_latex(
#     index=False,
#     caption="Overall error-category shifts after SynCode.",
#     label="tab:error_overview",
#     column_format="llrrrrr"
# )
# with open("summary_table.tex", "w") as f:
#     f.write(latex_summary)

# for (prompt, model), mat in matrix_tables.items():
#     fn = f"matrix_{prompt}_{model.replace(':','_')}.tex"
#     mat.to_latex(
#         fn,
#         caption=f"Transition matrix for {model} ({prompt}).",
#         label=f"tab:matrix_{prompt}_{model.replace(':','_')}",
#         column_format="l" + "r" * len(mat.columns)
#     )

# print("\n✅  LaTeX tables written: summary_table.tex and one matrix_*.tex per model.")



regular | llama3.2:3b-instruct-fp16  (n=1230)


Unnamed: 0,Pass,Semantic,Syntax
Pass,737 (59.9%),0 (0.0%),0 (0.0%)
Semantic,54 (4.4%),53 (4.3%),84 (6.8%)
Syntax,17 (1.4%),6 (0.5%),279 (22.7%)


Improved 6.3%, Worse 6.8%, Same 86.9%, Final-Pass 65.7%
⚠️  Folder not found: /root/Thesis_project/experiments/Refinement/fox/testing_runs/RAG/regular/3_fold/regular_RAG_5_shot_phi4:14b-fp16
⚠️  Folder not found: /root/Thesis_project/experiments/Refinement/fox/testing_runs/RAG/regular/3_fold/regular_RAG_5_shot_llama3.3:70b-instruct-fp16

signature | llama3.2:3b-instruct-fp16  (n=1230)


Unnamed: 0,Pass,Tests,Semantic,Syntax
Pass,308 (25.0%),0 (0.0%),0 (0.0%),0 (0.0%)
Tests,10 (0.8%),52 (4.2%),17 (1.4%),232 (18.9%)
Semantic,23 (1.9%),10 (0.8%),75 (6.1%),118 (9.6%)
Syntax,11 (0.9%),7 (0.6%),12 (1.0%),355 (28.9%)


Improved 5.9%, Worse 29.8%, Same 64.2%, Final-Pass 28.6%

signature | phi4:14b-fp16  (n=680)


Unnamed: 0,Pass,Tests,Semantic,Syntax
Pass,290 (42.6%),0 (0.0%),0 (0.0%),0 (0.0%)
Tests,34 (5.0%),40 (5.9%),4 (0.6%),11 (1.6%)
Semantic,7 (1.0%),11 (1.6%),65 (9.6%),29 (4.3%)
Syntax,0 (0.0%),0 (0.0%),11 (1.6%),178 (26.2%)


Improved 9.3%, Worse 6.5%, Same 84.3%, Final-Pass 48.7%
⚠️  Folder not found: /root/Thesis_project/experiments/Refinement/fox/testing_runs/RAG/signature/3_fold/signature_RAG_5_shot_llama3.3:70b-instruct-fp16
