In [None]:
import json
import pandas as pd

# -------- paths --------
GT_FILE = "CodellamaGroundTruth2_5000.json"
STUDENT_FILE = "CodeQA_Part1_5000.json"

MEASURES = ["accuracy", "completeness", "relevance", "clarity"]

# -------- load files --------
with open(GT_FILE, "r", encoding="utf-8") as f:
    gt = json.load(f)

with open(STUDENT_FILE, "r", encoding="utf-8") as f:
    st = json.load(f)

df_gt = pd.DataFrame(gt)
df_st = pd.DataFrame(st)

# -------- helper to extract score --------
def get_score(x):
    if isinstance(x, dict):
        return x.get("score")
    return None

# -------- extract scores --------
for m in MEASURES:
    df_gt[m+"_gt"] = df_gt[m].apply(get_score)
    df_st[m+"_st"] = df_st[m].apply(get_score)

# -------- merge on id --------
df = pd.merge(
    df_gt[["id"] + [m+"_gt" for m in MEASURES]],
    df_st[["id"] + [m+"_st" for m in MEASURES]],
    on="id",
    how="inner"
)

# -------- compute agreement accuracy --------
print("\nScore-agreement accuracy (exact match):\n")

for m in MEASURES:
    valid = df[m+"_gt"].notna() & df[m+"_st"].notna()
    matched = (df[m+"_gt"] == df[m+"_st"]) & valid
    
    total = valid.sum()
    correct = matched.sum()
    acc = correct / total if total else 0
    
    print(f"{m.capitalize():12s} | Compared: {total:4d} | Matched: {correct:4d} | Accuracy: {acc:.4f}")
