In [3]:
import json
from collections import OrderedDict

WEIGHTS = {
    "factuality": 0.30,
    "coverage": 0.20,
    "logical_coherence": 0.15,
    "clarity": 0.15,
    "consistency": 0.10,
    "utility": 0.10
}

type = "EN"
infile = f"Histories-Batch-{type}.json"
outfile = f"output-{type}.json"

def compute_weighted_score(ev):
    return round(
        ev["factuality"] * WEIGHTS["factuality"] +
        ev["coverage"] * WEIGHTS["coverage"] +
        ev["logical_coherence"] * WEIGHTS["logical_coherence"] +
        ev["clarity"] * WEIGHTS["clarity"] +
        ev["consistency"] * WEIGHTS["consistency"] +
        ev["utility"] * WEIGHTS["utility"],
        4
    )

def process_json(input_file, output_file):
    with open(input_file, "r", encoding="utf-8") as f:
        data = json.load(f, object_pairs_hook=OrderedDict)

    # ==========================================================
    # 1) Recompute average_score for all items
    # ==========================================================
    for key, items in data.items():
        if not isinstance(items, list):
            continue
        
        for obj in items:
            ev = obj.get("evaluation")
            if not ev:
                continue

            new_score = compute_weighted_score(ev)

            # rebuild object preserving order
            new_obj = OrderedDict()
            for k, v in obj.items():
                if k == "average_score":
                    continue
                new_obj[k] = v
                if k == "evaluation":
                    new_obj["average_score"] = new_score

            obj.clear()
            obj.update(new_obj)

    # ==========================================================
    # 2) Collect all rounds across the entire file
    # ==========================================================
    rounds = []
    for key, items in data.items():
        if not isinstance(items, list):
            continue
        
        for obj in items:
            if isinstance(obj, dict) and "round" in obj and "average_score" in obj:
                rounds.append((obj["round"], obj["average_score"]))

    rounds.sort(key=lambda x: x[0])

    # ==========================================================
    # 3) Calculate improvement metrics globally
    # ==========================================================
    success = low = fail = 0

    for i in range(1, len(rounds)):  # start from round 2
        prev = rounds[i-1][1]
        curr = rounds[i][1]

        if curr > prev:
            success += 1
        elif curr == prev:
            low += 1
        else:
            fail += 1
    
    total = success + low + fail
    ratio = round((success + low) / total, 4) if total > 0 else 0.0

    # ==========================================================
    # 4) Append one summary object at end of file
    # ==========================================================
    summary = OrderedDict([
        ("global_summary", OrderedDict([
            ("critical_success", success+low),
            # ("critical_low_effect", low),
            ("critical_failed", fail),
            ("Total_result", total),
            ("Success_ratio", ratio)
        ]))
    ])

    data["__GLOBAL_SUMMARY__"] = summary

    # ==========================================================
    # 5) Write result
    # ==========================================================
    with open(output_file, "w", encoding="utf-8") as f:
        json.dump(data, f, ensure_ascii=False, indent=2)

    print(f"✅ Done. Output saved to: {output_file}")

process_json(infile, outfile)


✅ Done. Output saved to: output-EN.json
