### Include Library

In [60]:
# library for cap_f1
from cap_f1 import *
from datetime import datetime
from multiprocessing import Pool
import math
import json
import glob
import os


# code for no need for restarting the kernel when python file is updated
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


### Load Data

In [61]:
print("Load caption file...")

# for filename
now = datetime.now()
timestamp = now.strftime("%Y-%m-%d_%H-%M")

#create folder to save the results
folder_path = f"results/{timestamp}"
os.makedirs(folder_path, exist_ok=True)

# features that we need to extract from the original dataset
org_caption_dataset = read_json("evaluation_results_5429-images_2025-04-03_11_27_fixed.json")



Load caption file...


In [62]:
# This is for testing
select_data("results/selected_filename.txt", "evaluation_results_5429-images_2025-04-03_11_27_fixed.json", f"{folder_path}/selected_data.json")
org_caption_dataset = read_json(f"{folder_path}/selected_data.json")
print(len(org_caption_dataset))
# org_caption_dataset = org_caption_dataset[:1]
# all_human_captions = all_human_captions[:1]

all_human_captions=[]
for item in org_caption_dataset:
    # Filter out human captions
    human_captions = [
        hc["caption"]            
        for hc in item["human_captions"]
        if hc["caption"] != "Quality issues are too severe to recognize visual content."
    ]
    all_human_captions.append(human_captions)

24 data, JSON results/2025-04-08_21-45/selected_data.json created with 24 items.
24


### Run Multi Processors

In [63]:
def process_batch(start_idx, end_idx, org_caption_dataset, all_human_captions, folder_path, timestamp, chunk_id):
    subset = org_caption_dataset[start_idx:end_idx]
    LIMIT = len(subset)
    human_subset = all_human_captions[start_idx:end_idx]

    # Step 1: Parse atomics
    T_atomics, g_atomics, parsed_T = generate_atomic_statement(subset, limit=LIMIT)
    save_results_json(output_path=f"{folder_path}/parsed_caption_{timestamp}_chunk{chunk_id}.json",
                      org_dataset=subset, T_atomics=T_atomics, g_atomics=g_atomics, parsed_T= parsed_T, limit=LIMIT)

    # Step 2: Match human & generated
    metadata = evaluate_matching(human_subset, T_atomics, g_atomics)
    save_results_json(output_path=f"{folder_path}/recall_precision_{timestamp}_chunk{chunk_id}.json",
                      update_existing=f"{folder_path}/parsed_caption_{timestamp}_chunk{chunk_id}.json",
                      metadata=metadata, limit=LIMIT)

    # Step 3: Cap F1
    evaluation = calculate_cap_f1(metadata)
    save_results_json(output_path=f"{folder_path}/final_{timestamp}_chunk{chunk_id}.json",
                      update_existing=f"{folder_path}/recall_precision_{timestamp}_chunk{chunk_id}.json",
                      evaluations=evaluation, limit=LIMIT)

def run_parallel_processing(org_caption_dataset, all_human_captions, folder_path, timestamp, num_workers=32):
    total = len(org_caption_dataset)
    chunk_size = math.ceil(total / num_workers)

    with Pool(processes=num_workers) as pool:
        jobs = []
        for i in range(num_workers):
            start_idx = i * chunk_size
            end_idx = min((i + 1) * chunk_size, total)
            jobs.append(pool.apply_async(process_batch, (start_idx, end_idx, org_caption_dataset, all_human_captions, folder_path, timestamp, i)))
        
        for job in jobs:
            job.get()

In [64]:


run_parallel_processing(org_caption_dataset, all_human_captions, folder_path, timestamp, num_workers=24)

100%|██████████| 1/1 [00:08<00:00,  8.69s/it]


Saved JSON to: results/2025-04-08_21-45/parsed_caption_2025-04-08_21-45_chunk5.json


100%|██████████| 1/1 [00:08<00:00,  8.75s/it]


Saved JSON to: results/2025-04-08_21-45/parsed_caption_2025-04-08_21-45_chunk22.json


100%|██████████| 1/1 [00:09<00:00,  9.05s/it]


Saved JSON to: results/2025-04-08_21-45/parsed_caption_2025-04-08_21-45_chunk14.json


100%|██████████| 1/1 [00:09<00:00,  9.32s/it]


Saved JSON to: results/2025-04-08_21-45/parsed_caption_2025-04-08_21-45_chunk12.json


100%|██████████| 1/1 [00:09<00:00,  9.60s/it]


Saved JSON to: results/2025-04-08_21-45/parsed_caption_2025-04-08_21-45_chunk9.json


100%|██████████| 1/1 [00:10<00:00, 10.04s/it]


Saved JSON to: results/2025-04-08_21-45/parsed_caption_2025-04-08_21-45_chunk3.json


100%|██████████| 1/1 [00:10<00:00, 10.23s/it]


Saved JSON to: results/2025-04-08_21-45/parsed_caption_2025-04-08_21-45_chunk23.json


100%|██████████| 1/1 [00:10<00:00, 10.50s/it]


Saved JSON to: results/2025-04-08_21-45/parsed_caption_2025-04-08_21-45_chunk15.json


100%|██████████| 1/1 [00:10<00:00, 10.52s/it]


Saved JSON to: results/2025-04-08_21-45/parsed_caption_2025-04-08_21-45_chunk13.json


100%|██████████| 1/1 [00:10<00:00, 10.55s/it]


Saved JSON to: results/2025-04-08_21-45/parsed_caption_2025-04-08_21-45_chunk2.json


100%|██████████| 1/1 [00:10<00:00, 10.95s/it]


Saved JSON to: results/2025-04-08_21-45/parsed_caption_2025-04-08_21-45_chunk7.json


100%|██████████| 1/1 [00:11<00:00, 11.15s/it]


Saved JSON to: results/2025-04-08_21-45/parsed_caption_2025-04-08_21-45_chunk20.json


100%|██████████| 1/1 [00:11<00:00, 11.53s/it]


Saved JSON to: results/2025-04-08_21-45/parsed_caption_2025-04-08_21-45_chunk6.json


100%|██████████| 1/1 [00:11<00:00, 11.76s/it]


Saved JSON to: results/2025-04-08_21-45/parsed_caption_2025-04-08_21-45_chunk1.json


100%|██████████| 1/1 [00:11<00:00, 11.89s/it]


Saved JSON to: results/2025-04-08_21-45/parsed_caption_2025-04-08_21-45_chunk16.json


100%|██████████| 1/1 [00:12<00:00, 12.01s/it]


Saved JSON to: results/2025-04-08_21-45/parsed_caption_2025-04-08_21-45_chunk0.json


100%|██████████| 1/1 [00:12<00:00, 12.23s/it]


Saved JSON to: results/2025-04-08_21-45/parsed_caption_2025-04-08_21-45_chunk19.json


100%|██████████| 1/1 [00:12<00:00, 12.59s/it]


Saved JSON to: results/2025-04-08_21-45/parsed_caption_2025-04-08_21-45_chunk8.json

100%|██████████| 1/1 [00:12<00:00, 12.59s/it]




  0%|          | 0/1 [00:00<?, ?it/s].59s/it]


Saved JSON to: results/2025-04-08_21-45/parsed_caption_2025-04-08_21-45_chunk18.json


100%|██████████| 1/1 [00:12<00:00, 12.89s/it]


Saved JSON to: results/2025-04-08_21-45/parsed_caption_2025-04-08_21-45_chunk4.json


100%|██████████| 1/1 [00:12<00:00, 12.98s/it]


Saved JSON to: results/2025-04-08_21-45/parsed_caption_2025-04-08_21-45_chunk11.json


100%|██████████| 1/1 [00:13<00:00, 13.11s/it]


Saved JSON to: results/2025-04-08_21-45/parsed_caption_2025-04-08_21-45_chunk17.json


100%|██████████| 1/1 [00:14<00:00, 14.80s/it]


Saved JSON to: results/2025-04-08_21-45/parsed_caption_2025-04-08_21-45_chunk21.json


100%|██████████| 1/1 [00:16<00:00, 16.66s/it]


Saved JSON to: results/2025-04-08_21-45/parsed_caption_2025-04-08_21-45_chunk10.json


100%|██████████| 1/1 [00:08<00:00,  8.32s/it]


Saved JSON to: results/2025-04-08_21-45/recall_precision_2025-04-08_21-45_chunk5.json


100%|██████████| 1/1 [00:00<00:00, 19878.22it/s]


Saved JSON to: results/2025-04-08_21-45/final_2025-04-08_21-45_chunk5.json
Error: Precision mismatch for model [gpt-4o-2024-08-06]
length 7 vs 7
G atomics:
['There is a can.', 'The can is on a wooden surface.', 'The can has visible text.', "The text reads 'WT. 15 OZ. (425g)'.", "The text includes part of the word 'TIONAL'.", 'The label on the can is yellow.', 'The label on the can is red.']
Precision TPs:
['There is a can.', 'The can is on a wooden surface.', "The text reads 'WT. 15 OZ. (425g)'", "The text includes part of the word 'TIONAL'.", 'The label on the can is yellow.', 'The label on the can is red.']
Precision FPs:
['The can has visible text.']


100%|██████████| 1/1 [00:08<00:00,  8.84s/it]


Saved JSON to: results/2025-04-08_21-45/recall_precision_2025-04-08_21-45_chunk3.json


100%|██████████| 1/1 [00:00<00:00, 12192.74it/s]


Saved JSON to: results/2025-04-08_21-45/final_2025-04-08_21-45_chunk3.json


100%|██████████| 1/1 [00:08<00:00,  8.72s/it]


Saved JSON to: results/2025-04-08_21-45/recall_precision_2025-04-08_21-45_chunk13.json


100%|██████████| 1/1 [00:00<00:00, 12483.05it/s]


Saved JSON to: results/2025-04-08_21-45/final_2025-04-08_21-45_chunk13.json
Error: Precision mismatch for model [Molmo-7B-O-0924]

100%|██████████| 1/1 [00:10<00:00, 10.74s/it]


length 7 vs 7

100%|██████████| 1/1 [00:10<00:00, 10.75s/it]







G atomics:
['There is a box of tea.', 'The box is on a table.', 'The table is red.', 'The box is yellow.', 'The box has a red label.', "The text 'Tea' is visible on the box.", "The text 'Tea' is at the bottom of the box."]
Precision TPs:
['The box is on a table']
Precision FPs:
['There is a box of tea', 'The table is red', 'The box is yellow', 'The box has a red label', "The text 'Tea' is visible on the box", "The text 'Tea' is at the bottom of the box"]
Saved JSON to: results/2025-04-08_21-45/recall_precision_2025-04-08_21-45_chunk22.json

100%|██████████| 1/1 [00:10<00:00, 10.45s/it]




  0%|          | 0/1 [00:00<?, ?it/s].45s/it]
100%|██████████| 1/1 [00:00<00:00, 25890.77it/s]


Saved JSON to: results/2025-04-08_21-45/recall_precision_2025-04-08_21-45_chunk14.jsonSaved JSON to: results/2025-04-08_21-45/final_2025-04-08_21-45_chunk22.json



100%|██████████| 1/1 [00:00<00:00, 12671.61it/s]


Saved JSON to: results/2025-04-08_21-45/final_2025-04-08_21-45_chunk14.json


100%|██████████| 1/1 [00:10<00:00, 10.37s/it]


Saved JSON to: results/2025-04-08_21-45/recall_precision_2025-04-08_21-45_chunk12.json


100%|██████████| 1/1 [00:00<00:00, 12557.80it/s]


Saved JSON to: results/2025-04-08_21-45/final_2025-04-08_21-45_chunk12.json
Error: Recall mismatch for model [Llama-3.2-11B-Vision-Instruct]
length 13 vs 12
T atomics:
['There is a person.', 'The person is holding a can.', 'There is a wall in the background.', 'There is a heater in the background.', 'The tin is consumable.', 'Nutritional facts are written on the tin.', 'The can is near a large wall heater.', 'There is a hand.', 'The hand is holding the can.', 'The can shows nutrition facts.', 'There is a toy.', 'The toy is sitting on the carpet.', 'The carpet is gray.']
Recall TPs:
['The tin is consumable.', 'Nutritional facts are written on the tin.', 'The hand is holding the can.', 'The can shows nutrition facts.']
Recall FNs:
['There is a person.', 'The person is holding a can.', 'There is a wall in the background.', 'There is a heater in the background.', 'The can is near a large wall heater.', 'There is a toy.', 'The toy is sitting on the carpet.', 'The carpet is gray.']


100%|██████████| 1/1 [00:11<00:00, 11.35s/it]


Saved JSON to: results/2025-04-08_21-45/recall_precision_2025-04-08_21-45_chunk23.json


100%|██████████| 1/1 [00:00<00:00, 12300.01it/s]


Saved JSON to: results/2025-04-08_21-45/final_2025-04-08_21-45_chunk23.json


100%|██████████| 1/1 [00:11<00:00, 11.13s/it]


Saved JSON to: results/2025-04-08_21-45/recall_precision_2025-04-08_21-45_chunk15.json


100%|██████████| 1/1 [00:00<00:00, 12446.01it/s]


Saved JSON to: results/2025-04-08_21-45/final_2025-04-08_21-45_chunk15.json


100%|██████████| 1/1 [00:12<00:00, 12.32s/it]


Saved JSON to: results/2025-04-08_21-45/recall_precision_2025-04-08_21-45_chunk2.json


100%|██████████| 1/1 [00:00<00:00, 12336.19it/s]


Saved JSON to: results/2025-04-08_21-45/final_2025-04-08_21-45_chunk2.json
Error: Recall mismatch for model [Llama-3.2-11B-Vision-Instruct]
length 19 vs 18
T atomics:
Recall TPs:
Recall FNs:
['The label is on the back of a dressing container.', 'There is a label on a small bottle.', 'The label shows contents.', 'The container is clear.', 'The label is orange.', 'The label is blue.', 'The bottle contains cleaner.', 'The cleaner is for hard surfaces.']


100%|██████████| 1/1 [00:11<00:00, 11.80s/it]


Saved JSON to: results/2025-04-08_21-45/recall_precision_2025-04-08_21-45_chunk20.json


100%|██████████| 1/1 [00:00<00:00, 14074.85it/s]


Saved JSON to: results/2025-04-08_21-45/final_2025-04-08_21-45_chunk20.json


100%|██████████| 1/1 [00:10<00:00, 10.87s/it]


Saved JSON to: results/2025-04-08_21-45/recall_precision_2025-04-08_21-45_chunk19.json


100%|██████████| 1/1 [00:00<00:00, 12595.51it/s]


Saved JSON to: results/2025-04-08_21-45/final_2025-04-08_21-45_chunk19.json


100%|██████████| 1/1 [00:13<00:00, 13.48s/it]


Saved JSON to: results/2025-04-08_21-45/recall_precision_2025-04-08_21-45_chunk7.json


100%|██████████| 1/1 [00:00<00:00, 22075.28it/s]


Saved JSON to: results/2025-04-08_21-45/final_2025-04-08_21-45_chunk7.json


100%|██████████| 1/1 [00:11<00:00, 11.98s/it]


Saved JSON to: results/2025-04-08_21-45/recall_precision_2025-04-08_21-45_chunk8.json


100%|██████████| 1/1 [00:00<00:00, 12748.64it/s]


Saved JSON to: results/2025-04-08_21-45/final_2025-04-08_21-45_chunk8.json


100%|██████████| 1/1 [00:12<00:00, 12.34s/it]


Saved JSON to: results/2025-04-08_21-45/recall_precision_2025-04-08_21-45_chunk4.json


100%|██████████| 1/1 [00:00<00:00, 12483.05it/s]


Saved JSON to: results/2025-04-08_21-45/final_2025-04-08_21-45_chunk4.json


100%|██████████| 1/1 [00:13<00:00, 13.68s/it]


Saved JSON to: results/2025-04-08_21-45/recall_precision_2025-04-08_21-45_chunk16.json


100%|██████████| 1/1 [00:00<00:00, 12787.51it/s]


Saved JSON to: results/2025-04-08_21-45/final_2025-04-08_21-45_chunk16.json


100%|██████████| 1/1 [00:09<00:00,  9.58s/it]


Saved JSON to: results/2025-04-08_21-45/recall_precision_2025-04-08_21-45_chunk10.json


100%|██████████| 1/1 [00:00<00:00, 12595.51it/s]


Saved JSON to: results/2025-04-08_21-45/final_2025-04-08_21-45_chunk10.json


100%|██████████| 1/1 [00:14<00:00, 14.81s/it]


Saved JSON to: results/2025-04-08_21-45/recall_precision_2025-04-08_21-45_chunk1.json


100%|██████████| 1/1 [00:00<00:00, 12945.38it/s]


Saved JSON to: results/2025-04-08_21-45/final_2025-04-08_21-45_chunk1.json


100%|██████████| 1/1 [00:14<00:00, 14.36s/it]


Saved JSON to: results/2025-04-08_21-45/recall_precision_2025-04-08_21-45_chunk11.json


100%|██████████| 1/1 [00:00<00:00, 11275.01it/s]


Saved JSON to: results/2025-04-08_21-45/final_2025-04-08_21-45_chunk11.json


100%|██████████| 1/1 [00:18<00:00, 18.32s/it]


Saved JSON to: results/2025-04-08_21-45/recall_precision_2025-04-08_21-45_chunk9.json


100%|██████████| 1/1 [00:00<00:00, 12446.01it/s]


Saved JSON to: results/2025-04-08_21-45/final_2025-04-08_21-45_chunk9.json


100%|██████████| 1/1 [00:13<00:00, 13.39s/it]


Saved JSON to: results/2025-04-08_21-45/recall_precision_2025-04-08_21-45_chunk21.json


100%|██████████| 1/1 [00:00<00:00, 12372.58it/s]


Saved JSON to: results/2025-04-08_21-45/final_2025-04-08_21-45_chunk21.json


100%|██████████| 1/1 [00:15<00:00, 15.66s/it]


Saved JSON to: results/2025-04-08_21-45/recall_precision_2025-04-08_21-45_chunk17.json


100%|██████████| 1/1 [00:00<00:00, 12264.05it/s]


Saved JSON to: results/2025-04-08_21-45/final_2025-04-08_21-45_chunk17.json


100%|██████████| 1/1 [00:16<00:00, 16.21s/it]


Saved JSON to: results/2025-04-08_21-45/recall_precision_2025-04-08_21-45_chunk18.json


100%|██████████| 1/1 [00:00<00:00, 11214.72it/s]


Saved JSON to: results/2025-04-08_21-45/final_2025-04-08_21-45_chunk18.json


100%|██████████| 1/1 [00:16<00:00, 16.85s/it]


Saved JSON to: results/2025-04-08_21-45/recall_precision_2025-04-08_21-45_chunk0.json


100%|██████████| 1/1 [00:00<00:00, 21290.88it/s]


Saved JSON to: results/2025-04-08_21-45/final_2025-04-08_21-45_chunk0.json


100%|██████████| 1/1 [00:43<00:00, 43.52s/it]


Saved JSON to: results/2025-04-08_21-45/recall_precision_2025-04-08_21-45_chunk6.json


100%|██████████| 1/1 [00:00<00:00, 28532.68it/s]


Saved JSON to: results/2025-04-08_21-45/final_2025-04-08_21-45_chunk6.json


In [65]:
def merge_json_chunks(output_file, file_pattern):
    merged_data = []

    for filename in sorted(glob.glob(file_pattern)):
        with open(filename, "r", encoding="utf-8") as f:
            try:
                data = json.load(f)
                if isinstance(data, list):
                    merged_data.extend(data)
                elif isinstance(data, dict):
                    merged_data.append(data)
            except Exception as e:
                print(f"Failed to read {filename}: {e}")

    with open(output_file, "w", encoding="utf-8") as out_f:
        json.dump(merged_data, out_f, indent=2, ensure_ascii=False)

    print(f"Merged {len(merged_data)} entries into {output_file}")

merge_json_chunks(
    output_file=f"{folder_path}/__final_{timestamp}_merged.json",
    file_pattern=f"{folder_path}/final_{timestamp}_chunk*.json"
)


Merged 24 entries into results/2025-04-08_21-45/__final_2025-04-08_21-45_merged.json


In [66]:
import json
import csv

def format_matches(match_list):
    lines = []
    for m in match_list:
        if "T_atomic" in m and "g_atomic" in m:
            lines.append(f'{m["T_atomic"]} : {m["g_atomic"]}')
        elif "g_atomic" in m and "T_org" in m:
            lines.append(f'{m["g_atomic"]} : {m["T_org"]}')
        else:
            lines.append(str(m))  # fallback for unexpected format
    return "\n".join(lines)


json_path = f"{folder_path}/__final_{timestamp}_merged.json"
csv_path = f"{folder_path}/__final_{timestamp}_merged.csv"

with open(json_path, "r", encoding="utf-8") as f:
    data = json.load(f)


fieldnames = [
    "image",
    "link",
    "parsed_T",
    "T_atomics",
    "gpt_caption",
    "gpt_g_atomics",
    "gpt_recall_TPs",
    "gpt_recall_Matches",
    "gpt_recall_FNs",
    "gpt_precision_TPs",
    "gpt_precision_Matches",
    "gpt_precision_FPs",
    "molmo_caption",
    "molmo_g_atomics",
    "molmo_recall_TPs",
    "molmo_recall_Matches",
    "molmo_recall_FNs",
    "molmo_precision_TPs",
    "molmo_precision_Matches",
    "molmo_precision_FPs",
    "llama_caption",
    "llama_g_atomics",
    "llama_recall_TPs",
    "llama_recall_Matches",
    "llama_recall_FNs",
    "llama_precision_TPs", 
    "llama_precision_Matches",
    "llama_precision_FPs",
    "gpt_recall",
    "gpt_precision",
    "gpt_capf1",
    "molmo_recall",
    "molmo_precision",
    "molmo_capf1",
    "llama_recall",
    "llama_precision",
    "llama_capf1",
]

with open(csv_path, "w", newline="", encoding="utf-8") as csvfile:
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()

    for item in data:
        file_name = item.get("file_name", "")
        cap_f1 = item.get("evaluation", {}).get("cap_f1", {})
        scores = cap_f1.get("scores", {})
        metadata = cap_f1.get("metadata", {})
        t_atomics = cap_f1.get("T_atomics", [])
        parsed_T = cap_f1.get("parsed_atomics", [])

        model_keys = {
            "gpt": "gpt-4o-2024-08-06",
            "molmo": "Molmo-7B-O-0924",
            "llama": "Llama-3.2-11B-Vision-Instruct",
        }

        row = {
            "image": file_name,
            "link": f'=HYPERLINK("https://vizwiz.cs.colorado.edu/VizWiz_visualization_img/{file_name}", "{file_name}")',
            "parsed_T": "\n".join(parsed_T), 
            "T_atomics": "\n".join(t_atomics),
            "gpt_caption": item["model_captions"][0]["caption"],
            "molmo_caption": item["model_captions"][2]["caption"],
            "llama_caption": item["model_captions"][1]["caption"],
            "gpt_g_atomics": "", "molmo_g_atomics": "", "llama_g_atomics": "",
            "gpt_recall_TPs": "", "molmo_recall_TPs": "", "llama_recall_TPs": "",
            "gpt_recall_Matches": "", "molmo_recall_Matches": "", "llama_recall_Matches": "",
            "gpt_recall_FNs": "", "molmo_recall_FNs": "", "llama_recall_FNs": "",
            "gpt_precision_TPs": "", "molmo_precision_TPs": "", "llama_precision_TPs": "",
            "gpt_precision_Matches": "", "molmo_precision_Matches": "", "llama_precision_Matches": "",
            "gpt_precision_FPs": "", "molmo_precision_FPs": "", "llama_precision_FPs": "",
            "gpt_recall": scores.get(model_keys["gpt"], {}).get("recall"),
            "gpt_precision": scores.get(model_keys["gpt"], {}).get("precision"),
            "gpt_capf1": scores.get(model_keys["gpt"], {}).get("cap_f1"),
            "molmo_recall": scores.get(model_keys["molmo"], {}).get("recall"),
            "molmo_precision": scores.get(model_keys["molmo"], {}).get("precision"),
            "molmo_capf1": scores.get(model_keys["molmo"], {}).get("cap_f1"),
            "llama_recall": scores.get(model_keys["llama"], {}).get("recall"),
            "llama_precision": scores.get(model_keys["llama"], {}).get("precision"),
            "llama_capf1": scores.get(model_keys["llama"], {}).get("cap_f1"),
        }

        for short_name, model_key in model_keys.items():
            g_atomics_list = cap_f1.get("g_atomics", {}).get(model_key, [])
            row[f"{short_name}_g_atomics"] = "\n".join(g_atomics_list)

            recall = metadata.get(model_key, {}).get("recall", {})
            row[f"{short_name}_recall_TPs"] = "\n".join(recall.get("TPs", []))
            row[f"{short_name}_recall_FNs"] = "\n".join(recall.get("FNs", []))
            row[f"{short_name}_recall_Matches"] = format_matches(recall.get("Match", []))

            precision = metadata.get(model_key, {}).get("precision", {})
            row[f"{short_name}_precision_TPs"] = "\n".join(precision.get("TPs", []))
            row[f"{short_name}_precision_FPs"] = "\n".join(precision.get("FPs", []))
            row[f"{short_name}_precision_Matches"] = format_matches(precision.get("Match", []))

        writer.writerow(row)

print(f"CSV file saved to: {csv_path}")


CSV file saved to: results/2025-04-08_21-45/__final_2025-04-08_21-45_merged.csv
