### Include Library

In [35]:
# library for cap_f1
from cap_f1 import *
from datetime import datetime
from multiprocessing import Pool
import math
import json
import glob
import os


# code for no need for restarting the kernel when python file is updated
%load_ext autoreload
%autoreload 2


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


### Load Data

In [36]:
print("Load caption file...")

# features that we need to extract from the original dataset
org_caption_dataset = read_json("evaluation_results_5432-images_2025-04-03_11_27_fixed.json")

all_human_captions=[]
for item in org_caption_dataset:
    # Filter out human captions
    human_captions = [
        hc["caption"]            
        for hc in item["human_captions"]
        if hc["caption"] != "Quality issues are too severe to recognize visual content."
    ]
    all_human_captions.append(human_captions)



Load caption file...


In [37]:
# This is for testing
print(len(org_caption_dataset))
org_caption_dataset = org_caption_dataset[:32]
all_human_captions = all_human_captions[:32]

5432


### Run Multi Processors

In [38]:
def process_batch(start_idx, end_idx, org_caption_dataset, all_human_captions, folder_path, timestamp, chunk_id):
    subset = org_caption_dataset[start_idx:end_idx]
    LIMIT = len(subset)
    human_subset = all_human_captions[start_idx:end_idx]

    # Step 1: Parse atomics
    T_atomics, g_atomics = generate_atomic_statement(subset, limit=LIMIT)
    save_results_json(output_path=f"{folder_path}/parsed_caption_{timestamp}_chunk{chunk_id}.json",
                      org_dataset=subset, T_atomics=T_atomics, g_atomics=g_atomics, limit=LIMIT)

    # Step 2: Match human & generated
    metadata = evaluate_matching(human_subset, T_atomics, g_atomics)
    save_results_json(output_path=f"{folder_path}/recall_precision_{timestamp}_chunk{chunk_id}.json",
                      update_existing=f"{folder_path}/parsed_caption_{timestamp}_chunk{chunk_id}.json",
                      metadata=metadata, limit=LIMIT)

    # Step 3: Cap F1
    evaluation = calculate_cap_f1(metadata)
    save_results_json(output_path=f"{folder_path}/final_{timestamp}_chunk{chunk_id}.json",
                      update_existing=f"{folder_path}/recall_precision_{timestamp}_chunk{chunk_id}.json",
                      evaluations=evaluation, limit=LIMIT)

def run_parallel_processing(org_caption_dataset, all_human_captions, folder_path, timestamp, num_workers=32):
    total = len(org_caption_dataset)
    chunk_size = math.ceil(total / num_workers)

    with Pool(processes=num_workers) as pool:
        jobs = []
        for i in range(num_workers):
            start_idx = i * chunk_size
            end_idx = min((i + 1) * chunk_size, total)
            jobs.append(pool.apply_async(process_batch, (start_idx, end_idx, org_caption_dataset, all_human_captions, folder_path, timestamp, i)))
        
        for job in jobs:
            job.get()

In [39]:
# for filename
now = datetime.now()
timestamp = now.strftime("%Y-%m-%d_%H-%M")

#create folder to save the results
folder_path = f"results/{timestamp}"
os.makedirs(folder_path, exist_ok=True)

run_parallel_processing(org_caption_dataset, all_human_captions, folder_path, timestamp, num_workers=32)

100%|██████████| 1/1 [00:08<00:00,  8.69s/it]


Saved JSON to: results/2025-04-03_23-02/parsed_caption_2025-04-03_23-02_chunk12.json


100%|██████████| 1/1 [00:08<00:00,  8.85s/it]


Saved JSON to: results/2025-04-03_23-02/parsed_caption_2025-04-03_23-02_chunk22.json


100%|██████████| 1/1 [00:09<00:00,  9.14s/it]


Saved JSON to: results/2025-04-03_23-02/parsed_caption_2025-04-03_23-02_chunk25.json


100%|██████████| 1/1 [00:09<00:00,  9.90s/it]


Saved JSON to: results/2025-04-03_23-02/parsed_caption_2025-04-03_23-02_chunk15.json


100%|██████████| 1/1 [00:10<00:00, 10.04s/it]


Saved JSON to: results/2025-04-03_23-02/parsed_caption_2025-04-03_23-02_chunk19.json


100%|██████████| 1/1 [00:10<00:00, 10.30s/it]
100%|██████████| 1/1 [00:10<00:00, 10.29s/it]


Saved JSON to: results/2025-04-03_23-02/parsed_caption_2025-04-03_23-02_chunk0.json


  0%|          | 0/1 [00:00<?, ?it/s]

Saved JSON to: results/2025-04-03_23-02/parsed_caption_2025-04-03_23-02_chunk11.json


100%|██████████| 1/1 [00:10<00:00, 10.31s/it]


Saved JSON to: results/2025-04-03_23-02/parsed_caption_2025-04-03_23-02_chunk14.json


100%|██████████| 1/1 [00:10<00:00, 10.52s/it]


Saved JSON to: results/2025-04-03_23-02/parsed_caption_2025-04-03_23-02_chunk6.json


100%|██████████| 1/1 [00:10<00:00, 10.90s/it]


Saved JSON to: results/2025-04-03_23-02/parsed_caption_2025-04-03_23-02_chunk3.json


100%|██████████| 1/1 [00:10<00:00, 11.00s/it]


Saved JSON to: results/2025-04-03_23-02/parsed_caption_2025-04-03_23-02_chunk24.json


100%|██████████| 1/1 [00:11<00:00, 11.08s/it]


Saved JSON to: results/2025-04-03_23-02/parsed_caption_2025-04-03_23-02_chunk1.json


100%|██████████| 1/1 [00:11<00:00, 11.14s/it]


Saved JSON to: results/2025-04-03_23-02/parsed_caption_2025-04-03_23-02_chunk21.json


100%|██████████| 1/1 [00:11<00:00, 11.28s/it]


Saved JSON to: results/2025-04-03_23-02/parsed_caption_2025-04-03_23-02_chunk5.json


100%|██████████| 1/1 [00:11<00:00, 11.62s/it]


Saved JSON to: results/2025-04-03_23-02/parsed_caption_2025-04-03_23-02_chunk23.json

100%|██████████| 1/1 [00:11<00:00, 11.65s/it]




  0%|          | 0/1 [00:00<?, ?it/s].65s/it]


Saved JSON to: results/2025-04-03_23-02/parsed_caption_2025-04-03_23-02_chunk2.json


100%|██████████| 1/1 [00:11<00:00, 11.71s/it]


Saved JSON to: results/2025-04-03_23-02/parsed_caption_2025-04-03_23-02_chunk16.json


100%|██████████| 1/1 [00:11<00:00, 11.80s/it]


Saved JSON to: results/2025-04-03_23-02/parsed_caption_2025-04-03_23-02_chunk31.json


100%|██████████| 1/1 [00:11<00:00, 11.86s/it]


Saved JSON to: results/2025-04-03_23-02/parsed_caption_2025-04-03_23-02_chunk4.json


100%|██████████| 1/1 [00:12<00:00, 12.26s/it]


Saved JSON to: results/2025-04-03_23-02/parsed_caption_2025-04-03_23-02_chunk29.json


100%|██████████| 1/1 [00:12<00:00, 12.35s/it]


Saved JSON to: results/2025-04-03_23-02/parsed_caption_2025-04-03_23-02_chunk26.json


100%|██████████| 1/1 [00:12<00:00, 12.78s/it]


Saved JSON to: results/2025-04-03_23-02/parsed_caption_2025-04-03_23-02_chunk18.json


100%|██████████| 1/1 [00:13<00:00, 13.10s/it]


Saved JSON to: results/2025-04-03_23-02/parsed_caption_2025-04-03_23-02_chunk13.json


100%|██████████| 1/1 [00:13<00:00, 13.59s/it]


Saved JSON to: results/2025-04-03_23-02/parsed_caption_2025-04-03_23-02_chunk27.json


100%|██████████| 1/1 [00:13<00:00, 13.64s/it]


Saved JSON to: results/2025-04-03_23-02/parsed_caption_2025-04-03_23-02_chunk30.json


100%|██████████| 1/1 [00:14<00:00, 14.00s/it]


Saved JSON to: results/2025-04-03_23-02/parsed_caption_2025-04-03_23-02_chunk20.json


100%|██████████| 1/1 [00:14<00:00, 14.65s/it]


Saved JSON to: results/2025-04-03_23-02/parsed_caption_2025-04-03_23-02_chunk17.json


100%|██████████| 1/1 [00:14<00:00, 14.98s/it]


Saved JSON to: results/2025-04-03_23-02/parsed_caption_2025-04-03_23-02_chunk28.json


100%|██████████| 1/1 [00:15<00:00, 15.34s/it]


Saved JSON to: results/2025-04-03_23-02/parsed_caption_2025-04-03_23-02_chunk10.json


100%|██████████| 1/1 [00:15<00:00, 15.36s/it]


Saved JSON to: results/2025-04-03_23-02/parsed_caption_2025-04-03_23-02_chunk9.json


100%|██████████| 1/1 [00:15<00:00, 15.79s/it]


Saved JSON to: results/2025-04-03_23-02/parsed_caption_2025-04-03_23-02_chunk8.json


100%|██████████| 1/1 [00:18<00:00, 18.70s/it]


Saved JSON to: results/2025-04-03_23-02/parsed_caption_2025-04-03_23-02_chunk7.json


100%|██████████| 1/1 [00:10<00:00, 10.61s/it]


Saved JSON to: results/2025-04-03_23-02/recall_precision_2025-04-03_23-02_chunk25.json


100%|██████████| 1/1 [00:00<00:00, 12905.55it/s]


Saved JSON to: results/2025-04-03_23-02/final_2025-04-03_23-02_chunk25.json


100%|██████████| 1/1 [00:11<00:00, 11.42s/it]


Saved JSON to: results/2025-04-03_23-02/recall_precision_2025-04-03_23-02_chunk12.json


100%|██████████| 1/1 [00:00<00:00, 12826.62it/s]


Saved JSON to: results/2025-04-03_23-02/final_2025-04-03_23-02_chunk12.json


100%|██████████| 1/1 [00:10<00:00, 10.04s/it]


Saved JSON to: results/2025-04-03_23-02/recall_precision_2025-04-03_23-02_chunk5.json


100%|██████████| 1/1 [00:00<00:00, 11814.94it/s]


Saved JSON to: results/2025-04-03_23-02/final_2025-04-03_23-02_chunk5.json


100%|██████████| 1/1 [00:12<00:00, 12.89s/it]


Saved JSON to: results/2025-04-03_23-02/recall_precision_2025-04-03_23-02_chunk22.json


100%|██████████| 1/1 [00:00<00:00, 13315.25it/s]


Saved JSON to: results/2025-04-03_23-02/final_2025-04-03_23-02_chunk22.json


100%|██████████| 1/1 [00:11<00:00, 11.51s/it]


Saved JSON to: results/2025-04-03_23-02/recall_precision_2025-04-03_23-02_chunk0.json


100%|██████████| 1/1 [00:00<00:00, 12985.46it/s]


Saved JSON to: results/2025-04-03_23-02/final_2025-04-03_23-02_chunk0.json


100%|██████████| 1/1 [00:11<00:00, 11.85s/it]


Saved JSON to: results/2025-04-03_23-02/recall_precision_2025-04-03_23-02_chunk11.json


100%|██████████| 1/1 [00:00<00:00, 12483.05it/s]


Saved JSON to: results/2025-04-03_23-02/final_2025-04-03_23-02_chunk11.json


100%|██████████| 1/1 [00:10<00:00, 10.43s/it]


Saved JSON to: results/2025-04-03_23-02/recall_precision_2025-04-03_23-02_chunk4.json


100%|██████████| 1/1 [00:00<00:00, 11586.48it/s]


Saved JSON to: results/2025-04-03_23-02/final_2025-04-03_23-02_chunk4.json


100%|██████████| 1/1 [00:12<00:00, 12.27s/it]


Saved JSON to: results/2025-04-03_23-02/recall_precision_2025-04-03_23-02_chunk19.json


100%|██████████| 1/1 [00:00<00:00, 12905.55it/s]


Saved JSON to: results/2025-04-03_23-02/final_2025-04-03_23-02_chunk19.json


100%|██████████| 1/1 [00:10<00:00, 10.68s/it]


Saved JSON to: results/2025-04-03_23-02/recall_precision_2025-04-03_23-02_chunk16.json


100%|██████████| 1/1 [00:00<00:00, 12710.01it/s]


Saved JSON to: results/2025-04-03_23-02/final_2025-04-03_23-02_chunk16.json


100%|██████████| 1/1 [00:10<00:00, 10.28s/it]


Saved JSON to: results/2025-04-03_23-02/recall_precision_2025-04-03_23-02_chunk29.json


100%|██████████| 1/1 [00:00<00:00, 11781.75it/s]


Saved JSON to: results/2025-04-03_23-02/final_2025-04-03_23-02_chunk29.json


100%|██████████| 1/1 [00:12<00:00, 12.37s/it]


Saved JSON to: results/2025-04-03_23-02/recall_precision_2025-04-03_23-02_chunk6.json


100%|██████████| 1/1 [00:00<00:00, 13025.79it/s]


Saved JSON to: results/2025-04-03_23-02/final_2025-04-03_23-02_chunk6.json
Error: Precision mismatch for model [Llama-3.2-11B-Vision-Instruct]
length 11 vs 10
G atomics:
['There is a glass bottle.', 'The bottle is small.', 'The bottle is dark green.', 'The bottle has a cap.', 'The cap is green.', 'The bottle has a label.', 'The label is white.', 'The label has a green band at the top.', "The label has the word 'Oxycodone' in black text.", 'The label includes additional text.', 'The additional text is blurry.']
Precision TPs:
['The cap is green.', 'The bottle has a label.']
Precision FPs:
['There is a glass bottle.', 'The bottle is small.', 'The bottle is dark green.', 'The label is white.', 'The label has a green band at the top.', "The label has the word 'Oxycodone' in black text.", 'The label includes additional text.', 'The additional text is blurry.']


100%|██████████| 1/1 [00:12<00:00, 12.19s/it]


Saved JSON to: results/2025-04-03_23-02/recall_precision_2025-04-03_23-02_chunk1.json


100%|██████████| 1/1 [00:00<00:00, 12945.38it/s]


Saved JSON to: results/2025-04-03_23-02/final_2025-04-03_23-02_chunk1.json


100%|██████████| 1/1 [00:11<00:00, 11.75s/it]


Saved JSON to: results/2025-04-03_23-02/recall_precision_2025-04-03_23-02_chunk23.json


100%|██████████| 1/1 [00:00<00:00, 24385.49it/s]


Saved JSON to: results/2025-04-03_23-02/final_2025-04-03_23-02_chunk23.json


100%|██████████| 1/1 [00:11<00:00, 11.01s/it]


Saved JSON to: results/2025-04-03_23-02/recall_precision_2025-04-03_23-02_chunk18.json


100%|██████████| 1/1 [00:00<00:00, 28339.89it/s]


Saved JSON to: results/2025-04-03_23-02/final_2025-04-03_23-02_chunk18.json
Error: Recall mismatch for model [Llama-3.2-11B-Vision-Instruct]
length 16 vs 16
T atomics:
['There is a can.', 'The can is red.', 'The can is of wild cherry Pepsi.', "The can has the word 'wild' on it.", 'The can is blue.', 'The can is white.', 'The can contains wild cherry Pepsi.', 'There is a logo on the can.', 'The logo has red stripes.', 'The logo has white stripes.', 'The logo has blue stripes.', 'The logo is circular.', 'There are nutritional facts on the can.', 'There is a liquid.', 'The liquid is in a container.', 'The image shows the side of the can.']
Recall TPs:
['There is a can.', 'The can is red.', 'There is a logo on the can.', 'The logo is blue.']
Recall FNs:
['The can is of wild cherry Pepsi.', "The can has the word 'wild' on it.", 'The can is blue.', 'The can is white.', 'The can contains wild cherry Pepsi.', 'The logo has red stripes.', 'The logo has white stripes.', 'The logo is circular.', 

100%|██████████| 1/1 [00:13<00:00, 14.00s/it]


Saved JSON to: results/2025-04-03_23-02/recall_precision_2025-04-03_23-02_chunk15.json


100%|██████████| 1/1 [00:00<00:00, 22429.43it/s]


Saved JSON to: results/2025-04-03_23-02/final_2025-04-03_23-02_chunk15.json


100%|██████████| 1/1 [00:11<00:00, 11.71s/it]


Saved JSON to: results/2025-04-03_23-02/recall_precision_2025-04-03_23-02_chunk26.json


100%|██████████| 1/1 [00:00<00:00, 13357.66it/s]


Saved JSON to: results/2025-04-03_23-02/final_2025-04-03_23-02_chunk26.json


100%|██████████| 1/1 [00:11<00:00, 11.18s/it]


Saved JSON to: results/2025-04-03_23-02/recall_precision_2025-04-03_23-02_chunk27.json


100%|██████████| 1/1 [00:00<00:00, 13107.20it/s]


Saved JSON to: results/2025-04-03_23-02/final_2025-04-03_23-02_chunk27.json


100%|██████████| 1/1 [00:11<00:00, 11.36s/it]


Saved JSON to: results/2025-04-03_23-02/recall_precision_2025-04-03_23-02_chunk20.json


100%|██████████| 1/1 [00:00<00:00, 22429.43it/s]


Saved JSON to: results/2025-04-03_23-02/final_2025-04-03_23-02_chunk20.json


100%|██████████| 1/1 [00:13<00:00, 13.87s/it]


Saved JSON to: results/2025-04-03_23-02/recall_precision_2025-04-03_23-02_chunk31.json


100%|██████████| 1/1 [00:00<00:00, 9822.73it/s]


Saved JSON to: results/2025-04-03_23-02/final_2025-04-03_23-02_chunk31.json


100%|██████████| 1/1 [00:10<00:00, 10.76s/it]


Saved JSON to: results/2025-04-03_23-02/recall_precision_2025-04-03_23-02_chunk9.json


100%|██████████| 1/1 [00:00<00:00, 11781.75it/s]


Saved JSON to: results/2025-04-03_23-02/final_2025-04-03_23-02_chunk9.json


100%|██████████| 1/1 [00:12<00:00, 12.65s/it]


Saved JSON to: results/2025-04-03_23-02/recall_precision_2025-04-03_23-02_chunk30.json


100%|██████████| 1/1 [00:00<00:00, 29959.31it/s]


Saved JSON to: results/2025-04-03_23-02/final_2025-04-03_23-02_chunk30.json


100%|██████████| 1/1 [00:15<00:00, 15.33s/it]


Saved JSON to: results/2025-04-03_23-02/recall_precision_2025-04-03_23-02_chunk24.json


100%|██████████| 1/1 [00:00<00:00, 22429.43it/s]


Saved JSON to: results/2025-04-03_23-02/final_2025-04-03_23-02_chunk24.json


100%|██████████| 1/1 [00:15<00:00, 15.53s/it]


Saved JSON to: results/2025-04-03_23-02/recall_precision_2025-04-03_23-02_chunk2.json


100%|██████████| 1/1 [00:00<00:00, 13315.25it/s]


Saved JSON to: results/2025-04-03_23-02/final_2025-04-03_23-02_chunk2.json


100%|██████████| 1/1 [00:16<00:00, 16.20s/it]


Saved JSON to: results/2025-04-03_23-02/recall_precision_2025-04-03_23-02_chunk21.json


100%|██████████| 1/1 [00:00<00:00, 11715.93it/s]


Saved JSON to: results/2025-04-03_23-02/final_2025-04-03_23-02_chunk21.json


100%|██████████| 1/1 [00:12<00:00, 12.75s/it]


Saved JSON to: results/2025-04-03_23-02/recall_precision_2025-04-03_23-02_chunk28.json


100%|██████████| 1/1 [00:00<00:00, 13025.79it/s]


Saved JSON to: results/2025-04-03_23-02/final_2025-04-03_23-02_chunk28.json


100%|██████████| 1/1 [00:17<00:00, 17.33s/it]


Saved JSON to: results/2025-04-03_23-02/recall_precision_2025-04-03_23-02_chunk3.json


100%|██████████| 1/1 [00:00<00:00, 12520.31it/s]


Saved JSON to: results/2025-04-03_23-02/final_2025-04-03_23-02_chunk3.json


100%|██████████| 1/1 [00:13<00:00, 13.60s/it]


Saved JSON to: results/2025-04-03_23-02/recall_precision_2025-04-03_23-02_chunk17.json


100%|██████████| 1/1 [00:00<00:00, 22310.13it/s]


Saved JSON to: results/2025-04-03_23-02/final_2025-04-03_23-02_chunk17.json


100%|██████████| 1/1 [00:12<00:00, 12.92s/it]


Saved JSON to: results/2025-04-03_23-02/recall_precision_2025-04-03_23-02_chunk8.json


100%|██████████| 1/1 [00:00<00:00, 13189.64it/s]


Saved JSON to: results/2025-04-03_23-02/final_2025-04-03_23-02_chunk8.json


100%|██████████| 1/1 [00:13<00:00, 13.46s/it]


Saved JSON to: results/2025-04-03_23-02/recall_precision_2025-04-03_23-02_chunk10.json


100%|██████████| 1/1 [00:00<00:00, 24818.37it/s]


Saved JSON to: results/2025-04-03_23-02/final_2025-04-03_23-02_chunk10.json


100%|██████████| 1/1 [00:15<00:00, 15.82s/it]


Saved JSON to: results/2025-04-03_23-02/recall_precision_2025-04-03_23-02_chunk13.json


100%|██████████| 1/1 [00:00<00:00, 11915.64it/s]


Saved JSON to: results/2025-04-03_23-02/final_2025-04-03_23-02_chunk13.json


100%|██████████| 1/1 [00:19<00:00, 19.34s/it]


Saved JSON to: results/2025-04-03_23-02/recall_precision_2025-04-03_23-02_chunk14.json


100%|██████████| 1/1 [00:00<00:00, 11650.84it/s]


Saved JSON to: results/2025-04-03_23-02/final_2025-04-03_23-02_chunk14.json


100%|██████████| 1/1 [00:12<00:00, 12.90s/it]


Saved JSON to: results/2025-04-03_23-02/recall_precision_2025-04-03_23-02_chunk7.json


100%|██████████| 1/1 [00:00<00:00, 29746.84it/s]


Saved JSON to: results/2025-04-03_23-02/final_2025-04-03_23-02_chunk7.json


In [26]:

def merge_json_chunks(output_file, file_pattern):
    merged_data = []

    for filename in sorted(glob.glob(file_pattern)):
        with open(filename, "r", encoding="utf-8") as f:
            try:
                data = json.load(f)
                if isinstance(data, list):
                    merged_data.extend(data)
                elif isinstance(data, dict):
                    merged_data.append(data)
            except Exception as e:
                print(f"Failed to read {filename}: {e}")

    with open(output_file, "w", encoding="utf-8") as out_f:
        json.dump(merged_data, out_f, indent=2, ensure_ascii=False)

    print(f"Merged {len(merged_data)} entries into {output_file}")

merge_json_chunks(
    output_file=f"{folder_path}/__final_{timestamp}_merged.json",
    file_pattern=f"{folder_path}/final_{timestamp}_chunk*.json"
)


Merged 32 entries into results/2025-04-03_22-44/__final_2025-04-03_22-44_merged.json


In [27]:
import json
import csv

json_path = f"{folder_path}/__final_{timestamp}_merged.json"
csv_path = f"{folder_path}/__final_{timestamp}_merged.csv"

with open(json_path, "r", encoding="utf-8") as f:
    data = json.load(f)

fieldnames = [
    "image",
    "T_atomics",
    "gpt_caption",
    "gpt_g_atomics",
    "gpt_recall_TPs",
    "gpt_recall_FNs",
    "gpt_precision_TPs",
    "gpt_precision_FPs",
    "molmo_caption",
    "molmo_g_atomics",
    "molmo_recall_TPs",
    "molmo_recall_FNs",
    "molmo_precision_TPs",
    "molmo_precision_FPs",
    "llama_caption",
    "llama_g_atomics",
    "llama_recall_TPs",
    "llama_recall_FNs",
    "llama_precision_TPs",
    "llama_precision_FPs",
    "gpt_recall",
    "gpt_precision",
    "gpt_capf1",
    "molmo_recall",
    "molmo_precision",
    "molmo_capf1",
    "llama_recall",
    "llama_precision",
    "llama_capf1",
]

with open(csv_path, "w", newline="", encoding="utf-8") as csvfile:
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()

    for item in data:
        file_name = item.get("file_name", "")
        cap_f1 = item.get("evaluation", {}).get("cap_f1", {})
        scores = cap_f1.get("scores", {})
        metadata = cap_f1.get("metadata", {})
        t_atomics = cap_f1.get("T_atomics", [])

        model_keys = {
            "gpt": "gpt-4o-2024-08-06",
            "molmo": "Molmo-7B-O-0924",
            "llama": "Llama-3.2-11B-Vision-Instruct",
        }

        row = {
            "image": file_name,
            "T_atomics": "\n".join(t_atomics),
            "gpt_caption": item["model_captions"][0]["caption"],
            "gpt_g_atomics": "",
            "gpt_recall_TPs": "",
            "gpt_precision_TPs": "",
            "molmo_caption": item["model_captions"][2]["caption"],
            "molmo_g_atomics": "",
            "molmo_recall_TPs": "",
            "molmo_precision_TPs": "",
            "llama_caption": item["model_captions"][1]["caption"],
            "llama_g_atomics": "",
            "llama_recall_TPs": "",
            "llama_precision_TPs": "",
            "gpt_recall": scores.get(model_keys["gpt"], {}).get("recall"),
            "gpt_precision": scores.get(model_keys["gpt"], {}).get("precision"),
            "gpt_capf1": scores.get(model_keys["gpt"], {}).get("cap_f1"),
            "molmo_recall": scores.get(model_keys["molmo"], {}).get("recall"),
            "molmo_precision": scores.get(model_keys["molmo"], {}).get("precision"),
            "molmo_capf1": scores.get(model_keys["molmo"], {}).get("cap_f1"),
            "llama_recall": scores.get(model_keys["llama"], {}).get("recall"),
            "llama_precision": scores.get(model_keys["llama"], {}).get("precision"),
            "llama_capf1": scores.get(model_keys["llama"], {}).get("cap_f1"),
        }

        for short_name, model_key in model_keys.items():
            # g_atomics
            g_atomics_list = cap_f1.get("g_atomics", {}).get(model_key, [])
            row[f"{short_name}_g_atomics"] = "\n".join(g_atomics_list)

            # recall TPs
            recall_tps = metadata.get(model_key, {}).get("recall", {}).get("TPs", [])
            row[f"{short_name}_recall_TPs"] = "\n".join(recall_tps)

            # recall FNs
            recall_fns = metadata.get(model_key, {}).get("recall", {}).get("FNs", [])
            row[f"{short_name}_recall_FNs"] = "\n".join(recall_fns)

            # precision TPs
            precision_tps = (
                metadata.get(model_key, {}).get("precision", {}).get("TPs", [])
            )
            row[f"{short_name}_precision_TPs"] = "\n".join(precision_tps)

            # precision FPs
            precision_fps = (
                metadata.get(model_key, {}).get("precision", {}).get("FPs", [])
            )
            row[f"{short_name}_precision_FPs"] = "\n".join(precision_fps)

        writer.writerow(row)

print(f"CSV file saved to: {csv_path}")

CSV file saved to: results/2025-04-03_22-44/__final_2025-04-03_22-44_merged.csv
