### Include Library

In [8]:
# library for cap_f1
from cap_f1 import *
from datetime import datetime
from multiprocessing import Pool
import math
import json
import glob
import os


# code for no need for restarting the kernel when python file is updated
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


### Load Data

In [9]:
print("Load caption file...")

# for filename
now = datetime.now()
timestamp = now.strftime("%Y-%m-%d_%H-%M")

#create folder to save the results
folder_path = f"results/{timestamp}"
os.makedirs(folder_path, exist_ok=True)

# features that we need to extract from the original dataset
org_caption_dataset = read_json("evaluation_results_5432-images_2025-04-03_11_27_fixed.json")



Load caption file...


In [10]:
# This is for testing
select_data("results/selected_filename.txt", "evaluation_results_5432-images_2025-04-03_11_27_fixed.json", f"{folder_path}/selected_data.json")
org_caption_dataset = read_json(f"{folder_path}/selected_data.json")
print(len(org_caption_dataset))
# org_caption_dataset = org_caption_dataset[:1]
# all_human_captions = all_human_captions[:1]

all_human_captions=[]
for item in org_caption_dataset:
    # Filter out human captions
    human_captions = [
        hc["caption"]            
        for hc in item["human_captions"]
        if hc["caption"] != "Quality issues are too severe to recognize visual content."
    ]
    all_human_captions.append(human_captions)

24 data, JSON results/2025-04-09_17-07/selected_data.json created with 24 items.
24


### Run Multi Processors

In [11]:
def process_batch(start_idx, end_idx, org_caption_dataset, all_human_captions, folder_path, timestamp, chunk_id):
    subset = org_caption_dataset[start_idx:end_idx]
    LIMIT = len(subset)
    human_subset = all_human_captions[start_idx:end_idx]
    print(human_subset)

    # Step 1: Parse atomics
    T_atomics, g_atomics, parsed_T = generate_atomic_statement(subset, limit=LIMIT)
    save_results_json(output_path=f"{folder_path}/parsed_caption_{timestamp}_chunk{chunk_id}.json",
                      org_dataset=subset, T_atomics=T_atomics, g_atomics=g_atomics, parsed_T= parsed_T, T_org=human_subset, limit=LIMIT)

    # Step 2: Match human & generated
    metadata = evaluate_matching(human_subset, T_atomics, g_atomics)
    save_results_json(output_path=f"{folder_path}/recall_precision_{timestamp}_chunk{chunk_id}.json",
                      update_existing=f"{folder_path}/parsed_caption_{timestamp}_chunk{chunk_id}.json",
                      metadata=metadata, limit=LIMIT)

    # Step 3: Cap F1
    evaluation = calculate_cap_f1(metadata)
    save_results_json(output_path=f"{folder_path}/final_{timestamp}_chunk{chunk_id}.json",
                      update_existing=f"{folder_path}/recall_precision_{timestamp}_chunk{chunk_id}.json",
                      evaluations=evaluation, limit=LIMIT)

def run_parallel_processing(org_caption_dataset, all_human_captions, folder_path, timestamp, num_workers=32):
    total = len(org_caption_dataset)
    chunk_size = math.ceil(total / num_workers)

    with Pool(processes=num_workers) as pool:
        jobs = []
        for i in range(num_workers):
            start_idx = i * chunk_size
            end_idx = min((i + 1) * chunk_size, total)
            jobs.append(pool.apply_async(process_batch, (start_idx, end_idx, org_caption_dataset, all_human_captions, folder_path, timestamp, i)))
        
        for job in jobs:
            job.get()

In [12]:
1

run_parallel_processing(org_caption_dataset, all_human_captions, folder_path, timestamp, num_workers=24)

[['A can of Coca Cola on a counter is shown for when one can use a nice, cold drink.', 'A black can of Coca Cola Zero calorie soda is on the counter near the coffee maker.', 'A kitchen counter the various items on top including a can of Coca-Cola, metal containers, and a teapot.', 'a black tin of Coca Cola placed on a black surface', 'Black counter with canisters, kettle and can of soda.']]
[['A certificate showing a flaming torch insignia, with the text "TOAD Training"', 'A certificate of Toad Training from a university that is not able to be read.', 'A piece of paper with a torch on it and a name tag.', 'A sheet of paper that has a logo on it and the words Toad Training.', 'A flyer for Toad Training sponsored by a university']][['A copy of Grand theft auto vice city for PlayStation 2.', 'Most of grand theft auto vice, a video game, is shown with the cover of the video game showing a motorcycle in the upper right corner, and an African American man with a gun in the lower right corner

  0%|          | 0/1 [00:00<?, ?it/s]

[['An open package of Anti-diarrheal medication sitting on a counter top.', 'A box of over the counter anti-diarrheal medication from Kroger', 'a small package of Kroger anti-diarrheal medication tablets.', 'a paper pack of Anti Diarrheal to control the symptoms of Diarrhea', 'Kroger brand anti diarrheal medicine on a counter top.']]






  0%|          | 0/1 [00:00<?, ?it/s]

[['a black TV inside a wood TV cabinet', 'appears to be a picture of a TV screen', 'Brown cabinet with a black TV in it which is showing the news with two men and a woman', 'A TV is showing the current news via Fox', 'A television showing news playing with three spokespersons.']][['A cover of a book that has a picture of an older man and the title of the book and author.', 'Book with a picture of a oval with a man in the center.', 'A picture of a man is printed on the book cover.', 'a picture of Charles Dickens a Christmas carol.', 'A portion of the cover of the book "A Christmas Carol"']]


  0%|          | 0/1 [00:00<?, ?it/s]

[['A person is holding a package of Gerber baby food.', 'A hand holding a Gerber baby food package with meat and apples pictures on it.', 'a package of gerber apples and chicken baby food', 'A package of individual servings of baby food.', 'Someone is holding a pack of 2 Gerber baby food with a table and chairs in the background.']]
[['Appears to  be a picture of a flyer', 'Package from a bamboo stick style scent diffuser showing part of the directions for use.', 'A hand holding a box that looks like a vase with sticks sticking out of it.', 'A hand holding a box with another white paper next to it describing a reed diffuser.', 'A person holding a box with a gold colored round object on it']]


  0%|          | 0/1 [00:00<?, ?it/s]




  0%|          | 0/1 [00:00<?, ?it/s]

[['PSP UMD disc of a game called Fate featuring two anime characters', 'A disc with a cartoon image drawn on top.', 'A close up view of a DVD in a container.', 'A blurry photo of a round item that could be a CD or DVD, but it is difficult to tell.']]
[['A package of cooked lunch meat, either turkey or ham.', 'A close up of a plastic container with a sandwich on the cover.', 'Packaging for sliced deli meat in a red plastic container.', 'A close up image of a package depicting a sandwich with lunch meat, lettuce, and bread.', 'Close up of a container of deli turkey meat in a plastic tub.']][['A brown box of aftershave product on a round table.', 'A box of aftershave is sitting on the table.', "A brown and yellow box of a men's grooming product sitting on an orange table."]][['A package made of paper material with a label that says "From" and "To".', 'Packaging from something containing fresh organic fruit and looks like is maybe for gifting, as it has "to" and "from" written on it', 'A g

  0%|          | 0/1 [00:00<?, ?it/s]

[['Someone holding a can with the background of a wall and heater.', 'a person is holding a tin of consumable with nutritional facts written on it', 'Part of a nutrition label on a can held up near a large wall heater.', 'A hand is holding a can showing the nutrition facts.', 'A hand holding an aluminum can and a toy sitting on the gray carpet.']]


  0%|          | 0/1 [00:00<?, ?it/s]





  0%|          | 0/1 [00:00<?, ?it/s]




  0%|          | 0/1 [00:00<?, ?it/s]


[['Hand holding a drinking cup with red lettering from a restaurant.', 'A plastic soft drink cup with sponsorships written on it in red letters.', 'a person is holding a white cup with ESPN on it', 'A person holding a large sized drinking cup.']]

  0%|          | 0/1 [00:00<?, ?it/s]

[['A lunch juice box from Dawn on top of a countertop.', 'a box of dawn brand smooth and juicy drink', 'A container of orange juice laying down on a orange colored surface.', 'A box containing smooth juicy is placed on a   brown surface.', 'A smooth N juicy orange juice by the brand Dawn']]

  0%|          | 0/1 [00:00<?, ?it/s]


[['Side of a white bag with food label showing in cigarettes in the background', 'The side of a bag of sugar and an ashtray.', 'an opened package of sugar on a counter top with an ashtray full of cigarette butts, and a pot', 'A large bag of chocolates is placed in front of an ashtray for cigarettes.', 'A large, open bag of granulated sugar on a kitchen countertop, next to an ashtray that is half-full of cigarette butts.']]

  0%|          | 0/1 [00:00<?, ?it/s]

[['imagine how you would describe this image on the phone to a friend.', 'A box of sandwiches and a bag of food are on a fabric', 'An orange box of some sort of food on the label and a business card to the left of it.', 'Two products of food placed on a couch.', 'a bunch of couches and pillows with bags on it']][['A piece of paper with three columns and typed text interspersed throughout but which is not legible due to the quality of photo.', 'A large white paper with rectangles and writing on it.', 'A worksheet is shown and it is upside down.']]

  0%|          | 0/1 [00:00<?, ?it/s]







100%|██████████| 1/1 [00:07<00:00,  7.03s/it]


Saved JSON to: results/2025-04-09_17-07/parsed_caption_2025-04-09_17-07_chunk22.json


100%|██████████| 1/1 [00:07<00:00,  7.79s/it]


Saved JSON to: results/2025-04-09_17-07/parsed_caption_2025-04-09_17-07_chunk15.json


100%|██████████| 1/1 [00:08<00:00,  8.47s/it]


Saved JSON to: results/2025-04-09_17-07/parsed_caption_2025-04-09_17-07_chunk6.json


100%|██████████| 1/1 [00:09<00:00,  9.99s/it]


Saved JSON to: results/2025-04-09_17-07/parsed_caption_2025-04-09_17-07_chunk19.json


100%|██████████| 1/1 [00:10<00:00, 10.00s/it]


Saved JSON to: results/2025-04-09_17-07/parsed_caption_2025-04-09_17-07_chunk16.json


100%|██████████| 1/1 [00:10<00:00, 10.06s/it]


Saved JSON to: results/2025-04-09_17-07/parsed_caption_2025-04-09_17-07_chunk9.json


100%|██████████| 1/1 [00:10<00:00, 10.24s/it]


Saved JSON to: results/2025-04-09_17-07/parsed_caption_2025-04-09_17-07_chunk4.json


100%|██████████| 1/1 [00:10<00:00, 10.30s/it]


Saved JSON to: results/2025-04-09_17-07/parsed_caption_2025-04-09_17-07_chunk5.json


  0%|          | 0/1 [00:00<?, ?it/s]

Error: Recall mismatch for model [gpt-4o-2024-08-06]
length 7 vs 7
T atomics:
['There is a piece of paper.', 'The paper has three columns.', 'The text is not legible.', 'The paper is white.', 'There are rectangles on the paper.', 'A worksheet is shown.', 'The worksheet is upside down.']
Recall TPs:
['The text is not legible']
Recall FNs:
['There is a piece of paper.', 'The paper has three columns.', 'The paper is white.', 'There are rectangles on the paper.', 'A worksheet is shown.', 'The worksheet is upside down.']


100%|██████████| 1/1 [00:10<00:00, 10.50s/it]


Saved JSON to: results/2025-04-09_17-07/parsed_caption_2025-04-09_17-07_chunk0.json


100%|██████████| 1/1 [00:10<00:00, 10.53s/it]
100%|██████████| 1/1 [00:10<00:00, 10.53s/it]


Saved JSON to: results/2025-04-09_17-07/parsed_caption_2025-04-09_17-07_chunk12.json
Saved JSON to: results/2025-04-09_17-07/parsed_caption_2025-04-09_17-07_chunk10.json

  0%|          | 0/1 [00:00<?, ?it/s]




100%|██████████| 1/1 [00:10<00:00, 10.72s/it]


Saved JSON to: results/2025-04-09_17-07/parsed_caption_2025-04-09_17-07_chunk7.json


100%|██████████| 1/1 [00:10<00:00, 10.94s/it]


Saved JSON to: results/2025-04-09_17-07/parsed_caption_2025-04-09_17-07_chunk8.json


100%|██████████| 1/1 [00:11<00:00, 11.17s/it]


Saved JSON to: results/2025-04-09_17-07/parsed_caption_2025-04-09_17-07_chunk11.json


100%|██████████| 1/1 [00:11<00:00, 11.43s/it]


Saved JSON to: results/2025-04-09_17-07/parsed_caption_2025-04-09_17-07_chunk13.json


100%|██████████| 1/1 [00:11<00:00, 11.57s/it]


Saved JSON to: results/2025-04-09_17-07/parsed_caption_2025-04-09_17-07_chunk14.json


100%|██████████| 1/1 [00:11<00:00, 11.86s/it]


Saved JSON to: results/2025-04-09_17-07/parsed_caption_2025-04-09_17-07_chunk3.json


100%|██████████| 1/1 [00:11<00:00, 11.94s/it]


Saved JSON to: results/2025-04-09_17-07/parsed_caption_2025-04-09_17-07_chunk23.json


100%|██████████| 1/1 [00:12<00:00, 12.01s/it]


Saved JSON to: results/2025-04-09_17-07/parsed_caption_2025-04-09_17-07_chunk21.json


100%|██████████| 1/1 [00:12<00:00, 12.40s/it]


Saved JSON to: results/2025-04-09_17-07/parsed_caption_2025-04-09_17-07_chunk2.json


100%|██████████| 1/1 [00:12<00:00, 12.45s/it]


Saved JSON to: results/2025-04-09_17-07/parsed_caption_2025-04-09_17-07_chunk1.json


100%|██████████| 1/1 [00:12<00:00, 13.00s/it]


Saved JSON to: results/2025-04-09_17-07/parsed_caption_2025-04-09_17-07_chunk20.json


100%|██████████| 1/1 [00:13<00:00, 13.50s/it]


Saved JSON to: results/2025-04-09_17-07/parsed_caption_2025-04-09_17-07_chunk18.json


100%|██████████| 1/1 [00:13<00:00, 13.64s/it]


Saved JSON to: results/2025-04-09_17-07/parsed_caption_2025-04-09_17-07_chunk17.json


  0%|          | 0/1 [00:00<?, ?it/s]

Error: Recall mismatch for model [Llama-3.2-11B-Vision-Instruct]
length 13 vs 14
T atomics:
['There is a white bag.', 'The bag is on a wooden surface.', 'The bag is next to a paneled wall.', 'There is a person.', 'The person is holding a beverage.', 'The beverage is in an aluminum can.', 'The aluminum can is white.', 'The aluminum can is red.', 'There is a canister shaped object.', 'The canister shaped object is near a wall.', 'The wall is made of wood.', 'There is a liquid.', 'The liquid is in a container.']
Recall TPs:
['There is a paneled wall.']
Recall FNs:
['There is a white bag.', 'The bag is on a wooden surface.', 'The bag is next to a paneled wall.', 'There is a person.', 'The person is holding a beverage.', 'The beverage is in an aluminum can.', 'The aluminum can is white.', 'The aluminum can is red.', 'There is a canister shaped object.', 'The canister shaped object is near a wall.', 'The wall is made of wood.', 'There is a liquid.', 'The liquid is in a container.']
Error: Pr

100%|██████████| 1/1 [00:09<00:00,  9.42s/it]


Saved JSON to: results/2025-04-09_17-07/recall_precision_2025-04-09_17-07_chunk15.json


100%|██████████| 1/1 [00:00<00:00, 13400.33it/s]


Saved JSON to: results/2025-04-09_17-07/final_2025-04-09_17-07_chunk15.json


100%|██████████| 1/1 [00:10<00:00, 10.01s/it]


Saved JSON to: results/2025-04-09_17-07/recall_precision_2025-04-09_17-07_chunk6.json


100%|██████████| 1/1 [00:00<00:00, 29537.35it/s]


Saved JSON to: results/2025-04-09_17-07/final_2025-04-09_17-07_chunk6.json


100%|██████████| 1/1 [00:11<00:00, 11.57s/it]


Saved JSON to: results/2025-04-09_17-07/recall_precision_2025-04-09_17-07_chunk22.json


100%|██████████| 1/1 [00:00<00:00, 12945.38it/s]


Saved JSON to: results/2025-04-09_17-07/final_2025-04-09_17-07_chunk22.json


100%|██████████| 1/1 [00:08<00:00,  8.51s/it]


Saved JSON to: results/2025-04-09_17-07/recall_precision_2025-04-09_17-07_chunk4.json


100%|██████████| 1/1 [00:00<00:00, 12787.51it/s]


Saved JSON to: results/2025-04-09_17-07/final_2025-04-09_17-07_chunk4.json


100%|██████████| 1/1 [00:08<00:00,  8.92s/it]


Saved JSON to: results/2025-04-09_17-07/recall_precision_2025-04-09_17-07_chunk13.json


100%|██████████| 1/1 [00:00<00:00, 13189.64it/s]


Saved JSON to: results/2025-04-09_17-07/final_2025-04-09_17-07_chunk13.json


100%|██████████| 1/1 [00:09<00:00,  9.39s/it]


Saved JSON to: results/2025-04-09_17-07/recall_precision_2025-04-09_17-07_chunk14.json


100%|██████████| 1/1 [00:00<00:00, 13066.37it/s]


Saved JSON to: results/2025-04-09_17-07/final_2025-04-09_17-07_chunk14.json


100%|██████████| 1/1 [00:10<00:00, 10.85s/it]


Saved JSON to: results/2025-04-09_17-07/recall_precision_2025-04-09_17-07_chunk7.json


100%|██████████| 1/1 [00:00<00:00, 13107.20it/s]


Saved JSON to: results/2025-04-09_17-07/final_2025-04-09_17-07_chunk7.json


100%|██████████| 1/1 [00:12<00:00, 12.64s/it]


Saved JSON to: results/2025-04-09_17-07/recall_precision_2025-04-09_17-07_chunk12.json


100%|██████████| 1/1 [00:00<00:00, 30174.85it/s]


Saved JSON to: results/2025-04-09_17-07/final_2025-04-09_17-07_chunk12.json


100%|██████████| 1/1 [00:12<00:00, 12.46s/it]


Saved JSON to: results/2025-04-09_17-07/recall_precision_2025-04-09_17-07_chunk11.json


100%|██████████| 1/1 [00:00<00:00, 13357.66it/s]


Saved JSON to: results/2025-04-09_17-07/final_2025-04-09_17-07_chunk11.json


100%|██████████| 1/1 [00:14<00:00, 14.40s/it]


Saved JSON to: results/2025-04-09_17-07/recall_precision_2025-04-09_17-07_chunk9.json


100%|██████████| 1/1 [00:00<00:00, 25115.59it/s]


Saved JSON to: results/2025-04-09_17-07/final_2025-04-09_17-07_chunk9.json


100%|██████████| 1/1 [00:14<00:00, 14.23s/it]


Saved JSON to: results/2025-04-09_17-07/recall_precision_2025-04-09_17-07_chunk5.json


100%|██████████| 1/1 [00:00<00:00, 12157.40it/s]


Saved JSON to: results/2025-04-09_17-07/final_2025-04-09_17-07_chunk5.json


100%|██████████| 1/1 [00:14<00:00, 14.46s/it]


Saved JSON to: results/2025-04-09_17-07/recall_precision_2025-04-09_17-07_chunk10.json


100%|██████████| 1/1 [00:00<00:00, 12945.38it/s]


Saved JSON to: results/2025-04-09_17-07/final_2025-04-09_17-07_chunk10.json


100%|██████████| 1/1 [00:12<00:00, 12.72s/it]


Saved JSON to: results/2025-04-09_17-07/recall_precision_2025-04-09_17-07_chunk2.json


100%|██████████| 1/1 [00:00<00:00, 30615.36it/s]


Saved JSON to: results/2025-04-09_17-07/final_2025-04-09_17-07_chunk2.json


100%|██████████| 1/1 [00:15<00:00, 15.77s/it]


Saved JSON to: results/2025-04-09_17-07/recall_precision_2025-04-09_17-07_chunk19.json


100%|██████████| 1/1 [00:00<00:00, 13231.24it/s]


Saved JSON to: results/2025-04-09_17-07/final_2025-04-09_17-07_chunk19.json


100%|██████████| 1/1 [00:15<00:00, 15.38s/it]


Saved JSON to: results/2025-04-09_17-07/recall_precision_2025-04-09_17-07_chunk0.json


100%|██████████| 1/1 [00:00<00:00, 13273.11it/s]


Saved JSON to: results/2025-04-09_17-07/final_2025-04-09_17-07_chunk0.json


100%|██████████| 1/1 [00:14<00:00, 14.66s/it]


Saved JSON to: results/2025-04-09_17-07/recall_precision_2025-04-09_17-07_chunk23.json


100%|██████████| 1/1 [00:00<00:00, 27594.11it/s]


Saved JSON to: results/2025-04-09_17-07/final_2025-04-09_17-07_chunk23.json


100%|██████████| 1/1 [00:14<00:00, 14.79s/it]
100%|██████████| 1/1 [00:14<00:00, 14.79s/it]


Saved JSON to: results/2025-04-09_17-07/recall_precision_2025-04-09_17-07_chunk16.jsonSaved JSON to: results/2025-04-09_17-07/recall_precision_2025-04-09_17-07_chunk3.json



100%|██████████| 1/1 [00:00<00:00, 20560.31it/s]



Saved JSON to: results/2025-04-09_17-07/final_2025-04-09_17-07_chunk3.json
Saved JSON to: results/2025-04-09_17-07/final_2025-04-09_17-07_chunk16.json


100%|██████████| 1/1 [00:14<00:00, 14.05s/it]


Saved JSON to: results/2025-04-09_17-07/recall_precision_2025-04-09_17-07_chunk17.json


100%|██████████| 1/1 [00:00<00:00, 13273.11it/s]


Saved JSON to: results/2025-04-09_17-07/final_2025-04-09_17-07_chunk17.json


100%|██████████| 1/1 [00:17<00:00, 17.16s/it]


Saved JSON to: results/2025-04-09_17-07/recall_precision_2025-04-09_17-07_chunk8.json


100%|██████████| 1/1 [00:00<00:00, 13025.79it/s]


Saved JSON to: results/2025-04-09_17-07/final_2025-04-09_17-07_chunk8.json


100%|██████████| 1/1 [00:15<00:00, 15.14s/it]


Saved JSON to: results/2025-04-09_17-07/recall_precision_2025-04-09_17-07_chunk20.json


100%|██████████| 1/1 [00:00<00:00, 11650.84it/s]


Saved JSON to: results/2025-04-09_17-07/final_2025-04-09_17-07_chunk20.json


100%|██████████| 1/1 [00:18<00:00, 18.55s/it]


Saved JSON to: results/2025-04-09_17-07/recall_precision_2025-04-09_17-07_chunk18.json


100%|██████████| 1/1 [00:00<00:00, 13273.11it/s]


Saved JSON to: results/2025-04-09_17-07/final_2025-04-09_17-07_chunk18.json


100%|██████████| 1/1 [00:20<00:00, 20.02s/it]


Saved JSON to: results/2025-04-09_17-07/recall_precision_2025-04-09_17-07_chunk1.json


100%|██████████| 1/1 [00:00<00:00, 25115.59it/s]


Saved JSON to: results/2025-04-09_17-07/final_2025-04-09_17-07_chunk1.json


100%|██████████| 1/1 [00:21<00:00, 21.06s/it]


Saved JSON to: results/2025-04-09_17-07/recall_precision_2025-04-09_17-07_chunk21.json


100%|██████████| 1/1 [00:00<00:00, 10155.70it/s]


Saved JSON to: results/2025-04-09_17-07/final_2025-04-09_17-07_chunk21.json


In [13]:
def merge_json_chunks(output_file, file_pattern):
    merged_data = []

    for filename in sorted(glob.glob(file_pattern)):
        with open(filename, "r", encoding="utf-8") as f:
            try:
                data = json.load(f)
                if isinstance(data, list):
                    merged_data.extend(data)
                elif isinstance(data, dict):
                    merged_data.append(data)
            except Exception as e:
                print(f"Failed to read {filename}: {e}")

    with open(output_file, "w", encoding="utf-8") as out_f:
        json.dump(merged_data, out_f, indent=2, ensure_ascii=False)

    print(f"Merged {len(merged_data)} entries into {output_file}")

merge_json_chunks(
    output_file=f"{folder_path}/__final_{timestamp}_merged.json",
    file_pattern=f"{folder_path}/final_{timestamp}_chunk*.json"
)


Merged 24 entries into results/2025-04-09_17-07/__final_2025-04-09_17-07_merged.json


In [14]:
import json
import csv

def format_matches(match_list):
    lines = []
    for m in match_list:
        if "T_atomic" in m and "g_atomic" in m:
            lines.append(f'{m["T_atomic"]} : {m["g_atomic"]}')
        elif "g_atomic" in m and "T_org" in m:
            lines.append(f'{m["g_atomic"]} : {m["T_org"]}')
        else:
            lines.append(str(m))  # fallback for unexpected format
    return "\n".join(lines)


json_path = f"{folder_path}/__final_{timestamp}_merged.json"
csv_path = f"{folder_path}/__final_{timestamp}_merged.csv"

with open(json_path, "r", encoding="utf-8") as f:
    data = json.load(f)


fieldnames = [
    "image",
    "link",
    "T_org",
    "parsed_T",
    "T_atomics",
    "gpt_caption",
    "gpt_g_atomics",
    "gpt_recall_TPs",
    "gpt_recall_Matches",
    "gpt_recall_FNs",
    "gpt_precision_TPs",
    "gpt_precision_Matches",
    "gpt_precision_FPs",
    "molmo_caption",
    "molmo_g_atomics",
    "molmo_recall_TPs",
    "molmo_recall_Matches",
    "molmo_recall_FNs",
    "molmo_precision_TPs",
    "molmo_precision_Matches",
    "molmo_precision_FPs",
    "llama_caption",
    "llama_g_atomics",
    "llama_recall_TPs",
    "llama_recall_Matches",
    "llama_recall_FNs",
    "llama_precision_TPs", 
    "llama_precision_Matches",
    "llama_precision_FPs",
    "gpt_recall",
    "gpt_precision",
    "gpt_capf1",
    "molmo_recall",
    "molmo_precision",
    "molmo_capf1",
    "llama_recall",
    "llama_precision",
    "llama_capf1",
]

with open(csv_path, "w", newline="", encoding="utf-8") as csvfile:
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()

    for item in data:
        file_name = item.get("file_name", "")
        cap_f1 = item.get("evaluation", {}).get("cap_f1", {})
        scores = cap_f1.get("scores", {})
        metadata = cap_f1.get("metadata", {})
        t_atomics = cap_f1.get("T_atomics", [])
        parsed_T = cap_f1.get("parsed_atomics", [])
        T_org = cap_f1.get("T_org", [])

        model_keys = {
            "gpt": "gpt-4o-2024-08-06",
            "molmo": "Molmo-7B-O-0924",
            "llama": "Llama-3.2-11B-Vision-Instruct",
        }

        row = {
            "image": file_name,
            "link": f'=HYPERLINK("https://vizwiz.cs.colorado.edu/VizWiz_visualization_img/{file_name}", "{file_name}")',
            "T_org": "\n".join(T_org), 
            "parsed_T": "\n".join(parsed_T), 
            "T_atomics": "\n".join(t_atomics),
            "gpt_caption": item["model_captions"][0]["caption"],
            "molmo_caption": item["model_captions"][2]["caption"],
            "llama_caption": item["model_captions"][1]["caption"],
            "gpt_g_atomics": "", "molmo_g_atomics": "", "llama_g_atomics": "",
            "gpt_recall_TPs": "", "molmo_recall_TPs": "", "llama_recall_TPs": "",
            "gpt_recall_Matches": "", "molmo_recall_Matches": "", "llama_recall_Matches": "",
            "gpt_recall_FNs": "", "molmo_recall_FNs": "", "llama_recall_FNs": "",
            "gpt_precision_TPs": "", "molmo_precision_TPs": "", "llama_precision_TPs": "",
            "gpt_precision_Matches": "", "molmo_precision_Matches": "", "llama_precision_Matches": "",
            "gpt_precision_FPs": "", "molmo_precision_FPs": "", "llama_precision_FPs": "",
            "gpt_recall": scores.get(model_keys["gpt"], {}).get("recall"),
            "gpt_precision": scores.get(model_keys["gpt"], {}).get("precision"),
            "gpt_capf1": scores.get(model_keys["gpt"], {}).get("cap_f1"),
            "molmo_recall": scores.get(model_keys["molmo"], {}).get("recall"),
            "molmo_precision": scores.get(model_keys["molmo"], {}).get("precision"),
            "molmo_capf1": scores.get(model_keys["molmo"], {}).get("cap_f1"),
            "llama_recall": scores.get(model_keys["llama"], {}).get("recall"),
            "llama_precision": scores.get(model_keys["llama"], {}).get("precision"),
            "llama_capf1": scores.get(model_keys["llama"], {}).get("cap_f1"),
        }

        for short_name, model_key in model_keys.items():
            g_atomics_list = cap_f1.get("g_atomics", {}).get(model_key, [])
            row[f"{short_name}_g_atomics"] = "\n".join(g_atomics_list)

            recall = metadata.get(model_key, {}).get("recall", {})
            row[f"{short_name}_recall_TPs"] = "\n".join(recall.get("TPs", []))
            row[f"{short_name}_recall_FNs"] = "\n".join(recall.get("FNs", []))
            row[f"{short_name}_recall_Matches"] = format_matches(recall.get("Match", []))

            precision = metadata.get(model_key, {}).get("precision", {})
            row[f"{short_name}_precision_TPs"] = "\n".join(precision.get("TPs", []))
            row[f"{short_name}_precision_FPs"] = "\n".join(precision.get("FPs", []))
            row[f"{short_name}_precision_Matches"] = format_matches(precision.get("Match", []))

        writer.writerow(row)

print(f"CSV file saved to: {csv_path}")


CSV file saved to: results/2025-04-09_17-07/__final_2025-04-09_17-07_merged.csv
