### Include Library

In [41]:
# library for cap_f1
from cap_f1 import *
from datetime import datetime
from multiprocessing import Pool
import math
import json
import glob
import os


# code for no need for restarting the kernel when python file is updated
%load_ext autoreload
%autoreload 2


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


### Load Data

In [42]:
print("Load caption file...")

# features that we need to extract from the original dataset
org_caption_dataset = read_json("evaluation_results_5432-images_2025-04-03_11_27_fixed.json")

all_human_captions=[]
for item in org_caption_dataset:
    # Filter out human captions
    human_captions = [
        hc["caption"]            
        for hc in item["human_captions"]
        if hc["caption"] != "Quality issues are too severe to recognize visual content."
    ]
    all_human_captions.append(human_captions)



Load caption file...


In [43]:
# This is for testing
print(len(org_caption_dataset))
org_caption_dataset = org_caption_dataset[:128]
all_human_captions = all_human_captions[:128]

5432


### Run Multi Processors

In [44]:
def process_batch(start_idx, end_idx, org_caption_dataset, all_human_captions, folder_path, timestamp, chunk_id):
    subset = org_caption_dataset[start_idx:end_idx]
    LIMIT = len(subset)
    human_subset = all_human_captions[start_idx:end_idx]

    # Step 1: Parse atomics
    T_atomics, g_atomics = generate_atomic_statement(subset, limit=LIMIT)
    save_results_json(output_path=f"{folder_path}/parsed_caption_{timestamp}_chunk{chunk_id}.json",
                      org_dataset=subset, T_atomics=T_atomics, g_atomics=g_atomics, limit=LIMIT)

    # Step 2: Match human & generated
    metadata = evaluate_matching(human_subset, T_atomics, g_atomics)
    save_results_json(output_path=f"{folder_path}/recall_precision_{timestamp}_chunk{chunk_id}.json",
                      update_existing=f"{folder_path}/parsed_caption_{timestamp}_chunk{chunk_id}.json",
                      metadata=metadata, limit=LIMIT)

    # Step 3: Cap F1
    evaluation = calculate_cap_f1(metadata)
    save_results_json(output_path=f"{folder_path}/final_{timestamp}_chunk{chunk_id}.json",
                      update_existing=f"{folder_path}/recall_precision_{timestamp}_chunk{chunk_id}.json",
                      evaluations=evaluation, limit=LIMIT)

def run_parallel_processing(org_caption_dataset, all_human_captions, folder_path, timestamp, num_workers=32):
    total = len(org_caption_dataset)
    chunk_size = math.ceil(total / num_workers)

    with Pool(processes=num_workers) as pool:
        jobs = []
        for i in range(num_workers):
            start_idx = i * chunk_size
            end_idx = min((i + 1) * chunk_size, total)
            jobs.append(pool.apply_async(process_batch, (start_idx, end_idx, org_caption_dataset, all_human_captions, folder_path, timestamp, i)))
        
        for job in jobs:
            job.get()

In [45]:
# for filename
now = datetime.now()
timestamp = now.strftime("%Y-%m-%d_%H-%M")

#create folder to save the results
folder_path = f"results/{timestamp}"
os.makedirs(folder_path, exist_ok=True)

run_parallel_processing(org_caption_dataset, all_human_captions, folder_path, timestamp, num_workers=32)

100%|██████████| 4/4 [00:36<00:00,  9.13s/it]


Saved JSON to: results/2025-04-03_23-04/parsed_caption_2025-04-03_23-04_chunk30.json


100%|██████████| 4/4 [00:39<00:00,  9.80s/it]


Saved JSON to: results/2025-04-03_23-04/parsed_caption_2025-04-03_23-04_chunk29.json


100%|██████████| 4/4 [00:39<00:00,  9.83s/it]


Saved JSON to: results/2025-04-03_23-04/parsed_caption_2025-04-03_23-04_chunk3.json


100%|██████████| 4/4 [00:39<00:00,  9.88s/it]
100%|██████████| 4/4 [00:39<00:00,  9.87s/it]

Saved JSON to: results/2025-04-03_23-04/parsed_caption_2025-04-03_23-04_chunk6.json







  0%|          | 0/4 [00:00<?, ?it/s]

Saved JSON to: results/2025-04-03_23-04/parsed_caption_2025-04-03_23-04_chunk20.json


100%|██████████| 4/4 [00:40<00:00, 10.06s/it]


Saved JSON to: results/2025-04-03_23-04/parsed_caption_2025-04-03_23-04_chunk22.json


100%|██████████| 4/4 [00:40<00:00, 10.08s/it]


Saved JSON to: results/2025-04-03_23-04/parsed_caption_2025-04-03_23-04_chunk11.json


100%|██████████| 4/4 [00:41<00:00, 10.30s/it]


Saved JSON to: results/2025-04-03_23-04/parsed_caption_2025-04-03_23-04_chunk16.json


100%|██████████| 4/4 [00:41<00:00, 10.31s/it]


Saved JSON to: results/2025-04-03_23-04/parsed_caption_2025-04-03_23-04_chunk25.json


100%|██████████| 4/4 [00:41<00:00, 10.47s/it]


Saved JSON to: results/2025-04-03_23-04/parsed_caption_2025-04-03_23-04_chunk14.json


100%|██████████| 4/4 [00:41<00:00, 10.49s/it]


Saved JSON to: results/2025-04-03_23-04/parsed_caption_2025-04-03_23-04_chunk7.json


100%|██████████| 4/4 [00:42<00:00, 10.52s/it]
100%|██████████| 4/4 [00:42<00:00, 10.51s/it]


Saved JSON to: results/2025-04-03_23-04/parsed_caption_2025-04-03_23-04_chunk18.json


  0%|          | 0/4 [00:00<?, ?it/s]

Saved JSON to: results/2025-04-03_23-04/parsed_caption_2025-04-03_23-04_chunk28.json


100%|██████████| 4/4 [00:42<00:00, 10.56s/it]


Saved JSON to: results/2025-04-03_23-04/parsed_caption_2025-04-03_23-04_chunk12.json


100%|██████████| 4/4 [00:43<00:00, 10.91s/it]


Saved JSON to: results/2025-04-03_23-04/parsed_caption_2025-04-03_23-04_chunk13.json


100%|██████████| 4/4 [00:44<00:00, 11.00s/it]


Saved JSON to: results/2025-04-03_23-04/parsed_caption_2025-04-03_23-04_chunk26.json


100%|██████████| 4/4 [00:44<00:00, 11.11s/it]


Saved JSON to: results/2025-04-03_23-04/parsed_caption_2025-04-03_23-04_chunk31.json


100%|██████████| 4/4 [00:44<00:00, 11.15s/it]


Saved JSON to: results/2025-04-03_23-04/parsed_caption_2025-04-03_23-04_chunk24.json


100%|██████████| 4/4 [00:44<00:00, 11.20s/it]


Saved JSON to: results/2025-04-03_23-04/parsed_caption_2025-04-03_23-04_chunk15.json


100%|██████████| 4/4 [00:44<00:00, 11.24s/it]


Saved JSON to: results/2025-04-03_23-04/parsed_caption_2025-04-03_23-04_chunk9.json


100%|██████████| 4/4 [00:45<00:00, 11.32s/it]


Saved JSON to: results/2025-04-03_23-04/parsed_caption_2025-04-03_23-04_chunk5.json


100%|██████████| 4/4 [00:45<00:00, 11.50s/it]


Saved JSON to: results/2025-04-03_23-04/parsed_caption_2025-04-03_23-04_chunk4.json


100%|██████████| 4/4 [00:46<00:00, 11.68s/it]


Saved JSON to: results/2025-04-03_23-04/parsed_caption_2025-04-03_23-04_chunk8.json


100%|██████████| 4/4 [00:46<00:00, 11.74s/it]


Saved JSON to: results/2025-04-03_23-04/parsed_caption_2025-04-03_23-04_chunk10.json


100%|██████████| 4/4 [00:47<00:00, 11.92s/it]


Saved JSON to: results/2025-04-03_23-04/parsed_caption_2025-04-03_23-04_chunk17.json


100%|██████████| 4/4 [00:48<00:00, 12.07s/it]


Saved JSON to: results/2025-04-03_23-04/parsed_caption_2025-04-03_23-04_chunk19.json


100%|██████████| 4/4 [00:49<00:00, 12.42s/it]


Saved JSON to: results/2025-04-03_23-04/parsed_caption_2025-04-03_23-04_chunk1.json


100%|██████████| 4/4 [00:50<00:00, 12.69s/it]


Saved JSON to: results/2025-04-03_23-04/parsed_caption_2025-04-03_23-04_chunk23.json


100%|██████████| 4/4 [00:51<00:00, 12.92s/it]


Saved JSON to: results/2025-04-03_23-04/parsed_caption_2025-04-03_23-04_chunk2.json


100%|██████████| 4/4 [00:52<00:00, 13.01s/it]


Saved JSON to: results/2025-04-03_23-04/parsed_caption_2025-04-03_23-04_chunk0.json


 25%|██▌       | 1/4 [00:14<00:44, 14.96s/it]

Error: Precision mismatch for model [Llama-3.2-11B-Vision-Instruct]
length 11 vs 10
G atomics:
['There is a bottle.', 'The bottle is made of plastic.', 'The bottle is white.', 'The bottle has a label.', 'The label is yellow and orange.', 'The label has text.', "The text on the label says 'Folic 800'.", "The text 'Folic 800' is in black font.", 'The label has a barcode.', 'There is additional text on the label.', 'The additional text is not legible.']
Precision TPs:
['There is a bottle.', 'The bottle is white.', 'The bottle has a label.', "The text on the label says 'Folic 800'.", 'The label has a barcode.']
Precision FPs:
['The bottle is made of plastic.', 'The label is yellow and orange.', "The text 'Folic 800' is in black font.", 'There is additional text on the label.', 'The additional text is not legible.']


100%|██████████| 4/4 [00:54<00:00, 13.68s/it]


Saved JSON to: results/2025-04-03_23-04/parsed_caption_2025-04-03_23-04_chunk21.json


 50%|█████     | 2/4 [00:21<00:21, 10.79s/it]

Error: Recall mismatch for model [gpt-4o-2024-08-06]
length 14 vs 13
T atomics:
['There is a carton of eggs.', 'The carton of eggs is on a table.', 'The table is dark brown.', 'There is a newspaper.', 'The newspaper is above the carton of eggs.', 'There is a box of unsalted butter.', 'There are some receipts.', 'The box of unsalted butter is on a counter top.', 'The receipts are next to the box of unsalted butter.', 'There is a package.', 'The package is blue.', 'The package contains unsalted butter.', 'There is a picture on the package.', 'The picture is of a cow.']
Recall TPs:
['There is a box of unsalted butter.', 'The package is blue.', 'The package contains unsalted butter.', 'There is a picture on the package.', 'The picture is of a cow.']
Recall FNs:
['There is a carton of eggs.', 'The carton of eggs is on a table.', 'The table is dark brown.', 'There is a newspaper.', 'The newspaper is above the carton of eggs.', 'There are some receipts.', 'The box of unsalted butter is on a c

100%|██████████| 4/4 [01:01<00:00, 15.49s/it]


Saved JSON to: results/2025-04-03_23-04/parsed_caption_2025-04-03_23-04_chunk27.json


 50%|█████     | 2/4 [00:21<00:21, 10.63s/it]

Error: Recall mismatch for model [gpt-4o-2024-08-06]
length 12 vs 11
T atomics:
['The bottle is grey.', 'The bottle contains pills.', 'The bottle is of the GNC brand.', 'The bottle is on a surface.', 'The surface is solid.', 'The bottle contains a dietary supplement.', 'The dietary supplement is Biotin.', 'The image is a close-up of the bottle.', 'The GNC Biotin dietary supplement exists.', 'The GNC Biotin dietary supplement is lying on its side.', 'The top two thirds of the bottle are visible.', 'The bottle is on a table.']
Recall TPs:
['The bottle is grey.', 'The bottle contains a dietary supplement.', 'The dietary supplement is Biotin.', 'The GNC Biotin dietary supplement exists.', 'The bottle is of the GNC brand.']
Recall FNs:
['The bottle contains pills.', 'The bottle is on a surface.', 'The surface is solid.', 'The GNC Biotin dietary supplement is lying on its side.', 'The top two thirds of the bottle are visible.', 'The bottle is on a table.']


 75%|███████▌  | 3/4 [00:31<00:10, 10.52s/it]

Error: Recall mismatch for model [Molmo-7B-O-0924]
length 11 vs 12
T atomics:
['There is a bottle of tomato sauce.', 'The bottle is on a tablecloth.', 'The tablecloth is red.', 'The tablecloth has a checkered pattern.', 'There is a jar of pasta sauce.', 'The jar of pasta sauce is lying on a tablecloth.', 'The bottle has a front label.', 'The bottle contains a red sauce.', 'There is a can.', 'The can contains tomatoes.', 'The tomatoes are labeled as Cisco Favorite.']
Recall TPs:
['The bottle is on a tablecloth.', 'The tablecloth is red.', 'The tablecloth has a checkered pattern.']
Recall FNs:
['There is a bottle of tomato sauce.', 'The tablecloth has a checkered pattern.', 'There is a jar of pasta sauce.', 'The jar of pasta sauce is lying on a tablecloth.', 'The bottle has a front label.', 'The bottle contains a red sauce.', 'There is a can.', 'The can contains tomatoes.', 'The tomatoes are labeled as Cisco Favorite.']


100%|██████████| 4/4 [00:43<00:00, 10.84s/it]


Saved JSON to: results/2025-04-03_23-04/recall_precision_2025-04-03_23-04_chunk20.json


100%|██████████| 4/4 [00:00<00:00, 41527.76it/s]


Saved JSON to: results/2025-04-03_23-04/final_2025-04-03_23-04_chunk20.json


100%|██████████| 4/4 [00:42<00:00, 10.62s/it]


Saved JSON to: results/2025-04-03_23-04/recall_precision_2025-04-03_23-04_chunk28.json


100%|██████████| 4/4 [00:00<00:00, 40820.48it/s]


Saved JSON to: results/2025-04-03_23-04/final_2025-04-03_23-04_chunk28.json


100%|██████████| 4/4 [00:46<00:00, 11.65s/it]


Saved JSON to: results/2025-04-03_23-04/recall_precision_2025-04-03_23-04_chunk3.json


100%|██████████| 4/4 [00:00<00:00, 70197.56it/s]
100%|██████████| 4/4 [00:43<00:00, 11.00s/it]

Saved JSON to: results/2025-04-03_23-04/final_2025-04-03_23-04_chunk3.json





Saved JSON to: results/2025-04-03_23-04/recall_precision_2025-04-03_23-04_chunk14.json


100%|██████████| 4/4 [00:00<00:00, 37365.74it/s]


Saved JSON to: results/2025-04-03_23-04/final_2025-04-03_23-04_chunk14.json


100%|██████████| 4/4 [00:47<00:00, 11.87s/it]


Saved JSON to: results/2025-04-03_23-04/recall_precision_2025-04-03_23-04_chunk29.json


100%|██████████| 4/4 [00:00<00:00, 41527.76it/s]


Saved JSON to: results/2025-04-03_23-04/final_2025-04-03_23-04_chunk29.json


100%|██████████| 4/4 [00:43<00:00, 10.86s/it]


Saved JSON to: results/2025-04-03_23-04/recall_precision_2025-04-03_23-04_chunk13.json


100%|██████████| 4/4 [00:00<00:00, 41425.22it/s]


Saved JSON to: results/2025-04-03_23-04/final_2025-04-03_23-04_chunk13.json


100%|██████████| 4/4 [00:51<00:00, 12.85s/it]


Saved JSON to: results/2025-04-03_23-04/recall_precision_2025-04-03_23-04_chunk30.json


100%|██████████| 4/4 [00:00<00:00, 41943.04it/s]


Saved JSON to: results/2025-04-03_23-04/final_2025-04-03_23-04_chunk30.json


100%|██████████| 4/4 [00:46<00:00, 11.55s/it]


Saved JSON to: results/2025-04-03_23-04/recall_precision_2025-04-03_23-04_chunk12.json


100%|██████████| 4/4 [00:00<00:00, 77672.30it/s]


Saved JSON to: results/2025-04-03_23-04/final_2025-04-03_23-04_chunk12.json


100%|██████████| 4/4 [00:43<00:00, 10.91s/it]


Saved JSON to: results/2025-04-03_23-04/recall_precision_2025-04-03_23-04_chunk5.json


100%|██████████| 4/4 [00:00<00:00, 39756.44it/s]


Saved JSON to: results/2025-04-03_23-04/final_2025-04-03_23-04_chunk5.json


100%|██████████| 4/4 [00:42<00:00, 10.62s/it]


Saved JSON to: results/2025-04-03_23-04/recall_precision_2025-04-03_23-04_chunk8.json


100%|██████████| 4/4 [00:00<00:00, 40820.48it/s]
 50%|█████     | 2/4 [00:27<00:27, 13.82s/it]

Saved JSON to: results/2025-04-03_23-04/final_2025-04-03_23-04_chunk8.json


100%|██████████| 4/4 [00:49<00:00, 12.29s/it]


Saved JSON to: results/2025-04-03_23-04/recall_precision_2025-04-03_23-04_chunk11.json


100%|██████████| 4/4 [00:00<00:00, 41527.76it/s]


Saved JSON to: results/2025-04-03_23-04/final_2025-04-03_23-04_chunk11.json


100%|██████████| 4/4 [00:43<00:00, 10.92s/it]


Saved JSON to: results/2025-04-03_23-04/recall_precision_2025-04-03_23-04_chunk4.json


100%|██████████| 4/4 [00:00<00:00, 40524.68it/s]


Saved JSON to: results/2025-04-03_23-04/final_2025-04-03_23-04_chunk4.json


100%|██████████| 4/4 [00:42<00:00, 10.71s/it]


Saved JSON to: results/2025-04-03_23-04/recall_precision_2025-04-03_23-04_chunk10.json


100%|██████████| 4/4 [00:00<00:00, 40041.09it/s]


Saved JSON to: results/2025-04-03_23-04/final_2025-04-03_23-04_chunk10.json


100%|██████████| 4/4 [00:48<00:00, 12.23s/it]


Saved JSON to: results/2025-04-03_23-04/recall_precision_2025-04-03_23-04_chunk16.json


100%|██████████| 4/4 [00:00<00:00, 39945.75it/s]


Error: Recall mismatch for model [Molmo-7B-O-0924]
Saved JSON to: results/2025-04-03_23-04/final_2025-04-03_23-04_chunk16.jsonlength 16 vs 17

T atomics:
['There is an image.', 'There is a box of sandwiches.', 'There is a bag of food.', 'The box of sandwiches is on a fabric.', 'The bag of food is on a fabric.', 'There is an orange box.', 'The box has a label.', 'The label indicates some sort of food.', 'There is a business card.', 'The business card is to the left of the box.', 'There are two products.', 'The products are food.', 'The products are placed on a couch.', 'There are couches.', 'There are pillows on the couches.', 'There are bags on the couches.']
Recall TPs:
['There is a box of sandwiches.', 'The box of sandwiches is on a fabric.', 'There is a box.', 'The box has a label.']
Recall FNs:
['There is an image.', 'There is a bag of food.', 'The bag of food is on a fabric.', 'There is an orange box.', 'The label indicates some sort of food.', 'There is a business card.', 'The bu

100%|██████████| 4/4 [00:45<00:00, 11.42s/it]


Saved JSON to: results/2025-04-03_23-04/recall_precision_2025-04-03_23-04_chunk31.json


100%|██████████| 4/4 [00:00<00:00, 86037.01it/s]


Saved JSON to: results/2025-04-03_23-04/final_2025-04-03_23-04_chunk31.json


100%|██████████| 4/4 [00:50<00:00, 12.54s/it]


Saved JSON to: results/2025-04-03_23-04/recall_precision_2025-04-03_23-04_chunk22.json


100%|██████████| 4/4 [00:00<00:00, 41527.76it/s]


Saved JSON to: results/2025-04-03_23-04/final_2025-04-03_23-04_chunk22.json


100%|██████████| 4/4 [00:51<00:00, 12.90s/it]


Saved JSON to: results/2025-04-03_23-04/recall_precision_2025-04-03_23-04_chunk6.json


100%|██████████| 4/4 [00:00<00:00, 41425.22it/s]


Saved JSON to: results/2025-04-03_23-04/final_2025-04-03_23-04_chunk6.json


100%|██████████| 4/4 [00:46<00:00, 11.69s/it]


Saved JSON to: results/2025-04-03_23-04/recall_precision_2025-04-03_23-04_chunk24.json


100%|██████████| 4/4 [00:00<00:00, 41221.66it/s]


Saved JSON to: results/2025-04-03_23-04/final_2025-04-03_23-04_chunk24.json


100%|██████████| 4/4 [00:50<00:00, 12.74s/it]


Saved JSON to: results/2025-04-03_23-04/recall_precision_2025-04-03_23-04_chunk25.json


100%|██████████| 4/4 [00:48<00:00, 12.05s/it]/s]



Saved JSON to: results/2025-04-03_23-04/recall_precision_2025-04-03_23-04_chunk26.jsonSaved JSON to: results/2025-04-03_23-04/final_2025-04-03_23-04_chunk25.json



100%|██████████| 4/4 [00:00<00:00, 65536.00it/s]


Saved JSON to: results/2025-04-03_23-04/final_2025-04-03_23-04_chunk26.json


100%|██████████| 4/4 [00:47<00:00, 11.94s/it]


Saved JSON to: results/2025-04-03_23-04/recall_precision_2025-04-03_23-04_chunk9.json


100%|██████████| 4/4 [00:00<00:00, 39662.45it/s]


Saved JSON to: results/2025-04-03_23-04/final_2025-04-03_23-04_chunk9.json


100%|██████████| 4/4 [00:51<00:00, 12.77s/it]


Saved JSON to: results/2025-04-03_23-04/recall_precision_2025-04-03_23-04_chunk18.json


100%|██████████| 4/4 [00:00<00:00, 41527.76it/s]


Saved JSON to: results/2025-04-03_23-04/final_2025-04-03_23-04_chunk18.json


100%|██████████| 4/4 [00:53<00:00, 13.31s/it]


Saved JSON to: results/2025-04-03_23-04/recall_precision_2025-04-03_23-04_chunk7.json


100%|██████████| 4/4 [00:00<00:00, 41527.76it/s]


Saved JSON to: results/2025-04-03_23-04/final_2025-04-03_23-04_chunk7.json


100%|██████████| 4/4 [00:48<00:00, 12.19s/it]


Saved JSON to: results/2025-04-03_23-04/recall_precision_2025-04-03_23-04_chunk1.json


100%|██████████| 4/4 [00:00<00:00, 41221.66it/s]


Saved JSON to: results/2025-04-03_23-04/final_2025-04-03_23-04_chunk1.json


100%|██████████| 4/4 [00:50<00:00, 12.68s/it]


Saved JSON to: results/2025-04-03_23-04/recall_precision_2025-04-03_23-04_chunk19.json


100%|██████████| 4/4 [00:00<00:00, 36954.22it/s]


Saved JSON to: results/2025-04-03_23-04/final_2025-04-03_23-04_chunk19.json


100%|██████████| 4/4 [00:47<00:00, 11.99s/it]


Saved JSON to: results/2025-04-03_23-04/recall_precision_2025-04-03_23-04_chunk2.json


100%|██████████| 4/4 [00:00<00:00, 41425.22it/s]


Saved JSON to: results/2025-04-03_23-04/final_2025-04-03_23-04_chunk2.json


100%|██████████| 4/4 [00:49<00:00, 12.41s/it]


Saved JSON to: results/2025-04-03_23-04/recall_precision_2025-04-03_23-04_chunk23.json


100%|██████████| 4/4 [00:55<00:00, 14.87s/it]/s]
100%|██████████| 4/4 [00:55<00:00, 13.91s/it]


Saved JSON to: results/2025-04-03_23-04/recall_precision_2025-04-03_23-04_chunk15.json
Saved JSON to: results/2025-04-03_23-04/final_2025-04-03_23-04_chunk23.json

  0%|          | 0/4 [00:00<?, ?it/s]




100%|██████████| 4/4 [00:00<00:00, 36157.79it/s]


Saved JSON to: results/2025-04-03_23-04/final_2025-04-03_23-04_chunk15.json


100%|██████████| 4/4 [00:49<00:00, 12.30s/it]


Saved JSON to: results/2025-04-03_23-04/recall_precision_2025-04-03_23-04_chunk21.json


100%|██████████| 4/4 [00:00<00:00, 40329.85it/s]


Saved JSON to: results/2025-04-03_23-04/final_2025-04-03_23-04_chunk21.json


100%|██████████| 4/4 [00:53<00:00, 13.32s/it]


Saved JSON to: results/2025-04-03_23-04/recall_precision_2025-04-03_23-04_chunk0.json


100%|██████████| 4/4 [00:00<00:00, 86480.49it/s]


Saved JSON to: results/2025-04-03_23-04/final_2025-04-03_23-04_chunk0.json


100%|██████████| 4/4 [01:01<00:00, 15.44s/it]


Saved JSON to: results/2025-04-03_23-04/recall_precision_2025-04-03_23-04_chunk17.json


100%|██████████| 4/4 [00:00<00:00, 40329.85it/s]


Saved JSON to: results/2025-04-03_23-04/final_2025-04-03_23-04_chunk17.json


100%|██████████| 4/4 [00:52<00:00, 13.02s/it]


Saved JSON to: results/2025-04-03_23-04/recall_precision_2025-04-03_23-04_chunk27.json


100%|██████████| 4/4 [00:00<00:00, 41020.09it/s]


Saved JSON to: results/2025-04-03_23-04/final_2025-04-03_23-04_chunk27.json


In [46]:

def merge_json_chunks(output_file, file_pattern):
    merged_data = []

    for filename in sorted(glob.glob(file_pattern)):
        with open(filename, "r", encoding="utf-8") as f:
            try:
                data = json.load(f)
                if isinstance(data, list):
                    merged_data.extend(data)
                elif isinstance(data, dict):
                    merged_data.append(data)
            except Exception as e:
                print(f"Failed to read {filename}: {e}")

    with open(output_file, "w", encoding="utf-8") as out_f:
        json.dump(merged_data, out_f, indent=2, ensure_ascii=False)

    print(f"Merged {len(merged_data)} entries into {output_file}")

merge_json_chunks(
    output_file=f"{folder_path}/__final_{timestamp}_merged.json",
    file_pattern=f"{folder_path}/final_{timestamp}_chunk*.json"
)


Merged 128 entries into results/2025-04-03_23-04/__final_2025-04-03_23-04_merged.json


In [49]:
import json
import csv

json_path = f"{folder_path}/__final_{timestamp}_merged.json"
csv_path = f"{folder_path}/__final_{timestamp}_merged.csv"

with open(json_path, "r", encoding="utf-8") as f:
    data = json.load(f)

fieldnames = [
    "image",
    "link",
    "T_atomics",
    "gpt_caption",
    "gpt_g_atomics",
    "gpt_recall_TPs",
    "gpt_recall_FNs",
    "gpt_precision_TPs",
    "gpt_precision_FPs",
    "molmo_caption",
    "molmo_g_atomics",
    "molmo_recall_TPs",
    "molmo_recall_FNs",
    "molmo_precision_TPs",
    "molmo_precision_FPs",
    "llama_caption",
    "llama_g_atomics",
    "llama_recall_TPs",
    "llama_recall_FNs",
    "llama_precision_TPs",
    "llama_precision_FPs",
    "gpt_recall",
    "gpt_precision",
    "gpt_capf1",
    "molmo_recall",
    "molmo_precision",
    "molmo_capf1",
    "llama_recall",
    "llama_precision",
    "llama_capf1",
]

with open(csv_path, "w", newline="", encoding="utf-8") as csvfile:
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()

    for item in data:
        file_name = item.get("file_name", "")
        cap_f1 = item.get("evaluation", {}).get("cap_f1", {})
        scores = cap_f1.get("scores", {})
        metadata = cap_f1.get("metadata", {})
        t_atomics = cap_f1.get("T_atomics", [])

        model_keys = {
            "gpt": "gpt-4o-2024-08-06",
            "molmo": "Molmo-7B-O-0924",
            "llama": "Llama-3.2-11B-Vision-Instruct",
        }

        row = {
            "image": file_name,
            "link": f'=HYPERLINK("https://vizwiz.cs.colorado.edu/VizWiz_visualization_img/{file_name}", "{file_name}")',
            "T_atomics": "\n".join(t_atomics),
            "gpt_caption": item["model_captions"][0]["caption"],
            "gpt_g_atomics": "",
            "gpt_recall_TPs": "",
            "gpt_precision_TPs": "",
            "molmo_caption": item["model_captions"][2]["caption"],
            "molmo_g_atomics": "",
            "molmo_recall_TPs": "",
            "molmo_precision_TPs": "",
            "llama_caption": item["model_captions"][1]["caption"],
            "llama_g_atomics": "",
            "llama_recall_TPs": "",
            "llama_precision_TPs": "",
            "gpt_recall": scores.get(model_keys["gpt"], {}).get("recall"),
            "gpt_precision": scores.get(model_keys["gpt"], {}).get("precision"),
            "gpt_capf1": scores.get(model_keys["gpt"], {}).get("cap_f1"),
            "molmo_recall": scores.get(model_keys["molmo"], {}).get("recall"),
            "molmo_precision": scores.get(model_keys["molmo"], {}).get("precision"),
            "molmo_capf1": scores.get(model_keys["molmo"], {}).get("cap_f1"),
            "llama_recall": scores.get(model_keys["llama"], {}).get("recall"),
            "llama_precision": scores.get(model_keys["llama"], {}).get("precision"),
            "llama_capf1": scores.get(model_keys["llama"], {}).get("cap_f1"),
        }

        for short_name, model_key in model_keys.items():
            # g_atomics
            g_atomics_list = cap_f1.get("g_atomics", {}).get(model_key, [])
            row[f"{short_name}_g_atomics"] = "\n".join(g_atomics_list)

            # recall TPs
            recall_tps = metadata.get(model_key, {}).get("recall", {}).get("TPs", [])
            row[f"{short_name}_recall_TPs"] = "\n".join(recall_tps)

            # recall FNs
            recall_fns = metadata.get(model_key, {}).get("recall", {}).get("FNs", [])
            row[f"{short_name}_recall_FNs"] = "\n".join(recall_fns)

            # precision TPs
            precision_tps = (
                metadata.get(model_key, {}).get("precision", {}).get("TPs", [])
            )
            row[f"{short_name}_precision_TPs"] = "\n".join(precision_tps)

            # precision FPs
            precision_fps = (
                metadata.get(model_key, {}).get("precision", {}).get("FPs", [])
            )
            row[f"{short_name}_precision_FPs"] = "\n".join(precision_fps)

        writer.writerow(row)

print(f"CSV file saved to: {csv_path}")

CSV file saved to: results/2025-04-03_23-04/__final_2025-04-03_23-04_merged.csv
