### Include Library

In [1]:
# library for cap_f1
from cap_f1 import *
from datetime import datetime
from multiprocessing import Pool
import math
import json
import glob
import os


# code for no need for restarting the kernel when python file is updated
%load_ext autoreload
%autoreload 2

### Load Data

In [2]:
print("Load caption file...")

# features that we need to extract from the original dataset
org_caption_dataset = read_json("evaluation_results_5429-images_2025-04-03_11_27_fixed.json")

all_human_captions=[]
for item in org_caption_dataset:
    # Filter out human captions
    human_captions = [
        hc["caption"]            
        for hc in item["human_captions"]
        if hc["caption"] != "Quality issues are too severe to recognize visual content."
    ]
    all_human_captions.append(human_captions)

Load caption file...


In [4]:
# This is for testing
print(len(org_caption_dataset))
org_caption_dataset = org_caption_dataset[:32]
all_human_captions = all_human_captions[:32]

32


### Run Multi Processors

In [5]:
def process_batch(start_idx, end_idx, org_caption_dataset, all_human_captions, folder_path, timestamp, chunk_id):
    subset = org_caption_dataset[start_idx:end_idx]
    LIMIT = len(subset)
    human_subset = all_human_captions[start_idx:end_idx]

    # Step 1: Parse atomics
    T_atomics, g_atomics = generate_atomic_statement(subset, limit=LIMIT)
    save_results_json(output_path=f"{folder_path}/parsed_caption_{timestamp}_chunk{chunk_id}.json",
                      org_dataset=subset, T_atomics=T_atomics, g_atomics=g_atomics, limit=LIMIT)

    # Step 2: Match human & generated
    metadata = evaluate_matching(human_subset, T_atomics, g_atomics)
    save_results_json(output_path=f"{folder_path}/recall_precision_{timestamp}_chunk{chunk_id}.json",
                      update_existing=f"{folder_path}/parsed_caption_{timestamp}_chunk{chunk_id}.json",
                      metadata=metadata, limit=LIMIT)

    # Step 3: Cap F1
    evaluation = calculate_cap_f1(metadata)
    save_results_json(output_path=f"{folder_path}/final_{timestamp}_chunk{chunk_id}.json",
                      update_existing=f"{folder_path}/recall_precision_{timestamp}_chunk{chunk_id}.json",
                      evaluations=evaluation, limit=LIMIT)

def run_parallel_processing(org_caption_dataset, all_human_captions, folder_path, timestamp, num_workers=32):
    total = len(org_caption_dataset)
    chunk_size = math.ceil(total / num_workers)

    with Pool(processes=num_workers) as pool:
        jobs = []
        for i in range(num_workers):
            start_idx = i * chunk_size
            end_idx = min((i + 1) * chunk_size, total)
            jobs.append(pool.apply_async(process_batch, (start_idx, end_idx, org_caption_dataset, all_human_captions, folder_path, timestamp, i)))
        
        for job in jobs:
            job.get()

In [6]:
# for filename
now = datetime.now()
timestamp = now.strftime("%Y-%m-%d_%H-%M")

#create folder to save the results
folder_path = f"results/{timestamp}"
os.makedirs(folder_path, exist_ok=True)

run_parallel_processing(org_caption_dataset, all_human_captions, folder_path, timestamp, num_workers=9)

0it [00:00, ?it/s]/4 [00:00<?, ?it/s]


Saved JSON to: results/2025-04-07_22-25/parsed_caption_2025-04-07_22-25_chunk8.json


0it [00:00, ?it/s]


Saved JSON to: results/2025-04-07_22-25/recall_precision_2025-04-07_22-25_chunk8.json


0it [00:00, ?it/s]


Saved JSON to: results/2025-04-07_22-25/final_2025-04-07_22-25_chunk8.json


100%|██████████| 4/4 [00:35<00:00,  8.95s/it]


Saved JSON to: results/2025-04-07_22-25/parsed_caption_2025-04-07_22-25_chunk6.json


100%|██████████| 4/4 [00:38<00:00,  9.57s/it]


Saved JSON to: results/2025-04-07_22-25/parsed_caption_2025-04-07_22-25_chunk2.json


100%|██████████| 4/4 [00:38<00:00,  9.75s/it]


Saved JSON to: results/2025-04-07_22-25/parsed_caption_2025-04-07_22-25_chunk7.json


100%|██████████| 4/4 [00:39<00:00,  9.98s/it]


Saved JSON to: results/2025-04-07_22-25/parsed_caption_2025-04-07_22-25_chunk5.json


100%|██████████| 4/4 [00:40<00:00, 10.17s/it]


Saved JSON to: results/2025-04-07_22-25/parsed_caption_2025-04-07_22-25_chunk1.json


100%|██████████| 4/4 [00:41<00:00, 10.33s/it]


Saved JSON to: results/2025-04-07_22-25/parsed_caption_2025-04-07_22-25_chunk0.json


100%|██████████| 4/4 [00:42<00:00, 10.66s/it]


Saved JSON to: results/2025-04-07_22-25/parsed_caption_2025-04-07_22-25_chunk4.json


100%|██████████| 4/4 [00:45<00:00, 11.30s/it]


Saved JSON to: results/2025-04-07_22-25/parsed_caption_2025-04-07_22-25_chunk3.json


 25%|██▌       | 1/4 [00:08<00:25,  8.46s/it]

Error: Recall mismatch for model [gpt-4o-2024-08-06]
length 13 vs 13
T atomics:
['There is a bottle of vitamins.', 'The bottle of vitamins is on white sheets.', 'There is an arm next to the bottle of vitamins.', 'The bottle has a green cap.', 'The bottle has leaves printed on the label.', 'The image describes the medicinal uses of the product.', 'The bottle is a supplement bottle.', 'The supplement is natural.', 'The supplement is goldenseal.', 'The bottle is turned on its side.', 'The object is soft.', 'The object is pillow-like.', "The letters on the bottle are 'denseal'."]
Recall TPs:
['There is a bottle of vitamins.', 'The bottle has a green cap.', 'The bottle is on white sheets.']
Recall FNs:
['There is an arm next to the bottle of vitamins.', 'The bottle has leaves printed on the label.', 'The image describes the medicinal uses of the product.', 'The bottle is a supplement bottle.', 'The supplement is natural.', 'The supplement is goldenseal.', 'The bottle is turned on its side.'

100%|██████████| 4/4 [00:36<00:00,  9.08s/it]


Saved JSON to: results/2025-04-07_22-25/recall_precision_2025-04-07_22-25_chunk7.json


100%|██████████| 4/4 [00:00<00:00, 86037.01it/s]


Saved JSON to: results/2025-04-07_22-25/final_2025-04-07_22-25_chunk7.json


100%|██████████| 4/4 [00:37<00:00,  9.40s/it]


Saved JSON to: results/2025-04-07_22-25/recall_precision_2025-04-07_22-25_chunk2.json


100%|██████████| 4/4 [00:00<00:00, 39475.80it/s]


Saved JSON to: results/2025-04-07_22-25/final_2025-04-07_22-25_chunk2.json


100%|██████████| 4/4 [00:40<00:00, 10.03s/it]


Saved JSON to: results/2025-04-07_22-25/recall_precision_2025-04-07_22-25_chunk6.json


100%|██████████| 4/4 [00:00<00:00, 86480.49it/s]


Saved JSON to: results/2025-04-07_22-25/final_2025-04-07_22-25_chunk6.json


100%|██████████| 4/4 [00:39<00:00,  9.99s/it]


Saved JSON to: results/2025-04-07_22-25/recall_precision_2025-04-07_22-25_chunk4.json


100%|██████████| 4/4 [00:00<00:00, 40329.85it/s]


Saved JSON to: results/2025-04-07_22-25/final_2025-04-07_22-25_chunk4.json


100%|██████████| 4/4 [00:46<00:00, 11.67s/it]


Saved JSON to: results/2025-04-07_22-25/recall_precision_2025-04-07_22-25_chunk5.json


100%|██████████| 4/4 [00:00<00:00, 40920.04it/s]


Saved JSON to: results/2025-04-07_22-25/final_2025-04-07_22-25_chunk5.json


100%|██████████| 4/4 [00:46<00:00, 11.57s/it]


Saved JSON to: results/2025-04-07_22-25/recall_precision_2025-04-07_22-25_chunk1.json


100%|██████████| 4/4 [00:00<00:00, 67108.86it/s]


Saved JSON to: results/2025-04-07_22-25/final_2025-04-07_22-25_chunk1.json


100%|██████████| 4/4 [00:43<00:00, 10.93s/it]


Saved JSON to: results/2025-04-07_22-25/recall_precision_2025-04-07_22-25_chunk3.json


100%|██████████| 4/4 [00:00<00:00, 42153.81it/s]


Saved JSON to: results/2025-04-07_22-25/final_2025-04-07_22-25_chunk3.json


100%|██████████| 4/4 [00:49<00:00, 12.28s/it]


Saved JSON to: results/2025-04-07_22-25/recall_precision_2025-04-07_22-25_chunk0.json


100%|██████████| 4/4 [00:00<00:00, 88768.34it/s]


Saved JSON to: results/2025-04-07_22-25/final_2025-04-07_22-25_chunk0.json


In [7]:
def merge_json_chunks(output_file, file_pattern):
    merged_data = []

    for filename in sorted(glob.glob(file_pattern)):
        with open(filename, "r", encoding="utf-8") as f:
            try:
                data = json.load(f)
                if isinstance(data, list):
                    merged_data.extend(data)
                elif isinstance(data, dict):
                    merged_data.append(data)
            except Exception as e:
                print(f"Failed to read {filename}: {e}")

    with open(output_file, "w", encoding="utf-8") as out_f:
        json.dump(merged_data, out_f, indent=2, ensure_ascii=False)

    print(f"Merged {len(merged_data)} entries into {output_file}")

merge_json_chunks(
    output_file=f"{folder_path}/__final_{timestamp}_merged.json",
    file_pattern=f"{folder_path}/final_{timestamp}_chunk*.json"
)


Merged 32 entries into results/2025-04-07_22-25/__final_2025-04-07_22-25_merged.json


In [22]:
import json
import csv

json_path = f"{folder_path}/__final_{timestamp}_merged.json"
csv_path = f"{folder_path}/__final_{timestamp}_merged.csv"

with open(json_path, "r", encoding="utf-8") as f:
    data = json.load(f)

fieldnames = [
    "image",
    "link",
    "T_atomics",
    "gpt_caption",
    "gpt_g_atomics",
    "gpt_recall_TPs",
    "gpt_recall_FNs",
    "gpt_precision_TPs",
    "gpt_precision_FPs",
    "molmo_caption",
    "molmo_g_atomics",
    "molmo_recall_TPs",
    "molmo_recall_FNs",
    "molmo_precision_TPs",
    "molmo_precision_FPs",
    "llama_caption",
    "llama_g_atomics",
    "llama_recall_TPs",
    "llama_recall_FNs",
    "llama_precision_TPs",
    "llama_precision_FPs",
    "gpt_recall",
    "gpt_precision",
    "gpt_capf1",
    "molmo_recall",
    "molmo_precision",
    "molmo_capf1",
    "llama_recall",
    "llama_precision",
    "llama_capf1",
]

with open(csv_path, "w", newline="", encoding="utf-8") as csvfile:
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()

    for item in data:
        file_name = item.get("file_name", "")
        cap_f1 = item.get("evaluation", {}).get("cap_f1", {})
        scores = cap_f1.get("scores", {})
        metadata = cap_f1.get("metadata", {})
        t_atomics = cap_f1.get("T_atomics", [])

        model_keys = {
            "gpt": "gpt-4o-2024-08-06",
            "molmo": "Molmo-7B-O-0924",
            "llama": "Llama-3.2-11B-Vision-Instruct",
        }

        row = {
            "image": file_name,
            "link": f'=HYPERLINK("https://vizwiz.cs.colorado.edu/VizWiz_visualization_img/{file_name}", "{file_name}")',
            "T_atomics": "\n".join(t_atomics),
            "gpt_caption": item["model_captions"][0]["caption"],
            "gpt_g_atomics": "",
            "gpt_recall_TPs": "",
            "gpt_precision_TPs": "",
            "molmo_caption": item["model_captions"][2]["caption"],
            "molmo_g_atomics": "",
            "molmo_recall_TPs": "",
            "molmo_precision_TPs": "",
            "llama_caption": item["model_captions"][1]["caption"],
            "llama_g_atomics": "",
            "llama_recall_TPs": "",
            "llama_precision_TPs": "",
            "gpt_recall": scores.get(model_keys["gpt"], {}).get("recall"),
            "gpt_precision": scores.get(model_keys["gpt"], {}).get("precision"),
            "gpt_capf1": scores.get(model_keys["gpt"], {}).get("cap_f1"),
            "molmo_recall": scores.get(model_keys["molmo"], {}).get("recall"),
            "molmo_precision": scores.get(model_keys["molmo"], {}).get("precision"),
            "molmo_capf1": scores.get(model_keys["molmo"], {}).get("cap_f1"),
            "llama_recall": scores.get(model_keys["llama"], {}).get("recall"),
            "llama_precision": scores.get(model_keys["llama"], {}).get("precision"),
            "llama_capf1": scores.get(model_keys["llama"], {}).get("cap_f1"),
        }

        for short_name, model_key in model_keys.items():
            # g_atomics
            g_atomics_list = cap_f1.get("g_atomics", {}).get(model_key, [])
            row[f"{short_name}_g_atomics"] = "\n".join(g_atomics_list)

            # recall TPs
            recall_tps = metadata.get(model_key, {}).get("recall", {}).get("TPs", [])
            row[f"{short_name}_recall_TPs"] = "\n".join(recall_tps)

            # recall FNs
            recall_fns = metadata.get(model_key, {}).get("recall", {}).get("FNs", [])
            row[f"{short_name}_recall_FNs"] = "\n".join(recall_fns)

            # precision TPs
            precision_tps = (
                metadata.get(model_key, {}).get("precision", {}).get("TPs", [])
            )
            row[f"{short_name}_precision_TPs"] = "\n".join(precision_tps)

            # precision FPs
            precision_fps = (
                metadata.get(model_key, {}).get("precision", {}).get("FPs", [])
            )
            row[f"{short_name}_precision_FPs"] = "\n".join(precision_fps)

        writer.writerow(row)

print(f"CSV file saved to: {csv_path}")

CSV file saved to: results/2025-04-05_16-36/__final_2025-04-05_16-36_merged.csv
