### Include Library

In [1]:
# library for cap_f1
from cap_f1 import *
from datetime import datetime
from multiprocessing import Pool
import math
import json
import glob
import os


# code for no need for restarting the kernel when python file is updated
%load_ext autoreload
%autoreload 2

### Load Data

In [9]:
print("Load caption file...")

# for filename
now = datetime.now()
timestamp = now.strftime("%Y-%m-%d_%H-%M")

#create folder to save the results
folder_path = f"results/{timestamp}"
os.makedirs(folder_path, exist_ok=True)

# features that we need to extract from the original dataset
org_caption_dataset = read_json("../../data/study-2-output/final-evaluated-captions/low-quality_evaluation_5432-images_2025-04-10_15:29.json")


Load caption file...


In [10]:
# This is for testing
select_data("results/selected_filename.txt", "evaluation_results_5432-images_2025-04-03_11_27_fixed.json", f"{folder_path}/selected_data.json")
org_caption_dataset = read_json(f"{folder_path}/selected_data.json")
print(len(org_caption_dataset))
# org_caption_dataset = org_caption_dataset[:1]
# all_human_captions = all_human_captions[:1]

24 data, JSON results/2025-04-09_17-07/selected_data.json created with 24 items.
24


In [12]:
all_human_captions=[]
for item in org_caption_dataset:
    # Filter out human captions
    human_captions = [
        hc["caption"]            
        for hc in item["human_captions"]
        if hc["caption"] != "Quality issues are too severe to recognize visual content."
    ]
    all_human_captions.append(human_captions)
all_human_captions[0]

['A can of Coca Cola on a counter is shown for when one can use a nice, cold drink.',
 'A black can of Coca Cola Zero calorie soda is on the counter near the coffee maker.',
 'A kitchen counter the various items on top including a can of Coca-Cola, metal containers, and a teapot.',
 'a black tin of Coca Cola placed on a black surface',
 'Black counter with canisters, kettle and can of soda.']

### Run Multi Processors

In [11]:
def process_batch(start_idx, end_idx, org_caption_dataset, all_human_captions, folder_path, timestamp, chunk_id):
    subset = org_caption_dataset[start_idx:end_idx]
    LIMIT = len(subset)
    human_subset = all_human_captions[start_idx:end_idx]
    print(human_subset)

    # Step 1: Parse atomics
    T_atomics, g_atomics, parsed_T = generate_atomic_statement(subset, limit=LIMIT)
    save_results_json(output_path=f"{folder_path}/parsed_caption_{timestamp}_chunk{chunk_id}.json",
                      org_dataset=subset, T_atomics=T_atomics, g_atomics=g_atomics, parsed_T= parsed_T, T_org=human_subset, limit=LIMIT)

    # Step 2: Match human & generated
    metadata = evaluate_matching(human_subset, T_atomics, g_atomics)
    save_results_json(output_path=f"{folder_path}/recall_precision_{timestamp}_chunk{chunk_id}.json",
                      update_existing=f"{folder_path}/parsed_caption_{timestamp}_chunk{chunk_id}.json",
                      metadata=metadata, limit=LIMIT)

    # Step 3: Cap F1
    evaluation = calculate_cap_f1(metadata)
    save_results_json(output_path=f"{folder_path}/final_{timestamp}_chunk{chunk_id}.json",
                      update_existing=f"{folder_path}/recall_precision_{timestamp}_chunk{chunk_id}.json",
                      evaluations=evaluation, limit=LIMIT)

def run_parallel_processing(org_caption_dataset, all_human_captions, folder_path, timestamp, num_workers=32):
    total = len(org_caption_dataset)
    chunk_size = math.ceil(total / num_workers)

    with Pool(processes=num_workers) as pool:
        jobs = []
        for i in range(num_workers):
            start_idx = i * chunk_size
            end_idx = min((i + 1) * chunk_size, total)
            jobs.append(pool.apply_async(process_batch, (start_idx, end_idx, org_caption_dataset, all_human_captions, folder_path, timestamp, i)))
        
        for job in jobs:
            job.get()

In [None]:
run_parallel_processing(org_caption_dataset[0:12], all_human_captions[0:12], folder_path, timestamp, num_workers=24)

Process SpawnPoolWorker-1:
Process SpawnPoolWorker-2:
Traceback (most recent call last):
Traceback (most recent call last):
Process SpawnPoolWorker-3:
Traceback (most recent call last):
Process SpawnPoolWorker-4:
Process SpawnPoolWorker-5:
Traceback (most recent call last):
Process SpawnPoolWorker-6:
Traceback (most recent call last):
Traceback (most recent call last):
Process SpawnPoolWorker-7:
  File "/Users/kgarg/.pyenv/versions/3.11.11/lib/python3.11/multiprocessing/process.py", line 314, in _bootstrap
    self.run()
  File "/Users/kgarg/.pyenv/versions/3.11.11/lib/python3.11/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/kgarg/.pyenv/versions/3.11.11/lib/python3.11/multiprocessing/pool.py", line 114, in worker
    task = get()
           ^^^^^
  File "/Users/kgarg/.pyenv/versions/3.11.11/lib/python3.11/multiprocessing/queues.py", line 367, in get
    return _ForkingPickler.loads(res)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^


In [13]:
def merge_json_chunks(output_file, file_pattern):
    merged_data = []

    for filename in sorted(glob.glob(file_pattern)):
        with open(filename, "r", encoding="utf-8") as f:
            try:
                data = json.load(f)
                if isinstance(data, list):
                    merged_data.extend(data)
                elif isinstance(data, dict):
                    merged_data.append(data)
            except Exception as e:
                print(f"Failed to read {filename}: {e}")

    with open(output_file, "w", encoding="utf-8") as out_f:
        json.dump(merged_data, out_f, indent=2, ensure_ascii=False)

    print(f"Merged {len(merged_data)} entries into {output_file}")

merge_json_chunks(
    output_file=f"{folder_path}/__final_{timestamp}_merged.json",
    file_pattern=f"{folder_path}/final_{timestamp}_chunk*.json"
)


Merged 24 entries into results/2025-04-09_17-07/__final_2025-04-09_17-07_merged.json


In [14]:
import json
import csv

def format_matches(match_list):
    lines = []
    for m in match_list:
        if "T_atomic" in m and "g_atomic" in m:
            lines.append(f'{m["T_atomic"]} : {m["g_atomic"]}')
        elif "g_atomic" in m and "T_org" in m:
            lines.append(f'{m["g_atomic"]} : {m["T_org"]}')
        else:
            lines.append(str(m))  # fallback for unexpected format
    return "\n".join(lines)


json_path = f"{folder_path}/__final_{timestamp}_merged.json"
csv_path = f"{folder_path}/__final_{timestamp}_merged.csv"

with open(json_path, "r", encoding="utf-8") as f:
    data = json.load(f)


fieldnames = [
    "image",
    "link",
    "T_org",
    "parsed_T",
    "T_atomics",
    "gpt_caption",
    "gpt_g_atomics",
    "gpt_recall_TPs",
    "gpt_recall_Matches",
    "gpt_recall_FNs",
    "gpt_precision_TPs",
    "gpt_precision_Matches",
    "gpt_precision_FPs",
    "molmo_caption",
    "molmo_g_atomics",
    "molmo_recall_TPs",
    "molmo_recall_Matches",
    "molmo_recall_FNs",
    "molmo_precision_TPs",
    "molmo_precision_Matches",
    "molmo_precision_FPs",
    "llama_caption",
    "llama_g_atomics",
    "llama_recall_TPs",
    "llama_recall_Matches",
    "llama_recall_FNs",
    "llama_precision_TPs", 
    "llama_precision_Matches",
    "llama_precision_FPs",
    "gpt_recall",
    "gpt_precision",
    "gpt_capf1",
    "molmo_recall",
    "molmo_precision",
    "molmo_capf1",
    "llama_recall",
    "llama_precision",
    "llama_capf1",
]

with open(csv_path, "w", newline="", encoding="utf-8") as csvfile:
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()

    for item in data:
        file_name = item.get("file_name", "")
        cap_f1 = item.get("evaluation", {}).get("cap_f1", {})
        scores = cap_f1.get("scores", {})
        metadata = cap_f1.get("metadata", {})
        t_atomics = cap_f1.get("T_atomics", [])
        parsed_T = cap_f1.get("parsed_atomics", [])
        T_org = cap_f1.get("T_org", [])

        model_keys = {
            "gpt": "gpt-4o-2024-08-06",
            "molmo": "Molmo-7B-O-0924",
            "llama": "Llama-3.2-11B-Vision-Instruct",
        }

        row = {
            "image": file_name,
            "link": f'=HYPERLINK("https://vizwiz.cs.colorado.edu/VizWiz_visualization_img/{file_name}", "{file_name}")',
            "T_org": "\n".join(T_org), 
            "parsed_T": "\n".join(parsed_T), 
            "T_atomics": "\n".join(t_atomics),
            "gpt_caption": item["model_captions"][0]["caption"],
            "molmo_caption": item["model_captions"][2]["caption"],
            "llama_caption": item["model_captions"][1]["caption"],
            "gpt_g_atomics": "", "molmo_g_atomics": "", "llama_g_atomics": "",
            "gpt_recall_TPs": "", "molmo_recall_TPs": "", "llama_recall_TPs": "",
            "gpt_recall_Matches": "", "molmo_recall_Matches": "", "llama_recall_Matches": "",
            "gpt_recall_FNs": "", "molmo_recall_FNs": "", "llama_recall_FNs": "",
            "gpt_precision_TPs": "", "molmo_precision_TPs": "", "llama_precision_TPs": "",
            "gpt_precision_Matches": "", "molmo_precision_Matches": "", "llama_precision_Matches": "",
            "gpt_precision_FPs": "", "molmo_precision_FPs": "", "llama_precision_FPs": "",
            "gpt_recall": scores.get(model_keys["gpt"], {}).get("recall"),
            "gpt_precision": scores.get(model_keys["gpt"], {}).get("precision"),
            "gpt_capf1": scores.get(model_keys["gpt"], {}).get("cap_f1"),
            "molmo_recall": scores.get(model_keys["molmo"], {}).get("recall"),
            "molmo_precision": scores.get(model_keys["molmo"], {}).get("precision"),
            "molmo_capf1": scores.get(model_keys["molmo"], {}).get("cap_f1"),
            "llama_recall": scores.get(model_keys["llama"], {}).get("recall"),
            "llama_precision": scores.get(model_keys["llama"], {}).get("precision"),
            "llama_capf1": scores.get(model_keys["llama"], {}).get("cap_f1"),
        }

        for short_name, model_key in model_keys.items():
            g_atomics_list = cap_f1.get("g_atomics", {}).get(model_key, [])
            row[f"{short_name}_g_atomics"] = "\n".join(g_atomics_list)

            recall = metadata.get(model_key, {}).get("recall", {})
            row[f"{short_name}_recall_TPs"] = "\n".join(recall.get("TPs", []))
            row[f"{short_name}_recall_FNs"] = "\n".join(recall.get("FNs", []))
            row[f"{short_name}_recall_Matches"] = format_matches(recall.get("Match", []))

            precision = metadata.get(model_key, {}).get("precision", {})
            row[f"{short_name}_precision_TPs"] = "\n".join(precision.get("TPs", []))
            row[f"{short_name}_precision_FPs"] = "\n".join(precision.get("FPs", []))
            row[f"{short_name}_precision_Matches"] = format_matches(precision.get("Match", []))

        writer.writerow(row)

print(f"CSV file saved to: {csv_path}")


CSV file saved to: results/2025-04-09_17-07/__final_2025-04-09_17-07_merged.csv
