### Include Library

In [46]:
# library for cap_f1
from cap_f1 import *

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


### Load Data

In [47]:
print("Load caption file...")

# features that we need to extract from the original dataset
keys = ["file_name", "human_captions", "model_captions"]
org_caption_dataset = read_json("combined-caption-output_7304-images2025-03-29_21_40_00.json", keys)

all_human_captions=[]
for item in org_caption_dataset:
    # Filter out human captions
    human_captions = [
        hc["caption"]            
        for hc in item["human_captions"]
        if hc["caption"] != "Quality issues are too severe to recognize visual content."
    ]
    all_human_captions.append(human_captions)

Load caption file...


### Parse Caption into Atomic Statements

In [48]:
print("Generating atomic statements using gpt-4o...")
LIMIT = 5
T_atomics, g_atomics  = generate_atomic_statement(org_caption_dataset, limit=LIMIT)

Generating atomic statements using gpt-4o...


100%|██████████| 20/20 [05:24<00:00, 16.23s/it]


In [49]:
# Save the parsing results
save_results_json(output_path="parsed_caption.json", org_dataset=org_caption_dataset, T_atomics=T_atomics, g_atomics=g_atomics, limit=LIMIT)

# Read Atomic Caption Dataset If Needed
keys = ["file_name", "human_captions", "model_captions", "evaluation"]
parsed_dataset = read_json("parsed_caption.json", keys)

Saved JSON to: parsed_caption.json


### Evaluation

In [50]:
metadata = evaluate_matching(all_human_captions, T_atomics, g_atomics)
# metadata = evaluate_matching_file(parsed_dataset)

100%|██████████| 20/20 [04:41<00:00, 14.05s/it]


In [51]:
save_results_json(output_path="data_recall_precision.json", update_existing="parsed_caption.json", metadata=metadata, limit=LIMIT)

Saved JSON to: data_recall_precision.json


In [52]:
evaluation = calculate_cap_f1(metadata)

100%|██████████| 20/20 [00:00<00:00, 72377.98it/s]


In [53]:
save_results_json(output_path="final_with_evaluation.json", update_existing="data_recall_precision.json", evaluations=evaluation, limit=LIMIT)

Saved JSON to: final_with_evaluation.json


In [44]:
import json
import csv

json_path = "final_with_evaluation.json"  # 본인 파일 경로로 바꿔주세요
csv_path = "final_with_evaluation.csv"

with open(json_path, "r", encoding="utf-8") as f:
    data = json.load(f)

fieldnames = [
    "image",
    "gpt_recall", "gpt_precision", "gpt_capf1",
    "molmo_recall", "molmo_precision", "molmo_capf1",
    "llama_recall", "llama_precision", "llama_capf1"
]

with open(csv_path, "w", newline="", encoding="utf-8") as csvfile:
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()

    for item in data:
        file_name = item.get("file_name", "")
        scores = item.get("evaluation", {}).get("cap_f1", {}).get("scores", {})

        row = {
            "image": file_name,
            "gpt_recall": scores.get("gpt-4o-2024-08-06", {}).get("recall"),
            "gpt_precision": scores.get("gpt-4o-2024-08-06", {}).get("precision"),
            "gpt_capf1": scores.get("gpt-4o-2024-08-06", {}).get("cap_f1"),
            "molmo_recall": scores.get("Molmo-7B-O-0924", {}).get("recall"),
            "molmo_precision": scores.get("Molmo-7B-O-0924", {}).get("precision"),
            "molmo_capf1": scores.get("Molmo-7B-O-0924", {}).get("cap_f1"),
            "llama_recall": scores.get("Llama-3.2-11B-Vision-Instruct", {}).get("recall"),
            "llama_precision": scores.get("Llama-3.2-11B-Vision-Instruct", {}).get("precision"),
            "llama_capf1": scores.get("Llama-3.2-11B-Vision-Instruct", {}).get("cap_f1"),
        }

        writer.writerow(row)

print(f"CSV file saved to: {csv_path}")


CSV file saved to: final_with_evaluation.csv


## Other Evaluation 
### BLUE, METEOR, ROUGE

In [18]:
print("Load caption file...")

# features that we need to extract from the original dataset
keys = ["file_name", "human_captions", "model_captions"]
org_caption_dataset = read_json("combined-caption-output_7304-images2025-03-29_21_40_00.json", keys)

for item in org_caption_dataset:
    # Filter out human captions
    human_captions = [
        hc["caption"]            
        for hc in item["human_captions"]
        if hc["caption"] != "Quality issues are too severe to recognize visual content."
    ]

Load caption file...


In [16]:
evall = get_others(org_caption_dataset, human_captions)
# print(json.dumps(evall, indent=4, ensure_ascii=False))

[nltk_data] Downloading package wordnet to /home/heoj4/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt_tab to /home/heoj4/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /home/heoj4/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


In [17]:
save_results_json(output_path="others_evaluation.json", metric_name="others",  evaluations=evall)

Saved JSON to: others_evaluation.json
