# BLIP NoCaps Evaluation (Kaggle)

- Enable Internet in the Kaggle notebook, and add the dataset containing `nocap_val_4500_captions.json` and `nocaps_test_image_info.json` to the *Data* tab.
- The notebook uses run_nocaps_eval.py to download images in parallel; the `--workers` option is set automatically based on `cpu_count()` (typically ~4 cores → 8 workers is safe).
- If you already have the images, set `SKIP_DOWNLOAD = True` in the setup cell to skip the download step; the following steps remain unchanged.

In [None]:
from pathlib import Path
import json
import os

BASE_DIR = Path("/kaggle/working")

# NOCAPS JSON (add dataset via Data tab)
DATASET_ROOT = Path("/kaggle/input/nocaps")
VAL_JSON = DATASET_ROOT / "nocaps_val_4500_captions.json"
TEST_JSON = DATASET_ROOT / "nocaps_test_image_info.json"

# COCO 2014 (re-use local images on Kaggle)
COCO_IMG_TRAIN = Path("/kaggle/input/coco2014/train2014/train2014")
COCO_IMG_VAL = Path("/kaggle/input/coco2014/val2014/val2014")

# Try to detect an Open Images dataset attached via Data tab
CANDIDATE_OPENIM_ROOTS = [
    Path("/kaggle/input/openimages"),
    Path("/kaggle/input/open-images-v6"),
    Path("/kaggle/input/open-images"),
    Path("/kaggle/input/openimagesv6"),
]
OPENIM_ROOT = next((p for p in CANDIDATE_OPENIM_ROOTS if p.exists()), None)

# Repo/script location: prefer local tools/, fallback to clone
REPO_URL = "https://github.com/TMG301-Skubidu/Image-Captioning-Vietnamese-BLIP.git"
REPO_DIR = BASE_DIR / "BLIP"
PREFER_LOCAL_CLI = Path("tools/run_nocaps_eval.py")
CLI_SCRIPT = PREFER_LOCAL_CLI if PREFER_LOCAL_CLI.exists() else (REPO_DIR / "tools/run_nocaps_eval.py")

IMAGE_ROOT = BASE_DIR / "nocaps_images"
ANN_ROOT = BASE_DIR / "nocaps_annotations"
OUTPUT_DIR = BASE_DIR / "NoCaps"

# Controls
SKIP_DOWNLOAD = False  # True nếu đã có đủ ảnh hoặc chạy offline
EVAL_WITH_SCRIPT = False  # True: chạy eval ngay trong CLI_SCRIPT (không cần eval_nocaps.py riêng)
USE_OPENIM_CLI = True  # True để tải Open Images bằng CLI (cần Internet)
PIP_INSTALL_OPENIM = True  # True để cho phép pip install openimages CLI
WORKERS_OVERRIDE = None  # số nguyên để ép số worker, None = tự động

for path in [IMAGE_ROOT / "val", IMAGE_ROOT / "test", ANN_ROOT, OUTPUT_DIR]:
    path.mkdir(parents=True, exist_ok=True)

print(f"VAL_JSON exists: {VAL_JSON.exists()} -> {VAL_JSON}")
print(f"TEST_JSON exists: {TEST_JSON.exists()} -> {TEST_JSON}")
print(f"COCO train exists: {COCO_IMG_TRAIN.exists()} -> {COCO_IMG_TRAIN}")
print(f"COCO val exists: {COCO_IMG_VAL.exists()} -> {COCO_IMG_VAL}")
print(f"OpenImages root: {OPENIM_ROOT if OPENIM_ROOT else 'None'}")
print(f"CLI_SCRIPT (preferred): {CLI_SCRIPT}")


In [None]:
import subprocess
import sys

try:
    if not REPO_DIR.exists():
        subprocess.run(["git", "clone", REPO_URL, str(REPO_DIR)], check=True)
    else:
        print(f"Repository already available at {REPO_DIR}")

    commit = subprocess.check_output(["git", "rev-parse", "HEAD"], cwd=REPO_DIR).decode().strip()
    print("Repository commit:", commit)
    for rel_path in ["eval_nocaps.py", "data/nocaps_dataset.py", "configs/nocaps.yaml"]:
        path = REPO_DIR / rel_path
        status = "OK" if path.exists() else "MISSING"
        print(f"{rel_path}: {status}")
except Exception as e:
    print("Repo clone/check skipped or failed:", e)
finally:
    # Resolve CLI script path (prefer local tools/ if present)
    if not PREFER_LOCAL_CLI.exists():
        CLI_SCRIPT = REPO_DIR / "tools/run_nocaps_eval.py"
    else:
        CLI_SCRIPT = PREFER_LOCAL_CLI
    print("CLI script resolved to:", CLI_SCRIPT)


In [None]:
import subprocess
import sys

requirements_path = REPO_DIR / "requirements.txt"
if REPO_DIR.exists() and requirements_path.exists():
    try:
        subprocess.run([sys.executable, "-m", "pip", "install", "-r", str(requirements_path)], check=True)
    except Exception as e:
        print("Skipping requirements install (error):", e)
else:
    print("Skipping requirements install: REPO_DIR or requirements.txt missing")

In [None]:
import os
from pathlib import Path
import torch
import timm

if REPO_DIR.exists():
    os.chdir(REPO_DIR)
    print("Working directory:", Path.cwd())
else:
    print("REPO_DIR not found (using current working directory):", Path.cwd())
print("Torch:", torch.__version__, "CUDA available:", torch.cuda.is_available())
print("Timm:", timm.__version__)

In [None]:
def summarize_image(entry):
    keys = ["file_name", "id", "domain", "coco_url", "open_images_id"]
    return {k: entry.get(k) for k in keys if k in entry}

with open(VAL_JSON, "r") as f:
    val_raw = json.load(f)
with open(TEST_JSON, "r") as f:
    test_raw = json.load(f)

val_caps = val_raw.get("annotations", [])
print("Val keys:", list(val_raw.keys()))
print("Val images:", len(val_raw["images"]))
print("Val annotations:", len(val_caps))
print("Val sample image:", summarize_image(val_raw["images"][0]))
print("Val sample captions:", [ann["caption"] for ann in val_caps[:3]])

print("Test keys:", list(test_raw.keys()))
print("Test images:", len(test_raw["images"]))
print("Test has annotations:", "annotations" in test_raw)
print("Test sample image:", summarize_image(test_raw["images"][0]))

In [None]:
import json
import multiprocessing
import subprocess
import sys

workers_detected = multiprocessing.cpu_count() or 1
workers = WORKERS_OVERRIDE or min(32, max(4, workers_detected))
print(f"Detected CPU cores: {workers_detected} -> using {workers} workers")

cmd = [
    sys.executable,
    str(CLI_SCRIPT),
    "--dataset-root", str(DATASET_ROOT),
    "--image-root", str(IMAGE_ROOT),
    "--ann-root", str(ANN_ROOT),
    "--output-dir", str(OUTPUT_DIR),
    "--batch-size", "4",
    "--workers", str(workers),
]
# Detect supported flags by scanning the script text
try:
    script_text = open(CLI_SCRIPT, 'r', encoding='utf-8', errors='ignore').read()
except Exception:
    script_text = ''
from pathlib import Path
if not Path(CLI_SCRIPT).exists():
    raise FileNotFoundError(f"Missing CLI script {CLI_SCRIPT}. Clone the repo or attach it as a Kaggle dataset.")

def supports(flag: str) -> bool:
    return (flag in script_text)

if supports('coco-train-root'):
    cmd += ["--coco-train-root", str(COCO_IMG_TRAIN)]
if supports('coco-val-root'):
    cmd += ["--coco-val-root", str(COCO_IMG_VAL)]
if OPENIM_ROOT and supports('openimages-root'):
    cmd += ["--openimages-root", str(OPENIM_ROOT)]
if SKIP_DOWNLOAD:
    cmd.append("--no-download")
else:
    if USE_OPENIM_CLI and supports('use-openimages-cli'):
        cmd.append("--use-openimages-cli")
        if PIP_INSTALL_OPENIM and supports('pip-install-openimages'):
            cmd.append("--pip-install-openimages")

print("Running:", " ".join(cmd))
subprocess.run(cmd, check=True)

failures_log = OUTPUT_DIR / "download_failures.json"
if failures_log.exists():
    failures = json.loads(failures_log.read_text())
    print("Download failures - val:", len(failures.get("val", [])), "| test:", len(failures.get("test", [])))
else:
    print("No download_failures.json found yet.")
if not EVAL_WITH_SCRIPT:
    cmd.append("--skip-eval")

In [None]:
import json

val_out = ANN_ROOT / "nocaps_val.json"
test_out = ANN_ROOT / "nocaps_test.json"
failures_log = OUTPUT_DIR / "download_failures.json"

if val_out.exists():
    val_entries = json.loads(val_out.read_text())
    print("Val entries:", len(val_entries), "->", val_out)
    if val_entries:
        print("Sample val entry:", val_entries[0])
else:
    print("Val annotations not ready yet (rerun the data download cell).")

if test_out.exists():
    test_entries = json.loads(test_out.read_text())
    print("Test entries:", len(test_entries), "->", test_out)
    if test_entries:
        print("Sample test entry:", test_entries[0])
else:
    print("Test annotations not ready yet (rerun the data download cell).")

if failures_log.exists():
    failures = json.loads(failures_log.read_text())
    print("Download failure samples (val):", failures.get("val", [])[:3])
    print("Download failure samples (test):", failures.get("test", [])[:3])
else:
    print("No download_failures.json found (no errors or download not finished yet).")

In [None]:
import json
from pathlib import Path

try:
    import ruamel_yaml as yaml  # type: ignore[import]
except ModuleNotFoundError:
    try:
        from ruamel import yaml  # type: ignore
    except ModuleNotFoundError:
        import yaml  # type: ignore

expected_config = OUTPUT_DIR / "nocaps_eval.yaml"
if expected_config.exists():
    config_path = expected_config
    config = yaml.load(expected_config.read_text(), Loader=yaml.Loader)
else:
    print("WARNING: not found", expected_config, "-> falling back to template.")
    config_path = Path("configs/nocaps.yaml")
    config = yaml.load(config_path.read_text(), Loader=yaml.Loader)
    config["image_root"] = str(IMAGE_ROOT).replace("\\", "/")
    config["ann_root"] = str(ANN_ROOT).replace("\\", "/")
    config["batch_size"] = min(4, config.get("batch_size", 32))
    expected_config = OUTPUT_DIR / "nocaps_eval.yaml"
    expected_config.write_text(yaml.dump(config))
    print("Wrote new config to:", expected_config)

print("Config path:", expected_config)
print(json.dumps(config, indent=2))

In [None]:
import subprocess, sys
from pathlib import Path

if EVAL_WITH_SCRIPT:
    print("Skipping separate eval; already done inside CLI script.")
else:
    subprocess.run([sys.executable, "eval_nocaps.py", "--config", "/kaggle/working/nocaps_eval.yaml", "--output_dir", "/kaggle/working/NoCaps", "--device", "cuda", "--distributed", "False"], check=True)

In [None]:
result_dir = OUTPUT_DIR / "result"
val_result_path = result_dir / "val.json"
test_result_path = result_dir / "test.json"

with open(val_result_path, "r") as f:
    val_result = json.load(f)
with open(test_result_path, "r") as f:
    test_result = json.load(f)

print("Val captions:", len(val_result))
print("Test captions:", len(test_result))
print("Sample val captions:")
for sample in val_result[:5]:
    print(sample)

print("Artifacts saved under:", OUTPUT_DIR)

## Next steps
- If `download_failures.json` is not empty, consider manually downloading those images and rerunning the conversion cell.
- Compress `nocaps_images`, `nocaps_annotations`, and `NoCaps/result/test.json` into a separate Kaggle dataset so future notebook runs are faster.
- Upload `NoCaps/result/test.json` to the NoCaps server to receive the official score.