In [1]:
# =========================================================
# CELL 1 - Install
# =========================================================
!pip -q install -U "huggingface_hub<1.0,>=0.34.0" accelerate safetensors
!pip -q install -U "pillow<12" diffusers transformers


In [None]:
# =========================================================
# CELL 2 - Mount + Paths  (UPDATED)
# =========================================================
from google.colab import drive
drive.mount("/content/drive")

from pathlib import Path
import random, numpy as np
import torch
import matplotlib.pyplot as plt
from PIL import Image, ImageDraw
from datetime import datetime
import json
import math
from PIL import ImageFilter


ROOT = Path("/content/drive/Othercomputers/ה-Mac שלי/Autonomous_Project")

RUN_ID = "last"


IMAGES_DIR = ROOT / "stage_2" / "images"
MASKS_DIR  = ROOT / "stage_2" / "masks_sam3_road_all_dataset"

OUT_DIR = ROOT / "stage_2" / f"run_{RUN_ID}"
OUT_DIR.mkdir(parents=True, exist_ok=True)

OUT_IMG_DIR  = OUT_DIR / "images"
OUT_LBL_DIR  = OUT_DIR / "labels"
OUT_META_DIR = OUT_DIR / "meta"

OUT_IMG_DIR.mkdir(parents=True, exist_ok=True)
OUT_LBL_DIR.mkdir(parents=True, exist_ok=True)
OUT_META_DIR.mkdir(parents=True, exist_ok=True)


YOLO_NAMES = ["animal"]
YOLO_CLASS_ID = 0

RANDOM_SEED = 42
random.seed(RANDOM_SEED)
np.random.seed(RANDOM_SEED)
torch.manual_seed(RANDOM_SEED)

objects = [
    "small dog", "medium-size dog", "large dog",
    "cat",
    "deer", "fawn",
    "fox", "wolf", "jackal",
    "wild boar", "boar piglet",
    "goat", "sheep", "cow", "calf", "horse", "donkey",
    "rabbit", "hare",
    "hedgehog",
    "raccoon", "skunk",
    "badger",
    "squirrel",
]

angles = [
    "left side view",
    "right side view",
    "three-quarter side view",
]


print("OUT_DIR:", OUT_DIR)
print("IMAGES_DIR exists:", IMAGES_DIR.exists())
print("MASKS_DIR exists:", MASKS_DIR.exists())


Mounted at /content/drive
OUT_DIR: /content/drive/Othercomputers/ה-Mac שלי/Autonomous_Project/stage_2/run_last
IMAGES_DIR exists: True
MASKS_DIR exists: True


In [3]:

# =========================================================
# CELL 3 - Helpers (depth-aware spot size) + YOLO label + metadata
# =========================================================

def find_matching_mask(img_path: Path, masks_dir: Path) -> Path:
    # Try to find a mask file that matches the image stem (preferred: "<stem>_mask.*", fallback: "<stem>*mask*.*")
    stem_mask = list(masks_dir.glob(img_path.stem + "_mask.*"))
    if stem_mask:
        return stem_mask[0]
    any_mask = list(masks_dir.glob(img_path.stem + "*mask*.*"))
    if any_mask:
        return any_mask[0]
    raise FileNotFoundError(f"No matching mask for {img_path.name}")

def normalize01(a: np.ndarray) -> np.ndarray:
    # Robust normalize to [0,1] using 2-98 percentiles to reduce outlier sensitivity
    a = a.astype(np.float32)
    lo, hi = np.percentile(a, 2), np.percentile(a, 98)
    if hi - lo < 1e-6:
        return np.zeros_like(a, dtype=np.float32)
    a = np.clip((a - lo) / (hi - lo), 0.0, 1.0)
    return a

def compute_depth_small(depth_estimator, img_rgb: Image.Image, long_edge=768):
    # Run depth on a resized image for speed; return depth map + scale factors (orig -> small)
    W, H = img_rgb.size
    if max(W, H) > long_edge:
        if W >= H:
            Ws = long_edge
            Hs = int(round(H * (long_edge / W)))
        else:
            Hs = long_edge
            Ws = int(round(W * (long_edge / H)))
        img_small = img_rgb.resize((Ws, Hs))
    else:
        img_small = img_rgb
        Ws, Hs = W, H

    out = depth_estimator(img_small)
    depth_pil = out["depth"]
    depth_np = np.array(depth_pil).astype(np.float32)
    depth01 = normalize01(depth_np)

    sx = Ws / W
    sy = Hs / H
    return depth01, (sx, sy)

# Approximate real-world object heights (meters) used to scale the inpaint mask based on depth
OBJ_HEIGHT_M = {
    "rabbit": 0.35, "hare": 0.5, "hedgehog": 0.25,
    "cat": 0.35,
    "small dog": 0.5, "medium-size dog": 0.8, "large dog": 1.0,
    "fox": 0.5, "wolf": 1.0, "jackal": 0.55,
    "deer": 1.1, "fawn": 0.7,
    "wild boar": 0.9, "boar piglet": 0.55,
    "goat": 0.9, "sheep": 0.9,
    "cow": 1.5, "calf": 1.0,
    "horse": 1.6, "donkey": 1.2,
    "raccoon": 0.35, "skunk": 0.40, "badger": 0.25, "squirrel": 0.40,
}

# Approximate width/height ratios to shape the inpaint region per animal type
OBJ_ASPECT = {
    "cat": 1.2,
    "small dog": 1.3, "medium-size dog": 1.35, "large dog": 1.4,
    "fox": 1.35, "wolf": 1.5, "jackal": 1.4,
    "deer": 1.4, "fawn": 1.3,
    "wild boar": 1.6, "boar piglet": 1.4,
    "goat": 1.35, "sheep": 1.35,
    "cow": 1.8, "calf": 1.6,
    "horse": 1.8, "donkey": 1.7,
}

# Camera + depth-to-distance calibration (tuned for KITTI-like setup)
CAM_HFOV_DEG = 70.0
Z_NEAR_M = 7.0
Z_FAR_M  = 55.0
MASK_MARGIN = 1.35

# Global tuning knobs: increase mask size and allow larger spots (helps reduce "no animal" outcomes)
GLOBAL_SCALE = 1.60
MIN_SPOT = (240, 180)
MAX_SPOT = (640, 520)

def fpx_from_hfov(img_w, hfov_deg):
    # Convert horizontal FOV to focal length in pixels (pinhole camera model)
    return (img_w / 2.0) / math.tan(math.radians(hfov_deg) / 2.0)

def depth01_to_Zm(d, z_near, z_far, invert=False):
    # Map normalized depth [0,1] to metric distance using an exponential curve
    if invert:
        d = 1.0 - d
    ratio = z_far / z_near
    return z_near * (ratio ** d)

def spot_mask_from_road_depth(
    road_mask_L, image_size,
    depth01_small, scale_xy,
    obj_name,
    y_min_frac=0.60, y_max_frac=0.95,
    DEPTH_INVERT=False,
):
    # Sample a point on the road mask (bottom part of the image), then size the inpaint spot by depth
    mask_np = np.array(road_mask_L)
    H, W = mask_np.shape
    road = mask_np > 128
    ys, xs = np.where(road)

    y_min, y_max = int(y_min_frac * H), int(y_max_frac * H)
    ok = (ys >= y_min) & (ys <= y_max)
    ys2, xs2 = ys[ok], xs[ok]

    if len(xs2) == 0:
        cx = np.random.randint(W)
        cy = np.random.randint(H)
    else:
        i = np.random.randint(len(xs2))
        cx, cy = int(xs2[i]), int(ys2[i])

    # Map sampled pixel to the resized depth map coordinates
    sx, sy = scale_xy
    Ws = depth01_small.shape[1]
    Hs = depth01_small.shape[0]
    dx = int(np.clip(round(cx * sx), 0, Ws - 1))
    dy = int(np.clip(round(cy * sy), 0, Hs - 1))
    d01 = float(depth01_small[dy, dx])

    # Convert depth to distance (meters)
    Zm = depth01_to_Zm(d01, Z_NEAR_M, Z_FAR_M, invert=DEPTH_INVERT)

    # Pinhole projection: object height in pixels ~= f_px * (H_obj / Z)
    img_w, img_h = image_size
    f_px = fpx_from_hfov(img_w, CAM_HFOV_DEG)

    H_obj = OBJ_HEIGHT_M.get(obj_name, 0.8)
    aspect = OBJ_ASPECT.get(obj_name, 1.4)

    h_px = f_px * (H_obj / max(Zm, 1e-6))
    spot_h = int(np.clip(h_px * MASK_MARGIN * GLOBAL_SCALE, MIN_SPOT[1], MAX_SPOT[1]))
    spot_w = int(np.clip(spot_h * aspect,               MIN_SPOT[0], MAX_SPOT[0]))

    # Build an ellipse mask for inpainting (acts as the "replace region")
    x0, y0 = max(0, cx - spot_w // 2), max(0, cy - spot_h // 2)
    x1, y1 = min(img_w, cx + spot_w // 2), min(img_h, cy + spot_h // 2)

    spot_mask = Image.new("L", (img_w, img_h), 0)
    ImageDraw.Draw(spot_mask).ellipse([x0, y0, x1, y1], fill=255)

    bbox = (x0, y0, x1, y1)
    return spot_mask, (cx, cy), bbox, d01, (spot_w, spot_h)

def inpaint_on_crop(
    pipe, image, spot_mask, bbox, prompt, negative,
    crop_pad=180, crop_res=768, guidance=14.0, steps=30, strength=0.88,
    feather=6
):
    # Crop around the bbox to run SDXL on a fixed resolution region (faster + more stable)
    W, H = image.size
    x0, y0, x1, y1 = bbox

    x0p = max(0, x0 - crop_pad); y0p = max(0, y0 - crop_pad)
    x1p = min(W, x1 + crop_pad); y1p = min(H, y1 + crop_pad)

    img_crop  = image.crop((x0p, y0p, x1p, y1p))
    mask_crop = spot_mask.crop((x0p, y0p, x1p, y1p)).convert("L")

    # Use a binary mask for diffusion, and a soft (blurred) mask for compositing back
    mask_bin  = mask_crop.point(lambda p: 255 if p > 128 else 0)
    mask_soft = mask_bin.filter(ImageFilter.GaussianBlur(feather)) if (feather and feather > 0) else mask_bin

    img_r  = img_crop.resize((crop_res, crop_res))
    mask_r = mask_bin.resize((crop_res, crop_res))

    out_r = pipe(
        prompt=prompt,
        negative_prompt=negative,
        image=img_r,
        mask_image=mask_r,
        guidance_scale=guidance,
        num_inference_steps=steps,
        strength=strength,
        width=crop_res,
        height=crop_res,
        num_images_per_prompt=1,
    ).images[0]

    out_crop = out_r.resize(img_crop.size)
    merged_crop = Image.composite(out_crop, img_crop, mask_soft)

    merged = image.copy()
    merged.paste(merged_crop, (x0p, y0p))
    return merged

def write_yolo_label(txt_path: Path, bbox, img_w: int, img_h: int, class_id: int = 0):
    # Save a single YOLO-format bounding box: class xc yc w h (all normalized to [0,1])
    x0, y0, x1, y1 = bbox
    bw = max(1, x1 - x0)
    bh = max(1, y1 - y0)
    xc = x0 + bw / 2.0
    yc = y0 + bh / 2.0

    xc_n = xc / img_w
    yc_n = yc / img_h
    bw_n = bw / img_w
    bh_n = bh / img_h

    line = f"{class_id} {xc_n:.6f} {yc_n:.6f} {bw_n:.6f} {bh_n:.6f}\n"
    txt_path.write_text(line, encoding="utf-8")



In [4]:
# =========================================================
# CELL 4 - SDXL Inpainting Pipeline
# =========================================================
from diffusers import AutoPipelineForInpainting

device = "cuda" if torch.cuda.is_available() else "cpu"

pipe = AutoPipelineForInpainting.from_pretrained(
    "diffusers/stable-diffusion-xl-1.0-inpainting-0.1",
    torch_dtype=torch.float16 if device == "cuda" else torch.float32,
    variant="fp16" if device == "cuda" else None,
)

if device == "cuda":
    pipe.enable_model_cpu_offload()

pipe.enable_attention_slicing()
pipe.vae.enable_slicing()
pipe.vae.enable_tiling()

try:
    pipe.enable_xformers_memory_efficient_attention()
except Exception:
    pass


Flax classes are deprecated and will be removed in Diffusers v1.0.0. We recommend migrating to PyTorch classes or pinning your version of Diffusers.
Flax classes are deprecated and will be removed in Diffusers v1.0.0. We recommend migrating to PyTorch classes or pinning your version of Diffusers.
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


model_index.json:   0%|          | 0.00/690 [00:00<?, ?B/s]

Fetching 18 files:   0%|          | 0/18 [00:00<?, ?it/s]

tokenizer_config.json:   0%|          | 0.00/737 [00:00<?, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

scheduler_config.json:   0%|          | 0.00/479 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/746 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/758 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/472 [00:00<?, ?B/s]

text_encoder_2/model.fp16.safetensors:   0%|          | 0.00/1.39G [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

config.json: 0.00B [00:00, ?B/s]

text_encoder/model.fp16.safetensors:   0%|          | 0.00/246M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/460 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/725 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/659 [00:00<?, ?B/s]

unet/diffusion_pytorch_model.fp16.safete(…):   0%|          | 0.00/5.14G [00:00<?, ?B/s]

vae/diffusion_pytorch_model.fp16.safeten(…):   0%|          | 0.00/167M [00:00<?, ?B/s]

Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

`torch_dtype` is deprecated! Use `dtype` instead!
The config attributes {'decay': 0.9999, 'inv_gamma': 1.0, 'min_decay': 0.0, 'optimization_step': 37000, 'power': 0.6666666666666666, 'update_after_step': 0, 'use_ema_warmup': False} were passed to UNet2DConditionModel, but are not expected and will be ignored. Please verify your config.json configuration file.


In [5]:
# =========================================================
# CELL 5 - Depth Estimator (NO CLIP, FIXED device)
# =========================================================
from transformers import pipeline as hf_pipeline

DEPTH_MODEL = "Intel/dpt-hybrid-midas"
depth_estimator = hf_pipeline(
    task="depth-estimation",
    model=DEPTH_MODEL,
    device=0 if torch.cuda.is_available() else -1,  # 0=GPU, -1=CPU
)

DEPTH_INVERT = True
print("Depth device:", 0 if torch.cuda.is_available() else -1)
print("DEPTH_MODEL:", DEPTH_MODEL)


config.json: 0.00B [00:00, ?B/s]

pytorch_model.bin:   0%|          | 0.00/490M [00:00<?, ?B/s]

preprocessor_config.json:   0%|          | 0.00/382 [00:00<?, ?B/s]

Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.
Device set to use cuda:0


Depth device: 0
DEPTH_MODEL: Intel/dpt-hybrid-midas


In [6]:
# =========================================================
# CELL 6 - Pick images that have masks
# =========================================================
img_files = [p for p in IMAGES_DIR.iterdir() if p.is_file()]

valid_imgs = []
for p in img_files:
    try:
        _ = find_matching_mask(p, MASKS_DIR)
        valid_imgs.append(p)
    except FileNotFoundError:
        pass

print("total images:", len(img_files))
print("images with masks:", len(valid_imgs))

chosen_imgs = valid_imgs
print("chosen (first 10):", [p.name for p in chosen_imgs[:10]])


model.safetensors:   0%|          | 0.00/490M [00:00<?, ?B/s]

total images: 358
images with masks: 336
chosen (first 10): ['000080.png', '000103.png', '000105.png', '000015.png', '000148.png', '000004.png', '000138.png', '000128.png', '000245.png', '000225.png']


In [8]:
# =========================================================
# CELL 7 - Main loop (NO CLIP, save optional, show optional)
# =========================================================
import json
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image

# -------------------------
# Run parameters
# -------------------------
NUM_TO_CREATE = 336       # How many images to process (upper bound)
SAVE_OUTPUTS  = True      # Save images + labels + metadata (True/False)
NUM_TO_SHOW   = 10        # How many examples to display in the notebook

# QC parameters
QC_DIFF_THR    = 12.0     # Minimum mean pixel change inside the mask to accept the result
MIN_ROAD_RATIO = 0.01     # Skip samples where the road mask is almost empty

# Inpainting parameters (runtime drivers)
MAX_TRIES   = 6           # Max re-samples per image if QC fails (can be expensive)
INFER_STEPS = 40          # Diffusion steps (higher = slower, often higher quality)
GUIDANCE    = 12.0        # CFG guidance (too high can harm realism)
STRENGTH    = 0.99        # How strongly to overwrite the crop
CROP_RES    = 768         # Inpaint crop resolution (bigger = slower, more detail)

def mean_abs_diff_in_mask(img_a: Image.Image, img_b: Image.Image, mask_L: Image.Image) -> float:
    # Compute mean absolute RGB difference only inside the (binary) mask
    a = np.array(img_a).astype(np.float32)
    b = np.array(img_b).astype(np.float32)
    m = (np.array(mask_L) > 128)
    if m.sum() < 10:
        return 0.0
    diff = np.abs(a - b).mean(axis=2)
    return float(diff[m].mean())

results = []
qc_fails = 0
skipped_bad_mask = 0
exceptions = 0
bad_images = []

to_process = chosen_imgs[:min(NUM_TO_CREATE, len(chosen_imgs))]

for idx, img_path in enumerate(to_process, start=1):
    try:
        mask_path = find_matching_mask(img_path, MASKS_DIR)

        init_image = Image.open(img_path).convert("RGB")
        road_mask  = Image.open(mask_path).convert("L").resize(init_image.size)

        # Quick sanity check: if the road area is tiny, don't waste inpainting calls
        road_ratio = float((np.array(road_mask) > 128).mean())
        if road_ratio < MIN_ROAD_RATIO:
            skipped_bad_mask += 1
            bad_images.append((img_path.name, "bad_road_mask", road_ratio))
            continue

        # Depth is used to choose the spot size (near = bigger mask, far = smaller mask)
        depth01_small, scale_xy = compute_depth_small(depth_estimator, init_image, long_edge=768)

        obj = random.choice(objects)
        ang = random.choice(angles)

        prompt = (
            f"a realistic {obj} standing on the asphalt road, {ang}, full body, "
            "one animal, clearly visible, correct scale and perspective, natural daylight, sharp focus, "
            "cast shadow on the road, photo-realistic, high detail"
        )
        negative = "cartoon, painting, blurry, lowres, deformed, extra legs, bad anatomy, floating, no shadow, empty road, no animal"

        show_now = (idx <= NUM_TO_SHOW)

        # Try multiple placements; keep the best according to QC score
        best_out = None
        best_score = -1.0
        best_pack = None  # (spot_mask, bbox, d, spot_w, spot_h, cx, cy)

        for t in range(1, MAX_TRIES + 1):
            spot_mask, (cx, cy), bbox, d, (spot_w, spot_h) = spot_mask_from_road_depth(
                road_mask_L=road_mask,
                image_size=init_image.size,
                depth01_small=depth01_small,
                scale_xy=scale_xy,
                obj_name=obj,
                y_min_frac=0.68,
                y_max_frac=0.95,
                DEPTH_INVERT=DEPTH_INVERT,
            )

            out_try = inpaint_on_crop(
                pipe=pipe,
                image=init_image,
                spot_mask=spot_mask,
                bbox=bbox,
                prompt=prompt,
                negative=negative,
                crop_pad=180,
                crop_res=CROP_RES,
                guidance=GUIDANCE,
                steps=INFER_STEPS,
                strength=STRENGTH,
                feather=6
            )

            score = mean_abs_diff_in_mask(init_image, out_try, spot_mask)

            if score > best_score:
                best_score = score
                best_out = out_try
                best_pack = (spot_mask, bbox, d, spot_w, spot_h, cx, cy)

            # Early stop if the change is clearly strong enough
            if score >= QC_DIFF_THR:
                break

        if best_out is None or best_pack is None:
            exceptions += 1
            bad_images.append((img_path.name, "no_best_out", None))
            continue

        # QC fail: output did not change enough inside the mask (often means "no animal appeared")
        if best_score < QC_DIFF_THR:
            qc_fails += 1
            bad_images.append((img_path.name, "qc_fail", best_score))
            continue

        spot_mask, bbox, d, spot_w, spot_h, cx, cy = best_pack
        out = best_out

        # Display a few examples in-notebook
        if show_now:
            ov = np.array(init_image).copy()
            m  = np.array(spot_mask) > 10
            ov[m] = (ov[m] * 0.55).astype(np.uint8)

            plt.figure(figsize=(12, 4))
            plt.title(f"OVERLAY {idx}/{len(to_process)} | {img_path.name} | spot={spot_w}x{spot_h} | diff={best_score:.1f}")
            plt.imshow(ov); plt.axis("off")
            plt.show()

            plt.figure(figsize=(12, 4))
            plt.title(f"OUT {idx}/{len(to_process)} | {img_path.name} | obj={obj} | d={d:.3f} | diff={best_score:.1f}")
            plt.imshow(out); plt.axis("off")
            plt.show()

        # Save outputs to disk (optional)
        if SAVE_OUTPUTS:
            out_name = f"{img_path.stem}__{obj.replace(' ','_')}__{ang.replace(' ','_').replace('-','_')}__d{d:.3f}.png"
            out_path = OUT_IMG_DIR / out_name
            out.save(out_path)

            W, H = init_image.size
            label_path = OUT_LBL_DIR / (out_path.stem + ".txt")
            write_yolo_label(label_path, bbox, W, H, class_id=YOLO_CLASS_ID)

            rec = {
                "orig_image": img_path.name,
                "mask_image": mask_path.name,
                "synth_image": out_path.name,
                "label_file": label_path.name,
                "class_id": int(YOLO_CLASS_ID),
                "class_name": YOLO_NAMES[YOLO_CLASS_ID],
                "obj": obj,
                "angle": ang,
                "depth01": float(d),
                "bbox_xyxy": [int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])],
                "spot_wh": [int(spot_w), int(spot_h)],
                "cxcy": [int(cx), int(cy)],
                "qc_diff": float(best_score),
                "road_ratio": float(road_ratio),
                "prompt": prompt,
                "negative": negative,
            }
            results.append(rec)

    except Exception as e:
        exceptions += 1
        if exceptions <= 10:
            print(f"[EXCEPTION] {img_path.name}: {e}")
        bad_images.append((img_path.name, "exception", str(e)))
        continue

# Write metadata files (only if saving)
if SAVE_OUTPUTS:
    manifest_path = OUT_META_DIR / "manifest.jsonl"
    with manifest_path.open("w", encoding="utf-8") as f:
        for r in results:
            f.write(json.dumps(r, ensure_ascii=False) + "\n")

    run_cfg = {
        "RUN_ID": RUN_ID,
        "RANDOM_SEED": RANDOM_SEED,
        "DEPTH_MODEL": DEPTH_MODEL,
        "DEPTH_INVERT": DEPTH_INVERT,
        "CAM_HFOV_DEG": CAM_HFOV_DEG,
        "Z_NEAR_M": Z_NEAR_M,
        "Z_FAR_M": Z_FAR_M,
        "MASK_MARGIN": MASK_MARGIN,
        "GLOBAL_SCALE": GLOBAL_SCALE,
        "MIN_SPOT": list(MIN_SPOT),
        "MAX_SPOT": list(MAX_SPOT),
        "YOLO_NAMES": YOLO_NAMES,
        "IMAGES_DIR": str(IMAGES_DIR),
        "MASKS_DIR": str(MASKS_DIR),
        "OUT_DIR": str(OUT_DIR),
        "NUM_TO_CREATE": int(NUM_TO_CREATE),
        "NUM_TO_SHOW": int(NUM_TO_SHOW),
        "SAVE_OUTPUTS": bool(SAVE_OUTPUTS),
        "MAX_TRIES": int(MAX_TRIES),
        "QC_DIFF_THR": float(QC_DIFF_THR),
        "INFER_STEPS": int(INFER_STEPS),
    }
    (OUT_META_DIR / "run_config.json").write_text(
        json.dumps(run_cfg, ensure_ascii=False, indent=2),
        encoding="utf-8"
    )

print("Done.")
print("Processed:", len(to_process))
print("Saved:", len(results) if SAVE_OUTPUTS else 0)
print("QC fails:", qc_fails)
print("Skipped bad masks:", skipped_bad_mask)
print("Exceptions:", exceptions)
print("Bad images (first 15):")
for x in bad_images[:15]:
    print(x)


Output hidden; open in https://colab.research.google.com to view.