In [6]:
from pathlib import Path
import json
import shutil
import numpy as np

ROOT = Path("..").resolve()

BASE_CASE = "dog_8_val_5"
BASE_JSONL = ROOT / "data" / "cases" / BASE_CASE / "generated" / f"{BASE_CASE}.jsonl"

CASE_SUFFIXES = list(range(11, 27))   # 11..26 inclusive

MASTER_LIB = ROOT / "data" / "master" / "master_lib.mat"

H, W = 600, 900
MARGIN = 5
SUPERSAMPLE = 4

def ensure_case_jsonl(case_name: str) -> Path:
    out_dir = ROOT / "data" / "cases" / case_name
    gen_dir = out_dir / "generated"
    gen_dir.mkdir(parents=True, exist_ok=True)

    target = gen_dir / f"{case_name}.jsonl"
    if not target.exists():
        shutil.copy2(BASE_JSONL, target)
    return target

print("Creating cases:")
for s in CASE_SUFFIXES:
    print(" ", f"dog_8_val_{s}")


Creating cases:
  dog_8_val_11
  dog_8_val_12
  dog_8_val_13
  dog_8_val_14
  dog_8_val_15
  dog_8_val_16
  dog_8_val_17
  dog_8_val_18
  dog_8_val_19
  dog_8_val_20
  dog_8_val_21
  dog_8_val_22
  dog_8_val_23
  dog_8_val_24
  dog_8_val_25
  dog_8_val_26


In [7]:
import sys
sys.path.append(str(ROOT / "scripts"))

from shape_gen.io_mat import load_generated_case, unit_to_pixel
from shape_gen.library import load_master_records, build_class_index

records = load_master_records(MASTER_LIB)
classes, byClass = build_class_index(records)

print("Loaded records:", len(records))
print("Classes:", len(classes))


Loaded records: 54000
Classes: 54


In [8]:
def load_case_from_jsonl(jsonl_path: Path, default_baseGrid: int = 256):
    row = json.loads(jsonl_path.read_text().splitlines()[-1])

    if "silhouette_u" in row:
        sil_u = np.asarray(row["silhouette_u"], float)
        occ_u = np.asarray(row.get("occluder_u", []), float)
        baseGrid = int(row["baseGrid"])
        sil_class = row.get("sil_class", None)

    else:
        sil_u = np.asarray(row["shape_contour_xy"], float)
        occ_u = np.asarray(row.get("occluder_rect_xy", []), float)
        baseGrid = int(row.get("baseGrid", default_baseGrid))
        sil_class = row.get("sil_class", row.get("category", None))

    if occ_u.size == 0:
        occ_u = np.zeros((0,2), float)

    return sil_u, occ_u, baseGrid, sil_class


In [11]:
# %%
# Cell 4. Worker + merge utilities (FIXED save_xy_npz signature)
from pathlib import Path
import re
from concurrent.futures import ProcessPoolExecutor, as_completed

import numpy as np
from shape_gen.generate_parallel import generate_completions, save_metadata_jsonl
from shape_gen.heatmap.xy_store import save_xy_npz


def _worker_run(args):
    wid = int(args["worker_id"])
    out_dir = Path(args["out_dir"])
    tmp_dir = Path(args["tmp_dir"])
    tmp_dir.mkdir(parents=True, exist_ok=True)

    rng = np.random.default_rng(int(args["seed"]))

    metas, out_files_xy, polygons_xy = generate_completions(
        silhouette=args["silhouette"],
        occluder=args["occluder"],
        start_pt=args["start_pt"],
        end_pt=args["end_pt"],
        minX=int(args["minX"]),
        minY=int(args["minY"]),
        wBB=int(args["wBB"]),
        hBB=int(args["hBB"]),
        out_w=int(args["out_w"]),
        out_h=int(args["out_h"]),
        out_dir=out_dir,
        silhouette_index=1,
        sil_class=args["sil_class"],
        base_grid=int(args["base_grid"]),
        records=args["records"],
        classes=args["classes"],
        byClass=args["byClass"],
        n_images=int(args["n_attempts"]),
        rng=rng,
        start_index=int(args["start_index"]),
        fraction=float(args["fraction"]),
        final_n_samples_mode=args["final_n_samples_mode"],
        supersample=int(args["supersample"]),
        flush_every=int(args["flush_every"]),
        max_attempts_per_image=int(args["max_attempts_per_image"]),
        require_valid=True,
        snap_intersections_to_vertices=True,
        refit_enabled=False,
        refit_n_ctrl=int(args["refit_n_ctrl"]),
        refit_subdiv=int(args["refit_subdiv"]),
        refit_jitter_sigma=float(args["refit_jitter_sigma"]),
        refit_max_attempts=int(args["refit_max_attempts"]),
        shrink_gamma=float(args["shrink_gamma"]),
        max_shrink_iters=int(args["max_shrink_iters"]),
        smooth_win=int(args["smooth_win"]),
        try_mirror=True,
        save_invalid=False,
        invalid_subdir="_invalid",
    )

    meta_path = tmp_dir / f"meta_{wid:02d}.jsonl"
    save_metadata_jsonl(metas, meta_path)

    xy_path = tmp_dir / f"xy_{wid:02d}.npz"
    save_xy_npz(
        xy_path,
        out_files=out_files_xy,
        polygons=polygons_xy,
        base_grid=int(args["base_grid"]),
        matlab_1_indexed=True,
    )

    return str(meta_path), str(xy_path)


def extract_idx(p: str) -> int:
    m = re.search(r"completion_\d{4}_(\d{5})\.png$", str(p))
    if not m:
        raise ValueError(f"Could not parse completion index from: {p}")
    return int(m.group(1))


In [None]:
# %%
from pathlib import Path
import json
import numpy as np
from concurrent.futures import ProcessPoolExecutor, as_completed

from shape_gen.geom_bbox import compute_bbox
from shape_gen.render import draw_and_save
from shape_gen.intersections2 import find_intersection_points_multiple

N_WORKERS = 8
TOTAL_ATTEMPTS = 10_000
ATTEMPTS_PER = TOTAL_ATTEMPTS // N_WORKERS
assert ATTEMPTS_PER * N_WORKERS == TOTAL_ATTEMPTS

FRACTION = 0.22
SAMPLES_MODE = "matlab_100"
MAX_ATTEMPTS = 250

SHRINK_GAMMA = 0.82
MAX_SHRINK_ITERS = 60
SMOOTH_WIN = 7

REFIT_N_CTRL = 10
REFIT_SUBDIV = 18
REFIT_JITTER = 0.008
REFIT_MAX_ATTEMPTS = 12

FLUSH = 1000

for suffix in CASE_SUFFIXES:

    CASE = f"dog_8_val_{suffix}"
    print("\n=== CASE:", CASE)

    jsonl_path = ensure_case_jsonl(CASE)

    OUT_DIR = ROOT / "data" / "cases" / CASE
    RAND_DIR = OUT_DIR / "generated" / "completions"
    TMP = OUT_DIR / "generated" / "_tmp"

    RAND_DIR.mkdir(parents=True, exist_ok=True)
    TMP.mkdir(parents=True, exist_ok=True)

    sil_u, occ_u, baseGrid, sil_class = load_case_from_jsonl(jsonl_path)

    silhouette = unit_to_pixel(sil_u, baseGrid)
    occluder = unit_to_pixel(occ_u, baseGrid) if occ_u.size else np.zeros((0, 2), dtype=np.int32)

    polys = [silhouette] + ([occluder] if occluder.size else [])
    minX, minY, wBB, hBB = compute_bbox(
        polys,
        base_grid=baseGrid,
        margin=MARGIN
    )

    # Write GT
    draw_and_save(
        polygons=[silhouette],
        colors=[[0, 0, 0]],
        minX=minX, minY=minY, wBB=wBB, hBB=hBB,
        out_w=W, out_h=H,
        out_file=OUT_DIR / "gt.png",
        supersample=SUPERSAMPLE
    )

    # Write occluded (only if occluder exists)
    if occluder.size:
        draw_and_save(
            polygons=[silhouette, occluder],
            colors=[[0, 0, 0], [131, 131, 131]],
            minX=minX, minY=minY, wBB=wBB, hBB=hBB,
            out_w=W, out_h=H,
            out_file=OUT_DIR / "occluded.png",
            supersample=SUPERSAMPLE
        )
    else:
        print("Occluder empty. Skipping occluded render.")

    # Intersections required for completion generation
    if not occluder.size:
        raise RuntimeError(f"{CASE}: occluder is empty. Cannot generate completions without intersections.")

    pts = find_intersection_points_multiple(silhouette, occluder, eps_merge=1e-3)
    if pts.shape[0] < 2:
        raise RuntimeError(f"{CASE}: <2 intersection points found. Got {pts.shape[0]}")

    start_pt, end_pt = pts[0], pts[1]

    CASE_SEED = int(suffix)

    jobs = []
    for wid in range(N_WORKERS):

        worker_seed = CASE_SEED * 1_000_000 + wid * 10_000 + 123

        jobs.append(dict(
            worker_id=wid,
            seed=worker_seed,
            tmp_dir=str(TMP),
            out_dir=str(RAND_DIR),

            silhouette=silhouette,
            occluder=occluder,
            start_pt=start_pt,
            end_pt=end_pt,
            minX=minX,
            minY=minY,
            wBB=wBB,
            hBB=hBB,
            out_w=W,
            out_h=H,
            sil_class=sil_class,
            base_grid=baseGrid,

            records=records,
            classes=classes,
            byClass=byClass,

            n_attempts=ATTEMPTS_PER,
            start_index=wid * ATTEMPTS_PER + 1,

            fraction=FRACTION,
            final_n_samples_mode=SAMPLES_MODE,
            supersample=SUPERSAMPLE,
            flush_every=FLUSH,
            max_attempts_per_image=MAX_ATTEMPTS,

            refit_n_ctrl=REFIT_N_CTRL,
            refit_subdiv=REFIT_SUBDIV,
            refit_jitter_sigma=REFIT_JITTER,
            refit_max_attempts=REFIT_MAX_ATTEMPTS,

            shrink_gamma=SHRINK_GAMMA,
            max_shrink_iters=MAX_SHRINK_ITERS,
            smooth_win=SMOOTH_WIN,
        ))

    results = []
    with ProcessPoolExecutor(max_workers=N_WORKERS) as ex:
        for fut in as_completed([ex.submit(_worker_run, j) for j in jobs]):
            results.append(fut.result())

    # ---- merge meta
    all_meta = []
    for m, _ in results:
        all_meta += Path(m).read_text(encoding="utf-8").splitlines()

    meta_rows = [json.loads(l) for l in all_meta if l.strip()]
    meta_rows.sort(key=lambda r: int(r.get("completion_index", 0)))

    meta_out = OUT_DIR / "generated" / "shapes_meta.jsonl"
    meta_out.parent.mkdir(exist_ok=True, parents=True)

    with meta_out.open("w", encoding="utf-8") as f:
        for r in meta_rows:
            f.write(json.dumps(r) + "\n")

    # ---- merge xy
    pairs = []
    for _, xy in results:
        npz = np.load(xy, allow_pickle=True)
        for fpath, poly in zip(npz["out_files"], npz["polygons"]):
            pairs.append((extract_idx(fpath), str(fpath), poly))

    pairs.sort(key=lambda t: t[0])

    out_files = [p[1] for p in pairs]
    polys_xy = [p[2] for p in pairs]

    xy_out = OUT_DIR / "generated" / "shapes_xy.npz"
    save_xy_npz(
        xy_out,
        out_files=out_files,
        polygons=polys_xy,
        base_grid=int(baseGrid),
        matlab_1_indexed=True,
    )

    print("Saved:", CASE, " valid:", len(out_files))



=== CASE: dog_8_val_11
