In [1]:
# ===================== 00_setup â€” CELL 0: Bootstrap + experiment registry =====================

import os
import sys
from pathlib import Path

# -------------------------------------------------
# Find repo root (folder that contains "src/imgofup")
# -------------------------------------------------
p = Path.cwd().resolve()  # expected: <repo>/notebooks
REPO_ROOT = None

for candidate in [p, *p.parents]:
    if (candidate / "src" / "imgofup").is_dir():
        REPO_ROOT = candidate
        break

if REPO_ROOT is None:
    raise RuntimeError("Could not find repo root (no 'src/imgofup' found).")

SRC_DIR = REPO_ROOT / "src"

# -------------------------------------------------
# Make src/ importable (so `import imgofup...` works)
# -------------------------------------------------
if str(SRC_DIR) not in sys.path:
    sys.path.insert(0, str(SRC_DIR))

# -------------------------------------------------
# Tell config the repo root (so paths.py resolves correctly)
# -------------------------------------------------
os.environ["PROJ_ROOT"] = str(REPO_ROOT)

print("ðŸ“¦ Repo root:", REPO_ROOT)
print("ðŸ“¦ Using src from:", SRC_DIR)
print("ðŸ”§ PROJ_ROOT env set to:", os.environ["PROJ_ROOT"])

# -------------------------------------------------
# Experiment registry (edit here to add/remove experiments)
# -------------------------------------------------
DATA_DIR = REPO_ROOT / "data"

EXPERIMENTS = {
    "openai_prompt_only": {
        "train_out": DATA_DIR / "output" / "train_out_openai_prompt_only",
        "model_out": DATA_DIR / "output" / "models" / "exp_openai_prompt_only",
        "feature_mode": "prompt_only",
        "prompt_encoder_kind": "openai-small",
    },
    "use_prompt_only": {
        "train_out": DATA_DIR / "output" / "train_out_use_prompt_only",
        "model_out": DATA_DIR / "output" / "models" / "exp_use_prompt_only",
        "feature_mode": "prompt_only",
        "prompt_encoder_kind": "dan",
    },
    "map_only": {
        "train_out": DATA_DIR / "output" / "train_out_map_only",
        "model_out": DATA_DIR / "output" / "models" / "exp_map_only",
        "feature_mode": "map_only",
    },
    "use_map": {
        "train_out": DATA_DIR / "output" / "train_out_use_map",
        "model_out": DATA_DIR / "output" / "models" / "exp_use_map",
        "feature_mode": "prompt_plus_map",
        "prompt_encoder_kind": "dan",
    },
    "openai_map": {
        "train_out": DATA_DIR / "output" / "train_out_openai_map",
        "model_out": DATA_DIR / "output" / "models" / "exp_openai_map",
        "feature_mode": "prompt_plus_map",
        "prompt_encoder_kind": "openai-small",
    },
}

# Ensure output dirs exist
for exp_cfg in EXPERIMENTS.values():
    exp_cfg["train_out"] = Path(exp_cfg["train_out"])
    exp_cfg["model_out"] = Path(exp_cfg["model_out"])
    exp_cfg["train_out"].mkdir(parents=True, exist_ok=True)
    exp_cfg["model_out"].mkdir(parents=True, exist_ok=True)

print("\nðŸ§ª Will run experiments:")
for exp_name, cfg in EXPERIMENTS.items():
    pe = cfg.get("prompt_encoder_kind", "-")
    print(
        f" - {exp_name:18s} | "
        f"mode={cfg['feature_mode']:14s} | "
        f"prompt={pe:14s} | "
        f"train_out={cfg['train_out'].name}"
    )


ðŸ“¦ Repo root: /Users/amirdonyadide/Documents/GitHub/IMGOFUP
ðŸ“¦ Using src from: /Users/amirdonyadide/Documents/GitHub/IMGOFUP/src
ðŸ”§ PROJ_ROOT env set to: /Users/amirdonyadide/Documents/GitHub/IMGOFUP

ðŸ§ª Will run experiments:
 - openai_prompt_only | mode=prompt_only    | prompt=openai-small   | train_out=train_out_openai_prompt_only
 - use_prompt_only    | mode=prompt_only    | prompt=dan            | train_out=train_out_use_prompt_only
 - map_only           | mode=map_only       | prompt=-              | train_out=train_out_map_only
 - use_map            | mode=prompt_plus_map | prompt=dan            | train_out=train_out_use_map
 - openai_map         | mode=prompt_plus_map | prompt=openai-small   | train_out=train_out_openai_map


In [2]:
# ===================== 00_setup â€” CELL 1: Config + validation helpers =====================

from pathlib import Path

from imgofup.config import paths as CONFIG
from imgofup.config.constants import EXTENT_DIAG_COL, EXTENT_AREA_COL

# -------------------------------------------------
# Config summary + quick sanity checks
# -------------------------------------------------
CONFIG.print_summary()

print("\nðŸ”Ž Sanity checks:")
print("USER_STUDY_XLSX:", CONFIG.PATHS.USER_STUDY_XLSX)
print("is_file        :", CONFIG.PATHS.USER_STUDY_XLSX.is_file())

print("USE_DYNAMIC_EXTENT_REFS:", CONFIG.USE_DYNAMIC_EXTENT_REFS)
print("ALLOW_FALLBACK_EXTENT  :", CONFIG.ALLOW_FALLBACK_EXTENT)
print("EXTENT_DIAG_COL:", EXTENT_DIAG_COL, "| EXTENT_AREA_COL:", EXTENT_AREA_COL)

MAP_DIM_CFG = int(CONFIG.CFG.MAP_DIM)
PROMPT_DIM_CFG = int(CONFIG.CFG.PROMPT_DIM)
FUSED_DIM_CFG = MAP_DIM_CFG + PROMPT_DIM_CFG
BATCH_SIZE = int(CONFIG.CFG.BATCH_SIZE)

print("\nCFG dims -> MAP_DIM:", MAP_DIM_CFG, "| PROMPT_DIM:", PROMPT_DIM_CFG, "| FUSED_DIM:", FUSED_DIM_CFG)
print("BATCH_SIZE:", BATCH_SIZE)

# -------------------------------------------------
# Validate experiment registry (defined in CELL 0)
# -------------------------------------------------
required_keys = {"train_out", "model_out", "feature_mode"}
allowed_modes = {"prompt_only", "map_only", "prompt_plus_map"}

for exp_name, exp_cfg in EXPERIMENTS.items():
    missing = required_keys - set(exp_cfg.keys())
    if missing:
        raise ValueError(f"Experiment '{exp_name}' is missing keys: {missing}")

    mode = str(exp_cfg["feature_mode"]).strip().lower()
    if mode not in allowed_modes:
        raise ValueError(
            f"Experiment '{exp_name}' has invalid feature_mode='{exp_cfg['feature_mode']}'. "
            f"Allowed: {sorted(allowed_modes)}"
        )
    exp_cfg["feature_mode"] = mode

    exp_cfg["train_out"] = Path(exp_cfg["train_out"])
    exp_cfg["model_out"] = Path(exp_cfg["model_out"])
    exp_cfg["train_out"].mkdir(parents=True, exist_ok=True)
    exp_cfg["model_out"].mkdir(parents=True, exist_ok=True)

    # Prompt encoder required whenever prompts are part of features
    if mode in {"prompt_only", "prompt_plus_map"}:
        if "prompt_encoder_kind" not in exp_cfg:
            raise ValueError(
                f"Experiment '{exp_name}' needs 'prompt_encoder_kind' because feature_mode='{mode}'."
            )

    # No prompt encoder needed for map_only
    if mode == "map_only":
        exp_cfg.pop("prompt_encoder_kind", None)

print("\nðŸ§ª Experiments to be executed:")
for exp_name, exp_cfg in EXPERIMENTS.items():
    pe = exp_cfg.get("prompt_encoder_kind", "-")
    print(
        f" - {exp_name:18s} | "
        f"mode={exp_cfg['feature_mode']:14s} | "
        f"prompt={pe:14s} | "
        f"train_out={exp_cfg['train_out'].name} | "
        f"model_out={exp_cfg['model_out'].name}"
    )

# -------------------------------------------------
# Small helper: feature dims by mode (used in later notebooks)
# -------------------------------------------------
def get_feature_dims_from_cfg(feature_mode: str):
    fm = str(feature_mode).strip().lower()
    if fm == "prompt_only":
        return 0, PROMPT_DIM_CFG, PROMPT_DIM_CFG
    if fm == "map_only":
        return MAP_DIM_CFG, 0, MAP_DIM_CFG
    if fm == "prompt_plus_map":
        return MAP_DIM_CFG, PROMPT_DIM_CFG, FUSED_DIM_CFG
    raise ValueError(f"Unknown feature_mode: {feature_mode}")


=== CONFIG SUMMARY ===
PROJ_ROOT  : /Users/amirdonyadide/Documents/GitHub/IMGOFUP
DATA_DIR   : /Users/amirdonyadide/Documents/GitHub/IMGOFUP/data
INPUT_DIR  : /Users/amirdonyadide/Documents/GitHub/IMGOFUP/data/input
OUTPUT_DIR : /Users/amirdonyadide/Documents/GitHub/IMGOFUP/data/output
MAPS_ROOT  : /Users/amirdonyadide/Documents/GitHub/IMGOFUP/data/input/samples/pairs
INPUT PAT. : *_input.geojson
--- User Study ---
USER_STUDY_XLSX : /Users/amirdonyadide/Documents/GitHub/IMGOFUP/data/userstudy/UserStudy.xlsx
RESPONSES_SHEET : Responses
TILE_ID_COL     : tile_id
COMPLETE_COL    : complete
REMOVE_COL      : remove
TEXT_COL        : cleaned_text
PROMPT_ID_COL   : prompt_id
PARAM_VALUE_COL : param_value
OPERATOR_COL    : operator
INTENSITY_COL   : intensity
--- Filters ---
ONLY_COMPLETE   : True
EXCLUDE_REMOVED : True
--- Outputs ---
PROMPT_OUT : /Users/amirdonyadide/Documents/GitHub/IMGOFUP/data/output/prompt_out
MAP_OUT    : /Users/amirdonyadide/Documents/GitHub/IMGOFUP/data/output/map_ou