In [None]:
 # Auto-detect GPUs, set env, and launch the HPO script from Jupyter

import os, sys, subprocess, shlex, json, pathlib

SCRIPT = "TrainRFDETR_SOLO_SingleClass_STATIC_Ucloud.py"

def _detect_runtime_profile() -> str:
    forced = os.environ.get("RFDETR_RUNTIME_PROFILE", "auto").strip().lower()
    if forced in {"ucloud", "local"}:
        return forced
    if forced not in {"", "auto"}:
        raise ValueError("RFDETR_RUNTIME_PROFILE must be one of: auto, ucloud, local")
    if os.name != "nt" and pathlib.Path("/work").exists():
        has_member_files = any(pathlib.Path("/work").glob("Member Files:*"))
        has_hash_user = any(pathlib.Path("/work").glob("*#*"))
        if has_member_files or has_hash_user:
            return "ucloud"
    return "local"

RUNTIME_PROFILE = _detect_runtime_profile()
os.environ["RFDETR_RUNTIME_PROFILE"] = RUNTIME_PROFILE
if RUNTIME_PROFILE == "ucloud":
    PROJECT_DIR = os.environ.get("PROJECT_DIR", "/work/projects/myproj/SOLO_Supervised_RFDETR/")
else:
    PROJECT_DIR = os.environ.get("PROJECT_DIR", str(pathlib.Path.cwd() / "SOLO_Supervised_RFDETR"))
print("RFDETR_RUNTIME_PROFILE =", RUNTIME_PROFILE)
print("PROJECT_DIR =", PROJECT_DIR)

# Which classes to train in this HPO run:
# "leu"  -> only Leucocyte
# "epi"  -> only Squamous Epithelial Cell
# "all"  -> both

HPO_TARGET = "all"   # DETERMINES WHAT TO RUN IN THE HPO!
os.environ.setdefault("RFDETR_HPO_TARGET", HPO_TARGET)
print("RFDETR_HPO_TARGET =", os.environ["RFDETR_HPO_TARGET"])
# Input mode:
#   - "640": full-image mode (no patching)
#   - any other positive integer (e.g. "224"): patch mode with that patch size
RFDETR_INPUT_MODE = os.environ.get("RFDETR_INPUT_MODE", "640")
os.environ["RFDETR_INPUT_MODE"] = RFDETR_INPUT_MODE
mode_key = RFDETR_INPUT_MODE.strip().lower()
# Clear legacy size knobs to avoid stale notebook env affecting runs.
os.environ.pop("RFDETR_PATCH_SIZE", None)
os.environ.pop("RFDETR_FULL_RESOLUTION", None)
os.environ["RFDETR_USE_PATCH_224"] = "0" if mode_key in {"640", "640x640", "full", "full640", "full_640"} else "1"
print("RFDETR_INPUT_MODE =", os.environ["RFDETR_INPUT_MODE"])
print("RFDETR_USE_PATCH_224 =", os.environ["RFDETR_USE_PATCH_224"])

# Matrix/SSL + dataset knobs (kept here so training script stays generic)
PROJECT_PATH = pathlib.Path(PROJECT_DIR).resolve()
REPO_ROOT = PROJECT_PATH.parent
_dataset_root_candidates = [PROJECT_PATH / "Stat_Dataset", REPO_ROOT / "Stat_Dataset"]
_dataset_root_auto = next((p for p in _dataset_root_candidates if p.exists()), _dataset_root_candidates[0])

def _detect_ucloud_user_base() -> str:
    work = pathlib.Path("/work")
    if not work.exists():
        return ""
    member = sorted(work.glob("Member Files:*"))
    if member:
        return member[0].name
    hashed = sorted([p for p in work.glob("*#*") if p.is_dir()])
    return hashed[0].name if hashed else ""

if RUNTIME_PROFILE == "ucloud":
    UCLOUD_USER_BASE = _detect_ucloud_user_base()
    DATASET_ROOT_DEFAULT = os.environ.get("STAT_DATASETS_ROOT", str(_dataset_root_auto))
    if UCLOUD_USER_BASE:
        IMAGES_FALLBACK_DEFAULT = f"/work/{UCLOUD_USER_BASE}/CellScanData/Zoom10x - Quality Assessment_Cleaned"
        OUTPUT_ROOT_DEFAULT = f"/work/{UCLOUD_USER_BASE}/RFDETR_SOLO_OUTPUT/HPO_BOTH_OVR"
    else:
        IMAGES_FALLBACK_DEFAULT = str(REPO_ROOT / "CellScanData" / "Zoom10x - Quality Assessment_Cleaned")
        OUTPUT_ROOT_DEFAULT = str(PROJECT_PATH / "RFDETR_SOLO_OUTPUT" / "HPO_BOTH_OVR")
    SSL_CKPT_ROOT_DEFAULT = os.environ.get("SSL_CKPT_ROOT", "/work/MatiasMose#8097/SSL_Checkpoints")
else:
    DATASET_ROOT_DEFAULT = os.environ.get("STAT_DATASETS_ROOT", str(_dataset_root_auto))
    IMAGES_FALLBACK_DEFAULT = str(pathlib.Path(r"D:\PHD\PhdData\CellScanData\Zoom10x - Quality Assessment_Cleaned"))
    OUTPUT_ROOT_DEFAULT = str(PROJECT_PATH / "RFDETR_SOLO_OUTPUT" / "HPO_BOTH_OVR_local")
    LOCAL_SSL_DROP_DIR = pathlib.Path(r"D:\PHD\Results\Quality Assessment\Epi+Leu for ESCMID Conference\ssl_checkpoints")
    _local_ssl_candidates = [LOCAL_SSL_DROP_DIR, REPO_ROOT / "SSL_Checkpoints", REPO_ROOT / "Checkpoints", REPO_ROOT]
    _local_ssl_root = next((p for p in _local_ssl_candidates if p.exists()), REPO_ROOT / "SSL_Checkpoints")
    SSL_CKPT_ROOT_DEFAULT = os.environ.get("SSL_CKPT_ROOT", str(_local_ssl_root))

os.environ.setdefault("IMAGES_FALLBACK_ROOT", IMAGES_FALLBACK_DEFAULT)
os.environ.setdefault("OUTPUT_ROOT", OUTPUT_ROOT_DEFAULT)
os.environ.setdefault("RFDETR_INIT_MODES", "default,scratch,ssl")  # compare detector-pretrained vs scratch vs SSL
os.environ.setdefault("RFDETR_SSL_MIN_LOADED_KEYS", "120")

# Dataset locations (required by training script)
DATASET_ROOT = pathlib.Path(os.environ.get("STAT_DATASETS_ROOT", DATASET_ROOT_DEFAULT))
def _latest_dataset_dir(root: pathlib.Path, token: str) -> str:
    cands = sorted([p for p in root.glob(f"*{token}*") if p.is_dir()])
    return str(cands[-1]) if cands else ""
DATASET_EPI = os.environ.get("DATASET_EPI", "").strip() or _latest_dataset_dir(DATASET_ROOT, "SquamousEpithelialCell_OVR")
DATASET_LEUCO = os.environ.get("DATASET_LEUCO", "").strip() or _latest_dataset_dir(DATASET_ROOT, "Leucocyte_OVR")
os.environ["DATASET_EPI"] = DATASET_EPI
os.environ["DATASET_LEUCO"] = DATASET_LEUCO

# Best SSL checkpoints per class (used when RFDETR_INIT_MODES includes "ssl")
RFDETR_EPI_SSL_CKPT = os.environ.get("RFDETR_EPI_SSL_CKPT", str(pathlib.Path(SSL_CKPT_ROOT_DEFAULT) / "epoch_epoch=029.ckpt"))
RFDETR_LEU_SSL_CKPT = os.environ.get("RFDETR_LEU_SSL_CKPT", str(pathlib.Path(SSL_CKPT_ROOT_DEFAULT) / "epoch_epoch=069.ckpt"))
os.environ["RFDETR_EPI_SSL_CKPT"] = RFDETR_EPI_SSL_CKPT
os.environ["RFDETR_LEU_SSL_CKPT"] = RFDETR_LEU_SSL_CKPT
# Remove deprecated knobs no longer consumed by the script.
for _k in (
    "RFDETR_LR_ENCODER_MULT",
    "RFDETR_EPI_WARMUP_STEPS",
    "RFDETR_LEU_WARMUP_STEPS",
    "RFDETR_SCALE_MIN",
    "RFDETR_SCALE_MAX",
    "RFDETR_ROT_DEG",
    "RFDETR_COLOR_JITTER",
    "RFDETR_GAUSS_BLUR",
):
    os.environ.pop(_k, None)

print("RFDETR_INIT_MODES =", os.environ["RFDETR_INIT_MODES"])
print("RFDETR_SSL_MIN_LOADED_KEYS =", os.environ["RFDETR_SSL_MIN_LOADED_KEYS"])
print("STAT_DATASETS_ROOT =", str(DATASET_ROOT))
print("DATASET_EPI =", os.environ["DATASET_EPI"])
print("DATASET_LEUCO =", os.environ["DATASET_LEUCO"])
print("IMAGES_FALLBACK_ROOT =", os.environ["IMAGES_FALLBACK_ROOT"])
print("OUTPUT_ROOT =", os.environ["OUTPUT_ROOT"])
print("RFDETR_EPI_SSL_CKPT =", os.environ["RFDETR_EPI_SSL_CKPT"])
print("RFDETR_LEU_SSL_CKPT =", os.environ["RFDETR_LEU_SSL_CKPT"])

# Preflight checks: fail fast on missing paths before training starts.
for _name, _p in (("DATASET_EPI", DATASET_EPI), ("DATASET_LEUCO", DATASET_LEUCO), ("IMAGES_FALLBACK_ROOT", os.environ["IMAGES_FALLBACK_ROOT"])):
    if not str(_p).strip():
        raise ValueError(f"{_name} is empty. Set it explicitly in the launcher env.")
    if not pathlib.Path(_p).exists():
        raise FileNotFoundError(f"Configured path does not exist for {_name}: {_p}")
init_modes = {x.strip().lower() for x in os.environ["RFDETR_INIT_MODES"].split(",") if x.strip()}
if "ssl" in init_modes:
    for _name, _p in (("RFDETR_EPI_SSL_CKPT", RFDETR_EPI_SSL_CKPT), ("RFDETR_LEU_SSL_CKPT", RFDETR_LEU_SSL_CKPT)):
        if not str(_p).strip():
            raise ValueError(f"{_name} is empty but RFDETR_INIT_MODES includes 'ssl'.")
        if not pathlib.Path(_p).exists():
            raise FileNotFoundError(f"Configured SSL checkpoint does not exist for {_name}: {_p}")
print("Preflight path check: OK")

# ---- GPU detection (respect CUDA_VISIBLE_DEVICES if already set) ----
def detect_gpu_count():
    # If the user already masked GPUs, respect that
    vis = os.environ.get("CUDA_VISIBLE_DEVICES", "").strip()
    if vis:
        return len([x for x in vis.split(",") if x.strip() != ""])
    # Try torch
    try:
        import torch
        return int(torch.cuda.device_count())
    except Exception:
        pass
    # Fallback: nvidia-smi
    try:
        out = subprocess.check_output(["nvidia-smi", "-L"], stderr=subprocess.DEVNULL, text=True)
        return len([ln for ln in out.splitlines() if "GPU " in ln])
    except Exception:
        return 0

gpu_count = detect_gpu_count()
if gpu_count == 0:
    print("No GPUs detected. The script will run CPU-only or fail if CUDA is required.")
else:
    print(f"Detected {gpu_count} GPU(s)")

# Let the script decide mapping; just hint max parallel = number of visible GPUs
os.environ.setdefault("MAX_PARALLEL", str(max(1, gpu_count)))
# allocator safety
os.environ.setdefault("PYTORCH_CUDA_ALLOC_CONF", "expandable_segments:True")
# Optional perf toggles (script also sets them per-worker)
os.environ.setdefault("TF32", "1")

# ---- Optional additional env (edit if your paths differ) ----
# os.environ.setdefault("IMAGES_FALLBACK_ROOT", "/work/Member Files:yourname/CellScanData/Zoom10x - Quality Assessment_Cleaned")
# os.environ.setdefault("OUTPUT_ROOT", "/work/Member Files:yourname/RFDETR_SOLO_OUTPUT/HPO_BOTH_OVR")
# os.environ.setdefault("NUM_WORKERS", "8")

# ---- Run the script ----
wd = pathlib.Path(PROJECT_DIR).resolve()
script_path = wd / SCRIPT
if not wd.exists():
    raise NotADirectoryError(f"PROJECT_DIR does not exist: {wd}")
if not script_path.exists():
    raise FileNotFoundError(f"Training script not found: {script_path}")
py = sys.executable
cmd = [py, "-u", str(script_path)]

print("\n[LAUNCH]")
print(" cwd:", wd)
print(" cmd:", " ".join(shlex.quote(x) for x in cmd))
print(" env: MAX_PARALLEL=", os.environ.get("MAX_PARALLEL"))
print(" env: RFDETR_INPUT_MODE=", os.environ.get("RFDETR_INPUT_MODE"))
print(" env: RFDETR_USE_PATCH_224=", os.environ.get("RFDETR_USE_PATCH_224"))
print(" env: RFDETR_INIT_MODES=", os.environ.get("RFDETR_INIT_MODES"))
print(" env: RFDETR_SSL_MIN_LOADED_KEYS=", os.environ.get("RFDETR_SSL_MIN_LOADED_KEYS"))
print(" env: DATASET_EPI=", os.environ.get("DATASET_EPI"))
print(" env: DATASET_LEUCO=", os.environ.get("DATASET_LEUCO"))
print(" env: RFDETR_EPI_SSL_CKPT=", os.environ.get("RFDETR_EPI_SSL_CKPT"))
print(" env: RFDETR_LEU_SSL_CKPT=", os.environ.get("RFDETR_LEU_SSL_CKPT"))

# Stream child output line-by-line so progress is visible during long runs.
proc = subprocess.Popen(
    cmd,
    cwd=str(wd),
    stdout=subprocess.PIPE,
    stderr=subprocess.STDOUT,
    text=True,
    bufsize=1,
)
assert proc.stdout is not None
for _line in proc.stdout:
    print(_line, end="")
ret = proc.wait()
if ret != 0:
    raise subprocess.CalledProcessError(ret, cmd)




RFDETR_RUNTIME_PROFILE = local
PROJECT_DIR = C:\Users\SH37YE\Desktop\PhD_Code_github\AIPoweredMicroscope\SOLO_Supervised_RFDETR
RFDETR_HPO_TARGET = epi
RFDETR_INPUT_MODE = 640
RFDETR_USE_PATCH_224 = 0
RFDETR_SSL_MODES = none,ssl
RFDETR_SSL_MIN_LOADED_KEYS = 120
STAT_DATASETS_ROOT = C:\Users\SH37YE\Desktop\PhD_Code_github\AIPoweredMicroscope\SOLO_Supervised_RFDETR\Stat_Dataset
DATASET_EPI = C:\Users\SH37YE\Desktop\PhD_Code_github\AIPoweredMicroscope\SOLO_Supervised_RFDETR\Stat_Dataset\QA-2025v2_SquamousEpithelialCell_OVR_20260217-093944
DATASET_LEUCO = C:\Users\SH37YE\Desktop\PhD_Code_github\AIPoweredMicroscope\SOLO_Supervised_RFDETR\Stat_Dataset\QA-2025v2_Leucocyte_OVR_20260217-094041
IMAGES_FALLBACK_ROOT = D:\PHD\PhdData\CellScanData\Zoom10x - Quality Assessment_Cleaned
OUTPUT_ROOT = C:\Users\SH37YE\Desktop\PhD_Code_github\AIPoweredMicroscope\SOLO_Supervised_RFDETR\RFDETR_SOLO_OUTPUT\HPO_BOTH_OVR_local
RFDETR_EPI_SSL_CKPT = D:\PHD\Results\Quality Assessment\Epi+Leu for ESCMID Conferen