In [2]:
# Auto-detect GPUs, set env, and launch the SSL checkpoint probing script from Jupyter (UCloud)

import os, sys, subprocess, shlex, pathlib

PROJECT_DIR = "/work/projects/myproj/Linear_Probing_For_SSL/"
SCRIPT      = "Best_SSL_Model_Selection.py"               #

# Which classes to probe:
# "leu"  -> only Leucocyte
# "epi"  -> only Squamous Epithelial Cell
# "all"  -> both
PROBE_TARGET = "epi"
os.environ["RFDETR_PROBE_TARGET"] = PROBE_TARGET
print("RFDETR_PROBE_TARGET =", os.environ["RFDETR_PROBE_TARGET"])

# ---- Probe settings (keep static across all ckpts) ----
# Use same as your script defaults unless you want to override here:
os.environ.setdefault("RFDETR_USE_PATCH_224", "1")
os.environ.setdefault("RFDETR_PATCH_SIZE", "224")

# Critical for "same data exactly":
os.environ.setdefault("RFDETR_TRAIN_FRACTION", "0.125")
os.environ.setdefault("RFDETR_FRACTION_SEED", "42")
os.environ.setdefault("SEED", "42")

# Optional: explicit ckpt list (comma-separated). If unset, script scans SSL_CKPT_ROOT.
# os.environ["SSL_CKPTS"] = "epoch_epoch-004.ckpt,epoch_epoch-014.ckpt,epoch_epoch-029.ckpt,last.ckpt"

# ---- GPU detection (respect CUDA_VISIBLE_DEVICES if already set) ----
def detect_gpu_count():
    vis = os.environ.get("CUDA_VISIBLE_DEVICES", "").strip()
    if vis:
        return len([x for x in vis.split(",") if x.strip() != ""])
    try:
        import torch
        return int(torch.cuda.device_count())
    except Exception:
        pass
    try:
        out = subprocess.check_output(["nvidia-smi", "-L"], stderr=subprocess.DEVNULL, text=True)
        return len([ln for ln in out.splitlines() if "GPU " in ln])
    except Exception:
        return 0

gpu_count = detect_gpu_count()
if gpu_count == 0:
    print("⚠️ No GPUs detected. Script may run CPU-only or fail if CUDA is required.")
else:
    print(f"✅ Detected {gpu_count} GPU(s)")

# Let the script decide mapping; just hint max parallel = number of visible GPUs
os.environ.setdefault("MAX_PARALLEL", str(max(1, gpu_count)))
# allocator safety
os.environ.setdefault("PYTORCH_CUDA_ALLOC_CONF", "expandable_segments:True")
# Optional perf toggles (script also sets them per-worker)
os.environ.setdefault("TF32", "1")


# DataLoader workers
os.environ.setdefault("NUM_WORKERS", "8")

# ---- Run the script ----
wd = pathlib.Path(PROJECT_DIR).resolve()
py = sys.executable
cmd = f'{shlex.quote(py)} -u {shlex.quote(str(wd / SCRIPT))}'

print("\n[LAUNCH]")
print(" cwd:", wd)
print(" cmd:", cmd)
print(" env: RFDETR_PROBE_TARGET=", os.environ.get("RFDETR_PROBE_TARGET"))
print(" env: MAX_PARALLEL=", os.environ.get("MAX_PARALLEL"))
#print(" env: SSL_CKPT_ROOT=", os.environ.get("SSL_CKPT_ROOT"))
#print(" env: OUTPUT_BASE=", os.environ.get("OUTPUT_BASE"))

subprocess.run(cmd, cwd=str(wd), shell=True, check=True)


RFDETR_PROBE_TARGET = epi
✅ Detected 1 GPU(s)

[LAUNCH]
 cwd: C:\work\projects\myproj\Linear_Probing_For_SSL
 cmd: 'C:\Users\SH37YE\AppData\Local\anaconda3\envs\AI_PowMic\python.exe' -u 'C:\work\projects\myproj\Linear_Probing_For_SSL\RunBest_SSL_Model_Selection.py'
 env: RFDETR_PROBE_TARGET= epi


NotADirectoryError: [WinError 267] The directory name is invalid