In [1]:
# from google.colab import drive
# drive.mount('/content/drive', force_remount=False)

!pip install --quiet torch torchvision webdataset tqdm pillow

In [2]:
# %%
# Cell 1 – Environment Setup & Dependencies
import os, sys, subprocess
from pathlib import Path

print("📦 [DEBUG] Avvio configurazione ambiente...")

# --- Colab detection ---------------------------------------------------------#
IN_COLAB = Path("/content").exists()
if IN_COLAB:
    print("📍 [DEBUG] Ambiente Google Colab rilevato.")
    from google.colab import drive  # type: ignore
    drive.mount("/content/drive", force_remount=False)
else:
    print("💻 [DEBUG] Ambiente locale rilevato (VSCode o simile).")

# --- Project root ------------------------------------------------------------#
ENV_PATHS = {
    "colab": "/content/drive/MyDrive/ColabNotebooks/wsi-ssrl-rcc_project",
    "local": "/Users/stefanoroybisignano/Desktop/MLA/project/wsi-ssrl-rcc_project",
}
PROJECT_ROOT = Path(ENV_PATHS["colab" if IN_COLAB else "local"]).resolve()
sys.path.append(str(PROJECT_ROOT / "src"))
print(f"📁 [DEBUG] PROJECT_ROOT → {PROJECT_ROOT}")

# --- Dependencies (installa solo se mancano) ---------------------------------#
def _pip_install(pkgs):
    import importlib.util, sys, subprocess
    missing = [p for p in pkgs if importlib.util.find_spec(p) is None]
    if missing:
        print(f"🔧 [DEBUG] Installazione pacchetti mancanti: {missing}")
        subprocess.check_call([sys.executable, "-m", "pip", "install", "--quiet", *missing])
    else:
        print("✅ [DEBUG] Tutti i pacchetti richiesti sono già installati.")

_pip_install(["torch", "torchvision", "webdataset", "tqdm", "pillow", "pyyaml", "joblib"])

# helper per il tarball del dataset (usato in Cell 2)
DATA_TARBALL = PROJECT_ROOT / "data" / "processed"


📦 [DEBUG] Avvio configurazione ambiente...
💻 [DEBUG] Ambiente locale rilevato (VSCode o simile).
📁 [DEBUG] PROJECT_ROOT → /Users/stefanoroybisignano/Desktop/MLA/project/wsi-ssrl-rcc_project
🔧 [DEBUG] Installazione pacchetti mancanti: ['pillow', 'pyyaml']


In [None]:
# %%
# Cell 2 – SLURM Submission (modulare) ─────────────────────────────────────────
import os, subprocess, traceback, shutil, yaml
from pathlib import Path
from textwrap import dedent
from dotenv import load_dotenv, find_dotenv

# ─────────────────────────────── Helpers di basso livello ─────────────────────
def _detect_env() -> tuple[bool, bool]:
    """Rileva se siamo in Colab o VSCode (con VSCODE_PID)."""
    in_colab  = Path("/content").exists()
    in_vscode = not in_colab and bool(os.environ.get("VSCODE_PID"))
    print(f"🚀 In Colab={in_colab}, VSCode={in_vscode}")
    return in_colab, in_vscode


def _load_env_vars() -> dict:
    """Carica .env e restituisce le variabili essenziali in un dict."""
    dotenv_path = find_dotenv()
    if not dotenv_path:
        raise FileNotFoundError("❌ .env non trovato nella root del progetto.")
    load_dotenv(dotenv_path, override=True)

    env = {
        "USER"   : os.getenv("CLUSTER_USER"),
        "HOST"   : os.getenv("CLUSTER_HOST"),
        "BASE"   : os.getenv("REMOTE_BASE_PATH"),
        "PW"     : os.getenv("CLUSTER_PASSWORD", ""),
        "MAIL"   : os.getenv("RESPONSABILE_EMAIL", os.getenv("MEMBER_EMAIL")),
        "PART"   : os.getenv("SBATCH_PARTITION", "global"),
        "MOD"    : os.getenv("SBATCH_MODULE", "intel/python/3/2019.4.088"),
    }
    missing = [k for k, v in env.items() if k in ("USER","HOST","BASE") and not v]
    if missing:
        raise KeyError(f"🌱 Mancano variabili nel .env: {missing}")
    return env


def _ssh_cmd(env: dict, cmd: str) -> list[str]:
    """Costruisce il comando SSH (con o senza sshpass)."""
    base = []
    if env["PW"] and shutil.which("sshpass"):
        base += ["sshpass", "-p", env["PW"]]
    base += ["ssh", f"{env['USER']}@{env['HOST']}", cmd]
    return base


def _rsync_cmd(env: dict, src: str, dst: str, extra: list[str] | None = None) -> list[str]:
    """Costruisce il comando rsync con eventuale sshpass."""
    cmd = ["rsync", "-avz"]
    if extra:
        cmd += extra
    if env["PW"] and shutil.which("sshpass"):
        cmd += ["-e", f"sshpass -p {env['PW']} ssh -o StrictHostKeyChecking=no"]
    cmd += [src, dst]
    return cmd


# ─────────────────────────────── Funzioni operative ───────────────────────────
def build_sbatch_script(env: dict, script_path: Path):
    body = f"""#!/bin/bash
#SBATCH --job-name=rcc_ssrl_launch
#SBATCH --ntasks=1
#SBATCH --cpus-per-task=4
#SBATCH --mem=16G              # puoi alzare la RAM in un colpo solo
#SBATCH --time=2:00:00
#SBATCH --gres=gpu:1
#SBATCH --partition={env['PART']}
#SBATCH --output=%x_%j.out
#SBATCH --mail-type=END,FAIL
#SBATCH --mail-user={env['MAIL']}
#SBATCH --workdir={env['BASE']}

module purge
module load {env['MOD']}      # intel/python/3/2019.4.088

cd {env['BASE']}
python notebooks/4-launch_training_cluster.py --config config/training.yaml
"""
    script_path.write_text(body)
    script_path.chmod(0o755)


def sync_project_code(env: dict):
    print("🔄 Sync codice progetto → cluster")
    subprocess.run(
        _rsync_cmd(env,
                   f"{PROJECT_ROOT}/",
                   f"{env['USER']}@{env['HOST']}:{env['BASE']}/",
                   ["--delete", "--exclude", ".git", "--exclude", "data/processed"]),
        check=True
    )


def upload_dataset_tar(env: dict, dataset_id: str):
    """Upload robusto del dataset come tar.gz (crea dir remota prima di rsync)."""
    local_tar   = PROJECT_ROOT / "data" / "processed" / f"{dataset_id}.tar.gz"
    remote_dir  = f"{env['BASE']}/data/processed"
    remote_tar  = f"{remote_dir}/{dataset_id}.tar.gz"
    remote_ds   = f"{remote_dir}/{dataset_id}"

    # già presente il dataset estratto?
    if subprocess.run(_ssh_cmd(env, f"test -d {remote_ds}")).returncode == 0:
        print("📦 Dataset già presente sul cluster, skip upload")
        return

    # crea tarball solo se assente
    if not local_tar.exists():
        print("🗜️  Creo tarball locale del dataset …")
        subprocess.run(
            ["tar", "-czf", str(local_tar), "-C",
             str(PROJECT_ROOT / "data" / "processed"), dataset_id],
            check=True
        )

    # assicura la cartella remota data/processed
    subprocess.run(_ssh_cmd(env, f"mkdir -p {remote_dir}"), check=True)

    # rsync del tar.gz
    print("🚚  Upload tarball …")
    subprocess.run(
        _rsync_cmd(env, str(local_tar),
                   f"{env['USER']}@{env['HOST']}:{remote_tar}"),
        check=True
    )

    # estrazione + (facoltativo) rimozione tar
    print("📂  Estrazione sul cluster …")
    subprocess.run(_ssh_cmd(env,
        f"tar -xzf {remote_tar} -C {remote_dir} && rm -f {remote_tar}"),
        check=True
    )

def submit_job(env: dict, script_name: str):
    cmd = f"cd {env['BASE']} && sbatch {script_name}"
    if env["PW"] and shutil.which("sshpass"):
        output = subprocess.check_output(_ssh_cmd(env, cmd))
        print("📬 Job submit:", output.decode().strip())
    else:
        subprocess.run(_ssh_cmd(env, cmd), check=True)


# ─────────────────────────────── MAIN per VSCode ──────────────────────────────
IN_COLAB, IN_VSCODE = _detect_env()
if IN_VSCODE:
    try:
        env = _load_env_vars()

        # dataset_id dal training.yaml
        cfg = yaml.safe_load((PROJECT_ROOT / "config" / "training.yaml").read_text())
        DATASET_ID = cfg["data"]["dataset_id"]

        # assicura dir base remoto
        subprocess.run(_ssh_cmd(env, f"mkdir -p {env['BASE']}"), check=True)

        # build e sync
        SBATCH_LOCAL = PROJECT_ROOT / "hpc_submit.sh"   # era Path.cwd()/…
        build_sbatch_script(env, SBATCH_LOCAL)
        sync_project_code(env)
        upload_dataset_tar(env, DATASET_ID)
        submit_job(env, SBATCH_LOCAL.name)

    except Exception:
        print("❌ Submission fallita:")
        traceback.print_exc()
else:
    print("⚠️  Cell 2: skip (non siamo in VSCode).")


🚀 In Colab=False, VSCode=True
🔄 Sync codice progetto → cluster
Transfer starting: 109 files
hpc_submit.sh
config/training.yaml
notebooks/4-launch_training.ipynb
src/trainers/moco_v2.py

sent 13077 bytes  received 534 bytes  79087 bytes/sec
total size is 29072991  speedup is 2135.99
📦 Dataset già presente sul cluster, skip upload
📬 Job submit: Submitted batch job 1201556


In [4]:
# Cell 3 – Dynamic import of utils.training_utils
import sys
import importlib.util
from pathlib import Path

# 1) locate & load the module file
utils_path = PROJECT_ROOT / "src" / "utils" / "training_utils.py"
spec       = importlib.util.spec_from_file_location("utils.training_utils", str(utils_path))
utils_mod  = importlib.util.module_from_spec(spec)     # type: ignore[arg-type]
assert spec and spec.loader, f"Cannot load spec for {utils_path}"
spec.loader.exec_module(utils_mod)                     # type: ignore[assignment]
sys.modules["utils.training_utils"] = utils_mod        # register in sys.modules
print(f"[DEBUG] Loaded utils.training_utils from {utils_path}")

# 2) import what we need
from utils.training_utils import (
    TRAINER_REGISTRY,
    get_latest_checkpoint,
    load_checkpoint,
)

print("[DEBUG] Imported:")
print("  • TRAINER_REGISTRY keys:", list(TRAINER_REGISTRY.keys()))
print("  • get_latest_checkpoint →", get_latest_checkpoint)
print("  • load_checkpoint       →", load_checkpoint)


[DEBUG] Loaded utils.training_utils from /Users/stefanoroybisignano/Desktop/MLA/project/wsi-ssrl-rcc_project/src/utils/training_utils.py
[DEBUG] Imported:
  • TRAINER_REGISTRY keys: []
  • get_latest_checkpoint → <function get_latest_checkpoint at 0x14a0bb7f0>
  • load_checkpoint       → <function load_checkpoint at 0x14a0bb760>


In [5]:
# Cell 4 – Configuration & Directory Setup (formatted and absolute paths)
import yaml, datetime, os
from pathlib import Path

# ------------------------------------------------------------------ #
# 0) EXP_CODE: riprendi da YAML → env → genera nuovo                 #
# ------------------------------------------------------------------ #

yaml_exp = cfg.get("exp_code", "")           # <─ nuovo parametro
env_exp  = os.environ.get("EXP_CODE", "")

if yaml_exp:                                 # 1) priorità al file YAML
    EXP_CODE = yaml_exp
elif env_exp:                                # 2) poi variabile d’ambiente
    EXP_CODE = env_exp
else:                                        # 3) altrimenti nuovo timestamp
    EXP_CODE = datetime.datetime.now().strftime("%Y%m%d%H%M%S")

# salva nel processo per eventuali figli
os.environ["EXP_CODE"] = EXP_CODE

print(f"[DEBUG] EXP_CODE → {EXP_CODE}")


# ------------------------------------------------------------------ #
# 1) Carica configurazione generale                                  #
# ------------------------------------------------------------------ #
cfg_path  = PROJECT_ROOT / "config" / "training.yaml"
cfg       = yaml.safe_load(cfg_path.read_text())
DATASET_ID = cfg["data"]["dataset_id"]
cfg["experiment_code"] = EXP_CODE     # lo inseriamo nel dict per eventuali usi downstream

# ------------------------------------------------------------------ #
# 2) Percorsi assoluti (train / val / test)                          #
# ------------------------------------------------------------------ #
for split in ("train", "val", "test"):
    rel = cfg["data"][split].format(dataset_id=DATASET_ID)
    abs_ = (PROJECT_ROOT / rel).resolve()
    cfg["data"][split] = str(abs_)
    print(f"[DEBUG] {split.upper()} → {abs_}")

# ------------------------------------------------------------------ #
# 3) Struttura directory esperimento                                 #
# ------------------------------------------------------------------ #
EXP_ROOT = PROJECT_ROOT / "data" / "processed" / str(DATASET_ID)
EXP_BASE = EXP_ROOT / "experiments" / EXP_CODE                   # unica per tutta la run
EXP_BASE.mkdir(parents=True, exist_ok=True)
(EXP_ROOT / "experiments.md").touch(exist_ok=True)               # indice globale

print(f"[DEBUG] EXP_BASE → {EXP_BASE}")

# ------------------------------------------------------------------ #
# 4) Salva la **copia YAML** solo se non esiste già                  #
# ------------------------------------------------------------------ #
exp_yaml = EXP_BASE / f"training_{EXP_CODE}.yaml"
if not exp_yaml.exists():                          # evita duplicati
    exp_yaml.write_text(yaml.dump(cfg, sort_keys=False))
    print(f"[DEBUG] Scritto   {exp_yaml}")
else:
    print(f"[DEBUG] Config già presente → {exp_yaml}")

[DEBUG] EXP_CODE → 20250626011748
[DEBUG] TRAIN → /Users/stefanoroybisignano/Desktop/MLA/project/wsi-ssrl-rcc_project/data/processed/dataset_9f30917e/webdataset/train/patches-0000.tar
[DEBUG] VAL → /Users/stefanoroybisignano/Desktop/MLA/project/wsi-ssrl-rcc_project/data/processed/dataset_9f30917e/webdataset/val/patches-0000.tar
[DEBUG] TEST → /Users/stefanoroybisignano/Desktop/MLA/project/wsi-ssrl-rcc_project/data/processed/dataset_9f30917e/webdataset/test/patches-0000.tar
[DEBUG] EXP_BASE → /Users/stefanoroybisignano/Desktop/MLA/project/wsi-ssrl-rcc_project/data/processed/dataset_9f30917e/experiments/20250626011748
[DEBUG] Scritto   /Users/stefanoroybisignano/Desktop/MLA/project/wsi-ssrl-rcc_project/data/processed/dataset_9f30917e/experiments/20250626011748/training_20250626011748.yaml


In [6]:
# Cell 5 – Import all trainer modules
import importlib, sys
from utils.training_utils import TRAINER_REGISTRY

trainer_mods = [
    "trainers.simclr",
    "trainers.moco_v2",
    "trainers.rotation",
    "trainers.jigsaw",
    "trainers.supervised",   # ✅ CORRETTO
    "trainers.transfer",
]

for m in trainer_mods:
    importlib.reload(sys.modules[m]) if m in sys.modules else importlib.import_module(m)

print(f"[DEBUG] Registered trainers: {list(TRAINER_REGISTRY.keys())}")


[DEBUG] Registered trainers: ['simclr', 'moco_v2', 'rotation', 'jigsaw', 'supervised', 'transfer']


In [7]:
# %% -------------------------------------------------------------------- #
# Cell 6 – Helper utilities (Tee, paths, selezione, …)                    #
# ----------------------------------------------------------------------- #
import contextlib, sys, time, inspect
from pathlib import Path
import torch
from utils.training_utils import get_latest_checkpoint, load_checkpoint
from trainers.train_classifier import train_classifier

# ──────────────────────────────────────────────────────────────────────── #
# I/O helpers                                                             #
# ──────────────────────────────────────────────────────────────────────── #
class _Tee:
    """Duplica stdout / stderr su console *e* file."""
    def __init__(self, *targets): self.targets = targets
    def write(self, data):  [t.write(data) and t.flush() for t in self.targets]
    def flush(self):        [t.flush()      for t in self.targets]

def _global_experiments_append(line: str):
    """Aggiunge una riga all’indice globale `experiments.md`."""
    exp_md = EXP_ROOT / "experiments.md"
    with exp_md.open("a") as f:
        f.write(line.rstrip() + "\n")

# ──────────────────────────────────────────────────────────────────────── #
# Path builders & artefact checks                                         #
# ──────────────────────────────────────────────────────────────────────── #
def _paths(model_name: str) -> dict[str, Path]:
    """Restituisce tutti i path (dir/log/ckpt/features/clf) per un modello."""
    mdir = EXP_BASE / model_name
    mdir.mkdir(parents=True, exist_ok=True)
    return {
        "dir"      : mdir,
        "log"      : mdir / f"log_{EXP_CODE}.md",
        "features" : mdir / f"{model_name}_features.pt",
        "clf"      : mdir / f"{model_name}_classifier.joblib",
        "ckpt_pref": f"{model_name}_epoch",        # usato da save_checkpoint
    }

def _completed(paths: dict, is_ssl: bool) -> bool:
    """True se TUTTI gli artefatti richiesti sono già presenti."""
    ckpt_ok = get_latest_checkpoint(paths["dir"], prefix=paths["ckpt_pref"]) is not None
    if not ckpt_ok:
        return False
    if is_ssl:
        return paths["features"].exists() and paths["clf"].exists()
    return True

# ──────────────────────────────────────────────────────────────────────── #
# Selezione modelli & trainer helpers                                     #
# ──────────────────────────────────────────────────────────────────────── #
def _select_models(cfg: dict) -> dict[str, dict]:
    sel = cfg.get("run_model", "all").lower()
    return {
        n: c for n, c in cfg["models"].items()
        if sel in ("all", n) or
           (sel == "sl"  and c.get("type") == "sl") or
           (sel == "ssl" and c.get("type") == "ssl")
    }

def _init_trainer(name: str, m_cfg: dict, data_cfg: dict, ckpt_dir: Path):
    Trainer = TRAINER_REGISTRY[name]
    tr = Trainer(m_cfg, data_cfg)
    tr.ckpt_dir = ckpt_dir         # salva i .pt qui
    return tr

# ──────────────────────────────────────────────────────────────────────── #
# Training / resume / artefacts                                           #
# ──────────────────────────────────────────────────────────────────────── #
def _run_full_training(trainer, epochs: int):
    """Loop di training (identico al codice esistente, solo incapsulato)."""
    has_val = hasattr(trainer, "validate_epoch")
    total_batches = getattr(trainer, "batches_train", None)
    if total_batches is None:
        try: total_batches = len(trainer.train_loader)
        except TypeError: total_batches = None

    for epoch in range(1, epochs + 1):
        t0 = time.time(); run_loss = run_corr = seen = 0
        print(f"--- Epoch {epoch}/{epochs} ---")
        for i, batch in enumerate(trainer.train_loader, 1):
            sig = inspect.signature(trainer.train_step)
            res = trainer.train_step(batch) if len(sig.parameters) == 1 \
                  else trainer.train_step(*batch)
            if len(res) == 4: _, loss, corr, bs = res
            else:             loss, bs = res; corr = 0
            run_loss += loss * bs; run_corr += corr; seen += bs

            # progress bar
            if total_batches:
                pct = (i/total_batches)*100
                eta = ((time.time()-t0)/i)*(total_batches-i)
                msg = (f"  Batch {i}/{total_batches} ({pct:.1f}%) | "
                       f"Loss: {run_loss/seen:.4f}")
                if has_val: msg += f" | Acc: {run_corr/seen:.3f}"
                msg += f" | Elapsed: {time.time()-t0:.1f}s | ETA: {eta:.1f}s"
            else:
                msg = f"  Batch {i} | Loss: {run_loss/seen:.4f}"
                if has_val: msg += f" | Acc: {run_corr/seen:.3f}"
                msg += f" | Elapsed: {time.time()-t0:.1f}s"
            print(msg)

        if has_val:
            v_loss, v_acc = trainer.validate_epoch()
            trainer.post_epoch(epoch, v_acc)
            print(f"Val -> Loss: {v_loss:.4f} | Acc: {v_acc:.3f}")
        else:
            trainer.post_epoch(epoch, run_loss/seen)

        print(f"Epoch {epoch} completed in {time.time()-t0:.1f}s\n")

def _resume_or_train(trainer, paths: dict, epochs: int):
    ckpt = get_latest_checkpoint(paths["dir"], prefix=paths["ckpt_pref"])
    if ckpt:
        print(f"⏩ Resume da {ckpt.name}")
        load_checkpoint(
            ckpt,
            model=torch.nn.Sequential(
                trainer.encoder,
                getattr(trainer, "projector", torch.nn.Identity())
            )
        )
    else:
        _run_full_training(trainer, epochs)

def _ensure_ssl_artifacts(trainer, paths: dict):
    if not paths["features"].exists():
        trainer.extract_features_to(str(paths["features"]))
    if not paths["clf"].exists():
        train_classifier(str(paths["features"]), str(paths["clf"]))

In [8]:
# %% -------------------------------------------------------------------- #
# Cell 7 – Modular Launch & Auto-Recover                                  #
# ----------------------------------------------------------------------- #
def launch_training(cfg: dict) -> None:
    """Lancia (o recupera) tutti i modelli selezionati."""
    for name, m_cfg in _select_models(cfg).items():
        paths   = _paths(name)
        is_ssl  = m_cfg.get("type") == "ssl"
        epochs  = int(m_cfg["training"]["epochs"])

        # ---------- logging (append) ----------------------------------- #
        with open(paths["log"], "a") as lf, \
             contextlib.redirect_stdout(_Tee(sys.stdout, lf)), \
             contextlib.redirect_stderr(_Tee(sys.stderr, lf)):

            if _completed(paths, is_ssl):
                print(f"✅ Artefatti completi per '{name}' – skip\n")
                continue

            trainer = _init_trainer(name, m_cfg, cfg["data"], paths["dir"])
            print(f"Device: {trainer.device} 🚀  Starting training for '{name}'")
            print(f"→ Model config: {m_cfg}\n")

            _resume_or_train(trainer, paths, epochs)

            if is_ssl:
                _ensure_ssl_artifacts(trainer, paths)

            # Aggiorna indice globale
            last_ckpt = get_latest_checkpoint(paths["dir"], prefix=paths["ckpt_pref"])
            rel = last_ckpt.relative_to(EXP_ROOT) if last_ckpt else "-"
            _global_experiments_append(f"| {EXP_CODE} | {name} | {epochs} | {rel} |")

# 🚀 Avvio immediato
# 🚀 Avvio immediato SOLO se siamo in Colab
if IN_COLAB:
    launch_training(cfg)
else:
    print("⏩ Training delegato a SLURM: skip esecuzione locale.")


⏩ Training delegato a SLURM: skip esecuzione locale.


In [9]:
# Cell 8 – Post-SLURM auto-download (solo VSCode + LEGION)
if IN_VSCODE:
    import subprocess, shutil, os
    from dotenv import load_dotenv, find_dotenv

    # Carica .env per usare di nuovo USER/HOST/PASSWORD
    load_dotenv(find_dotenv(), override=True)
    CLUSTER_USER  = os.getenv("CLUSTER_USER")
    CLUSTER_HOST  = os.getenv("CLUSTER_HOST")
    REMOTE_BASE   = os.getenv("REMOTE_BASE_PATH")
    CLUSTER_PW    = os.getenv("CLUSTER_PASSWORD", "")
    USE_SSHPASS   = bool(CLUSTER_PW and shutil.which("sshpass"))

    # ----- funzione rsync con/​senza sshpass ----------------------------------#
    def _rsync(src: str, dest: str):
        args = ["rsync", "-avz"]
        if USE_SSHPASS:
            sshpart = f"sshpass -p {CLUSTER_PW} ssh -o StrictHostKeyChecking=no"
            args += ["-e", sshpart]
        args += [src, dest]
        subprocess.run(args, check=True)

    dataset_id = cfg["data"]["dataset_id"]
    remote_exp = f"{REMOTE_BASE}/data/processed/{dataset_id}/experiments/{EXP_CODE}/"
    local_exp  = EXP_ROOT / "experiments" / EXP_CODE
    local_exp.mkdir(parents=True, exist_ok=True)

    print(f"⬇️  Downloading artifacts of EXP_CODE={EXP_CODE} …")
    try:
        _rsync(f"{CLUSTER_USER}@{CLUSTER_HOST}:{remote_exp}", str(local_exp))
        print("✅  Artifacts downloaded in:", local_exp)
    except subprocess.CalledProcessError as e:
        print("⚠️  Download failed (job forse non ancora terminato):")
        print(e.stderr or "")
else:
    print("↩️  Download skipped: non siamo in VSCode/LEGION.")


⬇️  Downloading artifacts of EXP_CODE=20250626011748 …
⚠️  Download failed (job forse non ancora terminato):



rsync: change_dir "/home/mla_group_01/wsi-ssrl-rcc_project/data/processed/dataset_9f30917e/experiments/20250626011748" failed: No such file or directory (2)


1. **Terminale VSCode (Remote SSH o locale)**
   Dopo aver sottoposto il job, prendi il `JOBID` dallo stdout di `sbatch`.
   Poi apri un terminale in VSCode e digiti:

   ```bash
   ssh mla_group_01@legionlogin.polito.it
   cd /home/mla_group_01/wsi-ssrl-rcc_project
   tail -f rcc_ssrl_launch_<JOBID>.out
   ```

   In questo modo vedrai **in tempo reale** tutti i `print` man mano che il training avanza.

2. **VSCode “Remote SSH” Extension**
   Se installi l’estensione Remote SSH di VSCode, puoi:

   * Connetterti direttamente al nodo di login (`legionlogin.polito.it`)
   * Aprire il file `rcc_ssrl_launch_<JOBID>.out` nell’editor
   * Abilitare “Auto Save” e “Follow Tail” (clic destro → *Tail Follow*), per visualizzare i nuovi messaggi senza uscire dall’IDE.