In [1]:
# === Cell 1: Montaggio Drive + Installazione dipendenze ===
from google.colab import drive
drive.mount('/content/drive')

!pip install --quiet torch torchvision webdataset tqdm pillow

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
# === Cell 2: Caricamento Configurazione & Path ===
from pathlib import Path
import yaml, sys

# 1) Carica YAML
config_path = Path('/content/drive/MyDrive/ColabNotebooks/wsi-ssrl-rcc_project/config/training.yaml')
with config_path.open('r') as f:
    cfg = yaml.safe_load(f)

# 2) Risolvi project root (Colab vs Local)
colab_root = Path(cfg['env_paths']['colab'])
local_root = Path(cfg['env_paths']['local'])
PROJECT_ROOT = colab_root if colab_root.exists() else local_root
if not PROJECT_ROOT.exists():
    raise FileNotFoundError(f"Project root non trovato: {PROJECT_ROOT}")

# 3) Normalizza i percorsi dei dati su path assoluti
for split in ['train','val','test']:
    rel = cfg['data'].get(split)
    if rel is not None:
        cfg['data'][split] = str(PROJECT_ROOT / rel)

print("🔥 PROJECT_ROOT:", PROJECT_ROOT)
print("📂 Dataset paths:")
for split in ['train','val','test']:
    print(f"  {split} → {cfg['data'][split]}")

# 4) Aggiungi PROJECT_ROOT e src/ al PYTHONPATH
sys.path.insert(0, str(PROJECT_ROOT))
sys.path.insert(0, str(PROJECT_ROOT / 'src'))

🔥 PROJECT_ROOT: /content/drive/MyDrive/ColabNotebooks/wsi-ssrl-rcc_project
📂 Dataset paths:
  train → /content/drive/MyDrive/ColabNotebooks/wsi-ssrl-rcc_project/data/processed/webdataset_2500/train/patches-0000.tar
  val → /content/drive/MyDrive/ColabNotebooks/wsi-ssrl-rcc_project/data/processed/webdataset_2500/val/patches-0000.tar
  test → /content/drive/MyDrive/ColabNotebooks/wsi-ssrl-rcc_project/data/processed/webdataset_2500/test/patches-0000.tar


In [3]:
# === Cell 3: Registry, BaseTrainer & Launch Function ===
import sys, types, importlib, logging
from typing import Any, Dict

# Logger di base
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s | %(levelname)-8s | %(name)s: %(message)s",
    datefmt="%Y-%m-%d %H:%M:%S",
)

# 1) Registry & BaseTrainer
TRAINER_REGISTRY: Dict[str, type] = {}
def register_trainer(name: str):
    def deco(cls):
        TRAINER_REGISTRY[name] = cls
        return cls
    return deco

class BaseTrainer:
    def __init__(self, model_cfg: Dict[str, Any], data_cfg: Dict[str, Any]):
        self.model_cfg = model_cfg
        self.data_cfg = data_cfg
        self.logger = logging.getLogger(self.__class__.__name__)
    def train(self):
        raise NotImplementedError("Trainer deve definire train()")

# 2) Inietta launch_training namespace
mod = types.ModuleType("launch_training")
mod.BaseTrainer = BaseTrainer
mod.register_trainer = register_trainer
sys.modules["launch_training"] = mod

# 3) Importa o ricarica tutti i trainer
for mdl in [
    "trainers.simclr",
    "trainers.moco_v2",
    "trainers.rotation",
    "trainers.jigsaw",
    "trainers.supervised",
    "trainers.transfer",
]:
    if mdl in sys.modules:
        importlib.reload(sys.modules[mdl])
    else:
        importlib.import_module(mdl)

# 4) Funzione di lancio
def launch_training(cfg: Dict[str, Any]):
    data_cfg   = cfg['data']
    models_cfg = cfg['models']
    run_model  = cfg.get('run_model', 'all')

    if isinstance(run_model, str) and run_model.lower() != 'all':
        if run_model not in models_cfg:
            raise KeyError(f"Modello '{run_model}' non in cfg['models']")
        tasks = [(run_model, models_cfg[run_model])]
    else:
        tasks = list(models_cfg.items())

    for name, m_cfg in tasks:
        if name not in TRAINER_REGISTRY:
            raise KeyError(f"Nessun trainer registrato per '{name}'")
        Trainer = TRAINER_REGISTRY[name]
        trainer = Trainer(m_cfg, data_cfg)
        logging.info(f"🚀 Avvio training: {name}")
        trainer.train()
        logging.info(f"✅ Finito training: {name}")


In [4]:
# === Cell 4: (Opzionale) Stampa Config per Verifica ===
import pprint
pprint.pprint(cfg)

{'data': {'test': '/content/drive/MyDrive/ColabNotebooks/wsi-ssrl-rcc_project/data/processed/webdataset_2500/test/patches-0000.tar',
          'train': '/content/drive/MyDrive/ColabNotebooks/wsi-ssrl-rcc_project/data/processed/webdataset_2500/train/patches-0000.tar',
          'val': '/content/drive/MyDrive/ColabNotebooks/wsi-ssrl-rcc_project/data/processed/webdataset_2500/val/patches-0000.tar'},
 'env_paths': {'colab': '/content/drive/MyDrive/ColabNotebooks/wsi-ssrl-rcc_project',
               'local': '/Users/stefanoroybisignano/Desktop/MLA/project/wsi-ssrl-rcc_project'},
 'models': {'jigsaw': {'backbone': 'resnet18',
                       'grid_size': 3,
                       'training': {'batch_size': 128,
                                    'epochs': 50,
                                    'learning_rate': '1e-4',
                                    'optimizer': 'adam',
                                    'weight_decay': '1e-5'}},
            'moco_v2': {'augmentation': {'color

In [5]:
# === Cell 5: Avvia il training ===
launch_training(cfg)

Epoch:   0%|          | 0/50 [00:00<?, ?epoch/s]
Train E1/50:   0%|          | 0/47 [00:00<?, ?batch/s][A
Epoch:   0%|          | 0/50 [00:15<?, ?epoch/s]


KeyboardInterrupt: 