# 03c - Comparatif raw vs resized vs augmentation réaliste
Lance trois entraînements standardisés pour comparer.

In [None]:
from pathlib import Path
import sys

# Détection racine projet
ROOT = Path.cwd().resolve()
for _ in range(10):
    if (ROOT / "Data").exists():
        break
    ROOT = ROOT.parent
else:
    raise FileNotFoundError("Racine projet introuvable (dossier 'Data' non trouvé).")

# Datasets
RAW_TRAIN  = ROOT / "Data" / "raw" / "train"
RAW_VAL    = ROOT / "Data" / "raw" / "val"
PROC_TRAIN = ROOT / "Data" / "processed" / "train_224"
PROC_VAL   = ROOT / "Data" / "processed" / "val_224"

# Paramètres
image_size = 224
limit_per_class = 600   # None pour full
epochs = 5
batch_size = 32
model = "resnet18"

# Python : préfère .venv si dispo
VENV_PY = ROOT / ".venv" / "Scripts" / "python.exe"
PY = str(VENV_PY if VENV_PY.exists() else Path(sys.executable))

# Sanity checks (fail fast)
if not RAW_TRAIN.exists() or not RAW_VAL.exists():
    raise FileNotFoundError(f"Raw dataset introuvable: {RAW_TRAIN} / {RAW_VAL}")

if not PROC_TRAIN.exists() or not PROC_VAL.exists():
    print("⚠️ Processed dataset introuvable (ok si tu n'as pas preprocess).")
    print("   PROC_TRAIN =", PROC_TRAIN)
    print("   PROC_VAL   =", PROC_VAL)

variants = [
    {"name": "raw_noaug",          "train": RAW_TRAIN,  "val": RAW_VAL,  "augment": "none"},
    {"name": "raw_realistic",      "train": RAW_TRAIN,  "val": RAW_VAL,  "augment": "realistic"},
    {"name": "resized_realistic",  "train": PROC_TRAIN, "val": PROC_VAL, "augment": "realistic"},
]

print("ROOT =", ROOT)
print("PY   =", PY)
print("Variants:", [v["name"] for v in variants])


In [None]:
from subprocess import run
from pathlib import Path

WEIGHTS_DIR = ROOT / "Model" / "weights"
LOG_DIR = ROOT / "Monitoring" / "output"
WEIGHTS_DIR.mkdir(parents=True, exist_ok=True)
LOG_DIR.mkdir(parents=True, exist_ok=True)

for var in variants:
    if not var["train"].exists():
        print(f"Skip {var['name']} (train dir missing: {var['train']})")
        continue
    if not var["val"].exists():
        print(f"Skip {var['name']} (val dir missing: {var['val']})")
        continue

    save_path = WEIGHTS_DIR / f"best_compare_{var['name']}.pt"

    cmd = [
        PY, "-m", "Model.training.train",
        "--train-dir", str(var["train"]),
        "--val-dir", str(var["val"]),
        "--model", model,
        "--optimizer", "adam",
        "--epochs", str(epochs),
        "--batch-size", str(batch_size),
        "--dropout", "0.3",
        "--image-size", str(image_size),
        "--augment", var["augment"],
        "--save-path", str(save_path),
        "--log-dir", str(LOG_DIR),
    ]

    if limit_per_class is not None:
        cmd += ["--limit-per-class", str(limit_per_class)]

    print("===", " ".join(cmd))
    run(cmd, check=True, cwd=str(ROOT))
