# 03b - Training variants
Automatic loop over models/optimizers/dropouts/augmentations and 3/4-class setups. Each step is documented and runs in parallel.

## Environment and paths
Same logic as the main notebook: detect root (presence of `Data/`), set train/val folders, and pick the interpreter.

In [1]:
from pathlib import Path
import sys

# Detect project root
ROOT = Path.cwd().resolve()
for _ in range(10):
    if (ROOT / "Data").exists():
        break
    ROOT = ROOT.parent
else:
    raise FileNotFoundError("Project root not found (Data folder missing).")

# Data paths
TRAIN_DIR = ROOT / "Data" / "raw" / "train"
VAL_DIR   = ROOT / "Data" / "raw" / "val"

# Logs
LOG_DIR = ROOT / "Monitoring" / "output"
LOG_DIR.mkdir(parents=True, exist_ok=True)

# Python: prefer venv if available
VENV_PY = ROOT / ".venv" / "Scripts" / "python.exe"
PY = str(VENV_PY if VENV_PY.exists() else Path(sys.executable))

print("ROOT =", ROOT)
print("PY   =", PY)
print("TRAIN_DIR =", TRAIN_DIR)
print("VAL_DIR   =", VAL_DIR)
print("LOG_DIR   =", LOG_DIR)

ROOT = C:\Users\lmanuelli\Projet\manuelli_laurent_projet_final
PY   = C:\Users\lmanuelli\Projet\manuelli_laurent_projet_final\.venv\Scripts\python.exe
TRAIN_DIR = C:\Users\lmanuelli\Projet\manuelli_laurent_projet_final\Data\raw\train
VAL_DIR   = C:\Users\lmanuelli\Projet\manuelli_laurent_projet_final\Data\raw\val
LOG_DIR   = C:\Users\lmanuelli\Projet\manuelli_laurent_projet_final\Monitoring\output


## Experiment grid
Define lists of models/optimizers/dropouts/augmentations and target classes. Each combination is executed as a subprocess via `Model.training.train`.

In [2]:
from subprocess import run
from concurrent.futures import ThreadPoolExecutor, as_completed
import os

# Variant grid (adapt to your resources)
models = ["resnet18", "resnet34", "baseline"]
optimizers = ["adam", "rmsprop", "adagrad"]
dropouts = [0.2, 0.4]
augment_modes = ["light", "realistic"]
class_sets = [
    ["Chao", "Ervas", "Milho"],
    ["Chao", "Ervas", "Milho", "Milho_ervas"],
]

limit_per_class = 800   # None for full dataset
epochs = 5
batch_size = 32
image_size = 224

WEIGHTS_DIR = ROOT / "Model" / "weights"
WEIGHTS_DIR.mkdir(parents=True, exist_ok=True)

def run_variant(model: str, optimizer: str, dropout: float, augment: str, classes: list[str]):
    suffix = f"{model}_{optimizer}_do{int(dropout*100)}_{augment}_{len(classes)}cls"
    save_path = WEIGHTS_DIR / f"best_{suffix}.pt"

    cmd = [
        PY, "-m", "Model.training.train",
        "--train-dir", str(TRAIN_DIR),
        "--val-dir", str(VAL_DIR),
        "--model", model,
        "--optimizer", optimizer,
        "--epochs", str(epochs),
        "--batch-size", str(batch_size),
        "--dropout", str(dropout),
        "--image-size", str(image_size),
        "--augment", augment,
        "--save-path", str(save_path),
        "--log-dir", str(LOG_DIR),
    ]

    if len(classes) == 3:
        cmd += ["--class-filter", *classes]

    if limit_per_class is not None:
        cmd += ["--limit-per-class", str(limit_per_class)]

    print("===", " ".join(cmd))
    run(cmd, check=True, cwd=str(ROOT))
    return {"suffix": suffix, "save_path": save_path}

## Parallel execution
Build the job list and dispatch with `ThreadPoolExecutor`. Adjust `max_workers` based on machine capacity to avoid GPU/CPU contention.

In [None]:
jobs = []
for model in models:
    for optimizer in optimizers:
        for dropout in dropouts:
            for augment in augment_modes:
                for cls_set in class_sets:
                    jobs.append((model, optimizer, dropout, augment, cls_set))

max_workers = min(4, os.cpu_count() or 1, len(jobs))
results = []

print(f"Launching {len(jobs)} jobs with {max_workers} workers")

with ThreadPoolExecutor(max_workers=max_workers) as executor:
    future_to_params = {
        executor.submit(run_variant, model, optimizer, dropout, augment, cls_set): {
            "model": model,
            "optimizer": optimizer,
            "dropout": dropout,
            "augment": augment,
            "classes": cls_set,
        }
        for model, optimizer, dropout, augment, cls_set in jobs
    }

    for fut in as_completed(future_to_params):
        params = future_to_params[fut]
        info = fut.result()
        params["suffix"] = info["suffix"]
        params["weights"] = str(info["save_path"])
        results.append(params)
        print(f"[done] {params['suffix']} -> {params['weights']}")

print("Variants completed:", len(results))

Launching 72 jobs with 4 workers
=== C:\Users\lmanuelli\Projet\manuelli_laurent_projet_final\.venv\Scripts\python.exe -m Model.training.train --train-dir C:\Users\lmanuelli\Projet\manuelli_laurent_projet_final\Data\raw\train --val-dir C:\Users\lmanuelli\Projet\manuelli_laurent_projet_final\Data\raw\val --model resnet18 --optimizer adam --epochs 5 --batch-size 32 --dropout 0.2 --image-size 224 --augment light --save-path C:\Users\lmanuelli\Projet\manuelli_laurent_projet_final\Model\weights\best_resnet18_adam_do20_light_3cls.pt --log-dir C:\Users\lmanuelli\Projet\manuelli_laurent_projet_final\Monitoring\output --class-filter Chao Ervas Milho --limit-per-class 800
=== C:\Users\lmanuelli\Projet\manuelli_laurent_projet_final\.venv\Scripts\python.exe -m Model.training.train --train-dir C:\Users\lmanuelli\Projet\manuelli_laurent_projet_final\Data\raw\train --val-dir C:\Users\lmanuelli\Projet\manuelli_laurent_projet_final\Data\raw\val --model resnet18 --optimizer adam --epochs 5 --batch-size 3