# 03 - Training
Goal: run classification trainings and keep a clear record of the configuration. First check paths, then launch predefined variants automatically.

## Base parameters
Detect the project root (presence of `Data/`), add it to `PYTHONPATH`, and explicitly select train/val folders and the venv interpreter.

In [1]:
from pathlib import Path
import sys

# 1) Find project root by walking up until Data/ exists
ROOT = Path.cwd().resolve()
for _ in range(10):
    if (ROOT / "Data").exists():
        break
    ROOT = ROOT.parent
else:
    raise FileNotFoundError("Could not find project root (Data folder missing).")

# 2) Add repository to sys.path
if str(ROOT) not in sys.path:
    sys.path.insert(0, str(ROOT))

# 3) Dataset/training parameters
IMAGE_SIZE = 224
AUGMENT = "light"  # none | light | realistic

# Raw vs processed: choose explicitly
TRAIN_DIR = ROOT / "Data" / "raw" / "train"
VAL_DIR   = ROOT / "Data" / "raw" / "val"

# 4) Python: prefer .venv if present
VENV_PY = ROOT / ".venv" / "Scripts" / "python.exe"
PY = str(VENV_PY if VENV_PY.exists() else Path(sys.executable))

print("ROOT =", ROOT)
print("PY   =", PY)
print("TRAIN_DIR =", TRAIN_DIR)
print("VAL_DIR   =", VAL_DIR)

ROOT = C:\Users\lmanuelli\Projet\manuelli_laurent_projet_final
PY   = C:\Users\lmanuelli\Projet\manuelli_laurent_projet_final\.venv\Scripts\python.exe
TRAIN_DIR = C:\Users\lmanuelli\Projet\manuelli_laurent_projet_final\Data\raw\train
VAL_DIR   = C:\Users\lmanuelli\Projet\manuelli_laurent_projet_final\Data\raw\val


## Launch training variants in parallel
Prepare the job list (3 classes, 4 classes with Adam, 4 classes with RMSprop realistic) and run them in parallel to save wall time. Tune `max_workers` if your GPU/CPU is limited.

In [2]:
from concurrent.futures import ThreadPoolExecutor, as_completed
from subprocess import run
import os

SAVE_3 = ROOT / "Model" / "weights" / "best_model_3.pt"
SAVE_4 = ROOT / "Model" / "weights" / "best_model.pt"
SAVE_RMS = ROOT / "Model" / "weights" / "best_model_rmsprop.pt"
LOG_DIR = ROOT / "Monitoring" / "output"

for path_obj in [SAVE_3, SAVE_4, SAVE_RMS]:
    path_obj.parent.mkdir(parents=True, exist_ok=True)
LOG_DIR.mkdir(parents=True, exist_ok=True)

TRAIN_JOBS = [
    {
        "name": "3_classes_adam",
        "optimizer": "adam",
        "epochs": 5,
        "batch_size": 32,
        "dropout": 0.3,
        "augment": AUGMENT,
        "class_filter": ["Chao", "Ervas", "Milho"],
        "save_path": SAVE_3,
    },
    {
        "name": "4_classes_adam",
        "optimizer": "adam",
        "epochs": 5,
        "batch_size": 32,
        "dropout": 0.3,
        "augment": AUGMENT,
        "class_filter": None,
        "save_path": SAVE_4,
    },
    {
        "name": "4_classes_rmsprop_realistic",
        "optimizer": "rmsprop",
        "epochs": 3,
        "batch_size": 32,
        "dropout": 0.5,
        "augment": "realistic",
        "class_filter": None,
        "save_path": SAVE_RMS,
    },
]

def build_cmd(job: dict) -> list[str]:
    cmd = [
        PY, "-m", "Model.training.train",
        "--train-dir", str(TRAIN_DIR),
        "--val-dir", str(VAL_DIR),
        "--model", "resnet18",
        "--optimizer", job["optimizer"],
        "--epochs", str(job["epochs"]),
        "--batch-size", str(job["batch_size"]),
        "--dropout", str(job["dropout"]),
        "--image-size", str(IMAGE_SIZE),
        "--augment", str(job["augment"]),
        "--save-path", str(job["save_path"]),
        "--log-dir", str(LOG_DIR),
    ]
    if job.get("class_filter"):
        cmd += ["--class-filter", *job["class_filter"]]
    return cmd

def run_job(job: dict):
    cmd = build_cmd(job)
    print(f"==> {job['name']}")
    print(" ".join(cmd))
    run(cmd, check=True, cwd=str(ROOT))
    return job["name"], job["save_path"]

max_workers = min(3, os.cpu_count() or 1, len(TRAIN_JOBS))
print(f"Parallel launch with {max_workers} workers")

with ThreadPoolExecutor(max_workers=max_workers) as executor:
    futures = {executor.submit(run_job, job): job for job in TRAIN_JOBS}
    for fut in as_completed(futures):
        name, save_path = fut.result()
        print(f"[done] {name} -> {save_path}")

print("All trainings finished.")

Parallel launch with 3 workers
==> 3_classes_adam
C:\Users\lmanuelli\Projet\manuelli_laurent_projet_final\.venv\Scripts\python.exe -m Model.training.train --train-dir C:\Users\lmanuelli\Projet\manuelli_laurent_projet_final\Data\raw\train --val-dir C:\Users\lmanuelli\Projet\manuelli_laurent_projet_final\Data\raw\val --model resnet18 --optimizer adam --epochs 5 --batch-size 32 --dropout 0.3 --image-size 224 --augment light --save-path C:\Users\lmanuelli\Projet\manuelli_laurent_projet_final\Model\weights\best_model_3.pt --log-dir C:\Users\lmanuelli\Projet\manuelli_laurent_projet_final\Monitoring\output --class-filter Chao Ervas Milho
==> 4_classes_adam
C:\Users\lmanuelli\Projet\manuelli_laurent_projet_final\.venv\Scripts\python.exe -m Model.training.train --train-dir C:\Users\lmanuelli\Projet\manuelli_laurent_projet_final\Data\raw\train --val-dir C:\Users\lmanuelli\Projet\manuelli_laurent_projet_final\Data\raw\val --model resnet18 --optimizer adam --epochs 5 --batch-size 32 --dropout 0.3 

CalledProcessError: Command '['C:\\Users\\lmanuelli\\Projet\\manuelli_laurent_projet_final\\.venv\\Scripts\\python.exe', '-m', 'Model.training.train', '--train-dir', 'C:\\Users\\lmanuelli\\Projet\\manuelli_laurent_projet_final\\Data\\raw\\train', '--val-dir', 'C:\\Users\\lmanuelli\\Projet\\manuelli_laurent_projet_final\\Data\\raw\\val', '--model', 'resnet18', '--optimizer', 'rmsprop', '--epochs', '3', '--batch-size', '32', '--dropout', '0.5', '--image-size', '224', '--augment', 'realistic', '--save-path', 'C:\\Users\\lmanuelli\\Projet\\manuelli_laurent_projet_final\\Model\\weights\\best_model_rmsprop.pt', '--log-dir', 'C:\\Users\\lmanuelli\\Projet\\manuelli_laurent_projet_final\\Monitoring\\output']' returned non-zero exit status 1.

In [None]:
from concurrent.futures import ThreadPoolExecutor, as_completed
import subprocess
import os
from pathlib import Path

LOG_DIR = ROOT / "Monitoring" / "output"
LOG_DIR.mkdir(parents=True, exist_ok=True)

def build_cmd(job: dict) -> list[str]:
    cmd = [
        PY, "-m", "Model.training.train",
        "--train-dir", str(TRAIN_DIR),
        "--val-dir", str(VAL_DIR),
        "--model", "resnet18",
        "--optimizer", job["optimizer"],
        "--epochs", str(job["epochs"]),
        "--batch-size", str(job["batch_size"]),
        "--dropout", str(job["dropout"]),
        "--image-size", str(IMAGE_SIZE),
        "--augment", str(job["augment"]),
        "--save-path", str(job["save_path"]),
        "--log-dir", str(LOG_DIR),
        "--num-workers", "0",  # ✅ Windows stability (évite EOFError)
    ]
    if job.get("class_filter"):
        cmd += ["--class-filter", *job["class_filter"]]
    return cmd

def run_job(job: dict):
    cmd = build_cmd(job)
    name = job["name"]
    log_path = LOG_DIR / f"train_{name}.log"

    print(f"==> {name}")
    print(" ".join(cmd))

    res = subprocess.run(
        cmd,
        cwd=str(ROOT),
        capture_output=True,
        text=True,
    )

    log_path.write_text(
        "CMD:\n" + " ".join(cmd) + "\n\n"
        + "RETURN CODE:\n" + str(res.returncode) + "\n\n"
        + "STDOUT:\n" + (res.stdout or "") + "\n\n"
        + "STDERR:\n" + (res.stderr or "") + "\n",
        encoding="utf-8",
    )

    if res.returncode != 0:
        return name, False, str(log_path)

    return name, True, str(job["save_path"])

# ⚠️ Reco : si tu as un GPU, mets 1 ici.
max_workers = 1
print(f"Launch with {max_workers} worker(s)")

results = []
with ThreadPoolExecutor(max_workers=max_workers) as executor:
    futures = {executor.submit(run_job, job): job for job in TRAIN_JOBS}
    for fut in as_completed(futures):
        results.append(fut.result())

print("\n=== SUMMARY ===")
for name, ok, info in results:
    status = "OK" if ok else "FAIL"
    print(f"{status:4} | {name} | {info}")


Launch with 1 worker(s)
==> 3_classes_adam
C:\Users\lmanuelli\Projet\manuelli_laurent_projet_final\.venv\Scripts\python.exe -m Model.training.train --train-dir C:\Users\lmanuelli\Projet\manuelli_laurent_projet_final\Data\raw\train --val-dir C:\Users\lmanuelli\Projet\manuelli_laurent_projet_final\Data\raw\val --model resnet18 --optimizer adam --epochs 5 --batch-size 32 --dropout 0.3 --image-size 224 --augment light --save-path C:\Users\lmanuelli\Projet\manuelli_laurent_projet_final\Model\weights\best_model_3.pt --log-dir C:\Users\lmanuelli\Projet\manuelli_laurent_projet_final\Monitoring\output --num-workers 0 --class-filter Chao Ervas Milho
