In [5]:
"""
model_benchmark.py – Version 1.2
================================
Ein lauffähiges Script, das verschiedene CNN-Architekturen fair vergleicht.
– Train/Val/Test-Split ohne Leak
– Sampler für Klassenbalance
– ImageNet- oder Dataset-Norm abhängig vom Pfad (Fine-Tune vs. Scratch)
– micro- und macro-F1
– Early-Stopping + Checkpoint
– tqdm-Balken auf Batch- und Epoch-Ebene
– Argument-Parser ignoriert unbekannte Flags → funktioniert auch in Jupyter

Getestet mit **Python 3.12**, `torch 2.2`, `torchvision 0.17`, `tqdm 4.66`.
"""

from __future__ import annotations
import argparse
import random
import sys
from collections import Counter
from pathlib import Path

import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import f1_score
from torch.utils.data import DataLoader, random_split, WeightedRandomSampler
from torchvision import datasets, models, transforms
from tqdm.auto import tqdm

# ───────────────────────────── CLI ──────────────────────────────

def parse_args(argv: list[str] | None = None) -> argparse.Namespace:
    """Ignoriert unbekannte Flags (Jupyter) & setzt Notebook-Defaults."""
    parser = argparse.ArgumentParser(description="CNN-Benchmark")
    parser.add_argument("--data_root", type=Path, default=Path("Data"), help="Ordner mit Train/ Test Unterordnern")
    parser.add_argument("--models", nargs="*", default=[
        "mobilenet_v2", "mobilenet_v3_large", "resnet18", "resnet34", "resnet50", "efficientnet_b0", "vgg16"],
                        help="Modellnamen")
    parser.add_argument("--epochs", type=int, default=25)
    parser.add_argument("--batch", type=int, default=64)
    parser.add_argument("--lr", type=float, default=1e-3)
    parser.add_argument("--patience", type=int, default=5)
    parser.add_argument("--val_split", type=float, default=0.15)
    parser.add_argument("--seed", type=int, default=42)

    if argv is None and ("ipykernel" in sys.argv[0] or any(a.startswith("-f") for a in sys.argv)):
        # Notebook-Modus
        args, _ = parser.parse_known_args([])
    else:
        args, _ = parser.parse_known_args(argv)
    return args

# ─────────────────────────── Helpers ────────────────────────────

IMAGENET_MEAN = (0.485, 0.456, 0.406)
IMAGENET_STD = (0.229, 0.224, 0.225)

def set_seed(seed: int):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

def compute_mean_std(ds):
    loader = DataLoader(ds, batch_size=128, shuffle=False, num_workers=2, pin_memory=True)
    mean = torch.zeros(3)
    var = torch.zeros(3)
    n = 0
    for imgs, _ in tqdm(loader, desc="calc mean/std", leave=False):
        b = imgs.size(0)
        imgs = imgs.view(b, 3, -1)
        mean += imgs.mean(2).sum(0)
        var += imgs.var(2).sum(0)
        n += b
    mean /= n
    std = (var / n).sqrt()
    return mean.tolist(), std.tolist()

def make_sampler(targets):
    counts = Counter(targets)
    class_w = {c: 1 / cnt for c, cnt in counts.items()}
    sample_w = [class_w[t] for t in targets]
    return WeightedRandomSampler(sample_w, num_samples=len(sample_w), replacement=True)

# ───────────────────────── Model-Factory ────────────────────────

def get_model(name: str, classes: int, pretrained=True):
    name = name.lower()
    if name == "mobilenet_v2":
        w = models.MobileNet_V2_Weights.DEFAULT if pretrained else None
        m = models.mobilenet_v2(weights=w)
        m.classifier[1] = nn.Linear(m.classifier[1].in_features, classes)
    elif name == "mobilenet_v3_large":
        w = models.MobileNet_V3_Large_Weights.DEFAULT if pretrained else None
        m = models.mobilenet_v3_large(weights=w)
        m.classifier[3] = nn.Linear(m.classifier[3].in_features, classes)
    elif name in {"resnet18", "resnet34", "resnet50"}:
        w_enum = getattr(models, f"{name.capitalize()}_Weights")
        w = w_enum.DEFAULT if pretrained else None
        m = getattr(models, name)(weights=w)
        m.fc = nn.Linear(m.fc.in_features, classes)
    elif name == "efficientnet_b0":
        w = models.EfficientNet_B0_Weights.DEFAULT if pretrained else None
        m = models.efficientnet_b0(weights=w)
        m.classifier[1] = nn.Linear(m.classifier[1].in_features, classes)
    elif name == "vgg16":
        w = models.VGG16_Weights.DEFAULT if pretrained else None
        m = models.vgg16(weights=w)
        m.classifier[6] = nn.Linear(m.classifier[6].in_features, classes)
    else:
        raise ValueError(name)
    return m

# ───────────────────────── Train / Val ──────────────────────────

def run_epoch(model, loader, criterion, optimizer=None, device="cpu", desc="train"):
    train = optimizer is not None
    model.train() if train else model.eval()
    total, correct, loss_sum = 0, 0, 0.0
    true, pred = [], []
    loop = tqdm(loader, desc=desc, leave=False)
    for x, y in loop:
        x, y = x.to(device), y.to(device)
        with torch.set_grad_enabled(train):
            out = model(x)
            loss = criterion(out, y)
            if train:
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
        p = out.argmax(1)
        bs = y.size(0)
        total += bs
        correct += (p == y).sum().item()
        loss_sum += loss.item() * bs
        true.extend(y.cpu())
        pred.extend(p.cpu())
        loop.set_postfix(loss=loss_sum/total, acc=correct/total)
    acc = correct/total
    micro = f1_score(true, pred, average="micro")
    macro = f1_score(true, pred, average="macro", zero_division=0)
    return loss_sum/total, acc, micro, macro

# ───────────────────────── Benchmark ────────────────────────────

def benchmark(cfg):
    set_seed(cfg.seed)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    pin_memory = device.type == "cuda"

    train_dir = cfg.data_root / "Train"
    test_dir = cfg.data_root / "Test"
    if not train_dir.exists():
        raise FileNotFoundError(train_dir)

    base_tf = transforms.Compose([transforms.Resize((224,224)), transforms.ToTensor()])
    full_ds = datasets.ImageFolder(train_dir, transform=base_tf)
    d_mean, d_std = compute_mean_std(full_ds)

    v_len = int(len(full_ds)*cfg.val_split)
    t_len = len(full_ds)-v_len
    train_ds, val_ds = random_split(full_ds, [t_len, v_len], generator=torch.Generator().manual_seed(cfg.seed))
    sampler = make_sampler([full_ds.targets[i] for i in train_ds.indices])

    test_tf = transforms.Compose([transforms.Resize((224,224)), transforms.ToTensor()])
    test_ds = datasets.ImageFolder(test_dir, transform=test_tf)

    results = {}

    for name in cfg.models:
        print("\n──", name.upper(), "──")
        pretrained = True  # fine-tuning-Pfad
        norm_mean, norm_std = (IMAGENET_MEAN, IMAGENET_STD) if pretrained else (d_mean, d_std)

        train_ds.dataset.transform = transforms.Compose([
            transforms.Resize((224,224)),
            transforms.RandomHorizontalFlip(),
            transforms.RandomRotation(10),
            transforms.ColorJitter(0.2,0.2,0.2),
            transforms.RandomResizedCrop(224, scale=(0.8,1.0)),
            transforms.ToTensor(),
            transforms.Normalize(norm_mean, norm_std)
        ])
        val_ds.dataset.transform = transforms.Compose([
            transforms.Resize((224,224)),
            transforms.ToTensor(),
            transforms.Normalize(norm_mean, norm_std)
        ])
        test_ds.transform = transforms.Compose([
            transforms.Resize((224,224)),
            transforms.ToTensor(),
            transforms.Normalize(norm_mean, norm_std)
        ])

        t_loader = DataLoader(train_ds, batch_size=cfg.batch, sampler=sampler, num_workers=2, pin_memory=pin_memory)
        v_loader = DataLoader(val_ds, batch_size=cfg.batch, shuffle=False, num_workers=2, pin_memory=pin_memory)
        test_loader = DataLoader(test_ds, batch_size=cfg.batch, shuffle=False, num_workers=2, pin_memory=pin_memory)

        model = get_model(name, classes=len(full_ds.classes), pretrained=pretrained).to(device)
        opt = optim.Adam(model.parameters(), lr=cfg.lr)
        sched = optim.lr_scheduler.ReduceLROnPlateau(opt, mode="max", factor=0.3, patience=3)
        crit = nn.CrossEntropyLoss()

        best_acc, patience = 0.0, 0
        ckpt = Path(f"best_{name}.pt")
        for ep in tqdm(range(1, cfg.epochs+1), desc="epochs", position=0):
            run_epoch(model, t_loader, crit, optimizer=opt, device=device, desc=f"train e{ep}")
            _, v_acc, _, _ = run_epoch(model, v_loader, crit, device=device, desc=f"val   e{ep}")
            sched.step(v_acc)
            if v_acc>best_acc:
                best_acc = v_acc
                patience = 0
                torch.save(model.state_dict(), ckpt)
            else:
                patience +=1
                if patience>=cfg.patience:
                    print("early stop")
                    break

        model.load_state_dict(torch.load(ckpt))
        _, test_acc, micro, macro = run_epoch(model, test_loader, crit, device=device, desc="test")
        results[name] = test_acc
        print(f"Test-Acc {test_acc:.3f}  micro-F1 {micro:.3f}  macro-F1 {macro:.3f}")

    print("\nSummary")
    for k,v in results.items():
        print(f"{k:20s}: {v:.3f}")
    best = max(results, key=results.get)
    print("Best model:", best, results[best])

# ──────────────────────────── Main ─────────────────────────────
if __name__ == "__main__":
    cfg = parse_args()
    benchmark(cfg)


                                                              


── MOBILENET_V2 ──


epochs:   0%|          | 0/25 [00:00<?, ?it/s]
train e1:   0%|          | 0/30 [00:00<?, ?it/s][A
train e1:   0%|          | 0/30 [00:08<?, ?it/s, acc=0.125, loss=2.2][A
train e1:   3%|▎         | 1/30 [00:08<04:01,  8.32s/it, acc=0.125, loss=2.2][A
train e1:   3%|▎         | 1/30 [00:13<04:01,  8.32s/it, acc=0.156, loss=2.17][A
train e1:   7%|▋         | 2/30 [00:13<02:59,  6.40s/it, acc=0.156, loss=2.17][A
train e1:   7%|▋         | 2/30 [00:18<02:59,  6.40s/it, acc=0.208, loss=2.12][A
train e1:  10%|█         | 3/30 [00:18<02:34,  5.72s/it, acc=0.208, loss=2.12][A
train e1:  10%|█         | 3/30 [00:23<02:34,  5.72s/it, acc=0.238, loss=2.08][A
train e1:  13%|█▎        | 4/30 [00:23<02:24,  5.56s/it, acc=0.238, loss=2.08][A
train e1:  13%|█▎        | 4/30 [00:28<02:24,  5.56s/it, acc=0.275, loss=2.03][A
train e1:  17%|█▋        | 5/30 [00:28<02:09,  5.19s/it, acc=0.275, loss=2.03][A
train e1:  17%|█▋        | 5/30 [00:32<02:09,  5.19s/it, acc=0.315, loss=1.97][A
train e1:

KeyboardInterrupt: 