Im Notebook exploration.ipynb wird das CV Model trainiert.

Es speichert das trainierte Modell als 'resnet34-stage-final' und 'export_resnet34.pkl'.

In [None]:
# train_fastai.py
# Training eines Bildklassifikations-Modells mit fastai
# Train-Bilder: data/images/train, Validierungs-Bilder: data/images/val

from pathlib import Path
import sys
import subprocess

# fastai installieren falls nicht vorhanden
try:
    from fastai.vision.all import *
except Exception:
    subprocess.check_call([sys.executable, "-m", "pip", "install", "fastai"])
    from fastai.vision.all import *

def main():
    path = Path("data/images")
    train_folder = path/"train"
    valid_folder = path/"val"

    if not train_folder.exists() or not valid_folder.exists():
        raise FileNotFoundError(f"Benötigte Ordner nicht gefunden: {train_folder} oder {valid_folder}")

    print("Verfügbare Device(s):", defaults.device)

    # DataLoaders: Ordnerstruktur erwartet pro Klasse einen Unterordner
    import csv

    train_csv = path/'train_labels.csv'
    valid_csv = path/'val_labels.csv'

    def read_labels(csv_path, subfolder):
        items = []
        labels = {}
        if not csv_path.exists():
            return items, labels
        with csv_path.open('r', newline='') as f:
            reader = csv.reader(f, delimiter=';')
            for row in reader:
                if not row: 
                    continue
                # handle optional header / extra columns
                name = row[0].strip()
                label = row[1].strip() if len(row) > 1 else ''
                if not name:
                    continue
                p = Path(name)
                # If CSV contains only filename (no parent), prepend subfolder
                if p.parent == Path('.'):
                    p = Path(subfolder)/p
                items.append(p)
                labels[p] = label
        return items, labels

    train_items, train_labels = read_labels(train_csv, 'train')
    valid_items, valid_labels = read_labels(valid_csv, 'val')

    if train_items and valid_items:
        all_items = train_items + valid_items
        labels_map = {**train_labels, **valid_labels}
        valid_set = set(valid_items)

        dblock = DataBlock(
            blocks=(ImageBlock, CategoryBlock),
            get_items=lambda p: all_items,
            get_y=lambda o: labels_map[o],
            splitter=FuncSplitter(lambda o: o in valid_set),
            item_tfms=Resize(460),
            batch_tfms=aug_transforms(size=224)
        )
        dls = dblock.dataloaders(path, bs=64)
    else:
        # Fallback: vorhandene Ordnerstruktur verwenden
        dls = ImageDataLoaders.from_folder(
            path,
            train='train',
            valid='val',
            item_tfms=Resize(460),
            batch_tfms=aug_transforms(size=224),
            bs=64
        )

    # Model: ResNet34 als Startpunkt
    learn = cnn_learner(dls, resnet34, metrics=[accuracy])

    # Optional: learning rate Suche (auskommentieren, wenn Probleme)
    try:
        lr_suggestion = learn.lr_find(suggest_funcs=(valley,))
        print("LR-Find suggestion:", lr_suggestion)
    except Exception:
        # Falls lr_find Probleme macht (Headless/Plot), verwenden wir Default
        print("lr_find fehlgeschlagen oder nicht möglich; Verwende Standard lr 3e-3")
        lr_suggestion = 3e-3

    # Erste Trainingsphase
    learn.fine_tune(5, base_lr=3e-3)

    # Feinabstimmung: unfreeze und niedrigeres LR für alle Schichten
    learn.unfreeze()
    learn.fit_one_cycle(3, lr_max=slice(1e-6, 1e-4))

    # Modell speichern / exportieren für Inferenz
    learn.save("resnet34-stage-final")
    learn.export("export_resnet34.pkl")  # kann mit load_learner wieder geladen werden

    print("Training abgeschlossen. Modell gespeichert als 'resnet34-stage-final' und 'export_resnet34.pkl'.")

if __name__ == "__main__":
    main()