Ich werde verschiedene Modelle trainieren und sie vergleichen.\
Kriterien sind:
- Dauer des Trainings
- Größe des Modells
- Genauigkeit in der Erkennung der verschiedenen Katzen

# YoloV11

# MobileNetV2

In [11]:
#!pip install torch torchvision

In [12]:
import torch
import os

from torch.utils.data import DataLoader
from torchvision import transforms
from torchvision.datasets import ImageFolder


In [13]:
src_directory = r"\\Cloud_9\data\pet_feeder_ai\cats"
dst_directory = r"\\Cloud_9\data\pet_feeder_ai\data_split"
class_names   = ["Fabius", "Elsa"]

In [14]:
train_directory = os.path.join(dst_directory, "train")
val_directory   = os.path.join(dst_directory, "val")
test_directory  = os.path.join(dst_directory, "test")

In [15]:
# 1) Augmentierung für Training
train_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomRotation(degrees=15),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.RandomResizedCrop(size=224, scale=(0.8, 1.0)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485,0.456,0.406],
                         std= [0.229,0.224,0.225]),
])

In [16]:
# 2) Nur Resize+Norm für Val/Test
eval_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485,0.456,0.406],
                         std= [0.229,0.224,0.225]),
])


In [17]:
from torchvision.datasets import ImageFolder
from PIL import Image

class DebugImageFolder(ImageFolder):
    def __getitem__(self, index):
        path, label = self.samples[index]
        try:
            # versuche, das Bild zu öffnen und zu transformieren
            img = Image.open(path).convert("RGB")
            if self.transform:
                img = self.transform(img)
            if self.target_transform:
                label = self.target_transform(label)
            return img, label
        except Exception as e:
            # gib den defekten Pfad aus und wirf den Fehler neu
            print(f"⚠️ Fehler beim Laden von: {path}\n   {e}")
            raise

# Usage:
train_ds = DebugImageFolder(train_directory, transform=train_transforms)
val_ds   = DebugImageFolder(val_directory,   transform=eval_transforms)
test_ds = DebugImageFolder(test_directory,   transform=eval_transforms)

train_dl = DataLoader(train_ds, batch_size=32, shuffle=True,  num_workers=0)
val_dl   = DataLoader(val_ds,   batch_size=32, shuffle=False, num_workers=0)
test_dl = DataLoader(test_ds,   batch_size=32, shuffle=False, num_workers=0)


In [18]:
"""# 3) Datasets und Loader
train_ds = ImageFolder(train_directory, transform=train_transforms)
val_ds   = ImageFolder(val_directory,   transform=eval_transforms)
test_ds  = ImageFolder(test_directory,  transform=eval_transforms)

train_dl = DataLoader(train_ds, batch_size=32, shuffle=True,  num_workers=4)
val_dl   = DataLoader(val_ds,   batch_size=32, shuffle=False, num_workers=4)
test_dl  = DataLoader(test_ds,  batch_size=32, shuffle=False, num_workers=4)"""

'# 3) Datasets und Loader\ntrain_ds = ImageFolder(train_directory, transform=train_transforms)\nval_ds   = ImageFolder(val_directory,   transform=eval_transforms)\ntest_ds  = ImageFolder(test_directory,  transform=eval_transforms)\n\ntrain_dl = DataLoader(train_ds, batch_size=32, shuffle=True,  num_workers=4)\nval_dl   = DataLoader(val_ds,   batch_size=32, shuffle=False, num_workers=4)\ntest_dl  = DataLoader(test_ds,  batch_size=32, shuffle=False, num_workers=4)'

In [19]:
from torchvision.models import mobilenet_v2
import torch.nn as nn

# Vortrainiertes MobileNetV2
model = mobilenet_v2(pretrained=True)

# Finalen Klassifizierer auf 2 Klassen anpassen
model.classifier[1] = nn.Linear(model.last_channel, 2)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)


MobileNetV2(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU6(inplace=True)
    )
    (1): InvertedResidual(
      (conv): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
          (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): ReLU6(inplace=True)
        )
        (1): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (2): InvertedResidual(
      (conv): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(16, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (1): BatchNorm2d(96, eps=

In [22]:
from tqdm import tqdm

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)

best_val_loss = float('inf')
patience = 3
counter = 0

# Epoch-Loop mit tqdm
for epoch in range(1, 21):
    # --- Training ---
    model.train()
    running_loss = 0
    # tqdm um die Batch-Schleife legen
    for xb, yb in tqdm(train_dl, desc=f"[Epoch {epoch:02d}] Training", leave=False):
        xb, yb = xb.to(device), yb.to(device)
        optimizer.zero_grad()
        logits = model(xb)
        loss = criterion(logits, yb)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * xb.size(0)

    # --- Validation ---
    model.eval()
    val_loss = 0
    correct = total = 0
    for xb, yb in tqdm(val_dl, desc=f"[Epoch {epoch:02d}] Validation", leave=False):
        xb, yb = xb.to(device), yb.to(device)
        logits = model(xb)
        val_loss += criterion(logits, yb).item() * xb.size(0)
        preds = logits.argmax(dim=1)
        correct += (preds == yb).sum().item()
        total += yb.size(0)

    val_loss /= len(val_ds)
    val_acc = correct / total

    print(
        f"Epoch {epoch:02d} | "
        f"Train-Loss={(running_loss/len(train_ds)):.4f} | "
        f"Val-Loss={val_loss:.4f} | Val-Acc={val_acc:.4f}"
    )

    # Early Stopping Check
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        counter = 0
        torch.save(model.state_dict(), 'catid_mobilenetv2.pt')
        print(" → Neues Bestmodell gespeichert")
    else:
        counter += 1
        print(f" → Keine Verbesserung ({counter}/{patience})")
        if counter >= patience:
            print(f"Abbruch nach {patience} Epochen ohne Verbesserung.")
            break


[Epoch 01] Training:   0%|          | 0/19 [00:00<?, ?it/s]

                                                                    

Epoch 01 | Train-Loss=0.1683 | Val-Loss=0.1488 | Val-Acc=0.9412
 → Neues Bestmodell gespeichert


                                                                    

Epoch 02 | Train-Loss=0.0962 | Val-Loss=0.1066 | Val-Acc=0.9706
 → Neues Bestmodell gespeichert


                                                                    

Epoch 03 | Train-Loss=0.0542 | Val-Loss=0.1027 | Val-Acc=0.9559
 → Neues Bestmodell gespeichert


                                                                    

Epoch 04 | Train-Loss=0.0614 | Val-Loss=0.1127 | Val-Acc=0.9632
 → Keine Verbesserung (1/3)


                                                                    

Epoch 05 | Train-Loss=0.0386 | Val-Loss=0.1854 | Val-Acc=0.9412
 → Keine Verbesserung (2/3)


                                                                    

Epoch 06 | Train-Loss=0.0277 | Val-Loss=0.1072 | Val-Acc=0.9632
 → Keine Verbesserung (3/3)
Abbruch nach 3 Epochen ohne Verbesserung.




# EfficientNet-Lite0 