In [None]:

import os
import random
import numpy as np
import torch
import logging

# Best Practice: Logging einrichten
logging.basicConfig(filename='training.log', level=logging.INFO, format='%(asctime)s %(levelname)s:%(message)s')
logger = logging.getLogger()

def seed_everything(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    logger.info(f"Seed gesetzt auf {seed}")

seed_everything()


In [None]:

# Best Practice: Zentrale Konfiguration
class Config:
    DATA_DIR = '/mnt/data/data/raw'
    BATCH_SIZE = 32
    NUM_CLASSES = 20
    NUM_EPOCHS = 30
    LEARNING_RATE = 1e-3
    WEIGHT_DECAY = 1e-4
    LR_STEP_SIZE = 7
    LR_GAMMA = 0.1
    EARLY_STOPPING_PATIENCE = 5
    DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'

cfg = Config()
logger.info(f"Konfiguration geladen: {cfg.__dict__}")


In [None]:

from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split

transform_train = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])
transform_val = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])
dataset = datasets.ImageFolder(cfg.DATA_DIR, transform=transform_train)
val_size = int(0.2 * len(dataset))
train_size = len(dataset) - val_size
train_set, val_set = random_split(dataset, [train_size, val_size], generator=torch.Generator().manual_seed(42))
val_set.dataset.transform = transform_val
train_loader = DataLoader(train_set, batch_size=cfg.BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_set, batch_size=cfg.BATCH_SIZE, shuffle=False)
logger.info("DataLoader erstellt.")


In [None]:

import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler

class EarlyStopping:
    def __init__(self, patience=cfg.EARLY_STOPPING_PATIENCE):
        self.patience = patience
        self.counter = 0
        self.best_score = None
        self.early_stop = False

    def __call__(self, val_loss, model):
        score = -val_loss
        if self.best_score is None or score > self.best_score:
            self.best_score = score
            self.counter = 0
            torch.save(model.state_dict(), 'best_model.pth')
        else:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True

model = models.resnet18(pretrained=True)
model.fc = nn.Linear(model.fc.in_features, cfg.NUM_CLASSES)
model.to(cfg.DEVICE)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=cfg.LEARNING_RATE, weight_decay=cfg.WEIGHT_DECAY)
scheduler = lr_scheduler.StepLR(optimizer, step_size=cfg.LR_STEP_SIZE, gamma=cfg.LR_GAMMA)

early_stopping = EarlyStopping()
history = {'train_loss': [], 'val_loss': []}
for epoch in range(cfg.NUM_EPOCHS):
    model.train()
    train_loss = 0.0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(cfg.DEVICE), labels.to(cfg.DEVICE)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        train_loss += loss.item() * inputs.size(0)
    train_loss /= len(train_loader.dataset)

    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(cfg.DEVICE), labels.to(cfg.DEVICE)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            val_loss += loss.item() * inputs.size(0)
    val_loss /= len(val_loader.dataset)
    scheduler.step()

    history['train_loss'].append(train_loss)
    history['val_loss'].append(val_loss)
    logger.info(f"Epoch {epoch+1}: Train Loss={train_loss}, Val Loss={val_loss}")

    if early_stopping(val_loss, model).early_stop:
        logger.warning("Frühes Stoppen aktiviert.")
        break


In [None]:

from sklearn.metrics import accuracy_score, f1_score, confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt

model.load_state_dict(torch.load('best_model.pth'))
model.eval()
y_true, y_pred = [], []
with torch.no_grad():
    for inputs, labels in val_loader:
        inputs = inputs.to(cfg.DEVICE)
        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)
        y_true.extend(labels.numpy())
        y_pred.extend(preds.cpu().numpy())

acc = accuracy_score(y_true, y_pred)
f1 = f1_score(y_true, y_pred, average='weighted')
logger.info(f"Validation Accuracy={acc}, F1 Score={f1}")
print(f"Validation Accuracy: {acc:.4f}")
print(f"Validation F1-Score: {f1:.4f}")

cm = confusion_matrix(y_true, y_pred)
disp = ConfusionMatrixDisplay(confusion_matrix=cm)
disp.plot()
plt.show()
