In [1]:
"""
Zuerst stellen wir sicher, dass die notwendigen Pakete installiert sind und importieren diese:
"""
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
from torchvision import transforms
from torchvision.transforms.autoaugment import AutoAugmentPolicy
from torch.utils.data import DataLoader, random_split
from torchvision import models
import matplotlib.pyplot as plt
import numpy as np
from tqdm import tqdm
import os
import certifi
from sklearn.metrics import f1_score

os.environ['SSL_CERT_FILE'] = certifi.where()

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


In [None]:
"""
Random Seet setzen, damit Ergebnisse reproduzierbar sind
"""

import random
def set_seed(seed=42):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    np.random.seed(seed)
    random.seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

def seed_worker(worker_id):
    worker_seed = torch.initial_seed() % 2**32
    np.random.seed(worker_seed)
    random.seed(worker_seed)

set_seed(42)

g = torch.Generator()
g.manual_seed(42)

In [2]:
# --- TRANSFORMATIONS ---
transform = {
    'train': transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.AutoAugment(AutoAugmentPolicy.IMAGENET),
        transforms.ToTensor()
    ]),
    'test': transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor()
    ])
}

# --- DATASETS ---
train_dataset = torchvision.datasets.ImageFolder(root='Data/Train', transform=transform["train"])
test_dataset = torchvision.datasets.ImageFolder(root='Data/Test', transform=transform["test"])

# --- SPLIT TRAIN / VALIDATION ---
train_size = int(0.8 * len(train_dataset))
val_size = len(train_dataset) - train_size
train_subset, val_subset = random_split(train_dataset, [train_size, val_size])


train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=64, shuffle=True, num_workers=0, pin_memory=True, generator=g, worker_init_fn=seed_worker)
val_loader = DataLoader(val_subset, batch_size=32, shuffle=False, num_workers=2, pin_memory=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=64, shuffle=False, num_workers=0, pin_memory=True, generator=g, worker_init_fn=seed_worker)

# --- MODEL ---
model = models.resnet50(weights=models.ResNet50_Weights.DEFAULT)

# ❗ Keine Layer werden eingefroren – alle Parameter werden trainiert
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, len(train_dataset.classes))
model.to(device)

# --- LOSS, OPTIMIZER, SCHEDULER ---
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0005)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=3)

# --- EARLY STOPPING PARAMS ---
patience = 5
best_val_acc = 0.0
epochs_no_improve = 0
num_epochs = 30

# --- TRAINING LOOP ---
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    running_corrects = 0
    nr_samples = 0

    for images, labels in tqdm(train_loader, desc=f"Train Epoch {epoch+1}"):
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, preds = torch.max(outputs, 1)
        loss = criterion(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        nr_samples += images.size(0)
        running_loss += loss.item() * images.size(0)
        running_corrects += torch.sum(preds == labels)

    epoch_loss = running_loss / nr_samples
    epoch_acc = running_corrects.double() / nr_samples

    # --- VALIDATION ---
    model.eval()
    val_running_corrects = 0
    val_nr_samples = 0

    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, preds = torch.max(outputs, 1)
            val_running_corrects += torch.sum(preds == labels)
            val_nr_samples += images.size(0)

    val_acc = val_running_corrects.double() / val_nr_samples

    print(f"Epoch {epoch+1}, Train Loss: {epoch_loss:.4f}, Train Acc: {epoch_acc:.4f}, Val Acc: {val_acc:.4f}")

    scheduler.step(val_acc)

    if val_acc > best_val_acc:
        best_val_acc = val_acc
        epochs_no_improve = 0
        torch.save(model.state_dict(), 'Cancer_Existing_Model_best_model_EarlyStopping_Scheduler.pt')
    else:
        epochs_no_improve += 1
        print(f"No improvement for {epochs_no_improve}/{patience} epochs.")

    if epochs_no_improve >= patience:
        print(f"Early stopping after {epoch+1} epochs. Best Val Acc: {best_val_acc:.4f}")
        break

# --- LOAD BEST MODEL ---
model.load_state_dict(torch.load('Cancer_Existing_Model_best_model_EarlyStopping_Scheduler.pt'))

# --- TESTING ---
model.eval()
test_running_corrects = 0
test_nr_samples = 0

for images, labels in tqdm(test_loader, desc="Testing"):
    images, labels = images.to(device), labels.to(device)
    outputs = model(images)
    _, preds = torch.max(outputs, 1)
    test_running_corrects += torch.sum(preds == labels)
    test_nr_samples += images.size(0)

test_acc = test_running_corrects.double() / test_nr_samples
print(f"Test Accuracy: {test_acc:.4f}")

Train Epoch 1: 100%|██████████| 56/56 [06:35<00:00,  7.06s/it]


Epoch 1, Train Loss: 1.7706, Train Acc: 0.3680, Val Acc: 0.4799


Train Epoch 2: 100%|██████████| 56/56 [07:39<00:00,  8.20s/it]


Epoch 2, Train Loss: 1.4003, Train Acc: 0.5003, Val Acc: 0.5000


Train Epoch 3: 100%|██████████| 56/56 [07:16<00:00,  7.79s/it]


Epoch 3, Train Loss: 1.1557, Train Acc: 0.5812, Val Acc: 0.5804


Train Epoch 4: 100%|██████████| 56/56 [07:37<00:00,  8.17s/it]


Epoch 4, Train Loss: 1.0832, Train Acc: 0.6086, Val Acc: 0.5513
No improvement for 1/5 epochs.


Train Epoch 5: 100%|██████████| 56/56 [07:12<00:00,  7.72s/it]


Epoch 5, Train Loss: 0.9477, Train Acc: 0.6616, Val Acc: 0.5424
No improvement for 2/5 epochs.


Train Epoch 6: 100%|██████████| 56/56 [07:16<00:00,  7.80s/it]


KeyboardInterrupt: 