In [12]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.models as models
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import pandas as pd
import os
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import seaborn as sns
import matplotlib.pyplot as plt


In [31]:
# Datenpfade
IMAGE_DIR = "C:\\Users\\gabri\\OneDrive\\Studium\\kurse\\6_Semester\\dshealth\\MelanomaClassification\\ISIC_2019_Training_Input\\ISIC_2019_Training_Input"
LABELS_CSV = "C:\\Users\\gabri\\OneDrive\\Studium\\kurse\\6_Semester\\dshealth\\MelanomaClassification\\ISIC_2019_Training_GroundTruth.csv"


# CSV-Datei einlesen
df = pd.read_csv(LABELS_CSV)
df = df[['image', 'MEL']]  # Nur Melanom-Labels behalten
df['image'] = df['image'] + ".jpg"  # Bildnamen anpassen (falls nötig)

print(df.head())  # Ersten paar Zeilen anzeigen



              image  MEL
0  ISIC_0000000.jpg  0.0
1  ISIC_0000001.jpg  0.0
2  ISIC_0000002.jpg  1.0
3  ISIC_0000003.jpg  0.0
4  ISIC_0000004.jpg  1.0


In [32]:
# Transformationen für Bilder (VGG16 erwartet bestimmte Normalisierung)
transform = transforms.Compose([
    transforms.Resize((224, 224)),  
    transforms.RandomHorizontalFlip(),  
    transforms.RandomRotation(30),  
    transforms.ColorJitter(brightness=0.2, contrast=0.2),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Eigene Dataset-Klasse für PyTorch
class SkinCancerDataset(Dataset):
    def __init__(self, dataframe, image_dir, transform=None):
        self.dataframe = dataframe
        self.image_dir = image_dir
        self.transform = transform

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        img_name = os.path.join(self.image_dir, self.dataframe.iloc[idx, 0])
        image = Image.open(img_name).convert("RGB")  # Bild laden
        label = torch.tensor(float(self.dataframe.iloc[idx, 1]))  # 0 oder 1

        if self.transform:
            image = self.transform(image)

        return image, label

# Dataset laden
dataset = SkinCancerDataset(df, IMAGE_DIR, transform=transform)

# Split in Training (80%) & Validierung (20%)
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_size, val_size])

# DataLoader für Batch-Verarbeitung
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

print(f"Train: {len(train_dataset)}, Validierung: {len(val_dataset)}")


Train: 20264, Validierung: 5067


In [40]:
# Prüfen, ob CUDA (GPU) verfügbar ist
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")  # Prints "cuda" if GPU is available, otherwise "cpu"

# VGG16-Modell laden (vortrainiert auf ImageNet)
model = models.vgg16(pretrained=True)

# Letzte Schicht für binäre Klassifikation anpassen
model.classifier[6] = nn.Sequential(
    nn.Linear(4096, 1),  # 1 Neuron für binäre Klassifikation
    nn.Sigmoid()
)

model = model.to(device)  # Modell auf GPU/CPU verschieben


Using device: cuda


In [41]:
criterion = nn.BCELoss()  # Binary Cross Entropy Loss
optimizer = optim.Adam(model.parameters(), lr=0.0001)  # Learning Rate 0.0001


In [None]:
epochs = 5
best_acc = 0.0

for epoch in range(epochs):
    model.train()
    running_loss = 0.0

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device).float().view(-1, 1)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    # Validierung
    model.eval()
    correct, total = 0, 0

    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device).float().view(-1, 1)

            outputs = model(images)
            predicted = (outputs > 0.5).float()  # Schwelle bei 0.5

            correct += (predicted == labels).sum().item()
            total += labels.size(0)

    acc = correct / total

    print(f"Epoch {epoch+1}/{epochs}, Loss: {running_loss/len(train_loader):.4f}, Validation Acc: {acc:.4f}")

    # Bestes Modell speichern
    if acc > best_acc:
        best_acc = acc
        torch.save(model.state_dict(), "best_melanoma_vgg16.pth")

print("Training abgeschlossen!")


Epoch 1/5, Loss: 0.3896, Validation Acc: 0.8411
Epoch 2/5, Loss: 0.3539, Validation Acc: 0.8461


In [None]:
# Modell in den Evaluationsmodus setzen
model.eval()

# Listen zur Speicherung der echten Labels und Vorhersagen
all_labels = []
all_preds = []

with torch.no_grad():
    for images, labels in val_loader:
        images, labels = images.to(device), labels.to(device).float().view(-1, 1)

        outputs = model(images)
        predicted = (outputs > 0.5).float()  # Schwelle bei 0.5

        # Labels & Vorhersagen speichern
        all_labels.extend(labels.cpu().numpy())
        all_preds.extend(predicted.cpu().numpy())

# In NumPy-Arrays umwandeln
all_labels = np.array(all_labels)
all_preds = np.array(all_preds)

# Metriken berechnen
accuracy = accuracy_score(all_labels, all_preds)
precision = precision_score(all_labels, all_preds)
recall = recall_score(all_labels, all_preds)
f1 = f1_score(all_labels, all_preds)

# Ergebnisse ausgeben
print(f"✅ Accuracy: {accuracy:.4f}")
print(f"✅ Precision: {precision:.4f}")
print(f"✅ Recall: {recall:.4f}")
print(f"✅ F1-Score: {f1:.4f}")
