In [1]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("sehastrajits/fundus-aptosddridirdeyepacsmessidor")

print("Path to dataset files:", path)

Path to dataset files: /kaggle/input/fundus-aptosddridirdeyepacsmessidor


In [2]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import roc_curve, auc, confusion_matrix, ConfusionMatrixDisplay
from itertools import cycle

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Data paths
data_dir = "/kaggle/input/fundus-aptosddridirdeyepacsmessidor/split_dataset"
train_dir = os.path.join(data_dir, "train")
val_dir = os.path.join(data_dir, "val")
test_dir = os.path.join(data_dir, "test")

# Data preprocessing and augmentation
train_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.GaussianBlur(kernel_size=3),  # Noise removal
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

val_test_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.GaussianBlur(kernel_size=3),  # Noise removal
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Load datasets
train_dataset = datasets.ImageFolder(train_dir, transform=train_transforms)
val_dataset = datasets.ImageFolder(val_dir, transform=val_test_transforms)
test_dataset = datasets.ImageFolder(test_dir, transform=val_test_transforms)

# Data loaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=4)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=4)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=4)

# Class names
class_names = train_dataset.classes  # ['0', '1', '2', '3', '4']

# Load pretrained ResNet50
model = models.resnet50(pretrained=True)
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 5)  # 5 classes
model = model.to(device)

# Loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0005)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)

# Training and validation
num_epochs = 100
train_losses, val_losses = [], []
train_accs, val_accs = [], []

for epoch in range(num_epochs):
    # Training
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
    train_loss = running_loss / len(train_loader)
    train_acc = 100 * correct / total
    train_losses.append(train_loss)
    train_accs.append(train_acc)

    # Validation
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            running_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    val_loss = running_loss / len(val_loader)
    val_acc = 100 * correct / total
    val_losses.append(val_loss)
    val_accs.append(val_acc)

    print(f"Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%, "
          f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.2f}%")

    # Save model after every epoch
    torch.save(model.state_dict(), "best_model.pth")
    print(f"Model saved at epoch {epoch+1}")

    scheduler.step()

# Plot accuracy and loss
plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(train_accs, label="Train Accuracy")
plt.plot(val_accs, label="Validation Accuracy")
plt.title("Accuracy")
plt.xlabel("Epoch")
plt.ylabel("Accuracy (%)")
plt.legend()
plt.subplot(1, 2, 2)
plt.plot(train_losses, label="Train Loss")
plt.plot(val_losses, label="Validation Loss")
plt.title("Loss")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend()
plt.savefig("accuracy_loss_plot.png")
plt.close()

# ROC and AUC
model.eval()
y_true = []
y_score = []
with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        y_true.extend(labels.cpu().numpy())
        y_score.extend(torch.softmax(outputs, dim=1).cpu().numpy())

y_true = np.array(y_true)
y_score = np.array(y_score)
n_classes = 5
fpr = dict()
tpr = dict()
roc_auc = dict()

# Compute ROC curve and AUC for each class
for i in range(n_classes):
    fpr[i], tpr[i], _ = roc_curve(y_true == i, y_score[:, i])
    roc_auc[i] = auc(fpr[i], tpr[i])

# Plot ROC curves
plt.figure()
colors = cycle(['aqua', 'darkorange', 'cornflowerblue', 'green', 'red'])
for i, color in zip(range(n_classes), colors):
    plt.plot(fpr[i], tpr[i], color=color, label=f'ROC curve of class {i} (AUC = {roc_auc[i]:.2f})')
plt.plot([0, 1], [0, 1], 'k--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curves')
plt.legend(loc='lower right')
plt.savefig('roc_curve.png')
plt.close()

# Confusion matrix
if os.path.exists("best_model.pth"):
    model.load_state_dict(torch.load("best_model.pth"))
    model.eval()
    y_true = []
    y_pred = []
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            y_true.extend(labels.cpu().numpy())
            y_pred.extend(predicted.cpu().numpy())

    cm = confusion_matrix(y_true, y_pred)
    disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=class_names)
    disp.plot(cmap=plt.cm.Blues)
    plt.title("Confusion Matrix")
    plt.savefig("confusion_matrix.png")
    plt.close()
else:
    print("Warning: best_model.pth not found. Skipping confusion matrix.")

# Test the model
if os.path.exists("best_model.pth"):
    model.load_state_dict(torch.load("best_model.pth"))
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    test_acc = 100 * correct / total
    print(f"Test Accuracy: {test_acc:.2f}%")
else:
    print("Warning: best_model.pth not found. Skipping testing.")

Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 217MB/s]


Epoch 1/100, Train Loss: 1.1743, Train Acc: 50.49%, Val Loss: 1.0458, Val Acc: 57.28%
Model saved at epoch 1
Epoch 2/100, Train Loss: 1.0305, Train Acc: 57.34%, Val Loss: 1.0050, Val Acc: 58.58%
Model saved at epoch 2
Epoch 3/100, Train Loss: 0.9738, Train Acc: 59.81%, Val Loss: 1.1448, Val Acc: 56.14%
Model saved at epoch 3
Epoch 4/100, Train Loss: 0.9345, Train Acc: 61.75%, Val Loss: 0.9218, Val Acc: 61.39%
Model saved at epoch 4
Epoch 5/100, Train Loss: 0.9043, Train Acc: 63.36%, Val Loss: 0.9019, Val Acc: 63.24%
Model saved at epoch 5
Epoch 6/100, Train Loss: 0.8748, Train Acc: 64.52%, Val Loss: 0.8619, Val Acc: 65.11%
Model saved at epoch 6
Epoch 7/100, Train Loss: 0.8442, Train Acc: 65.74%, Val Loss: 0.8253, Val Acc: 66.41%
Model saved at epoch 7
Epoch 8/100, Train Loss: 0.8168, Train Acc: 66.86%, Val Loss: 0.8352, Val Acc: 66.14%
Model saved at epoch 8
Epoch 9/100, Train Loss: 0.7839, Train Acc: 68.02%, Val Loss: 0.8396, Val Acc: 66.36%
Model saved at epoch 9
Epoch 10/100, Train