In [None]:
import zipfile
import os

zip_path = "/content/preprocessed.zip"
extract_path = "datasets/preprocessed"

os.makedirs(extract_path, exist_ok=True)

with zipfile.ZipFile(zip_path, "r") as zip_ref:
    zip_ref.extractall(extract_path)

print("Dataset unzipped into datasets/preprocessed/")

Dataset unzipped into datasets/preprocessed/


In [None]:

# Basic CNN using preprocessed images
from PIL import Image
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms


In [None]:
# Parameters
IMG_SIZE = 224       # Preprocessed images are already 224x224
BATCH_SIZE = 32
NUM_CLASSES = 2
NUM_EPOCHS = 10
LEARNING_RATE = 1e-3
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Dataset class
class DRDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.images = []
        self.labels = []
        label_map = {"No_DR":0, "DR":1}

        for label in os.listdir(root_dir):
            if label not in label_map:
                continue
            label_dir = os.path.join(root_dir, label)
            for img_file in os.listdir(label_dir):
                self.images.append(os.path.join(label_dir, img_file))
                self.labels.append(label_map[label])

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_path = self.images[idx]
        label = self.labels[idx]

        # Load preprocessed image directly
        image = Image.open(img_path).convert("RGB")
        if self.transform:
            image = self.transform(image)

        return image, label

In [None]:
# Transforms
train_transform = transforms.Compose([
    transforms.ToTensor(),
])

val_test_transform = transforms.Compose([
    transforms.ToTensor(),
])

In [None]:
# Load datasets
train_dir = "/content/datasets/preprocessed/preprocessed/train"
val_dir   = "/content/datasets/preprocessed/preprocessed/val"
test_dir  = "/content/datasets/preprocessed/preprocessed/test"

train_dataset = DRDataset(train_dir, transform=train_transform)
val_dataset   = DRDataset(val_dir, transform=val_test_transform)
test_dataset  = DRDataset(test_dir, transform=val_test_transform)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)
val_loader   = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=2)
test_loader  = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=2)

In [None]:
class SimpleCNN(nn.Module):
    def __init__(self, num_classes=2):
        super().__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2,2),

            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2,2)
        )
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(64 * 56 * 56, num_classes)
        )

    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x

In [None]:
# Instantiate model, loss, optimizer
model = SimpleCNN(NUM_CLASSES).to(DEVICE)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

In [None]:
best_acc = 0.0
for epoch in range(NUM_EPOCHS):
    # ---- Training ----
    model.train()
    running_loss = 0
    for images, labels in train_loader:
        images, labels = images.to(DEVICE), labels.to(DEVICE)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    print(f"Epoch [{epoch+1}/{NUM_EPOCHS}] | Loss: {running_loss/len(train_loader):.4f}")

    # ---- Validation ----
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(DEVICE), labels.to(DEVICE)
            outputs = model(images)
            _, preds = torch.max(outputs, 1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)

    val_acc = correct / total
    print(f"Validation Accuracy: {val_acc:.4f}")

    # ---- Save best model ----
    if val_acc > best_acc:
        best_acc = val_acc
        torch.save(model.state_dict(), "best_cnn_preprocessed.pth")
        print(" Saved new best model")

Epoch [1/10] | Loss: 0.6405
Validation Accuracy: 0.7980
 Saved new best model
Epoch [2/10] | Loss: 0.4913
Validation Accuracy: 0.8107
 Saved new best model
Epoch [3/10] | Loss: 0.4551
Validation Accuracy: 0.8136
 Saved new best model
Epoch [4/10] | Loss: 0.4305
Validation Accuracy: 0.8390
 Saved new best model
Epoch [5/10] | Loss: 0.4043
Validation Accuracy: 0.8079
Epoch [6/10] | Loss: 0.3603
Validation Accuracy: 0.7952
Epoch [7/10] | Loss: 0.3301
Validation Accuracy: 0.7698
Epoch [8/10] | Loss: 0.2818
Validation Accuracy: 0.8065
Epoch [9/10] | Loss: 0.2330
Validation Accuracy: 0.8729
 Saved new best model
Epoch [10/10] | Loss: 0.2081
Validation Accuracy: 0.8249


In [None]:
# Test Evaluation
model.load_state_dict(torch.load("/content/best_cnn_preprocessed.pth"))
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(DEVICE), labels.to(DEVICE)
        outputs = model(images)
        _, preds = torch.max(outputs, 1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)

test_acc = correct / total
print(f"Test Accuracy: {test_acc:.4f}")

Test Accuracy: 0.8553


In [None]:

# Define a small CNN model
class SimpleCNN(nn.Module):
    def __init__(self, num_classes=2):
        super(SimpleCNN, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2,2),  # 224 -> 112
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2,2),  # 112 -> 56
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2,2),  # 56 -> 28
            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2,2),  # 28 -> 14
        )
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(256*14*14, 512),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(512, num_classes)
        )

    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x


In [None]:
# Instantiate model, loss, optimizer
model = SimpleCNN(NUM_CLASSES).to(DEVICE)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

In [None]:
best_acc = 0.0
for epoch in range(NUM_EPOCHS):
    #  Training 
    model.train()
    running_loss = 0
    for images, labels in train_loader:
        images, labels = images.to(DEVICE), labels.to(DEVICE)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    print(f"Epoch [{epoch+1}/{NUM_EPOCHS}] | Loss: {running_loss/len(train_loader):.4f}")

    # Validation
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(DEVICE), labels.to(DEVICE)
            outputs = model(images)
            _, preds = torch.max(outputs, 1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)

    val_acc = correct / total
    print(f"Validation Accuracy: {val_acc:.4f}")

    # Save best model 
    if val_acc > best_acc:
        best_acc = val_acc
        torch.save(model.state_dict(), "best_cnn_preprocessed2.pth")
        print(" Saved new best model")

Epoch [1/10] | Loss: 0.5634
Validation Accuracy: 0.8136
 Saved new best model
Epoch [2/10] | Loss: 0.4259
Validation Accuracy: 0.8121
Epoch [3/10] | Loss: 0.3409
Validation Accuracy: 0.8898
 Saved new best model
Epoch [4/10] | Loss: 0.2772
Validation Accuracy: 0.8983
 Saved new best model
Epoch [5/10] | Loss: 0.2195
Validation Accuracy: 0.8969
Epoch [6/10] | Loss: 0.1830
Validation Accuracy: 0.9280
 Saved new best model
Epoch [7/10] | Loss: 0.1492
Validation Accuracy: 0.9209
Epoch [8/10] | Loss: 0.1300
Validation Accuracy: 0.9068
Epoch [9/10] | Loss: 0.1064
Validation Accuracy: 0.9195
Epoch [10/10] | Loss: 0.0910
Validation Accuracy: 0.9237


In [None]:
# Test Evaluation
model.load_state_dict(torch.load("/content/best_cnn_preprocessed2.pth"))
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(DEVICE), labels.to(DEVICE)
        outputs = model(images)
        _, preds = torch.max(outputs, 1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)

test_acc = correct / total
print(f"Test Accuracy: {test_acc:.4f}")

Test Accuracy: 0.9059
