In [2]:
import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

class BinaryEyePACS(datasets.ImageFolder):
    """
    Wraps ImageFolder to convert 5-class labels to Binary labels.
    Source: 0, 1 -> 0 (Non-Referable)
    Source: 2, 3, 4 -> 1 (Referable)
    """
    def __getitem__(self, index):
        # 1. Get the original image and label (0-4)
        sample, target = super().__getitem__(index)
        
        # 2. Convert the label
        # If label is 0 or 1, new label is 0. Otherwise, it's 1.
        binary_target = 0 if target < 2 else 1
        
        return sample, binary_target

# --- How to use it ---

# 1. Define Transforms (Standard for ResNet)
data_transforms = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) # ImageNet stats
])

# 2. Load the Data
# Point this to the folder containing '0', '1', '2', etc.
train_dataset = BinaryEyePACS(root='colored_images', transform=data_transforms)

# 3. Create Loader
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

# 4. Verification
print(f"Total Images: {len(train_dataset)}")
print(f"Original Classes: {train_dataset.classes}") # Will show ['0','1','2','3','4']
print("Mapping active: 0/1 -> 0 (Healthy), 2/3/4 -> 1 (Sick)")

Total Images: 35126
Original Classes: ['Mild', 'Moderate', 'No_DR', 'Proliferate_DR', 'Severe']
Mapping active: 0/1 -> 0 (Healthy), 2/3/4 -> 1 (Sick)


In [3]:
import os
import pandas as pd
from PIL import Image
from torch.utils.data import Dataset

class MessidorDataset(Dataset):
    def __init__(self, csv_file, img_dir, transform=None):
        """
        Args:
            csv_file (string): Path to the csv file with annotations.
            img_dir (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied.
        """
        self.data_frame = pd.read_csv(csv_file)
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self):
        return len(self.data_frame)

    def __getitem__(self, idx):
        # 1. Get Image Name
        # Note: Check your CSV column names. Usually 'image_id' or 'id_code'
        img_name = self.data_frame.iloc[idx, 0] 
        
        # Append extension if missing (Messidor CSV sometimes lacks .jpg)
        if not str(img_name).endswith('.jpg') and not str(img_name).endswith('.png'):
            img_name = str(img_name) + ".jpg"
            
        img_path = os.path.join(self.img_dir, img_name)
        
        # 2. Load Image
        try:
            image = Image.open(img_path).convert('RGB')
        except FileNotFoundError:
            # Handle missing files safely
            return self.__getitem__((idx + 1) % len(self))

        # 3. Get Label (Binary Mapping)
        # Column 1 is usually 'adjudicated_dr_grade' (0-4)
        grade = self.data_frame.iloc[idx, 1]
        
        # Map: 0/1 -> 0 (Healthy), 2/3/4 -> 1 (Referable DR)
        label = 0 if grade < 2 else 1

        if self.transform:
            image = self.transform(image)

        return image, label

messidor_data = MessidorDataset(
    csv_file='messidor_data.csv',
    img_dir='Messidor-2/preprocess',
    transform=data_transforms # Same transforms as EyePACS
)