In [9]:
import time, copy, pathlib, torch
from torch import nn, optim
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader
from tqdm.auto import tqdm   

In [1]:
import pandas as pd
from pathlib import Path
from PIL import Image

import torch
from torch.utils.data import Dataset, DataLoader, random_split
from torchvision import transforms
from sklearn.model_selection import train_test_split

# ---------- Custom Dataset ----------
class RetinopathyDataset(Dataset):
    """
    Loads <id_code>.png images and integer labels from a CSV file.

    Args
    ----
    df : pandas.DataFrame
        Must contain 'id_code' and 'diagnosis' columns.
    img_dir : str or Path
        Directory where *.png images live.
    transform : callable, optional
        TorchVision transform pipeline applied to each PIL image.
    """
    def __init__(self, df: pd.DataFrame, img_dir: str, transform=None):
        self.df = df.reset_index(drop=True)
        self.img_dir = Path(img_dir)
        self.transform = transform

    def __len__(self) -> int:
        return len(self.df)

    def __getitem__(self, idx: int):
        row   = self.df.iloc[idx]
        img_f = self.img_dir / f"{row.id_code}.png"
        img   = Image.open(img_f).convert("RGB")     # ensure 3-channel

        if self.transform is not None:
            img = self.transform(img)

        label = torch.tensor(row.diagnosis, dtype=torch.long)
        return img, label


# ---------- Convenience factory ----------
def make_dataloaders(csv_path: str,
                     img_dir: str,
                     batch_size: int = 32,
                     val_frac: float = 0.2,
                     seed: int = 42,
                     num_workers: int = 4):
    """
    Reads the CSV, splits it into train / val (stratified), builds DataLoaders.
    Returns
    -------
    train_dl, val_dl  (torch.utils.data.DataLoader)
    """

    df = pd.read_csv(csv_path)

    train_df, val_df = train_test_split(
        df,
        test_size   = val_frac,
        stratify    = df["diagnosis"],
        random_state= seed,
    )

    # --- Transforms (EfficientNet default size) ---
    train_tfms = transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.AutoAugment(transforms.AutoAugmentPolicy.IMAGENET),
        transforms.ToTensor(),
        transforms.Normalize(mean=(0.485, 0.456, 0.406),
                             std =(0.229, 0.224, 0.225)),
    ])

    val_tfms = transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize(mean=(0.485, 0.456, 0.406),
                             std =(0.229, 0.224, 0.225)),
    ])

    train_ds = RetinopathyDataset(train_df, img_dir, transform=train_tfms)
    val_ds   = RetinopathyDataset(val_df,   img_dir, transform=val_tfms)

    train_dl = DataLoader(train_ds, batch_size=batch_size,
                          shuffle=True, drop_last=False,
                          num_workers=num_workers, pin_memory=True)

    val_dl   = DataLoader(val_ds,   batch_size=batch_size,
                          shuffle=False, drop_last=False,
                          num_workers=num_workers, pin_memory=True)

    return train_dl, val_dl


In [2]:
# train_dl, val_dl = make_dataloaders("data/aptos2019-blindness-detection/train.csv", "data/aptos2019-blindness-detection/train_images" )

In [3]:
CSV_PATH = "data/aptos2019-blindness-detection/train.csv"   # your CSV
IMG_DIR  = "data/aptos2019-blindness-detection/train_images"             # where the *.png files live

train_loader, val_loader = make_dataloaders(
    CSV_PATH,
    IMG_DIR,
    batch_size = 32,
    val_frac   = 0.15,   # 85 % train, 15 % val
)

# Iterate once
for imgs, labels in train_loader:
    print(imgs.shape, labels.shape)   # -> torch.Size([32, 3, 224, 224]) torch.Size([32])
    break

torch.Size([32, 3, 224, 224]) torch.Size([32])


In [6]:
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
NUM_CLASSES    = 5

In [8]:
!pip install -U transformers datasets timm evaluate scikit-learn pillow torchvision

Collecting datasets
  Downloading datasets-4.0.0-py3-none-any.whl.metadata (19 kB)
Collecting timm
  Downloading timm-1.0.19-py3-none-any.whl.metadata (60 kB)
Collecting evaluate
  Downloading evaluate-0.4.5-py3-none-any.whl.metadata (9.5 kB)
Collecting torchvision
  Downloading torchvision-0.22.1-cp39-cp39-manylinux_2_28_x86_64.whl.metadata (6.1 kB)
Collecting pyarrow>=15.0.0 (from datasets)
  Downloading pyarrow-21.0.0-cp39-cp39-manylinux_2_28_x86_64.whl.metadata (3.3 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py39-none-any.whl.metadata (7.2 kB)
Collecting fsspec>=2023.5.0 (from huggingface-hub<1.0,>=0.34.0->transformers)
  Downloading fsspec-2025.3.0-py3-none-any.whl.metadata (11 kB)
Collecting aiohttp!=

In [7]:
model = timm.create_model(
    "deit_base_patch16_224",
    pretrained=True,
    num_classes=NUM_CLASSES
)
model = model.to(DEVICE)

# Mixed-precision amp (faster on GPUs that support it)
scaler = GradScaler()

# ------------------------------------------------------------------------
# 5.  Loss, optimiser, scheduler
# ------------------------------------------------------------------------
criterion  = nn.CrossEntropyLoss()
optimizer  = optim.AdamW(model.parameters(), lr=LR, weight_decay=WEIGHT_DECAY)
scheduler  = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=EPOCHS)

# ------------------------------------------------------------------------
# 6.  Train / eval loops
# ------------------------------------------------------------------------
def run_epoch(loader, train=True):
    model.train(train)
    epoch_loss, correct, total = 0.0, 0, 0

    for imgs, labels in tqdm(loader, leave=False):
        imgs, labels = imgs.to(DEVICE), labels.to(DEVICE)

        with torch.set_grad_enabled(train), autocast():
            outputs = model(imgs)
            loss    = criterion(outputs, labels)

        if train:
            optimizer.zero_grad()
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()

        epoch_loss += loss.item() * imgs.size(0)
        preds       = outputs.argmax(dim=1)
        correct    += (preds == labels).sum().item()
        total      += labels.size(0)

    return epoch_loss / total, correct / total


# ------------------------------------------------------------------------
# 7.  Main training loop
# ------------------------------------------------------------------------
best_wts, best_acc = copy.deepcopy(model.state_dict()), 0.0

for epoch in range(1, EPOCHS + 1):
    t0 = time.time()

    train_loss, train_acc = run_epoch(train_loader, train=True)
    val_loss,   val_acc   = run_epoch(val_loader,   train=False)
    scheduler.step()

    if val_acc > best_acc:
        best_acc, best_wts = val_acc, copy.deepcopy(model.state_dict())

    print(f"Epoch {epoch:02}/{EPOCHS} │ "
          f"train {train_loss:.3f} / {train_acc:.1%} │ "
          f"val {val_loss:.3f} / {val_acc:.1%} │ "
          f"{time.time()-t0:.1f}s")

# ------------------------------------------------------------------------
# 8.  Save best checkpoint
# ------------------------------------------------------------------------
model.load_state_dict(best_wts)
torch.save(model.state_dict(), "deit_base_patch16_retina.pth")
print(f"🎉 Best validation accuracy: {best_acc:.2%}")

NameError: name 'timm' is not defined