In [1]:
from torch.utils.data import Dataset
from PIL import Image
import numpy as np

class TrainDataset(Dataset):
    def __init__(self, labels_df, img_dir, ext, transform=None):
        """
        Args:
            labels_df (pd.DataFrame): DataFrame with columns ['id_code', 'diagnosis']
            img_dir (Path): Directory where images are stored
            transform (callable, optional): Transform to apply to images
        """
        self.labels_df = labels_df
        self.img_dir = img_dir
        self.ext = ext
        self.transform = transform

    def __len__(self):
        return len(self.labels_df)

    def __getitem__(self, idx):
        row = self.labels_df.iloc[idx]
        img_id = row.iloc[0]  # First column: image filename base
        label = int(row.iloc[1])  # Second column: label

        img_path = self.img_dir / f"{img_id}.{self.ext}"
        image = Image.open(img_path).convert('RGB')  # ensure 3 channels

        if self.transform:
            image = np.array(image)
            image = self.transform(image=image)['image']

        return image, label

In [2]:
from pathlib import Path
import pandas as pd
import albumentations as A
from albumentations.pytorch import ToTensorV2

IMAGE_SIZE_TRAIN = 352
IMAGE_SIZE_VAL = 480

trainval_labels1 = pd.read_csv('aptos2019-blindness-detection/train.csv')
trainval_imgs_dir1 = Path('aptos2019-blindness-detection/train_images/processed')

trainval_labels2 = pd.read_csv('diabetic-retinopathy-detection/trainLabelsHalf.csv')
trainval_imgs_dir2 = Path('diabetic-retinopathy-detection/train/processed')

train_transform = A.Compose([
    A.Resize(IMAGE_SIZE_VAL, IMAGE_SIZE_VAL),
    A.HorizontalFlip(p=0.5),
    A.Rotate(limit=360, p=1.0),
    A.RandomCrop(IMAGE_SIZE_TRAIN, IMAGE_SIZE_TRAIN),

    A.RandomBrightnessContrast(
        brightness_limit=0.2,  # ±20% brightness
        contrast_limit=0.2,    # ±20% contrast
        p=0.3
    ),
    A.HueSaturationValue(
        hue_shift_limit=10,        # ±10 degrees
        sat_shift_limit=20,        # ±20%
        val_shift_limit=10,        # ±10%
        p=0.3
    ),
    A.GaussianBlur(blur_limit=(3, 5), p=0.3),
    A.Sharpen(alpha=(0.2, 0.5), lightness=(0.5, 1.0), p=0.3),

    A.Normalize(  # For model pretrained on ImageNet
        mean=[0.485, 0.456, 0.406],
        std =[0.229, 0.224, 0.225]
    ),
    ToTensorV2(),
])

val_transform = A.Compose([
    A.Resize(IMAGE_SIZE_VAL, IMAGE_SIZE_VAL),
    A.Normalize(  # For model pretrained on ImageNet
        mean=[0.485, 0.456, 0.406],
        std =[0.229, 0.224, 0.225]
    ),
    ToTensorV2(),
])

In [3]:
from torch.utils.data import ConcatDataset

# First, create your untransformed base datasets
base_dataset1 = TrainDataset(trainval_labels1, trainval_imgs_dir1, 'png')
base_dataset2 = TrainDataset(trainval_labels2, trainval_imgs_dir2, 'jpeg')

# Concatenate just to get total size and allow for consistent indexing
base_dataset = ConcatDataset([base_dataset1, base_dataset2])

total_size = len(base_dataset)

del base_dataset1, base_dataset2, base_dataset

In [9]:
from torch.utils.data import random_split, Subset, DataLoader
import numpy as np

# Split indices (use seed for reproducibility)
indices = np.arange(total_size)
np.random.seed(42)
np.random.shuffle(indices)

# Split indices into train/val
train_size = int(0.8 * total_size)
train_indices = indices[:train_size]
val_indices = indices[train_size:]

# Rebuild the original datasets with their respective transforms
train_dataset1 = TrainDataset(trainval_labels1, trainval_imgs_dir1, 'png', train_transform)
train_dataset2 = TrainDataset(trainval_labels2, trainval_imgs_dir2, 'jpeg', train_transform)
val_dataset1   = TrainDataset(trainval_labels1, trainval_imgs_dir1, 'png', val_transform)
val_dataset2   = TrainDataset(trainval_labels2, trainval_imgs_dir2, 'jpeg', val_transform)

# Concat them separately
train_dataset_full = ConcatDataset([train_dataset1, train_dataset2])
val_dataset_full   = ConcatDataset([val_dataset1, val_dataset2])

# Subset them using the indices
train_dataset = Subset(train_dataset_full, train_indices)
val_dataset   = Subset(val_dataset_full, val_indices)

BATCH_SIZE_TRAIN = 12
BATCH_SIZE_VAL = 10

train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE_TRAIN, shuffle=True, num_workers=10)
val_dataloader = DataLoader(val_dataset, batch_size=BATCH_SIZE_VAL, shuffle=False, num_workers=10)

===================================

In [5]:
import torch
import torch.nn.functional as F

import timm
import pytorch_lightning as pl
from pytorch_lightning.callbacks import EarlyStopping, ModelCheckpoint
from torch.optim.lr_scheduler import ReduceLROnPlateau

  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(


In [8]:
class EfficientNetClassifier(pl.LightningModule):
    def __init__(self, model_name="efficientnet_b3", lr=1e-3, num_classes=5):
        super().__init__()
        self.save_hyperparameters()
        # create & swap in a new head
        self.net = timm.create_model(
            self.hparams.model_name,
            pretrained=True,
            num_classes=self.hparams.num_classes,
        )

    def forward(self, x):
        return self.net(x)

    def training_step(self, batch, batch_idx):
        imgs, labels = batch
        logits = self(imgs)
        loss = F.cross_entropy(logits, labels)
        acc  = (logits.argmax(dim=-1) == labels).float().mean()
        self.log('train_loss', loss)
        self.log('train_acc',  acc, prog_bar=True)
        return loss

    def validation_step(self, batch, batch_idx):
        imgs, labels = batch
        logits = self(imgs)
        loss = F.cross_entropy(logits, labels)
        acc  = (logits.argmax(dim=-1) == labels).float().mean()
        self.log('val_loss', loss, prog_bar=True)
        self.log('val_acc',  acc, prog_bar=True)

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=self.hparams.lr)

        scheduler = {
            'scheduler': ReduceLROnPlateau(
                optimizer,
                mode='min',              # we're watching val_loss (lower is better)
                factor=0.5,              # reduce LR by this factor
                patience=5,              # after N epochs of no improvement
                min_lr=1e-6,             # don’t go below this
                verbose=True
            ),
            'monitor': 'val_loss',      # <- this is important
            'interval': 'epoch',
            'frequency': 1
        }

        return {'optimizer': optimizer, 'lr_scheduler': scheduler}


In [None]:
# pl.seed_everything(42)

checkpoint_cb = ModelCheckpoint(
    monitor='val_loss',
    dirpath='checkpoints/',
    filename='effnet-b3-{epoch:02d}-{val_loss:.4f}',
    save_top_k=1,
    mode='min',
)
earlystop_cb = EarlyStopping(
    monitor='val_loss',
    patience=15,
    mode='min',
)

trainer = pl.Trainer(
    max_epochs=50,
    callbacks=[earlystop_cb, checkpoint_cb],
    accelerator='auto',  # GPU if available
    devices=1,
)

model = EfficientNetClassifier(lr=1e-3, num_classes=5)
trainer.fit(model, train_dataloader, val_dataloader)

print("✅ Best checkpoint:", checkpoint_cb.best_model_path)


.

In [11]:
class EfficientNetFromScratchClassifier(pl.LightningModule):
    def __init__(self, model_name="efficientnet_b3", lr=1e-3, num_classes=5):
        super().__init__()
        self.save_hyperparameters()
        # create & swap in a new head
        self.net = timm.create_model(
            self.hparams.model_name,
            pretrained=False,
            num_classes=self.hparams.num_classes,
        )

    def forward(self, x):
        return self.net(x)

    def training_step(self, batch, batch_idx):
        imgs, labels = batch
        logits = self(imgs)
        loss = F.cross_entropy(logits, labels)
        acc  = (logits.argmax(dim=-1) == labels).float().mean()
        self.log('train_loss', loss)
        self.log('train_acc',  acc, prog_bar=True)
        return loss

    def validation_step(self, batch, batch_idx):
        imgs, labels = batch
        logits = self(imgs)
        loss = F.cross_entropy(logits, labels)
        acc  = (logits.argmax(dim=-1) == labels).float().mean()
        self.log('val_loss', loss, prog_bar=True)
        self.log('val_acc',  acc, prog_bar=True)

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=self.hparams.lr)

        scheduler = {
            'scheduler': ReduceLROnPlateau(
                optimizer,
                mode='min',              # we're watching val_loss (lower is better)
                factor=0.5,              # reduce LR by this factor
                patience=5,              # after N epochs of no improvement
                min_lr=1e-6,             # don’t go below this
                verbose=True
            ),
            'monitor': 'val_loss',      # <- this is important
            'interval': 'epoch',
            'frequency': 1
        }

        return {'optimizer': optimizer, 'lr_scheduler': scheduler}


In [None]:
# pl.seed_everything(42)

checkpoint_cb = ModelCheckpoint(
    monitor='val_loss',
    dirpath='checkpoints/',
    filename='effnet-b3-scratch-{epoch:02d}-{val_loss:.4f}',
    save_top_k=1,
    mode='min',
)
earlystop_cb = EarlyStopping(
    monitor='val_loss',
    patience=15,
    mode='min',
)

trainer = pl.Trainer(
    max_epochs=50,
    callbacks=[earlystop_cb, checkpoint_cb],
    accelerator='auto',  # GPU if available
    devices=1,
)

model = EfficientNetFromScratchClassifier(lr=1e-3, num_classes=5)
trainer.fit(model, train_dataloader, val_dataloader)

print("✅ Best checkpoint:", checkpoint_cb.best_model_path)


.

In [6]:
from torch import nn

class AdjacentLabelSmoothingLoss(nn.Module):
    def __init__(self, smoothing=0.1, num_classes=5):
        """
        Args:
            smoothing: Total smoothing weight to be distributed to adjacent classes.
            num_classes: Total number of classes (assumed to be 0-indexed and ordinal).
        """
        super().__init__()
        self.smoothing = smoothing
        self.num_classes = num_classes

    def forward(self, logits, target):
        with torch.no_grad():
            true_dist = torch.zeros_like(logits)
            for i in range(logits.size(0)):
                t = target[i]
                if t > 0 and t < self.num_classes - 1:
                    true_dist[i][t]     = 1.0 - self.smoothing
                    true_dist[i][t - 1] = self.smoothing / 2
                    true_dist[i][t + 1] = self.smoothing / 2
                elif t == 0:
                    true_dist[i][t]     = 1.0 - self.smoothing
                    true_dist[i][t + 1] = self.smoothing
                elif t == self.num_classes - 1:
                    true_dist[i][t]     = 1.0 - self.smoothing
                    true_dist[i][t - 1] = self.smoothing

        log_probs = F.log_softmax(logits, dim=-1)
        return -(true_dist * log_probs).sum(dim=-1).mean()

In [7]:
class EfficientNetV2Classifier(pl.LightningModule):
    def __init__(self, model_name="efficientnetv2_rw_m", lr=1e-4, num_classes=5, smoothing=0.1):
        super().__init__()
        self.save_hyperparameters()
        # create & swap in a new head
        self.net = timm.create_model(
            self.hparams.model_name,
            pretrained=True,
            num_classes=self.hparams.num_classes,
            
            drop_rate=0.4,        # 🔥 add stronger dropout (applied before final FC)
            drop_path_rate=0.3,   # 🔥 stochastic depth (helps regularize deep nets)
        )
        
        self.criterion = AdjacentLabelSmoothingLoss(
            smoothing=self.hparams.smoothing,
            num_classes=self.hparams.num_classes
        )

    def forward(self, x):
        return self.net(x)

    def training_step(self, batch, batch_idx):
        imgs, labels = batch
        logits = self(imgs)
        loss = self.criterion(logits, labels)
        acc  = (logits.argmax(dim=-1) == labels).float().mean()
        self.log('train_loss', loss)
        self.log('train_acc',  acc, prog_bar=True)
        return loss

    def validation_step(self, batch, batch_idx):
        imgs, labels = batch
        logits = self(imgs)
        loss = self.criterion(logits, labels)
        acc  = (logits.argmax(dim=-1) == labels).float().mean()
        self.log('val_loss', loss, prog_bar=True)
        self.log('val_acc',  acc, prog_bar=True, on_epoch=True)

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=self.hparams.lr, weight_decay=1e-4)

        scheduler = {
            'scheduler': ReduceLROnPlateau(
                optimizer,
                mode='min',              # we're watching val_loss (lower is better)
                factor=0.5,              # reduce LR by this factor
                patience=5,              # after N epochs of no improvement
                min_lr=1e-6,             # don’t go below this
                verbose=True
            ),
            'monitor': 'val_loss',
            'interval': 'epoch',
            'frequency': 1
        }

        return {'optimizer': optimizer, 'lr_scheduler': scheduler}

In [None]:
# pl.seed_everything(42)

checkpoint_cb = ModelCheckpoint(
    monitor='val_loss',
    dirpath='checkpoints/',
    filename='effnet-v2rw-m-dropout2-l2reg2-augs-adjsmooth-{epoch:02d}-{val_loss:.4f}-{val_acc:.4f}',
    save_top_k=1,
    mode='min',
)
earlystop_cb = EarlyStopping(
    monitor='val_loss',
    patience=15,
    mode='min',
)

trainer = pl.Trainer(
    max_epochs=50,
    callbacks=[earlystop_cb, checkpoint_cb],
    accelerator='auto',  # GPU if available
    precision='16-mixed',
    devices=1,
)

model = EfficientNetV2Classifier(lr=1e-4, num_classes=5)
trainer.fit(model, train_dataloader, val_dataloader)

print("✅ Best checkpoint:", checkpoint_cb.best_model_path)


Using 16bit Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
/home/mauribuntu/miniconda3/envs/causal-dl-torch/lib/python3.12/site-packages/pytorch_lightning/callbacks/model_checkpoint.py:654: Checkpoint directory /mnt/g/Kaggle-Diabetic-Retinopathy/checkpoints exists and is not empty.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name      | Type                       | Params | Mode 
-----------------------------------------------------------------
0 | net       | EfficientNet               | 51.1 M | train
1 | criterion | AdjacentLabelSmoothingLoss | 0      | train
-----------------------------------------------------------------
51.1 M    Trainable params
0         Non-trainable params
51.1 M    Total params
204.377   Total estimated model params size (MB)
1117      Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

.

In [None]:
from torch import nn

class EfficientNetV2OrdinalClassifier(pl.LightningModule):
    def __init__(self, model_name="efficientnetv2_rw_m", lr=1e-4, num_classes=5):
        super().__init__()
        self.save_hyperparameters()
        self.num_classes = num_classes
        
        # Use timm to load pretrained backbone, remove classifier head
        self.net = timm.create_model(
            self.hparams.model_name,
            pretrained=True,
            num_classes=0,  # remove original head
            
            drop_rate=0.4,
            drop_path_rate=0.3
        )

        in_features = self.net.num_features
        self.head = nn.Linear(in_features, self.num_classes - 1)  # 4 outputs for 5 ordinal classes

    def forward(self, x):
        features = self.net(x)
        logits = self.head(features)
        return logits
    
    def ordinal_targets(self, labels):
        """
        Converts integer class labels (0 to num_classes - 1) into ordinal binary targets.
        For example, label 2 becomes [1, 1, 0, 0] for num_classes = 5
        """
        batch_size = labels.size(0)
        num_thresholds = self.num_classes - 1  # one less than number of classes
        labels_expanded = labels.unsqueeze(1)  # Expand labels to shape (batch_size, 1)
        # Create comparison thresholds: shape (1, num_thresholds) = [0, 1, 2, 3]
        thresholds = torch.arange(num_thresholds, device=labels.device).unsqueeze(0)
        # Compare each label to thresholds: True where label > threshold
        binary_targets = labels_expanded > thresholds  # shape (batch_size, num_thresholds)
        return binary_targets.float()

    def training_step(self, batch, batch_idx):
        imgs, labels = batch
        logits = self(imgs)
        targets = self.ordinal_targets(labels)
        loss = F.binary_cross_entropy_with_logits(logits, targets)
        
        preds = (logits.sigmoid() > 0.5).sum(dim=1)
        acc = (preds == labels).float().mean()

        self.log('train_loss', loss)
        self.log('train_acc',  acc, prog_bar=True)
        
        return loss

    def validation_step(self, batch, batch_idx):
        imgs, labels = batch
        logits = self(imgs)
        targets = self.ordinal_targets(labels)
        loss = F.binary_cross_entropy_with_logits(logits, targets)

        preds = torch.clamp((logits.sigmoid() > 0.5).sum(dim=1), 0, self.num_classes - 1)
        acc = (preds == labels).float().mean()
        
        self.log('val_loss', loss, prog_bar=True)
        self.log('val_acc',  acc, prog_bar=True, on_epoch=True)

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=self.hparams.lr, weight_decay=1e-4)

        scheduler = {
            'scheduler': ReduceLROnPlateau(
                optimizer,
                mode='min',              # we're watching val_loss (lower is better)
                factor=0.5,              # reduce LR by this factor
                patience=5,              # after N epochs of no improvement
                min_lr=1e-6,             # don’t go below this
                verbose=True
            ),
            'monitor': 'val_loss',
            'interval': 'epoch',
            'frequency': 1
        }

        return {'optimizer': optimizer, 'lr_scheduler': scheduler}

In [1]:
import mlflow.pytorch

mlflow.pytorch.autolog()

# pl.seed_everything(42)

checkpoint_cb = ModelCheckpoint(
    monitor='val_loss',
    dirpath='checkpoints/',
    filename='effnet-v2rw-m-ordinal-dropout-l2reg-augs-{epoch:02d}-{val_loss:.4f}-{val_acc:.4f}',
    save_top_k=1,
    mode='min',
)
earlystop_cb = EarlyStopping(
    monitor='val_loss',
    patience=15,
    mode='min',
)

trainer = pl.Trainer(
    max_epochs=50,
    callbacks=[earlystop_cb, checkpoint_cb],
    accelerator='auto',  # GPU if available
    precision='16-mixed',
    devices=1
)

model = EfficientNetV2OrdinalClassifier(lr=1e-4, num_classes=5)
trainer.fit(model, train_dataloader, val_dataloader,
            ckpt_path='checkpoints/effnet-v2rw-m-ordinal-dropout-l2reg-augs-epoch=04-val_loss=0.1859-val_acc=0.8198.ckpt')

print("✅ Best checkpoint:", checkpoint_cb.best_model_path)

ModuleNotFoundError: No module named 'mlflow'