## Data Preparation

In [1]:
from pathlib import Path
import pandas as pd

test_imgs_dir1 = Path('aptos2019-blindness-detection/test_images/processed')
# test_imgs_dir2 = Path('diabetic-retinopathy-detection/test/processed')

imgs_list1 = [p for p in test_imgs_dir1.glob("*.png") if p.is_file()]
# imgs_list2 = [p for p in test_imgs_dir2.glob("*.jpeg") if p.is_file()]
# imgs_list = imgs_list1 + imgs_list2

# Create DataFrame
# df_test = pd.DataFrame({'img_dir': imgs_list})
df_test = pd.DataFrame({'img_dir': imgs_list1})
# df_test.to_csv('df_test.csv', index=False)

In [2]:
df_test.to_csv('df_test1.csv', index=False)

===========================================

In [1]:
import pandas as pd

df_test = pd.read_csv('df_test1.csv')

In [2]:
import albumentations as A
from albumentations.pytorch import ToTensorV2

def downscale(img, **kwargs):
    h, w = img.shape[:2]
    if max(h, w) > IMAGE_SIZE_VAL:
        img = A.LongestMaxSize(max_size=IMAGE_SIZE_VAL, p=1.0)(image=img)["image"]
    return img

# IMAGE_SIZE_TRAIN = 352
# IMAGE_SIZE_VAL = 480
IMAGE_SIZE_VAL = 768
# IMAGE_SIZE_VAL = 512

# train_transform = A.Compose(...)

val_transform = A.Compose([
    A.Lambda(name="Downscale", image=downscale),
    # A.Resize(IMAGE_SIZE_TRAIN, IMAGE_SIZE_TRAIN),

    A.PadIfNeeded(IMAGE_SIZE_VAL, IMAGE_SIZE_VAL, fill=0),

    A.Normalize(  # For model pretrained on ImageNet
        mean=[0.485, 0.456, 0.406],
        std =[0.229, 0.224, 0.225]
    ),

    ToTensorV2(),
])

  data = fetch_version_info()


In [3]:
from torch.utils.data import Dataset
from PIL import Image
import numpy as np

class TestDataset(Dataset):
    def __init__(self, df, transform=None):
        self.df = df
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        # id = row['id']
        # label = int(row['label'])
        img_dir = row['img_dir']

        image = Image.open(img_dir).convert('RGB')  # ensure 3 channels

        if self.transform:
            image = np.array(image)
            image = self.transform(image=image)['image']

        return image#, label

In [4]:
from torch.utils.data import DataLoader

# BATCH_SIZE_TRAIN = ...
BATCH_SIZE_VAL = 12

# train_dataset = TrainDataset(df_train, train_transform)
test_dataset = TestDataset(df_test, val_transform)

# train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE_TRAIN, shuffle=True, num_workers=10)
test_dataloader = DataLoader(test_dataset, batch_size=BATCH_SIZE_VAL, shuffle=False, num_workers=11)

In [5]:
import torch
from torch import nn
import torch.nn.functional as F
import timm
import pytorch_lightning as pl

  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(


## Model Classes

In [6]:
class EfficientNetV2OrdinalClassifier(pl.LightningModule):
    def __init__(self, model_name="efficientnetv2_rw_m", lr=1e-4, num_classes=5):
        super().__init__()
        self.save_hyperparameters()
        self.num_classes = num_classes
        
        # Use timm to load pretrained backbone, remove classifier head
        self.net = timm.create_model(
            self.hparams.model_name,
            pretrained=True,
            num_classes=0,  # remove original head
            
            drop_rate=0.3,
            drop_path_rate=0.3
        )

        in_features = self.net.num_features
        self.head = nn.Linear(in_features, self.num_classes - 1)  # 4 outputs for 5 ordinal classes

    def forward(self, x):
        features = self.net(x)
        logits = self.head(features)
        return logits
    
    def predict_class(self, logits):
        probas = logits.sigmoid()
        return (probas > 0.5).sum(dim=1)
    
    def ordinal_targets(self, labels):
        """
        Converts integer class labels (0 to num_classes - 1) into ordinal binary targets.
        For example, label 2 becomes [1, 1, 0, 0] for num_classes = 5
        """
        batch_size = labels.size(0)
        num_thresholds = self.num_classes - 1  # one less than number of classes
        labels_expanded = labels.unsqueeze(1)  # Expand labels to shape (batch_size, 1)
        # Create comparison thresholds: shape (1, num_thresholds) = [0, 1, 2, 3]
        thresholds = torch.arange(num_thresholds, device=labels.device).unsqueeze(0)
        # Compare each label to thresholds: True where label > threshold
        binary_targets = labels_expanded > thresholds  # shape (batch_size, num_thresholds)
        return binary_targets.float()

    def training_step(self, batch, batch_idx):
        imgs, labels = batch
        logits = self(imgs)
        targets = self.ordinal_targets(labels)
        loss = F.binary_cross_entropy_with_logits(logits, targets)
        
        preds = (logits.sigmoid() > 0.5).sum(dim=1)
        acc = (preds == labels).float().mean()

        self.log('train_loss', loss, prog_bar=True, on_step=False, on_epoch=True)
        self.log('train_acc',  acc, prog_bar=True, on_step=False, on_epoch=True)
        
        return loss

    def validation_step(self, batch, batch_idx):
        imgs, labels = batch
        logits = self(imgs)
        targets = self.ordinal_targets(labels)
        loss = F.binary_cross_entropy_with_logits(logits, targets)

        preds = (logits.sigmoid() > 0.5).sum(dim=1)
        acc = (preds == labels).float().mean()
        
        self.log('val_loss', loss, prog_bar=True)
        self.log('val_acc',  acc, prog_bar=True)

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=self.hparams.lr, weight_decay=5e-5)

        scheduler = {
            'scheduler': torch.optim.lr_scheduler.ReduceLROnPlateau(
                optimizer,
                mode='min',              # we're watching val_loss (lower is better)
                factor=0.5,              # reduce LR by this factor
                patience=5,              # after N epochs of no improvement
                min_lr=1e-6,             # don’t go below this
                verbose=True
            ),
            'monitor': 'val_loss',
            'interval': 'epoch',
            'frequency': 1
        }

        return {'optimizer': optimizer, 'lr_scheduler': scheduler}

In [6]:
class AdjacentLabelSmoothingLoss(nn.Module):
    def __init__(self, smoothing: float = 0.1, num_classes: int = 5, window_size: int = 1):
        """
        Exponential-decay label smoothing for ordinal targets.

        Args:
          smoothing: total probability mass to smooth away from the true class (0 <= s < 1).
          num_classes: total number of ordinal classes.
          window_size: how many steps to consider on each side of the true class.
                       (1 for adjacent only, 2 to include distance-2 neighbors, etc.)
        """
        super().__init__()
        assert 0 <= smoothing < 1, "smoothing must be in [0,1)."
        self.smoothing = smoothing
        self.num_classes = num_classes
        self.window_size = window_size

        # Precompute smoothing distributions for each possible true class 0..num_classes-1
        # according to your two requirements:
        #   1) exp decay by distance
        #   2) center weight = 1 - smoothing
        R = smoothing / (1.0 - smoothing)  # ratio of total neighbor mass to center mass

        weight_matrix = []
        for t in range(num_classes):
            # how many valid steps on each side
            left_n  = min(window_size,             t)
            right_n = min(window_size, num_classes - 1 - t)

            # Solve for decay d so that sum(d^1..d^left_n) + sum(d^1..d^right_n) = R
            # Newton's method on f(d) = sum_{k=1..L} d^k + sum_{k=1..R} d^k - R = 0
            if left_n + right_n == 0:
                d = 0.0
            else:
                # initial guess
                d = R / (left_n + right_n)
                for _ in range(50):
                    # f(d) and f'(d)
                    f = sum(d**k for k in range(1, left_n+1)) + sum(d**k for k in range(1, right_n+1)) - R
                    fp = sum(k * d**(k-1) for k in range(1, left_n+1)) + sum(k * d**(k-1) for k in range(1, right_n+1))
                    d = max(d - f/(fp + 1e-12), 1e-12)

            # Build raw (unnormalized) weights for this true class t
            raw = torch.zeros(num_classes, dtype=torch.float64)
            for c in range(num_classes):
                dist = abs(c - t)
                if dist == 0:
                    raw[c] = 1.0
                elif dist <= window_size:
                    raw[c] = d**dist
                # else remains 0

            # Normalize so sum(raw) = 1 and center weight = 1 - smoothing
            raw = raw / raw.sum()
            weight_matrix.append(raw.float())

        # Stack into [num_classes, num_classes] tensor
        weight_matrix = torch.stack(weight_matrix, dim=0)  # weight_matrix[t] is the distribution for true class t
        self.register_buffer('weight_matrix', weight_matrix)

    def forward(self, logits: torch.Tensor, target: torch.Tensor) -> torch.Tensor:
        """
        logits: (B, C)
        target: (B,) integer tensor in [0..C-1]
        """
        # Get the precomputed soft-target distributions
        # shape → (B, C)
        true_dist = self.weight_matrix[target]

        # Standard cross-entropy with log-softmax
        log_probs = F.log_softmax(logits, dim=-1)
        loss = -(true_dist * log_probs).sum(dim=-1).mean()
        return loss
    
    def debug_dist(self, true_class: int) -> torch.Tensor:
        """
        Returns the smoothed distribution for a given true class.

        Args:
            true_class: int, between 0 and num_classes - 1.

        Returns:
            A tensor of shape (num_classes,) showing the target distribution.
        """
        if not (0 <= true_class < self.num_classes):
            raise ValueError(f"true_class must be between 0 and {self.num_classes - 1}, got {true_class}.")
        
        dist = self.weight_matrix[true_class]
        return dist

In [7]:
class ConvNeXtClassifier(pl.LightningModule):
    def __init__(self, model_name="convnext_small.fb_in22k_ft_in1k_384", lr=1e-4, num_classes=5, smoothing=0.1):
        super().__init__()
        self.save_hyperparameters()
        # create & swap in a new head
        self.net = timm.create_model(
            self.hparams.model_name,
            pretrained=True,
            num_classes=self.hparams.num_classes,
            
            drop_rate=0.2,
            drop_path_rate=0.2,
        )
        
        self.criterion = AdjacentLabelSmoothingLoss(
            smoothing=self.hparams.smoothing,
            num_classes=self.hparams.num_classes
        )

    def forward(self, x):
        return self.net(x)

    def predict_class(self, logits):
        return logits.argmax(dim=-1)

    def training_step(self, batch, batch_idx):
        imgs, labels = batch
        logits = self(imgs)
        loss = self.criterion(logits, labels)
        acc  = (logits.argmax(dim=-1) == labels).float().mean()

        self.log('train_loss', loss, prog_bar=True, on_step=False, on_epoch=True)
        self.log('train_acc',  acc, prog_bar=True, on_step=False, on_epoch=True)
        
        return loss

    def validation_step(self, batch, batch_idx):
        imgs, labels = batch
        logits = self(imgs)
        loss = self.criterion(logits, labels)
        acc  = (logits.argmax(dim=-1) == labels).float().mean()
        
        self.log('val_loss', loss, prog_bar=True, on_step=False, on_epoch=True)
        self.log('val_acc',  acc, prog_bar=True, on_step=False, on_epoch=True)

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=self.hparams.lr, weight_decay=1e-5)

        scheduler = {
            'scheduler': torch.optim.lr_scheduler.ReduceLROnPlateau(
                optimizer,
                mode='min',              # we're watching val_loss (lower is better)
                factor=0.5,              # reduce LR by this factor
                patience=5,              # after N epochs of no improvement
                min_lr=1e-6,             # don’t go below this
                verbose=True
            ),
            'monitor': 'val_loss',
            'interval': 'epoch',
            'frequency': 1
        }

        return {'optimizer': optimizer, 'lr_scheduler': scheduler}

In [8]:
class EfficientNetV2Classifier(pl.LightningModule):
    def __init__(self, model_name="efficientnetv2_rw_m", lr=1e-4, num_classes=5, smoothing=0.1):
        super().__init__()
        self.save_hyperparameters()
        # create & swap in a new head
        self.net = timm.create_model(
            self.hparams.model_name,
            pretrained=True,
            num_classes=self.hparams.num_classes,
            
            drop_rate=0.4,        # 🔥 add stronger dropout (applied before final FC)
            drop_path_rate=0.3,   # 🔥 stochastic depth (helps regularize deep nets)
        )
        
        self.criterion = AdjacentLabelSmoothingLoss(
            smoothing=self.hparams.smoothing,
            num_classes=self.hparams.num_classes
        )

    def forward(self, x):
        return self.net(x)

    def predict_class(self, logits):
        return logits.argmax(dim=-1)

    def training_step(self, batch, batch_idx):
        imgs, labels = batch
        logits = self(imgs)
        loss = self.criterion(logits, labels)
        acc  = (logits.argmax(dim=-1) == labels).float().mean()

        self.log('train_loss', loss, prog_bar=True, on_step=False, on_epoch=True)
        self.log('train_acc',  acc, prog_bar=True, on_step=False, on_epoch=True)
        
        return loss

    def validation_step(self, batch, batch_idx):
        imgs, labels = batch
        logits = self(imgs)
        loss = self.criterion(logits, labels)
        acc  = (logits.argmax(dim=-1) == labels).float().mean()
        
        self.log('val_loss', loss, prog_bar=True)
        self.log('val_acc',  acc, prog_bar=True)

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=self.hparams.lr, weight_decay=1e-4)

        scheduler = {
            'scheduler': torch.optim.lr_scheduler.ReduceLROnPlateau(
                optimizer,
                mode='min',              # we're watching val_loss (lower is better)
                factor=0.5,              # reduce LR by this factor
                patience=5,              # after N epochs of no improvement
                min_lr=1e-6,             # don’t go below this
                verbose=True
            ),
            'monitor': 'val_loss',
            'interval': 'epoch',
            'frequency': 1
        }

        return {'optimizer': optimizer, 'lr_scheduler': scheduler}

In [9]:
class ResNet50Classifier(pl.LightningModule):
    def __init__(self, model_name="resnet50.a1_in1k", lr=1e-4, num_classes=5,
                 smoothing=0.1, window_size=1):
        super().__init__()
        self.save_hyperparameters()
        # create & swap in a new head
        self.net = timm.create_model(
            self.hparams.model_name,
            pretrained=True,
            num_classes=self.hparams.num_classes,
            
            drop_rate=0.3,        # 🔥 add stronger dropout (applied before final FC)
            # drop_path_rate=0.3,   # 🔥 stochastic depth (helps regularize deep nets)
        )
        
        self.criterion = AdjacentLabelSmoothingLoss(
            smoothing=self.hparams.smoothing,
            window_size=self.hparams.window_size,
            num_classes=self.hparams.num_classes
        )

    def forward(self, x):
        return self.net(x)

    def predict_class(self, logits):
        return logits.argmax(dim=-1)

    def training_step(self, batch, batch_idx):
        imgs, labels = batch
        logits = self(imgs)
        loss = self.criterion(logits, labels)
        
        preds = logits.argmax(dim=-1)
        acc  = (preds == labels).float().mean()

        self.log('train_loss', loss, prog_bar=True, on_step=False, on_epoch=True)
        self.log('train_acc',  acc, prog_bar=True, on_step=False, on_epoch=True)
        
        return loss

    def validation_step(self, batch, batch_idx):
        imgs, labels = batch
        logits = self(imgs)
        loss = self.criterion(logits, labels)

        preds = logits.argmax(dim=-1)
        acc  = (preds == labels).float().mean()
        
        self.log('val_loss', loss, prog_bar=True, on_epoch=True)
        self.log('val_acc',  acc, prog_bar=True, on_epoch=True)

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=self.hparams.lr, weight_decay=1e-4)

        scheduler = {
            'scheduler': torch.optim.lr_scheduler.ReduceLROnPlateau(
                optimizer,
                mode='min',              # we're watching val_loss (lower is better)
                factor=0.5,              # reduce LR by this factor
                patience=5,              # after N epochs of no improvement
                min_lr=1e-6,             # don’t go below this
                verbose=True
            ),
            'monitor': 'val_loss',
            'interval': 'epoch',
            'frequency': 1
        }

        return {'optimizer': optimizer, 'lr_scheduler': scheduler}

In [11]:
class ConvNeXtRegressor(pl.LightningModule):
    def __init__(self, model_name="convnext_small.fb_in22k_ft_in1k_384", lr=1e-4, num_classes=5):
        super().__init__()
        self.save_hyperparameters()
        self.net = timm.create_model(
            model_name,
            pretrained=True,    # We will manually load weights later
            num_classes=1,        # One output neuron for regression
            
            drop_rate=0.2,
            drop_path_rate=0.2,
        )

    def forward(self, x):
        return self.net(x).squeeze(1)  # Output shape [batch_size]
    
    def predict_class(self, outputs):
        return outputs.round().clamp(0, self.hparams.num_classes - 1).long()

    def training_step(self, batch, batch_idx):
        imgs, labels = batch
        preds = self(imgs)
        loss = F.mse_loss(preds, labels.float())

        preds_rounded = preds.round().clamp(0, self.hparams.num_classes - 1)
        acc = (preds_rounded == labels).float().mean()

        self.log('train_loss', loss, prog_bar=True, on_step=False, on_epoch=True)
        self.log('train_acc',  acc, prog_bar=True, on_step=False, on_epoch=True)

        return loss

    def validation_step(self, batch, batch_idx):
        imgs, labels = batch
        preds = self(imgs)
        loss = F.mse_loss(preds, labels.float())

        preds_rounded = preds.round().clamp(0, self.hparams.num_classes - 1)
        acc = (preds_rounded == labels).float().mean()
        
        self.log('val_loss', loss, prog_bar=True)
        self.log('val_acc',  acc, prog_bar=True)
        
        return loss

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=self.hparams.lr, weight_decay=1e-5)

        scheduler = {
            'scheduler': torch.optim.lr_scheduler.ReduceLROnPlateau(
                optimizer,
                mode='min',          # still minimizing val_loss (MSE)
                factor=0.5,
                patience=5,
                min_lr=1e-6,
                verbose=True
            ),
            'monitor': 'val_loss',   # watch val_loss (MSE) to reduce LR
            'interval': 'epoch',
            'frequency': 1
        }

        return {'optimizer': optimizer, 'lr_scheduler': scheduler}

    @classmethod
    def load_from_classifier_ckpt(cls, path, model_name="convnext_small.fb_in22k_ft_in1k_384",
                                  lr=1e-4, num_classes=5):
        """
        Create a ConvNeXtRegressor and load weights from a classification checkpoint.
        """
        model = cls(model_name=model_name, lr=lr)
        checkpoint = torch.load(path, map_location='cpu')

        state_dict = checkpoint['state_dict']

        # Remove classification head weights (they don't match)
        filtered_state_dict = {k: v for k, v in state_dict.items() if 'head' not in k}
        model.load_state_dict(filtered_state_dict, strict=False)
        return model

In [12]:
class ResNet50Regressor(pl.LightningModule):
    def __init__(self, model_name="resnet50.a1_in1k", lr=1e-4, num_classes=5):
        super().__init__()
        self.save_hyperparameters()
        self.net = timm.create_model(
            model_name,
            pretrained=True,    # We will manually load weights later
            num_classes=1,        # One output neuron for regression
            
            drop_rate=0.3,
            # drop_path_rate=0.2,
        )

    def forward(self, x):
        return self.net(x).squeeze(1)  # Output shape [batch_size]
    
    def predict_class(self, outputs):
        return outputs.round().clamp(0, self.hparams.num_classes - 1).long()

    def training_step(self, batch, batch_idx):
        imgs, labels = batch
        preds = self(imgs)
        loss = F.mse_loss(preds, labels.float())

        preds_rounded = preds.round().clamp(0, self.hparams.num_classes - 1)
        acc = (preds_rounded == labels).float().mean()

        self.log('train_loss', loss, prog_bar=True, on_step=False, on_epoch=True)
        self.log('train_acc',  acc, prog_bar=True, on_step=False, on_epoch=True)

        return loss

    def validation_step(self, batch, batch_idx):
        imgs, labels = batch
        preds = self(imgs)
        loss = F.mse_loss(preds, labels.float())

        preds_rounded = preds.round().clamp(0, self.hparams.num_classes - 1)
        acc = (preds_rounded == labels).float().mean()
        
        self.log('val_loss', loss, prog_bar=True)
        self.log('val_acc',  acc, prog_bar=True)
        
        return loss

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=self.hparams.lr, weight_decay=1e-4)

        scheduler = {
            'scheduler': torch.optim.lr_scheduler.ReduceLROnPlateau(
                optimizer,
                mode='min',          # still minimizing val_loss (MSE)
                factor=0.5,
                patience=5,
                min_lr=1e-6,
                verbose=True
            ),
            'monitor': 'val_loss',   # watch val_loss (MSE) to reduce LR
            'interval': 'epoch',
            'frequency': 1
        }

        return {'optimizer': optimizer, 'lr_scheduler': scheduler}

    @classmethod
    def load_from_classifier_ckpt(cls, path, model_name="convnext_small.fb_in22k_ft_in1k_384",
                                  lr=1e-4, num_classes=5):
        """
        Create a ConvNeXtRegressor and load weights from a classification checkpoint.
        """
        model = cls(model_name=model_name, lr=lr)
        checkpoint = torch.load(path, map_location='cpu')

        state_dict = checkpoint['state_dict']

        # Remove classification head weights (they don't match)
        filtered_state_dict = {k: v for k, v in state_dict.items() if 'fc' not in k}
        model.load_state_dict(filtered_state_dict, strict=False)
        return model

## Inference

In [None]:
# models = {
#     'convnext':   ('checkpoints/convnext-small-dropout-l2reg-augs-adjsmooth-full-refined-epoch=00-val_loss=0.6888-val_acc=0.8396.ckpt',
#                    ConvNeXtClassifier),
#     'convnext-r': ('checkpoints/convnext-small-reg-dropout-l2reg-augs-full-epoch=23-val_loss=0.2984-val_acc=0.8075.ckpt',
#                    ConvNeXtRegressor),
#     'effnet':     ('checkpoints/effnet-v2rw-m-dropout-l2reg-augs-adjsmooth-full-epoch=14-val_loss=0.7004-val_acc=0.8305.ckpt',
#                    EfficientNetV2Classifier),
#     'effnet-p':   ('checkpoints/effnet-v2rw-m-dropout-l2reg-augs-adjsmooth-epoch=12-val_loss=0.6849-val_acc=0.8370.ckpt',
#                    EfficientNetV2Classifier),
#     'effnet-o':   ('checkpoints/effnet-v2rw-m-ordinal-dropout2-l2reg2-augs-full-epoch=22-val_loss=0.1580-val_acc=0.8278.ckpt',
#                    EfficientNetV2OrdinalClassifier),
#     'resnet':     ('checkpoints/resnet-50-dropout-l2reg2-augs-adjsmooth2-full-epoch=39-val_loss=0.8696-val_acc=0.8299.ckpt',
#                    ResNet50Classifier),
#     'resnet-n':   ('checkpoints/resnet-50-dropout-l2reg-augs-adjsmooth-full-refined-epoch=00-val_loss=0.7068-val_acc=0.8268.ckpt',
#                    ResNet50Classifier),
#     'resnet-r':   ('checkpoints/resnet-50-reg-dropout-l2reg-augs-full-epoch=26-val_loss=0.3978-val_acc=0.7151.ckpt',
#                    ResNet50Regressor),
# }

In [None]:
# models = {
#     'convnext':   ('checkpoints/convnext-small-sv768-dropout-l2reg-augs-adjsmooth-part1-epoch=26-val_loss=0.6054-val_acc=0.8661.ckpt',
#                    ConvNeXtClassifier),
#     'convnext-r': ('checkpoints/convnext-small-reg-sv768-dropout-l2reg-augs-part1-epoch=13-val_loss=0.1747-val_acc=0.8462.ckpt',
#                    ConvNeXtRegressor),
#     'effnet':     ('checkpoints/effnet-v2rw-m-sv768-dropout-l2reg-augs-adjsmooth-part1-epoch=48-val_loss=0.6221-val_acc=0.8590.ckpt',
#                    EfficientNetV2Classifier),
#     # 'effnet-p':   ('checkpoints/effnet-v2rw-m-dropout-l2reg-augs-adjsmooth-epoch=12-val_loss=0.6849-val_acc=0.8370.ckpt',
#     #                EfficientNetV2Classifier),
#     'effnet-o':   ('checkpoints/effnet-v2rw-m-ordinal-sv768-dropout-l2reg-augs-part1-epoch=06-val_loss=0.1286-val_acc=0.8319.ckpt',
#                    EfficientNetV2OrdinalClassifier),
#     'resnet':     ('checkpoints/resnet-50-sv768-dropout-l2reg-augs-adjsmooth2-part1-epoch=31-val_loss=0.8662-val_acc=0.8590.ckpt',
#                    ResNet50Classifier),
#     'resnet-n':   ('checkpoints/resnet-50-sv768-dropout-l2reg-augs-adjsmooth-part1-epoch=22-val_loss=0.6740-val_acc=0.8547.ckpt',
#                    ResNet50Classifier),
#     # 'resnet-r':   ('checkpoints/resnet-50-reg-dropout-l2reg-augs-full-epoch=26-val_loss=0.3978-val_acc=0.7151.ckpt',
#     #                ResNet50Regressor),
# }

In [None]:
# models = {
#     'convnext':   ('checkpoints/convnext-small-sv768-dropout-l2reg-augs-adjsmooth-part2-epoch=45-val_loss=0.6401-val_acc=0.8507.ckpt',
#                    ConvNeXtClassifier),
#     'convnext-r': ('checkpoints/convnext-small-reg-dropout-l2reg-augs-full-epoch=23-val_loss=0.2984-val_acc=0.8075.ckpt',
#                    ConvNeXtRegressor),
#     'effnet':     ('checkpoints/effnet-v2rw-m-sv768-dropout-l2reg-augs-adjsmooth-part2-epoch=46-val_loss=0.6480-val_acc=0.8508.ckpt',
#                    EfficientNetV2Classifier),
#     'effnet-p':   ('checkpoints/effnet-v2rw-m-dropout-l2reg-augs-adjsmooth-epoch=12-val_loss=0.6849-val_acc=0.8370.ckpt',
#                    EfficientNetV2Classifier),
#     'effnet-o':   ('checkpoints/effnet-v2rw-m-ordinal-dropout2-l2reg2-augs-full-epoch=22-val_loss=0.1580-val_acc=0.8278.ckpt',
#                    EfficientNetV2OrdinalClassifier),
#     'resnet':     ('checkpoints/resnet-50-sv768-dropout-l2reg-augs-adjsmooth-part2-epoch=33-val_loss=0.8686-val_acc=0.8346.ckpt',
#                    ResNet50Classifier),
#     # 'resnet-n':   ('checkpoints/resnet-50-sv768-dropout-l2reg-augs-adjsmooth-part1-epoch=22-val_loss=0.6740-val_acc=0.8547.ckpt',
#     #                ResNet50Classifier),
#     'resnet-r':   ('checkpoints/resnet-50-reg-dropout-l2reg-augs-full-epoch=26-val_loss=0.3978-val_acc=0.7151.ckpt',
#                    ResNet50Regressor),
# }

In [10]:
models = {
    # 'convnext':   ('checkpoints/convnext-small-sv768-dropout-l2reg-augs-adjsmooth-part2-epoch=45-val_loss=0.6401-val_acc=0.8507.ckpt',
    # 'convnext':   ('checkpoints/convnext-small-sv728dyn-rand_shrink-dropout-l2reg-augs-adjsmooth-train2-refine1-epoch=20-val_loss=0.6240-val_acc=0.8580.ckpt',
    'convnext':   ('checkpoints/convnext-small-st448-sv768dyn-rand_shrink-dropout-l2reg-augs2-adjsmooth-train2-refine1-epoch=14-val_loss=0.6224-val_acc=0.8635.ckpt',
                   ConvNeXtClassifier),
    # 'convnext-r': ('checkpoints/convnext-small-reg-dropout-l2reg-augs-full-epoch=23-val_loss=0.2984-val_acc=0.8075.ckpt',
    #                ConvNeXtRegressor),
    # 'effnet':     ('checkpoints/effnet-v2rw-m-sv768-dropout-l2reg-augs-adjsmooth-part2-epoch=46-val_loss=0.6480-val_acc=0.8508.ckpt',
    # 'effnet':     ('checkpoints/effnet-v2rw-m-sv728dyn-rand_shrink-dropout-l2reg-augs-adjsmooth-train2-refine1-epoch=15-val_loss=0.6569-val_acc=0.8460.ckpt',
    'effnet':     ('checkpoints/effnet-v2rw-m-st408-sv768dyn-rand_shrink-dropout-l2reg-augs2-adjsmooth-train2-refine1-epoch=15-val_loss=0.6679-val_acc=0.8454.ckpt',
                   EfficientNetV2Classifier),
    # 'effnet-p':   ('checkpoints/effnet-v2rw-m-dropout-l2reg-augs-adjsmooth-epoch=12-val_loss=0.6849-val_acc=0.8370.ckpt',
    #                EfficientNetV2Classifier),
    # 'effnet-o':   ('checkpoints/effnet-v2rw-m-ordinal-dropout2-l2reg2-augs-full-epoch=22-val_loss=0.1580-val_acc=0.8278.ckpt',
    #                EfficientNetV2OrdinalClassifier),
    # 'resnet':     ('checkpoints/resnet-50-sv768-dropout-l2reg-augs-adjsmooth-part2-epoch=33-val_loss=0.8686-val_acc=0.8346.ckpt',
    # 'resnet':     ('checkpoints/resnet-50-sv728dyn-rand_shrink-dropout-l2reg-augs-adjsmooth-train2-refine1-epoch=23-val_loss=0.9103-val_acc=0.8230.ckpt',
    'resnet':     ('checkpoints/resnet-50-st477-sv768dyn-rand_shrink-dropout-l2reg-augs2-adjsmooth-train2-refine1-epoch=24-val_loss=0.8834-val_acc=0.8389.ckpt',
                   ResNet50Classifier),
    # 'resnet-n':   ('checkpoints/resnet-50-sv768-dropout-l2reg-augs-adjsmooth-part1-epoch=22-val_loss=0.6740-val_acc=0.8547.ckpt',
    #                ResNet50Classifier),
    # 'resnet-r':   ('checkpoints/resnet-50-reg-dropout-l2reg-augs-full-epoch=26-val_loss=0.3978-val_acc=0.7151.ckpt',
    #                ResNet50Regressor),
}

In [11]:
from tqdm.notebook import tqdm

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Assume that test_dataloader does NOT shuffle, so order matches df_test

# Loop each model
for model_name, (ckpt_path, model_class) in models.items():
    print(f'Processing {model_name}…')

    # 1) load & move to device
    model = model_class.load_from_checkpoint(ckpt_path, strict=False)
    model = model.to(device).eval()

    # 2) collect raw outputs in a list
    all_out = []
    with torch.no_grad():
        for imgs in tqdm(test_dataloader):
            imgs = imgs.to(device)
            with torch.amp.autocast(device.type):
                out = model(imgs)
            # out: Tensor of shape [B] or [B, D]
            all_out.append(out.cpu())

    # 3) concatenate and convert to numpy
    all_out = torch.cat(all_out, dim=0).numpy()  # shape (N,) or (N, D)

    # 4) turn into stacking columns
    if all_out.ndim == 1:
        # regressor → one column
        df_test[f'{model_name}_pred'] = all_out
    else:
        # multi‐dim output → one column per dim
        D = all_out.shape[1]
        for i in range(D):
            df_test[f'{model_name}_{i}'] = all_out[:, i]

    # cleanup
    del model
    if device.type == 'cuda':
        torch.cuda.empty_cache()

Processing convnext…


  0%|          | 0/161 [00:00<?, ?it/s]

Processing effnet…


  0%|          | 0/161 [00:00<?, ?it/s]

Processing resnet…


  0%|          | 0/161 [00:00<?, ?it/s]

In [12]:
# df_test.to_csv('df_test_inferred.csv', index=False)
# df_test.to_csv('df_test_inferred-data1.csv', index=False)
# df_test.to_csv('df_test_inferred-data2.csv', index=False)
# df_test.to_csv('df_test_inferred-train2-data1.csv', index=False)
# df_test.to_csv('df_test_inferred-train2-finetune1.csv', index=False)
# df_test.to_csv('df_test_inferred-train2-shrunk1.csv', index=False)
df_test.to_csv('df_test_inferred-train2-dyn1.csv', index=False)

===================================================

In [1]:
import pandas as pd

# df_test21 = pd.read_csv('df_test_inferred-train2-data1.csv')
# df_test21 = pd.read_csv('df_test_inferred-train2-finetune1.csv')
# df_test21 = pd.read_csv('df_test_inferred-train2-shrunk1.csv')
df_test21 = pd.read_csv('df_test_inferred-train2-dyn1.csv')

# df_test1 = df_test[df_test['img_dir'].str.contains('aptos')]
# df_test2 = df_test[df_test['img_dir'].str.contains('diabetic-retinopathy-detection')]

In [2]:
len('diabetic-retinopathy-detection/test/processed/')

46

In [2]:
df_test2['group'] = df_test2['img_dir'].str.slice(start=46, stop=-5).str.split('_').str[0]
df_test2.drop(columns=['img_dir'], inplace=True)

In [3]:
feature_cols = [col for col in df_test2.columns if col not in ['group', 'label']]

# Group by 'group' (pairs of rows)
grouped = df_test2.groupby('group', sort=False)

rows = []
for patient_id, group in grouped:
    left = group.iloc[0]
    right = group.iloc[1]

    # First row: left is the focus (X), right is the other (O)
    row_1 = {'group': patient_id}
    for col in feature_cols:
        row_1[f'{col}_X'] = left[col]
        row_1[f'{col}_O'] = right[col]
    rows.append(row_1)

    # Second row: right is the focus (X), left is the other (O)
    row_2 = {'group': patient_id}
    for col in feature_cols:
        row_2[f'{col}_X'] = right[col]
        row_2[f'{col}_O'] = left[col]
    rows.append(row_2)

# Final DataFrame
df_test2_paired = pd.DataFrame(rows)
df_test2_paired.drop(columns=['group'], inplace=True)

In [5]:
import joblib
from catboost import CatBoostRegressor

stacking_models_dirs = [f'stacking_models/stack-cb_gpu-train2-dyn1-fold{i}.pkl' for i in range(1,5+1)]
stacking_models = [joblib.load(p) for p in stacking_models_dirs]

In [6]:
stacking_models[0].feature_names_

['convnext_0',
 'convnext_1',
 'convnext_2',
 'convnext_3',
 'convnext_4',
 'effnet_0',
 'effnet_1',
 'effnet_2',
 'effnet_3',
 'effnet_4']

In [None]:
import joblib
from tabpfn import TabPFNRegressor

stacking_models_dirs = [f'stacking_models/stack-tabpfn-train2-finetune1-fold{i}.pkl' for i in range(1,5+1)]
stacking_models = [joblib.load(p) for p in stacking_models_dirs]

In [None]:
# # preds = [model.predict(df_test2_paired) for model in stacking_models]
# # preds = [model.predict(df_test2.drop(columns=['img_dir'])) for model in stacking_models]
# preds = [model.predict(df_test2_paired) for model in stacking_models]

In [7]:
preds = [model.predict(df_test21.drop(columns=['img_dir'])) for model in stacking_models]

In [8]:
preds[0]

array([1.8210509 , 2.80315379, 2.71027262, ..., 2.17839626, 3.23544422,
       1.92815434])

In [9]:
preds[1]

array([1.64177996, 2.99300709, 2.53380339, ..., 2.14300888, 3.30841184,
       1.68540037])

In [10]:
import numpy as np

num_classes = 5

preds_mean = np.mean(preds, axis=0)
preds_mean_discrete = np.clip(np.rint(preds_mean), 0, num_classes - 1).astype(int)

In [None]:
# preds_convnext_r_discrete = np.clip(np.rint(df_test['convnext-r_pred'].values), 0, num_classes - 1).astype(int)

In [None]:
# # 1) Select only the convnext probability columns
# cols = [f'convnext_{i}' for i in range(5)]
# probs = df_test[cols].values  # shape (n_samples, 5)

# # 2) Argmax along axis=1
# preds_convnext_discrete = np.argmax(probs, axis=1)  # array of ints in [0..4]

.

In [18]:
df_ss1 = pd.read_csv('aptos2019-blindness-detection/sample_submission.csv')

In [11]:
len('aptos2019-blindness-detection/test_images/processed/')

52

In [12]:
df_submit1 = pd.DataFrame()
# df_submit1['id_code'] = df_test['img_dir'].str.slice(start=52, stop=-4)
df_submit1['id_code'] = df_test21['img_dir'].str.slice(start=52, stop=-4)

In [13]:
df_submit1['diagnosis'] = preds_mean_discrete

In [14]:
df_submit1.to_csv('aptos2019-blindness-detection/submission.csv', index=False)

.

In [10]:
df_ss2 = pd.read_csv('diabetic-retinopathy-detection/sampleSubmission.csv')

In [11]:
df_submit2 = pd.DataFrame()
# df_submit2['img_dir'] = df_test[df_test['img_dir'].str\
#                                 .contains('diabetic-retinopathy-detection')]['img_dir']
df_submit2['img_dir'] = pd.read_csv('df_test2.csv')['img_dir']
df_submit2['image'] = df_submit2['img_dir'].str.slice(start=46, stop=-5)
df_submit2.drop(columns=['img_dir'], inplace=True)

In [12]:
df_submit2['level'] = preds_mean_discrete

In [13]:
df_submit2.to_csv('diabetic-retinopathy-detection/submission.csv', index=False)

============================================================

In [60]:
len('aptos2019-blindness-detection/test_images/processed/')

52

In [62]:
df_submit1 = df_test[df_test['img_dir'].str.contains('aptos2019-blindness-detection')][['img_dir', 'level']]
df_submit1['img_dir'] = df_submit1['img_dir'].str.slice(start=52)
df_submit1['img_dir'] = df_submit1['img_dir'].str.slice(stop=-4)
df_submit1 = df_submit1.rename(columns={'img_dir': 'id_code', 'level': 'diagnosis'})
df_submit1.to_csv('aptos2019-blindness-detection/submission.csv', index=False)

.

In [28]:
len('diabetic-retinopathy-detection/test/processed/')

46

In [None]:
df_submit2 = df_test[df_test['img_dir'].str.contains('diabetic-retinopathy-detection')][['img_dir', 'level']]
df_submit2['img_dir'] = df_submit2['img_dir'].str.slice(start=46)
df_submit2['img_dir'] = df_submit2['img_dir'].str.slice(stop=-5)
df_submit2 = df_submit2.rename(columns={'img_dir': 'image'})
df_submit2.to_csv('diabetic-retinopathy-detection/submission.csv', index=False)