In [35]:
import os
import pandas as pd
from PIL import Image
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, cohen_kappa_score
from torch.optim.lr_scheduler import ReduceLROnPlateau
import torch.nn.functional as F
import numpy as np

In [36]:
class SEBlock(nn.Module):
    def __init__(self, channel, reduction=16):
        super(SEBlock, self).__init__()
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.fc = nn.Sequential(
            nn.Linear(channel, channel // reduction, bias=False),
            nn.ReLU(inplace=True),
            nn.Linear(channel // reduction, channel, bias=False),
            nn.Sigmoid()
        )

    def forward(self, x):
        b, c, _, _ = x.size()
        y = self.avg_pool(x).view(b, c)
        y = self.fc(y).view(b, c, 1, 1)
        return x * y.expand_as(x)
        
class RetinaMultiLabelDataset(Dataset):
    def __init__(self, csv_file, image_dir, transform=None):
        self.data = pd.read_csv(csv_file)
        self.image_dir = image_dir
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        row = self.data.iloc[idx]
        img_path = os.path.join(self.image_dir, row.iloc[0])
        img = Image.open(img_path).convert("RGB")
        labels = torch.tensor(row[1:].values.astype("float32"))
        if self.transform:
            img = self.transform(img)
        return img, labels

class FocalLoss(nn.Module):
    def __init__(self, alpha=[0.5, 0.85, 0.85], gamma=2.0): #Best:0.5 0.9 0.9
        super().__init__()
        if alpha is not None:
            self.alpha = torch.tensor(alpha, dtype=torch.float)
        else:
            self.alpha = None
        self.gamma = gamma

    def forward(self, inputs, targets):
        bce_loss = F.binary_cross_entropy_with_logits(inputs, targets, reduction='none')
        p = torch.sigmoid(inputs)
        p_t = p * targets + (1 - p) * (1 - targets)
        focal_weight = (1 - p_t) ** self.gamma
        loss = focal_weight * bce_loss

        if self.alpha is not None:
            alpha_t = self.alpha.to(inputs.device) * targets + (1 - targets)
            loss = alpha_t * loss

        return loss.mean()

class ClassBalancedBCE(nn.Module):
    def __init__(self, samples_per_class, beta=0.999, reduction='mean'):
        super().__init__()
        effective_num = 1.0 - beta ** samples_per_class
        weights = (1.0 - beta) / effective_num.clamp(min=1e-6)
        weights = weights / weights.sum() * len(samples_per_class)
        self.register_buffer('weights', weights)
        self.reduction = reduction

    def forward(self, inputs, targets):
        bce = F.binary_cross_entropy_with_logits(inputs, targets, reduction='none')
        weight = self.weights[targets.long().argmax(dim=1)]  # pick weight of positive class
        weight = targets * self.weights[0] + (1 - targets) * self.weights[1]  # if you want per-class
        loss = weight * bce
        return loss.mean() if self.reduction == 'mean' else loss.sum()


class ClassBalancedLoss(nn.Module):
    def __init__(self, num_samples_per_class, beta=0.9):
   
        super().__init__()
        num_samples_per_class = torch.tensor(num_samples_per_class, dtype=torch.float)
        effective_num = 1.0 - beta ** num_samples_per_class
        weights = (1.0 - beta) / torch.clamp(effective_num, min=1e-6)
        weights = weights / weights.sum() * len(num_samples_per_class)
        self.register_buffer('weights', weights)

    def forward(self, inputs, targets):
        weights = self.weights.to(inputs.device)  
        bce_loss = F.binary_cross_entropy_with_logits(inputs, targets, reduction='none')
        modulator = targets * weights
        loss = modulator * bce_loss
        return loss.mean()

class FocalCBLoss(nn.Module):
    def __init__(self, num_samples_per_class, beta=0.5, gamma=2.0):
        super().__init__()
        # CB weights
        num_samples = torch.tensor(num_samples_per_class, dtype=torch.float)
        effective_num = 1.0 - beta ** num_samples
        weights = (1.0 - beta) / torch.clamp(effective_num, min=1e-6)
        weights = weights / weights.sum() * len(num_samples_per_class)
        self.register_buffer('weights', weights)
        self.gamma = gamma

    def forward(self, inputs, targets):
        weights = self.weights.to(inputs.device)
        bce_loss = F.binary_cross_entropy_with_logits(inputs, targets, reduction='none')
        pt = torch.exp(-bce_loss)
        focal_loss = (1 - pt) ** self.gamma * bce_loss  
        modulator = targets * weights  
        loss = modulator * focal_loss
        return loss.mean()

In [37]:
def build_model(backbone="efficientnet", num_classes=3, pretrained=True, attention_type=None):
    model = None

    if backbone == "resnet18":
        model = models.resnet18(pretrained=pretrained)
        model.fc = nn.Linear(model.fc.in_features, num_classes)

    elif backbone == "efficientnet":
        model = models.efficientnet_b0(pretrained=pretrained)

        if attention_type == 'se':
            channels = [16, 24, 40, 80, 112, 192, 320]
            reduction = 16  

            se_blocks = nn.ModuleList([SEBlock(ch, reduction=reduction) for ch in channels])

            new_features = []
            se_idx = 0
            for i, module in enumerate(model.features):
                if i in [1, 2, 3, 4, 5, 6, 7]:  
                    if se_idx < len(se_blocks):
                        wrapped = nn.Sequential(module, se_blocks[se_idx])
                        new_features.append(wrapped)
                        se_idx += 1
                    else:
                        new_features.append(module)
                else:
                    new_features.append(module)

            model.features = nn.Sequential(*new_features)

            model.global_pool = nn.AdaptiveAvgPool2d(1)
            model.classifier = nn.Sequential(
                nn.Flatten(),
                nn.Dropout(p=0.2),
                nn.Linear(1280, num_classes)
            )

        elif attention_type == 'mha':
            embed_dim = 1280
            num_heads = 4

            class GlobalMHABlock(nn.Module):
                def __init__(self, embed_dim, num_heads):
                    super().__init__()
                    self.mha = nn.MultiheadAttention(embed_dim, num_heads, batch_first=True)
                    self.norm = nn.LayerNorm(embed_dim)

                def forward(self, x):
                    B, C, H, W = x.shape
                    x_tokens = x.flatten(2).transpose(1, 2)
                    attn_out, _ = self.mha(x_tokens, x_tokens, x_tokens)
                    attn_out = self.norm(attn_out)
                    return attn_out.mean(dim=1)

            model.global_pool = nn.Identity()
            model.mha_block = GlobalMHABlock(embed_dim, num_heads)
            model.head = nn.Linear(embed_dim, num_classes)

            def forward(x):
                x = model.features(x)
                x = model.mha_block(x)
                x = model.head(x)
                return x

            model.forward = forward

        else:
            # Default
            model.classifier[1] = nn.Linear(model.classifier[1].in_features, num_classes)

    elif backbone == "swin_tiny":
        import timm
        model = timm.create_model('swin_tiny_patch4_window7_224', pretrained=pretrained, num_classes=num_classes)
        
    else:
        raise ValueError("Unsupported backbone")

    return model

def set_finetune_mode(model, mode, backbone):
    """
    mode:
    1 = no finetuning
    2 = frozen backbone, train classifier
    3 = full finetuning
    """
    if mode == 1:
        for p in model.parameters():
            p.requires_grad = False

    elif mode == 2:
        for p in model.parameters():
            p.requires_grad = False
        if backbone == "resnet18":
            for p in model.fc.parameters():
                p.requires_grad = True
        elif backbone == "efficientnet":
            for p in model.classifier.parameters():
                p.requires_grad = True

    elif mode == 3:
        for p in model.parameters():
            p.requires_grad = True

def finetune_mode_name(mode):
    return {
        1: "no_ft",
        2: "frozen",
        3: "full"
    }[mode]



def find_best_thresholds(y_true, y_prob, num_classes=3):
    """
    Grid search for per-class thresholds that maximize average F1 on offsite test set.
    y_true: numpy array (N, 3) binary labels
    y_prob: numpy array (N, 3) probabilities from model
    Returns: list of 3 best thresholds [thresh_DR, thresh_G, thresh_A]
    """
    best_thresh = [0.5] * num_classes
    best_avg_f1 = 0.0

    thresholds = np.arange(0.2, 0.8, 0.05)  

    for thresh_dr in thresholds:
        for thresh_g in thresholds:
            for thresh_a in thresholds:
                thresh = np.array([thresh_dr, thresh_g, thresh_a])
                preds = (y_prob > thresh).astype(int)

                f1_dr = f1_score(y_true[:, 0], preds[:, 0])
                f1_g = f1_score(y_true[:, 1], preds[:, 1])
                f1_a = f1_score(y_true[:, 2], preds[:, 2])
                avg_f1 = (f1_dr + f1_g + f1_a) / 3

                if avg_f1 > best_avg_f1:
                    best_avg_f1 = avg_f1
                    best_thresh = [thresh_dr, thresh_g, thresh_a]

    print(f"Best thresholds: DR={best_thresh[0]:.3f}, G={best_thresh[1]:.3f}, A={best_thresh[2]:.3f}")
    print(f"Best average F1 on offsite: {best_avg_f1:.4f}")
    return best_thresh

In [38]:

def train_one_backbone(backbone, finetune_mode, train_csv, val_csv, test_csv, train_image_dir, val_image_dir, test_image_dir,
                       epochs=50, batch_size=32, lr=1e-4, img_size=256, save_dir="/content/drive/MyDrive/Deep Learning P2 Oulu/final-project-deep-learning-fall-2025/final_project_resources/results/checkpoints", pretrained_backbone=None,
                       patience=5,focal_gamma=None, samples_per_class=None,loss_type='bce',attention_type=None):  # New: patience for early stopping
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # transforms
    train_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomVerticalFlip(p=0.5),
    transforms.RandomRotation(degrees=10),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.RandomAffine(degrees=0, translate=(0.1, 0.1), scale=(0.9, 1.1)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
    val_test_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
    train_ds = RetinaMultiLabelDataset(train_csv, train_image_dir, train_transform)
    val_ds   = RetinaMultiLabelDataset(val_csv, val_image_dir, val_test_transform)
    test_ds  = RetinaMultiLabelDataset(test_csv, test_image_dir, val_test_transform)




    train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True, num_workers=0)
    val_loader   = DataLoader(val_ds, batch_size=batch_size, shuffle=False, num_workers=0)
    test_loader  = DataLoader(test_ds, batch_size=batch_size, shuffle=False, num_workers=0)

    model = build_model(backbone, num_classes=3, pretrained=True, attention_type=attention_type).to(device)
  
    best_val_loss = float("inf")
    os.makedirs(save_dir, exist_ok=True)
    mode_name = finetune_mode_name(finetune_mode)
    ckpt_path = os.path.join(save_dir, f"{backbone}_{mode_name}.pt")

    # load pretrained backbone
    if pretrained_backbone is not None:
        state_dict = torch.load(pretrained_backbone, map_location="cpu")
        model.load_state_dict(state_dict)

    set_finetune_mode(model, finetune_mode, backbone)

    if finetune_mode == 1:
        torch.save(model.state_dict(), ckpt_path)
        print(f"Saved pretrained model for {backbone} (no fine-tuning) at {ckpt_path}")
        return  


    if loss_type == "bce":
        criterion = nn.BCEWithLogitsLoss()
    elif loss_type == "focal":
        criterion = FocalLoss(alpha=1, gamma=focal_gamma)
    elif loss_type == "cb":
        criterion = ClassBalancedLoss(samples_per_class)
    elif loss_type == 'focal_cb':
        criterion = FocalCBLoss(samples_per_class, beta=0.9, gamma=focal_gamma)
    else:
        raise ValueError("Unknown loss_type")
    optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=lr)
    scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=3)

    epochs_no_improve = 0
    early_stop = False

    for epoch in range(epochs):
        model.train()
        train_loss = 0
        for imgs, labels in train_loader:
            imgs, labels = imgs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(imgs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            train_loss += loss.item() * imgs.size(0)

        train_loss /= len(train_loader.dataset)

        model.eval()
        val_loss = 0

        with torch.no_grad():
            for imgs, labels in val_loader:
                imgs, labels = imgs.to(device), labels.to(device)
                outputs = model(imgs)
                loss = criterion(outputs, labels)
                val_loss += loss.item() * imgs.size(0)
        val_loss /= len(val_loader.dataset)


        print(f"[{backbone}] Epoch {epoch+1}/{epochs} Train Loss: {train_loss:.8f} Val Loss: {val_loss:.8f}")
        scheduler.step(val_loss)

        if val_loss < best_val_loss:
            best_val_loss = val_loss
            torch.save(model.state_dict(), ckpt_path)
            print(f"Saved best model for {backbone} at {ckpt_path}")
            epochs_no_improve = 0  
        else:
            epochs_no_improve += 1
            print(f"No improvement in val loss for {epochs_no_improve} epochs")

    
        if epochs_no_improve >= patience:
            print(f"Early stopping triggered after {epoch+1} epochs")
            early_stop = True
            break


def evaluate_model(
    ckpt_path,
    backbone,
    test_csv,
    test_image_dir,
    batch_size=32,
    img_size=224,
    attention_type=None  
):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    transform = transforms.Compose([
        transforms.Resize((img_size, img_size)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])
    test_ds = RetinaMultiLabelDataset(test_csv, test_image_dir, transform)
    test_loader = DataLoader(test_ds, batch_size=batch_size, shuffle=False, num_workers=0)

  
    model = build_model(backbone, num_classes=3, pretrained=False, attention_type=attention_type)
    model.load_state_dict(torch.load(ckpt_path, map_location=device))
    model.to(device)
    model.eval()

    y_true_list = []
    y_prob_list = []

    with torch.no_grad():
        for imgs, labels in test_loader:
            imgs = imgs.to(device)
            outputs = model(imgs)
            probs = torch.sigmoid(outputs).cpu().numpy()
            y_prob_list.append(probs)
            y_true_list.append(labels.numpy())

    y_true = np.concatenate(y_true_list)
    y_prob = np.concatenate(y_prob_list)

    disease_names = ["DR", "Glaucoma", "AMD"]
    results = {}
    preds = (y_prob > 0.5).astype(int)
    for i, disease in enumerate(disease_names):
        y_t = y_true[:, i]
        y_p = preds[:, i]
        results[disease] = {
            "precision": precision_score(y_t, y_p, zero_division=0),
            "recall": recall_score(y_t, y_p, zero_division=0),
            "f1": f1_score(y_t, y_p, zero_division=0),
        }
    results["average_f1"] = np.mean([results[d]["f1"] for d in disease_names])

    return results

class OnsiteDataset(Dataset):
    def __init__(self, csv_file, image_dir, transform=None):
        self.data = pd.read_csv(csv_file)
        self.image_dir = image_dir
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        row = self.data.iloc[idx]
        img_name = row['id']
        img_path = os.path.join(self.image_dir, img_name)
        img = Image.open(img_path).convert("RGB")
        if self.transform:
            img = self.transform(img)
        return img, img_name

# Now define predict_onsite
def predict_onsite(
    ckpt_path,
    backbone,
    onsite_csv,
    onsite_image_dir,
    output_csv='submission.csv',
    batch_size=32,
    img_size=256,
    best_thresholds=[0.50, 0.5, 0.55],
    use_tta=True,
    attention_type=None  # 
):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    base_transform = transforms.Compose([
        transforms.Resize((img_size, img_size)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])

    tta_transforms = [base_transform]
    if use_tta:
        tta_transforms.append(
            transforms.Compose([
                transforms.Resize((img_size, img_size)),
                transforms.RandomHorizontalFlip(p=1.0),  
                transforms.ToTensor(),
                transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
            ])
        )

    model = build_model(
        backbone=backbone,
        num_classes=3,
        pretrained=False, 
        attention_type=attention_type
    )
    model.load_state_dict(torch.load(ckpt_path, map_location=device))
    model.to(device)
    model.eval()

    all_probs = []
    image_ids = None

    with torch.no_grad():
        for transform in tta_transforms:
            dataset = OnsiteDataset(onsite_csv, onsite_image_dir, transform=transform)
            loader = DataLoader(dataset, batch_size=batch_size, shuffle=False, num_workers=0)

            probs_batch = []
            ids_batch = []

            for imgs, batch_ids in loader:
                imgs = imgs.to(device)
                outputs = model(imgs)
                prob = torch.sigmoid(outputs).cpu().numpy()
                probs_batch.append(prob)
                ids_batch.extend(batch_ids)

            all_probs.append(np.concatenate(probs_batch, axis=0))
            if image_ids is None:
                image_ids = ids_batch

    final_probs = np.mean(all_probs, axis=0)

    if best_thresholds is None:
        best_thresholds = [0.50, 0.5, 0.55]
    preds = (final_probs > np.array(best_thresholds)).astype(int)

    df = pd.DataFrame({
        'id': image_ids,
        'D': preds[:, 0],
        'G': preds[:, 1],
        'A': preds[:, 2]
    })

    df = df.sort_values('id').reset_index(drop=True)
    df.to_csv(output_csv, index=False)
    print(f"Saved onsite predictions to {output_csv}")

In [39]:
if __name__ == "__main__":
  samples_per_class = [517,163,142]

  experiments = [
    ("resnet18", 1),
    ("resnet18", 2),
    ("resnet18", 3),
    ("efficientnet", 1),
    ("efficientnet", 2),
    ("efficientnet", 3),
  ]
  pretrained_paths = {
        "resnet18": 'final_project_resources/pretrained_backbone/ckpt_resnet18_ep50.pt',  
        "efficientnet": 'final_project_resources/pretrained_backbone/ckpt_efficientnet_ep50.pt'  
  }
  train_csv = "final_project_resources/train.csv" # replace with your own train label file path
  val_csv   = "final_project_resources/val.csv" # replace with your own validation label file path
  test_csv  = "final_project_resources/offsite_test.csv"  # replace with your own test label file path
  train_image_dir ="final_project_resources/images/train"   # replace with your own train image floder path
  val_image_dir = "final_project_resources/images/val"  # replace with your own validation image floder path
  test_image_dir = "final_project_resources/images/offsite_test" # replace with your own test image floder path


  '''for backbone, mode in experiments:
      train_one_backbone(
          backbone=backbone,
          finetune_mode=mode,
          train_csv=train_csv,
          val_csv=val_csv,
          test_csv=test_csv,  # Offsite for evaluation
          train_image_dir=train_image_dir,
          val_image_dir=val_image_dir,
          test_image_dir=test_image_dir,
          epochs=200,
          lr=1e-3 if mode != 3 else 1e-4,
          patience=10,
          pretrained_backbone=pretrained_paths.get(backbone)  # None if not found, but you need them
      )'''

  train_one_backbone(
    backbone="swin_tiny",          
    finetune_mode=3,
    train_csv=train_csv,
    val_csv=val_csv,
    test_csv=test_csv,
    train_image_dir=train_image_dir,
    val_image_dir=val_image_dir,
    test_image_dir=test_image_dir,
    epochs=100,
    batch_size=16,                 
    lr=1e-4,                       
    img_size=224,                  
    save_dir="final_project_resources/results/checkpoints_task4",
    pretrained_backbone=None,       
    patience=5,
    loss_type='focal',              
    focal_gamma=1.3,
    attention_type=None             
)





[swin_tiny] Epoch 1/100 Train Loss: 0.20187182 Val Loss: 0.22807651
Saved best model for swin_tiny at final_project_resources/results/checkpoints_task4\swin_tiny_full.pt
[swin_tiny] Epoch 2/100 Train Loss: 0.19273028 Val Loss: 0.20975644
Saved best model for swin_tiny at final_project_resources/results/checkpoints_task4\swin_tiny_full.pt
[swin_tiny] Epoch 3/100 Train Loss: 0.16557607 Val Loss: 0.19626350
Saved best model for swin_tiny at final_project_resources/results/checkpoints_task4\swin_tiny_full.pt
[swin_tiny] Epoch 4/100 Train Loss: 0.15297746 Val Loss: 0.18066468
Saved best model for swin_tiny at final_project_resources/results/checkpoints_task4\swin_tiny_full.pt
[swin_tiny] Epoch 5/100 Train Loss: 0.14586738 Val Loss: 0.16349436
Saved best model for swin_tiny at final_project_resources/results/checkpoints_task4\swin_tiny_full.pt
[swin_tiny] Epoch 6/100 Train Loss: 0.12949314 Val Loss: 0.14923015
Saved best model for swin_tiny at final_project_resources/results/checkpoints_task

In [40]:

'''experiments = [
    {
        "name": "ResNet18 - No Fine-tuning",
        "backbone": "resnet18",
        "ckpt_path": "/content/drive/MyDrive/Deep Learning P2 Oulu/final-project-deep-learning-fall-2025/final_project_resources/results/checkpoints/resnet18_no_ft.pt",
        "mode": "no_ft"
    },
    {
        "name": "ResNet18 - Frozen Backbone",
        "backbone": "resnet18",
        "ckpt_path": "/content/drive/MyDrive/Deep Learning P2 Oulu/final-project-deep-learning-fall-2025/final_project_resources/results/checkpoints/resnet18_frozen.pt",
        "mode": "frozen"
    },
    {
        "name": "ResNet18 - Full Fine-tuning",
        "backbone": "resnet18",
        "ckpt_path": "/content/drive/MyDrive/Deep Learning P2 Oulu/final-project-deep-learning-fall-2025/final_project_resources/results/checkpoints/resnet18_full.pt",
        "mode": "full"
    },
    {
        "name": "EfficientNet - No Fine-tuning",
        "backbone": "efficientnet",
        "ckpt_path": "/content/drive/MyDrive/Deep Learning P2 Oulu/final-project-deep-learning-fall-2025/final_project_resources/results/checkpoints/efficientnet_no_ft.pt",
        "mode": "no_ft"
    },
    {
        "name": "EfficientNet - Frozen Backbone",
        "backbone": "efficientnet",
        "ckpt_path": "/content/drive/MyDrive/Deep Learning P2 Oulu/final-project-deep-learning-fall-2025/final_project_resources/results/checkpoints/efficientnet_frozen.pt",
        "mode": "frozen"
    },
    {
        "name": "EfficientNet - Full Fine-tuning",
        "backbone": "efficientnet",
        "ckpt_path": "/content/drive/MyDrive/Deep Learning P2 Oulu/final-project-deep-learning-fall-2025/final_project_resources/results/checkpoints/efficientnet_full.pt",
        "mode": "full"
    },
]'''

experiments = [

    {
        "name": "Swin",
        "backbone": "swin_tiny",
        "ckpt_path": "final_project_resources/results/checkpoints_task4/swin_tiny_full.pt",
        "mode": "full"
    },
]


offsite_test_csv = 'final_project_resources/offsite_test.csv'
offsite_image_dir = 'final_project_resources/images/offsite_test'

onsite_csv = 'final_project_resources/onsite_test_submission.csv'
onsite_image_dir = 'final_project_resources/images/onsite_test'
predictions_dir = 'final_project_resources/predictions'

os.makedirs(predictions_dir, exist_ok=True)


all_offsite_results = {}

print("=== EVALUATING ON OFFSITE TEST SET ===\n")
for exp in experiments:
    name = exp["name"]
    backbone = exp["backbone"]
    ckpt = exp["ckpt_path"]

    print(f"Evaluating: {name}")
    results = evaluate_model(
        ckpt_path=ckpt,
        backbone=backbone,
        test_csv=offsite_test_csv,
        test_image_dir=offsite_image_dir, attention_type=None
    )

    all_offsite_results[name] = results

    # Print nicely

    for disease in ["DR", "Glaucoma", "AMD"]:
        print(f"   {disease}: Precision={results[disease]['precision']:.4f}, "
              f"Recall={results[disease]['recall']:.4f}, "
              f"F1={results[disease]['f1']:.4f}")
    print("-" * 50)



=== EVALUATING ON OFFSITE TEST SET ===

Evaluating: Swin
   DR: Precision=0.9044, Recall=0.8786, F1=0.8913
   Glaucoma: Precision=0.8889, Recall=0.6531, F1=0.7529
   AMD: Precision=0.6250, Recall=0.6818, F1=0.6522
--------------------------------------------------


In [41]:
print("\n=== GENERATING ONSITE PREDICTIONS FOR KAGGLE SUBMISSION ===\n")
for exp in experiments:
    name = exp["name"]
    backbone = exp["backbone"]
    ckpt = exp["ckpt_path"]
    mode = exp["mode"]

    # Clean filename (replace spaces and special chars)
    safe_name = name.replace(" ", "_").replace("-", "_").lower()
    output_csv = os.path.join(predictions_dir, f"submission_Task2efficient_focal_81{backbone}_{mode}.csv")

    print(f"Generating submission for: {name} → {output_csv}")

    predict_onsite(
        ckpt_path="final_project_resources/results/checkpoints_task4/swin_tiny_full.pt",
        backbone="swin_tiny",
        onsite_csv=onsite_csv,
        onsite_image_dir=onsite_image_dir,
        output_csv="submission_task4_swin_final4.csv",
        img_size=224,                   
        use_tta=True,
        best_thresholds=[0.5,0.5,0.5]  
    )

    print(f"Saved: {output_csv}\n")


=== GENERATING ONSITE PREDICTIONS FOR KAGGLE SUBMISSION ===

Generating submission for: Swin → final_project_resources/predictions\submission_Task2efficient_focal_81swin_tiny_full.csv
Saved onsite predictions to submission_task4_swin_final4.csv
Saved: final_project_resources/predictions\submission_Task2efficient_focal_81swin_tiny_full.csv



What more to try? Treshholds on onsite inference, Increase Lr