In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Task 1

In [None]:
import os
import pandas as pd
from PIL import Image

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, cohen_kappa_score


In [None]:
# ========================
# Dataset preparation
# ========================
class RetinaMultiLabelDataset(Dataset):
    def __init__(self, csv_file, image_dir, transform=None):
        self.data = pd.read_csv(csv_file)
        self.image_dir = image_dir
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        row = self.data.iloc[idx]
        img_path = os.path.join(self.image_dir, row.iloc[0])
        img = Image.open(img_path).convert("RGB")
        labels = torch.tensor(row[1:].values.astype("float32"))
        if self.transform:
            img = self.transform(img)
        return img, labels





In [None]:
# ========================
# build model
# ========================
def build_model(backbone="resnet18", num_classes=3, pretrained=True):

    if backbone == "resnet18":
        model = models.resnet18(pretrained=pretrained)
        model.fc = nn.Linear(model.fc.in_features, num_classes)
    elif backbone == "efficientnet":
        model = models.efficientnet_b0(pretrained=pretrained)
        model.classifier[1] = nn.Linear(model.classifier[1].in_features, num_classes)
    else:
        raise ValueError("Unsupported backbone")
    return model




In [None]:
# ========================
# model training and val
# ========================
def train_one_backbone(backbone, train_csv, val_csv, test_csv, train_image_dir, val_image_dir, test_image_dir,
                       epochs=10, batch_size=32, lr=1e-4, img_size=256, save_dir="checkpoints",pretrained_backbone=None, mode="full_fine_tuning"):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # transforms
    train_transform = transforms.Compose([
      transforms.Resize((img_size, img_size)),
      Augmentations:
      transforms.RandomHorizontalFlip(p=0.5),
      transforms.RandomVerticalFlip(p=0.5),
      transforms.RandomRotation(degrees=15),
      transforms.ColorJitter(brightness=0.1, contrast=0.1),
      transforms.ToTensor(),
      transforms.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225]),
    ])

    val_transform = transforms.Compose([
      transforms.Resize((img_size, img_size)),
      transforms.ToTensor(),
      transforms.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225]),
    ])

    # dataset & dataloader
    train_ds = RetinaMultiLabelDataset(train_csv, train_image_dir, train_transform)
    val_ds   = RetinaMultiLabelDataset(val_csv, val_image_dir, val_transform)
    test_ds  = RetinaMultiLabelDataset(test_csv, test_image_dir, val_transform)

    train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True, num_workers=4)
    val_loader   = DataLoader(val_ds, batch_size=batch_size, shuffle=False, num_workers=4)
    test_loader  = DataLoader(test_ds, batch_size=batch_size, shuffle=False, num_workers=4)

    # model
    model = build_model(backbone, num_classes=3, pretrained=False).to(device)

    # load pretrained backbone
    if pretrained_backbone is not None:
        state_dict = torch.load(pretrained_backbone, map_location="cpu")
        model.load_state_dict(state_dict)


    if mode == "no_fine_tuning":
        for p in model.parameters():
            p.requires_grad = False
    elif mode == "frozen_backbone":
        for p in model.parameters():
            p.requires_grad = False
        if backbone == "resnet18":
            for p in model.fc.parameters():
                p.requires_grad = True
        elif backbone == "efficientnet":
            for p in model.classifier.parameters():
                p.requires_grad = True
    elif mode == "full_fine_tuning":
        for p in model.parameters():
            p.requires_grad = True

    # loss
    criterion = nn.BCEWithLogitsLoss()


    # training
    best_val_loss = float("inf")
    os.makedirs(save_dir, exist_ok=True)
    ckpt_path = os.path.join(save_dir, f"best_{backbone}.pt")


    if mode == "no_fine_tuning":
        print("skip training!")
        torch.save(model.state_dict(), ckpt_path)
    else:
      # optimizer
      optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=lr)
      for epoch in range(epochs):
          model.train()
          train_loss = 0
          for imgs, labels in train_loader:
              imgs, labels = imgs.to(device), labels.to(device)
              optimizer.zero_grad()
              outputs = model(imgs)
              loss = criterion(outputs, labels)
              loss.backward()
              optimizer.step()
              train_loss += loss.item() * imgs.size(0)

          train_loss /= len(train_loader.dataset)

          # validation
          model.eval()
          val_loss = 0
          with torch.no_grad():
              for imgs, labels in val_loader:
                  imgs, labels = imgs.to(device), labels.to(device)
                  outputs = model(imgs)
                  loss = criterion(outputs, labels)
                  val_loss += loss.item() * imgs.size(0)
          val_loss /= len(val_loader.dataset)

          print(f"[{backbone}] Epoch {epoch+1}/{epochs} Train Loss: {train_loss:.4f} Val Loss: {val_loss:.4f}")

          # save best
          if val_loss < best_val_loss:
              best_val_loss = val_loss
              torch.save(model.state_dict(), ckpt_path)
              print(f"Saved best model for {backbone} at {ckpt_path}")

    # ========================
    # testing
    # ========================
    model.load_state_dict(torch.load(ckpt_path, map_location=device))
    model.eval()
    y_true, y_pred = [], []

    with torch.no_grad():
        for imgs, labels in test_loader:
            imgs = imgs.to(device)
            outputs = model(imgs)
            probs = torch.sigmoid(outputs).cpu().numpy()
            preds = (probs > 0.5).astype(int)
            y_true.extend(labels.numpy())
            y_pred.extend(preds)

    y_true = torch.tensor(y_true).numpy()
    y_pred = torch.tensor(y_pred).numpy()

    disease_names = ["DR", "Glaucoma", "AMD"]

    for i, disease in enumerate(disease_names):  #compute metrics for every disease
        y_t = y_true[:, i]
        y_p = y_pred[:, i]

        acc = accuracy_score(y_t, y_p)
        precision = precision_score(y_t, y_p, average="macro",zero_division=0)
        recall = recall_score(y_t, y_p, average="macro",zero_division=0)
        f1 = f1_score(y_t, y_p, average="macro",zero_division=0)
        kappa = cohen_kappa_score(y_t, y_p)

        print(f"{disease} Results [{backbone}]")
        print(f"Accuracy : {acc:.4f}")
        print(f"Precision: {precision:.4f}")
        print(f"Recall   : {recall:.4f}")
        print(f"F1-score : {f1:.4f}")
        print(f"Kappa    : {kappa:.4f}")

    # ========================
    # prediction generating
    # ========================
    print("Saving predictions to final_onsite_prediction.csv")
    ids = pd.read_csv(test_csv)['id']
    onsite_prediction = pd.DataFrame(y_pred, columns=['D', 'G', 'A'])
    onsite_prediction.insert(0, 'id', ids)
    onsite_prediction.to_csv("/content/drive/MyDrive/Colab Notebooks/final_project_resources/predictions/final_onsite_prediction.csv", index=False) # replace with your own test label file path

In [None]:
# ========================
# main Task1.1, 1.2, 1.3
# ========================
if __name__ == "__main__":
    train_csv = "/content/drive/MyDrive/Colab Notebooks/final_project_resources/train.csv" # replace with your own train label file path
    val_csv   = "/content/drive/MyDrive/Colab Notebooks/final_project_resources/val.csv" # replace with your own validation label file path
    test_csv  = "/content/drive/MyDrive/Colab Notebooks/final_project_resources/offsite_test.csv"  # replace with your own test label file path
    train_image_dir ="/content/drive/MyDrive/Colab Notebooks/final_project_resources/images/train"   # replace with your own train image floder path
    val_image_dir = "/content/drive/MyDrive/Colab Notebooks/final_project_resources/images/val"  # replace with your own validation image floder path
    test_image_dir = "/content/drive/MyDrive/Colab Notebooks/final_project_resources/images/offsite_test" # replace with your own test image floder path
    save_dir = "/content/drive/MyDrive/Colab Notebooks/final_project_resources/checkpoints"
    pretrained_backbone = '/content/drive/MyDrive/Colab Notebooks/final_project_resources/pretrained_backbone/ckpt_resnet18_ep50.pt'  # replace with your own pretrained backbone path
    backbone = 'resnet18'  # backbone choices: ["resnet18", "efficientnet"]
    train_one_backbone(backbone, train_csv, val_csv, test_csv, train_image_dir, val_image_dir, test_image_dir,
                          epochs=50, batch_size=32, lr=1e-5, img_size=256, save_dir=save_dir,
                          pretrained_backbone=pretrained_backbone, mode="full_fine_tuning") # mode options: ["no_fine_tuning", "frozen_backbone", "full_fine_tuning"]



[resnet18] Epoch 1/50 Train Loss: 1.4598 Val Loss: 1.1865
Saved best model for resnet18 at /content/drive/MyDrive/Colab Notebooks/final_project_resources/checkpoints/best_resnet18.pt
[resnet18] Epoch 2/50 Train Loss: 1.0537 Val Loss: 1.0278
Saved best model for resnet18 at /content/drive/MyDrive/Colab Notebooks/final_project_resources/checkpoints/best_resnet18.pt
[resnet18] Epoch 3/50 Train Loss: 0.7984 Val Loss: 0.8737
Saved best model for resnet18 at /content/drive/MyDrive/Colab Notebooks/final_project_resources/checkpoints/best_resnet18.pt
[resnet18] Epoch 4/50 Train Loss: 0.6190 Val Loss: 0.7798
Saved best model for resnet18 at /content/drive/MyDrive/Colab Notebooks/final_project_resources/checkpoints/best_resnet18.pt
[resnet18] Epoch 5/50 Train Loss: 0.5253 Val Loss: 0.7400
Saved best model for resnet18 at /content/drive/MyDrive/Colab Notebooks/final_project_resources/checkpoints/best_resnet18.pt
[resnet18] Epoch 6/50 Train Loss: 0.4534 Val Loss: 0.6833
Saved best model for resnet

# Task 2

In [None]:
#resnet+focal loss
from google.colab import drive
drive.mount('/content/drive')

import os
import pandas as pd
import numpy as np
from PIL import Image
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
from sklearn.metrics import precision_score, recall_score, f1_score
import warnings
warnings.filterwarnings('ignore')

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f" Device: {device}")

# ====================================================================
# FOCAL LOSS ONLY
# ====================================================================
class FocalLoss(nn.Module):
    def __init__(self, alpha=0.75, gamma=2.5):
        super(FocalLoss, self).__init__()
        self.alpha = alpha
        self.gamma = gamma

    def forward(self, inputs, targets):
        BCE_loss = F.binary_cross_entropy_with_logits(inputs, targets, reduction='none')
        probs = torch.sigmoid(inputs)
        p_t = probs * targets + (1 - probs) * (1 - targets)
        focal_weight = (1 - p_t) ** self.gamma
        alpha_t = self.alpha * targets + (1 - self.alpha) * (1 - targets)
        focal_loss = alpha_t * focal_weight * BCE_loss
        return focal_loss.mean()

# ====================================================================
# CLASS-BALANCED LOSS ONLY
# ====================================================================
class ClassBalancedLoss(nn.Module):
    def __init__(self, samples_per_class, beta=0.9999):
        super(ClassBalancedLoss, self).__init__()
        self.samples_per_class = torch.tensor(samples_per_class, dtype=torch.float32)
        self.beta = beta

        effective_num = 1.0 - torch.pow(self.beta, self.samples_per_class)
        weights = (1.0 - self.beta) / effective_num
        self.weights = weights / weights.sum() * len(weights)

    def forward(self, inputs, targets):
        weights = self.weights.to(inputs.device)
        BCE_loss = F.binary_cross_entropy_with_logits(inputs, targets, reduction='none')
        weighted_loss = BCE_loss * weights.unsqueeze(0)
        return weighted_loss.mean()

# ====================================================================
# DATASET
# ====================================================================
class RetinaDataset(Dataset):
    def __init__(self, csv_file, image_dir, transform=None):
        self.data = pd.read_csv(csv_file)
        self.image_dir = image_dir
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        row = self.data.iloc[idx]
        img_path = os.path.join(self.image_dir, row.iloc[0])
        img = Image.open(img_path).convert("RGB")
        labels = torch.tensor(row[1:].values.astype("float32"))
        if self.transform:
            img = self.transform(img)
        return img, labels

# ====================================================================
# MODEL
# ====================================================================
def build_model(backbone="resnet18", num_classes=3, dropout=0.5):
    if backbone == "resnet18":
        model = models.resnet18(weights=None)
        num_features = model.fc.in_features
        model.fc = nn.Sequential(
            nn.Dropout(dropout),
            nn.Linear(num_features, num_classes)
        )
    elif backbone == "efficientnet":
        model = models.efficientnet_b0(weights=None)
        num_features = model.classifier[1].in_features
        model.classifier = nn.Sequential(
            nn.Dropout(dropout),
            nn.Linear(num_features, num_classes)
        )
    return model

# ====================================================================
# TRAINING FUNCTION
# ====================================================================
def train_task2(backbone, loss_fn, loss_name, train_csv, val_csv, test_csv,
               train_image_dir, val_image_dir, test_image_dir,
               pretrained_backbone, epochs=60, batch_size=12, lr=2e-5):

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    train_transform = transforms.Compose([
        transforms.Resize((288, 288)),
        transforms.RandomResizedCrop(256, scale=(0.8, 1.0)),
        transforms.RandomHorizontalFlip(p=0.5),
        transforms.RandomVerticalFlip(p=0.5),
        transforms.RandomRotation(30),
        transforms.RandomAffine(degrees=0, translate=(0.15, 0.15)),
        transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4, hue=0.15),
        transforms.RandomGrayscale(p=0.1),
        transforms.GaussianBlur(kernel_size=3, sigma=(0.1, 2.0)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        transforms.RandomErasing(p=0.4, scale=(0.02, 0.2)),
    ])

    val_test_transform = transforms.Compose([
        transforms.Resize((256, 256)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])

    train_ds = RetinaDataset(train_csv, train_image_dir, train_transform)
    val_ds = RetinaDataset(val_csv, val_image_dir, val_test_transform)
    test_ds = RetinaDataset(test_csv, test_image_dir, val_test_transform)

    train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True,
                             num_workers=2, pin_memory=True, drop_last=True)
    val_loader = DataLoader(val_ds, batch_size=batch_size, shuffle=False,
                           num_workers=2, pin_memory=True)
    test_loader = DataLoader(test_ds, batch_size=batch_size, shuffle=False,
                            num_workers=2, pin_memory=True)

    print(f" Train: {len(train_ds)}, Val: {len(val_ds)}, Test: {len(test_ds)}")

    model = build_model(backbone, num_classes=3, dropout=0.5).to(device)

    pretrained_dict = torch.load(pretrained_backbone, map_location=device)
    model_dict = model.state_dict()
    pretrained_dict = {k: v for k, v in pretrained_dict.items()
                      if k in model_dict and v.shape == model_dict[k].shape}
    model_dict.update(pretrained_dict)
    model.load_state_dict(model_dict, strict=False)

    for p in model.parameters():
        p.requires_grad = True

    optimizer = optim.AdamW(model.parameters(), lr=lr, weight_decay=1e-3)
    scheduler = optim.lr_scheduler.OneCycleLR(optimizer, max_lr=lr*3, epochs=epochs,
                                             steps_per_epoch=len(train_loader), pct_start=0.1)
    scaler = torch.cuda.amp.GradScaler()

    best_val_f1 = 0.0
    patience_counter = 0
    max_patience = 20

    save_dir = "/content/drive/MyDrive/Colab Notebooks/final_project_resources/task2"
    os.makedirs(save_dir, exist_ok=True)
    ckpt_path = os.path.join(save_dir, f"{backbone}_{loss_name}.pt")

    print(f"\n{'='*70}")
    print(f" TASK 2: {backbone} + {loss_name}")
    print(f"{'='*70}\n")

    for epoch in range(epochs):
        model.train()
        train_loss = 0

        for imgs, labels in train_loader:
            imgs, labels = imgs.to(device), labels.to(device)
            optimizer.zero_grad()

            with torch.cuda.amp.autocast():
                outputs = model(imgs)
                loss = loss_fn(outputs, labels)

            scaler.scale(loss).backward()
            scaler.unscale_(optimizer)
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            scaler.step(optimizer)
            scaler.update()
            scheduler.step()

            train_loss += loss.item() * imgs.size(0)

        train_loss /= len(train_loader.dataset)

        model.eval()
        val_loss = 0
        all_preds, all_labels = [], []

        with torch.no_grad():
            for imgs, labels in val_loader:
                imgs, labels = imgs.to(device), labels.to(device)
                with torch.cuda.amp.autocast():
                    outputs = model(imgs)
                    loss = loss_fn(outputs, labels)
                val_loss += loss.item() * imgs.size(0)
                probs = torch.sigmoid(outputs).cpu().numpy()
                preds = (probs > 0.5).astype(int)
                all_preds.extend(preds)
                all_labels.extend(labels.cpu().numpy())

        val_loss /= len(val_loader.dataset)
        all_preds = np.array(all_preds)
        all_labels = np.array(all_labels)
        val_f1s = [f1_score(all_labels[:, i], all_preds[:, i], average='binary', zero_division=0) for i in range(3)]
        val_f1 = np.mean(val_f1s)

        print(f"E{epoch+1:02d}/{epochs} | Train:{train_loss:.4f} | Val:{val_loss:.4f} | F1:{val_f1:.4f}")

        if val_f1 > best_val_f1:
            best_val_f1 = val_f1
            torch.save(model.state_dict(), ckpt_path)
            print(f"   BEST! F1: {val_f1:.4f}")
            patience_counter = 0
        else:
            patience_counter += 1

        if patience_counter >= max_patience:
            print(f"\n Early stop at epoch {epoch+1}")
            break

    print(f"\n Complete! Best Val F1: {best_val_f1:.4f}\n")
    return ckpt_path, test_loader

def evaluate_task2(model_path, backbone, test_loader):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = build_model(backbone, num_classes=3, dropout=0.5).to(device)
    model.load_state_dict(torch.load(model_path, map_location=device))
    model.eval()

    y_true, y_pred = [], []
    with torch.no_grad():
        for imgs, labels in test_loader:
            imgs = imgs.to(device)
            with torch.cuda.amp.autocast():
                outputs = model(imgs)
            probs = torch.sigmoid(outputs).cpu().numpy()
            preds = (probs > 0.5).astype(int)
            y_true.extend(labels.numpy())
            y_pred.extend(preds)

    y_true, y_pred = np.array(y_true), np.array(y_pred)
    disease_names = ["DR", "Glaucoma", "AMD"]
    results = {}
    f1_scores = []

    print(f"\n{'='*70}")
    print(f" OFFSITE TEST EVALUATION")
    print(f"{'='*70}")
    print(f"{'Disease':<15} {'Precision':<12} {'Recall':<12} {'F1-Score':<12}")
    print("-" * 70)

    for i, disease in enumerate(disease_names):
        p = precision_score(y_true[:, i], y_pred[:, i], average='binary', zero_division=0)
        r = recall_score(y_true[:, i], y_pred[:, i], average='binary', zero_division=0)
        f1 = f1_score(y_true[:, i], y_pred[:, i], average='binary', zero_division=0)
        results[disease] = {'precision': p, 'recall': r, 'f1_score': f1}
        f1_scores.append(f1)
        print(f"{disease:<15} {p:<12.4f} {r:<12.4f} {f1:<12.4f}")

    avg_f1 = np.mean(f1_scores)
    results['average_f1'] = avg_f1
    print("-" * 70)
    print(f"{'AVG F1:':<15} {avg_f1:.4f} ({avg_f1*100:.2f}%)")
    print(f"{'='*70}\n")
    return results

def generate_submission(model_path, backbone, onsite_csv, onsite_image_dir, output_name):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = build_model(backbone, num_classes=3, dropout=0.5).to(device)
    model.load_state_dict(torch.load(model_path, map_location=device))
    model.eval()

    submission_df = pd.read_csv(onsite_csv)
    transform = transforms.Compose([
        transforms.Resize((256, 256)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])

    predictions = []
    with torch.no_grad():
        for img_name in submission_df['id'].values:
            img = Image.open(os.path.join(onsite_image_dir, img_name)).convert("RGB")
            img_tensor = transform(img).unsqueeze(0).to(device)
            with torch.cuda.amp.autocast():
                output = model(img_tensor)
            probs = torch.sigmoid(output).cpu().numpy()[0]
            preds = (probs > 0.5).astype(int)
            predictions.append(preds)

    predictions = np.array(predictions)
    submission_df['D'] = predictions[:, 0]
    submission_df['G'] = predictions[:, 1]
    submission_df['A'] = predictions[:, 2]
    submission_df.to_csv(output_name, index=False)
    print(f" Saved: {output_name}\n")
    return submission_df

print(" All functions loaded!")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
 Device: cuda
 All functions loaded!


In [None]:
base_path = "/content/drive/MyDrive/Colab Notebooks/final_project_resources"
train_csv = f"{base_path}/train.csv"
val_csv = f"{base_path}/val.csv"
test_csv = f"{base_path}/offsite_test.csv"
train_image_dir = f"{base_path}/images/train"
val_image_dir = f"{base_path}/images/val"
test_image_dir = f"{base_path}/images/offsite_test"
pretrained_resnet = f"{base_path}/checkpoints/resnet18_DLsns_task1-3.pt"

# ====================================================================
# TASK 2.1: FOCAL LOSS
# ====================================================================
print("\n TASK 2.1: Focal Loss\n")

focal_loss = FocalLoss(alpha=0.75, gamma=2.5)

ckpt_focal, test_loader = train_task2(
    backbone='resnet18',
    loss_fn=focal_loss,
    loss_name='focal_loss',
    train_csv=train_csv,
    val_csv=val_csv,
    test_csv=test_csv,
    train_image_dir=train_image_dir,
    val_image_dir=val_image_dir,
    test_image_dir=test_image_dir,
    pretrained_backbone=pretrained_resnet,
    epochs=60,
    batch_size=12,
    lr=2e-5
)

results_focal = evaluate_task2(ckpt_focal, 'resnet18', test_loader)


onsite_csv = f"{base_path}/onsite_test_submission.csv"
onsite_image_dir = f"{base_path}/images/onsite_test"

submission_focal = generate_submission(ckpt_focal, 'resnet18', onsite_csv,
                                      onsite_image_dir, "task2_1_focal_loss.csv")

print(f" Task 2.1 (Focal Loss) - Offsite: {results_focal['average_f1']*100:.2f}%")
print(" Download: task2_1_focal_loss.csv")


 TASK 2.1: Focal Loss

 Train: 800, Val: 200, Test: 200

 TASK 2: resnet18 + focal_loss

E01/60 | Train:0.0987 | Val:0.0865 | F1:0.3505
   BEST! F1: 0.3505
E02/60 | Train:0.0765 | Val:0.0525 | F1:0.6173
   BEST! F1: 0.6173
E03/60 | Train:0.0523 | Val:0.0333 | F1:0.6936
   BEST! F1: 0.6936
E04/60 | Train:0.0395 | Val:0.0355 | F1:0.7294
   BEST! F1: 0.7294
E05/60 | Train:0.0408 | Val:0.0436 | F1:0.7271
E06/60 | Train:0.0382 | Val:0.0370 | F1:0.7479
   BEST! F1: 0.7479
E07/60 | Train:0.0373 | Val:0.0355 | F1:0.7571
   BEST! F1: 0.7571
E08/60 | Train:0.0381 | Val:0.0350 | F1:0.7602
   BEST! F1: 0.7602
E09/60 | Train:0.0366 | Val:0.0351 | F1:0.7556
E10/60 | Train:0.0325 | Val:0.0337 | F1:0.7749
   BEST! F1: 0.7749
E11/60 | Train:0.0328 | Val:0.0420 | F1:0.7461
E12/60 | Train:0.0354 | Val:0.0360 | F1:0.7662
E13/60 | Train:0.0330 | Val:0.0383 | F1:0.7490
E14/60 | Train:0.0330 | Val:0.0334 | F1:0.7888
   BEST! F1: 0.7888
E15/60 | Train:0.0307 | Val:0.0348 | F1:0.7467
E16/60 | Train:0.0295 | V

In [None]:
# ====================================================================
# TASK 2.2: CLASS-BALANCED LOSS
# ====================================================================
print("\n TASK 2.2: Class-Balanced Loss\n")

samples_per_class = [537, 163, 142]
cb_loss = ClassBalancedLoss(samples_per_class=samples_per_class, beta=0.9999)

ckpt_cb, test_loader = train_task2(
    backbone='resnet18',
    loss_fn=cb_loss,
    loss_name='class_balanced',
    train_csv=train_csv,
    val_csv=val_csv,
    test_csv=test_csv,
    train_image_dir=train_image_dir,
    val_image_dir=val_image_dir,
    test_image_dir=test_image_dir,
    pretrained_backbone=pretrained_resnet,
    epochs=60,
    batch_size=12,
    lr=2e-5
)

results_cb = evaluate_task2(ckpt_cb, 'resnet18', test_loader)

submission_cb = generate_submission(ckpt_cb, 'resnet18', onsite_csv,
                                   onsite_image_dir, "task2_2_class_balanced.csv")

print(f" Task 2.2 (Class-Balanced) - Offsite: {results_cb['average_f1']*100:.2f}%")
print(" Download: task2_2_class_balanced.csv")


 TASK 2.2: Class-Balanced Loss

 Train: 800, Val: 200, Test: 200

 TASK 2: resnet18 + class_balanced

E01/60 | Train:0.7265 | Val:0.6862 | F1:0.2034
   BEST! F1: 0.2034
E02/60 | Train:0.6336 | Val:0.5406 | F1:0.4162
   BEST! F1: 0.4162
E03/60 | Train:0.5011 | Val:0.4278 | F1:0.6763
   BEST! F1: 0.6763
E04/60 | Train:0.3782 | Val:0.3493 | F1:0.7296
   BEST! F1: 0.7296
E05/60 | Train:0.3265 | Val:0.3374 | F1:0.7356
   BEST! F1: 0.7356
E06/60 | Train:0.3190 | Val:0.3266 | F1:0.7492
   BEST! F1: 0.7492
E07/60 | Train:0.3212 | Val:0.4176 | F1:0.6682
E08/60 | Train:0.3208 | Val:0.3523 | F1:0.7205
E09/60 | Train:0.2969 | Val:0.4221 | F1:0.6783
E10/60 | Train:0.3040 | Val:0.3635 | F1:0.7309
E11/60 | Train:0.2821 | Val:0.3245 | F1:0.7461
E12/60 | Train:0.2681 | Val:0.3601 | F1:0.7220
E13/60 | Train:0.2799 | Val:0.3866 | F1:0.7301
E14/60 | Train:0.2774 | Val:0.3475 | F1:0.7763
   BEST! F1: 0.7763
E15/60 | Train:0.2645 | Val:0.3713 | F1:0.7575
E16/60 | Train:0.2336 | Val:0.3251 | F1:0.8020
   BE

In [None]:
# ====================================================================
# TASK 2.1: EFFICIENTNET + FOCAL LOSS
# ====================================================================
print("\n TASK 2.1: EfficientNet + Focal Loss\n")

pretrained_eff = f"{base_path}/checkpoints/efficientnet_DLsns_task1-3.pt"
focal_loss = FocalLoss(alpha=0.75, gamma=2.5)

ckpt_focal_eff, test_loader_eff = train_task2(
    backbone='efficientnet',
    loss_fn=focal_loss,
    loss_name='focal_loss',
    train_csv=train_csv,
    val_csv=val_csv,
    test_csv=test_csv,
    train_image_dir=train_image_dir,
    val_image_dir=val_image_dir,
    test_image_dir=test_image_dir,
    pretrained_backbone=pretrained_eff,
    epochs=60,
    batch_size=12,
    lr=2e-5  # Same LR as ResNet
)

results_focal_eff = evaluate_task2(ckpt_focal_eff, 'efficientnet', test_loader_eff)

submission_focal_eff = generate_submission(ckpt_focal_eff, 'efficientnet', onsite_csv,
                                          onsite_image_dir, "task2_1_focal_efficientnet.csv")

print(f" Task 2.1 EfficientNet (Focal) - Offsite: {results_focal_eff['average_f1']*100:.2f}%")
print(" Download: task2_1_focal_efficientnet.csv")


 TASK 2.1: EfficientNet + Focal Loss

 Train: 800, Val: 200, Test: 200

 TASK 2: efficientnet + focal_loss

E01/60 | Train:0.2212 | Val:0.5144 | F1:0.7298
   BEST! F1: 0.7298
E02/60 | Train:0.2226 | Val:0.5690 | F1:0.7265
E03/60 | Train:0.2049 | Val:0.4882 | F1:0.7269
E04/60 | Train:0.1800 | Val:0.4734 | F1:0.7564
   BEST! F1: 0.7564
E05/60 | Train:0.1161 | Val:0.4285 | F1:0.7564
   BEST! F1: 0.7564
E06/60 | Train:0.1062 | Val:0.3472 | F1:0.7485
E07/60 | Train:0.0852 | Val:0.3180 | F1:0.7659
   BEST! F1: 0.7659
E08/60 | Train:0.0690 | Val:0.2718 | F1:0.7731
   BEST! F1: 0.7731
E09/60 | Train:0.0492 | Val:0.2545 | F1:0.7633
E10/60 | Train:0.0507 | Val:0.2061 | F1:0.7858
   BEST! F1: 0.7858
E11/60 | Train:0.0501 | Val:0.1556 | F1:0.7873
   BEST! F1: 0.7873
E12/60 | Train:0.0328 | Val:0.1923 | F1:0.7796
E13/60 | Train:0.0376 | Val:0.1767 | F1:0.7840
E14/60 | Train:0.0272 | Val:0.1745 | F1:0.7796
E15/60 | Train:0.0326 | Val:0.1346 | F1:0.7946
   BEST! F1: 0.7946
E16/60 | Train:0.0248 | Va

In [None]:
# ====================================================================
# TASK 2.2: EFFICIENTNET + CLASS-BALANCED LOSS
# ====================================================================
print("\n TASK 2.2: EfficientNet + Class-Balanced Loss\n")

samples_per_class = [537, 163, 142]
cb_loss = ClassBalancedLoss(samples_per_class=samples_per_class, beta=0.9999)

ckpt_cb_eff, test_loader_eff = train_task2(
    backbone='efficientnet',
    loss_fn=cb_loss,
    loss_name='class_balanced',
    train_csv=train_csv,
    val_csv=val_csv,
    test_csv=test_csv,
    train_image_dir=train_image_dir,
    val_image_dir=val_image_dir,
    test_image_dir=test_image_dir,
    pretrained_backbone=pretrained_eff,
    epochs=60,
    batch_size=12,
    lr=2e-5
)

results_cb_eff = evaluate_task2(ckpt_cb_eff, 'efficientnet', test_loader_eff)

submission_cb_eff = generate_submission(ckpt_cb_eff, 'efficientnet', onsite_csv,
                                       onsite_image_dir, "task2_2_class_balanced_efficientnet.csv")

print(f" Task 2.2 EfficientNet (Class-Balanced) - Offsite: {results_cb_eff['average_f1']*100:.2f}%")
print(" Download: task2_2_class_balanced_efficientnet.csv")


 TASK 2.2: EfficientNet + Class-Balanced Loss

 Train: 800, Val: 200, Test: 200

 TASK 2: efficientnet + class_balanced

E01/60 | Train:0.5113 | Val:0.8627 | F1:0.7241
   BEST! F1: 0.7241
E02/60 | Train:0.4337 | Val:0.9031 | F1:0.7371
   BEST! F1: 0.7371
E03/60 | Train:0.4019 | Val:0.8470 | F1:0.7431
   BEST! F1: 0.7431
E04/60 | Train:0.3765 | Val:0.7965 | F1:0.7423
E05/60 | Train:0.3476 | Val:0.7527 | F1:0.7530
   BEST! F1: 0.7530
E06/60 | Train:0.3234 | Val:0.6977 | F1:0.7637
   BEST! F1: 0.7637
E07/60 | Train:0.2805 | Val:0.6638 | F1:0.7607
E08/60 | Train:0.2727 | Val:0.6121 | F1:0.7795
   BEST! F1: 0.7795
E09/60 | Train:0.2354 | Val:0.6310 | F1:0.7761
E10/60 | Train:0.2580 | Val:0.5861 | F1:0.7578
E11/60 | Train:0.2323 | Val:0.5551 | F1:0.7632
E12/60 | Train:0.2435 | Val:0.5015 | F1:0.7942
   BEST! F1: 0.7942
E13/60 | Train:0.2303 | Val:0.5482 | F1:0.7793
E14/60 | Train:0.2094 | Val:0.4660 | F1:0.7863
E15/60 | Train:0.1981 | Val:0.5261 | F1:0.7762
E16/60 | Train:0.1912 | Val:0.482

In [None]:
print("\n" + "="*80)
print(" COMPLETE TASK 2 RESULTS - ALL CONFIGURATIONS")
print("="*80)
print("\nRESNET18:")
print(f"  Task 2.1 - Focal Loss:        Offsite: {results_focal['average_f1']*100:.2f}%")
print(f"  Task 2.2 - Class-Balanced:    Offsite: {results_cb['average_f1']*100:.2f}%")
print("\nEFFICIENTNET:")
print(f"  Task 2.1 - Focal Loss:        Offsite: {results_focal_eff['average_f1']*100:.2f}%")
print(f"  Task 2.2 - Class-Balanced:    Offsite: {results_cb_eff['average_f1']*100:.2f}%")
print("="*80)



 COMPLETE TASK 2 RESULTS - ALL CONFIGURATIONS

RESNET18:
  Task 2.1 - Focal Loss:        Offsite: 73.84%
  Task 2.2 - Class-Balanced:    Offsite: 79.06%

EFFICIENTNET:
  Task 2.1 - Focal Loss:        Offsite: 74.76%
  Task 2.2 - Class-Balanced:    Offsite: 73.05%


# Task 3

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import os
import pandas as pd
from PIL import Image

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, cohen_kappa_score


In [None]:
# TASK 3 - FINAL VERSION

import os
import pandas as pd
import numpy as np
from PIL import Image

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score


# ============================================================================
# DATASET CLASS
# ============================================================================
class RetinaMultiLabelDataset(Dataset):
    def __init__(self, csv_file, image_dir, transform=None):
        self.data = pd.read_csv(csv_file)
        self.image_dir = image_dir
        self.transform = transform

        # Validate dataset
        valid_indices = []
        for idx in range(len(self.data)):
            row = self.data.iloc[idx]
            img_path = os.path.join(self.image_dir, row.iloc[0])
            if os.path.exists(img_path):
                valid_indices.append(idx)

        original_len = len(self.data)
        self.data = self.data.iloc[valid_indices].reset_index(drop=True)

        if len(self.data) < original_len:
            print(f" Warning: {original_len - len(self.data)} images missing")
        print(f"Dataset ready: {len(self.data)} valid images")

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        row = self.data.iloc[idx]
        img_path = os.path.join(self.image_dir, row.iloc[0])
        img = Image.open(img_path).convert("RGB")
        labels = torch.tensor(row[1:].values.astype("float32"))
        if self.transform:
            img = self.transform(img)
        return img, labels


# ============================================================================
# TASK 3.1: SE BLOCK
# ============================================================================
class SEBlock(nn.Module):
    """
    Squeeze-and-Excitation Block - ORIGINAL VERSION
    Reference: Hu et al., "Squeeze-and-Excitation Networks" (CVPR 2018)
    """
    def __init__(self, channels, reduction=16):
        super(SEBlock, self).__init__()
        reduced_channels = max(channels // reduction, 1)

        self.fc1 = nn.Linear(channels, reduced_channels, bias=False)
        self.relu = nn.ReLU(inplace=True)
        self.fc2 = nn.Linear(reduced_channels, channels, bias=False)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        batch, channels, _, _ = x.size()

        squeeze = F.adaptive_avg_pool2d(x, 1).view(batch, channels)

        excitation = self.fc1(squeeze)
        excitation = self.relu(excitation)
        excitation = self.fc2(excitation)
        excitation = self.sigmoid(excitation)

        # Scale
        excitation = excitation.view(batch, channels, 1, 1)
        return x * excitation.expand_as(x)


# ============================================================================
# TASK 3.2: MHA BLOCK
# ============================================================================
class MultiHeadAttention(nn.Module):
    """
    Multi-Head Attention for Vision - IMPROVED VERSION
    Reference: Vaswani et al., "Attention is All You Need" (NeurIPS 2017)

    IMPROVEMENTS:
    - Added dropout parameter (default=0.4)
    - Dropout on attention weights
    - Dropout on output projection
    """
    def __init__(self, channels, num_heads=8, reduction=4, dropout=0.4):
        super(MultiHeadAttention, self).__init__()
        assert channels % num_heads == 0, f"channels must be divisible by num_heads"

        self.num_heads = num_heads
        self.head_dim = channels // num_heads
        self.scale = self.head_dim ** -0.5
        self.reduction = reduction

        if reduction > 1:
            self.pool = nn.AvgPool2d(kernel_size=reduction, stride=reduction)
        else:
            self.pool = nn.Identity()

        self.qkv = nn.Linear(channels, channels * 3, bias=False)
        self.proj_out = nn.Linear(channels, channels, bias=False)
        self.norm = nn.LayerNorm(channels)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        batch, channels, H, W = x.size()
        identity = x

        # Spatial reduction
        x_reduced = self.pool(x)
        _, _, h, w = x_reduced.size()
        seq_len = h * w

        # Reshape to sequence
        x_flat = x_reduced.flatten(2).transpose(1, 2)

        # Generate Q, K, V
        qkv = self.qkv(x_flat)
        qkv = qkv.reshape(batch, seq_len, 3, self.num_heads, self.head_dim)
        qkv = qkv.permute(2, 0, 3, 1, 4)
        q, k, v = qkv[0], qkv[1], qkv[2]

        # Scaled dot-product attention
        attn_scores = (q @ k.transpose(-2, -1)) * self.scale
        attn_weights = F.softmax(attn_scores, dim=-1)
        attn_weights = self.dropout(attn_weights)
        attn_out = attn_weights @ v

        # Concatenate heads
        attn_out = attn_out.transpose(1, 2).contiguous()
        attn_out = attn_out.reshape(batch, seq_len, channels)

        # Output projection
        out = self.proj_out(attn_out)
        out = self.dropout(out)
        out = self.norm(out)

        # Reshape back to spatial
        out = out.transpose(1, 2).reshape(batch, channels, h, w)

        # Upsample if needed
        if self.reduction > 1:
            out = F.interpolate(out, size=(H, W), mode='bilinear', align_corners=False)

        # Residual connection
        return identity + out


# ============================================================================
# BUILD MODEL FUNCTION
# ============================================================================
def build_model_with_attention(backbone="resnet18", num_classes=3, attention_type=None):
    """
    Build model with attention mechanisms

    Args:
        backbone: 'resnet18' or 'efficientnet'
        num_classes: 3 (DR, Glaucoma, AMD)
        attention_type: 'se' or 'mha'
    """
    if backbone == "resnet18":
        model = models.resnet18(weights=None)
        num_channels = 512

        if attention_type == 'se':
            original_layer4 = model.layer4
            model.layer4 = nn.Sequential(
                original_layer4,
                SEBlock(num_channels, reduction=16)
            )
            print(f" Inserted SE Block after layer4 ({num_channels} channels)")

        elif attention_type == 'mha':
            original_layer4 = model.layer4
            model.layer4 = nn.Sequential(
                original_layer4,
                MultiHeadAttention(num_channels, num_heads=8, reduction=2, dropout=0.4)
            )
            print(f"✓ Inserted MHA Block (IMPROVED with dropout) after layer4")

        model.fc = nn.Linear(model.fc.in_features, num_classes)

    elif backbone == "efficientnet":
        model = models.efficientnet_b0(weights=None)
        num_channels = 1280

        if attention_type == 'se':
            original_features = model.features
            model.features = nn.Sequential(
                original_features,
                SEBlock(num_channels, reduction=16)
            )
            print(f"✓ Inserted SE Block (ORIGINAL)")

        elif attention_type == 'mha':
            original_features = model.features
            model.features = nn.Sequential(
                original_features,
                MultiHeadAttention(num_channels, num_heads=8, reduction=4, dropout=0.4)
            )
            print(f"✓ Inserted MHA Block (IMPROVED with dropout)")

        model.classifier[1] = nn.Linear(model.classifier[1].in_features, num_classes)

    else:
        raise ValueError("Unsupported backbone")

    return model


# ============================================================================
# TEST-TIME AUGMENTATION
# ============================================================================
def predict_with_tta(model, img, device):
    """Apply test-time augmentation for more robust predictions"""
    model.eval()
    transforms_tta = [
        lambda x: x,
        lambda x: torch.flip(x, dims=[3]),
        lambda x: torch.flip(x, dims=[2]),
    ]

    probs_list = []
    with torch.no_grad():
        for transform in transforms_tta:
            img_aug = transform(img)
            with torch.amp.autocast('cuda'):
                logits = model(img_aug)
                probs = torch.sigmoid(logits)
            probs_list.append(probs)


    return torch.stack(probs_list).mean(dim=0)


# ============================================================================
# TRAINING FUNCTION
# ============================================================================
def train_task3(backbone, train_csv, val_csv,
                offsite_test_csv, offsite_test_dir,
                onsite_test_csv, onsite_test_dir,
                train_image_dir, val_image_dir,
                epochs=50, batch_size=12, lr=2e-5, img_size=256,
                save_dir="./checkpoints",
                pretrained_task1_path="/content/drive/MyDrive/Colab Notebooks/final_project_resources/best_resnet18.pt",
                attention_type='se',
                use_tta=True):
    """
    Train Task 3 with attention mechanisms

    TRAINING IMPROVEMENTS (applied to both SE and MHA):
    - Strong augmentation (11 transforms)
    - OneCycleLR scheduler
    - Mixed precision training
    - Gradient clipping
    - F1-based model selection
    - Early stopping
    - Test-time augmentation
    - AdamW optimizer
    """
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    print(f"\n{'='*70}")
    print(f"TASK 3: Training {attention_type.upper()} Model")
    print(f"  SE: ORIGINAL | MHA: IMPROVED (dropout)")
    print(f"  Training: IMPROVED (for both)")
    print(f"{'='*70}")
    print(f"Backbone: {backbone}")
    print(f"Epochs: {epochs} | Batch: {batch_size} | LR: {lr}")
    print(f"TTA: {'Enabled' if use_tta else 'Disabled'}")
    print(f"{'='*70}\n")

    # Strong augmentation
    train_transform = transforms.Compose([
        transforms.Resize((288, 288)),
        transforms.RandomResizedCrop(img_size, scale=(0.8, 1.0)),
        transforms.RandomHorizontalFlip(p=0.5),
        transforms.RandomVerticalFlip(p=0.5),
        transforms.RandomRotation(30),
        transforms.RandomAffine(degrees=0, translate=(0.15, 0.15)),
        transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4, hue=0.15),
        transforms.RandomGrayscale(p=0.1),
        transforms.GaussianBlur(kernel_size=3, sigma=(0.1, 2.0)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225]),
        transforms.RandomErasing(p=0.4, scale=(0.02, 0.2)),
    ])

    val_transform = transforms.Compose([
        transforms.Resize((img_size, img_size)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225]),
    ])

    # Datasets
    print("Loading datasets...")
    train_ds = RetinaMultiLabelDataset(train_csv, train_image_dir, train_transform)
    val_ds = RetinaMultiLabelDataset(val_csv, val_image_dir, val_transform)
    offsite_test_ds = RetinaMultiLabelDataset(offsite_test_csv, offsite_test_dir, val_transform)
    onsite_test_ds = RetinaMultiLabelDataset(onsite_test_csv, onsite_test_dir, val_transform)

    # DataLoaders
    train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True, num_workers=2, pin_memory=True)
    val_loader = DataLoader(val_ds, batch_size=batch_size, shuffle=False, num_workers=2, pin_memory=True)
    offsite_test_loader = DataLoader(offsite_test_ds, batch_size=batch_size, shuffle=False, num_workers=2, pin_memory=True)
    onsite_test_loader = DataLoader(onsite_test_ds, batch_size=batch_size, shuffle=False, num_workers=2, pin_memory=True)

    # Build model
    print("\nBuilding model...")
    model = build_model_with_attention(
        backbone=backbone,
        num_classes=3,
        attention_type=attention_type
    ).to(device)

    # Load Task 1 weights
    print(f"\nLoading Task 1 pretrained weights...")
    try:
        state_dict = torch.load(pretrained_task1_path, map_location="cpu")
        missing_keys, unexpected_keys = model.load_state_dict(state_dict, strict=False)

        if missing_keys:
            print(f" New layers initialized: {len(missing_keys)} layers")
        print(f"✓ Loaded pretrained backbone from Task 1")

    except Exception as e:
        print(f"Warning: Could not load pretrained weights: {e}")
        print("  Training from scratch...")

    # All parameters trainable
    for p in model.parameters():
        p.requires_grad = True

    # Loss function
    criterion = nn.BCEWithLogitsLoss()

    # MHA-specific: Lower learning rate
    if attention_type == 'mha':
        lr = lr * 0.5
        print(f"\n MHA-specific adjustment: Using lower LR = {lr}")

    optimizer = optim.AdamW(model.parameters(), lr=lr, weight_decay=1e-3)
    scheduler = optim.lr_scheduler.OneCycleLR(
        optimizer,
        max_lr=lr * 3,
        epochs=epochs,
        steps_per_epoch=len(train_loader),
        pct_start=0.1
    )

    scaler = torch.amp.GradScaler('cuda')
    best_val_f1 = 0.0
    patience = 0
    max_patience = 20

    os.makedirs(save_dir, exist_ok=True)
    attn_suffix = f"_{attention_type}" if attention_type else ""
    ckpt_path = os.path.join(save_dir, f"best_{backbone}_task3{attn_suffix}.pt")

    print(f"\n{'='*70}")
    print("Starting Training...")
    print(f"{'='*70}\n")

    # Training loop
    for epoch in range(epochs):
        # Training
        model.train()
        train_loss = 0
        for imgs, labels in train_loader:
            imgs, labels = imgs.to(device), labels.to(device)
            optimizer.zero_grad()

            # Mixed precision
            with torch.amp.autocast('cuda'):
                outputs = model(imgs)
                loss = criterion(outputs, labels)

            scaler.scale(loss).backward()

            # Gradient clipping
            scaler.unscale_(optimizer)
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)

            scaler.step(optimizer)
            scaler.update()
            scheduler.step()

            train_loss += loss.item() * imgs.size(0)

        train_loss /= len(train_loader.dataset)

        # Validation with F1 tracking
        model.eval()
        val_loss = 0
        all_preds, all_labels = [], []

        with torch.no_grad():
            for imgs, labels in val_loader:
                imgs, labels = imgs.to(device), labels.to(device)

                with torch.amp.autocast('cuda'):
                    outputs = model(imgs)
                    loss = criterion(outputs, labels)

                val_loss += loss.item() * imgs.size(0)

                probs = torch.sigmoid(outputs).cpu().numpy()
                preds = (probs > 0.5).astype(int)
                all_preds.append(preds)
                all_labels.append(labels.cpu().numpy())

        val_loss /= len(val_loader.dataset)

        all_preds = np.concatenate(all_preds, axis=0)
        all_labels = np.concatenate(all_labels, axis=0)

        # Calculate val F1
        f1s = [f1_score(all_labels[:, i], all_preds[:, i], zero_division=0) for i in range(3)]
        val_f1 = np.mean(f1s)

        print(f"Epoch {epoch+1}/{epochs} | Train {train_loss:.4f} | Val {val_loss:.4f} | F1 {val_f1:.4f}")

        # Save best based on F1
        if val_f1 > best_val_f1:
            best_val_f1 = val_f1
            torch.save(model.state_dict(), ckpt_path)
            print(f"  ✓ Saved best model (F1={best_val_f1:.4f})")
            patience = 0
        else:
            patience += 1
            if patience >= max_patience:
                print(f"  Early stopping at epoch {epoch+1}")
                break

    print(f"\nBest Validation F1: {best_val_f1:.4f}")

    # OFFSITE Test Evaluation
    print(f"\n{'='*70}")
    print("OFFSITE Test Set Evaluation")
    print(f"{'='*70}\n")

    model.load_state_dict(torch.load(ckpt_path, map_location=device))
    model.eval()
    y_true, y_pred, y_probs = [], [], []

    with torch.no_grad():
        for imgs, labels in offsite_test_loader:
            imgs = imgs.to(device)

            if use_tta:
                probs = predict_with_tta(model, imgs, device).cpu().numpy()
            else:
                with torch.amp.autocast('cuda'):
                    outputs = model(imgs)
                probs = torch.sigmoid(outputs).cpu().numpy()

            preds = (probs > 0.5).astype(int)
            y_true.extend(labels.numpy())
            y_pred.extend(preds)
            y_probs.extend(probs)

    y_true = np.array(y_true)
    y_pred = np.array(y_pred)
    y_probs = np.array(y_probs)

    disease_names = ["DR", "Glaucoma", "AMD"]

    print(f"OFFSITE Test Results: {backbone.upper()} + {attention_type.upper()}")
    print(f"{'='*70}\n")

    f1_scores = []
    for i, disease in enumerate(disease_names):
        y_t = y_true[:, i]
        y_p = y_pred[:, i]

        acc = accuracy_score(y_t, y_p)
        precision = precision_score(y_t, y_p, average="binary", zero_division=0)
        recall = recall_score(y_t, y_p, average="binary", zero_division=0)
        f1 = f1_score(y_t, y_p, average="binary", zero_division=0)
        f1_scores.append(f1)

        print(f"{disease:12s} | Acc: {acc:.4f} | Prec: {precision:.4f} | Rec: {recall:.4f} | F1: {f1:.4f}")

    avg_f1_offsite = np.mean(f1_scores)
    print(f"\n{'='*70}")
    print(f"OFFSITE Average F1: {avg_f1_offsite:.4f}")
    print(f"{'='*70}\n")

    # ONSITE Test Predictions
    print(f"\n{'='*70}")
    print("ONSITE Test Predictions (for Kaggle)")
    print(f"{'='*70}\n")

    onsite_preds = []
    with torch.no_grad():
        for imgs, _ in onsite_test_loader:
            imgs = imgs.to(device)

            if use_tta:
                probs = predict_with_tta(model, imgs, device).cpu().numpy()
            else:
                with torch.amp.autocast('cuda'):
                    outputs = model(imgs)
                probs = torch.sigmoid(outputs).cpu().numpy()

            preds = (probs > 0.5).astype(int)
            onsite_preds.extend(preds)

    onsite_preds = np.array(onsite_preds)

    # Save predictions
    os.makedirs("./predictions", exist_ok=True)
    onsite_pred_filename = f"task3_{backbone}{attn_suffix}_onsite_kaggle.csv"
    ids = pd.read_csv(onsite_test_csv)['id']
    onsite_prediction = pd.DataFrame(onsite_preds, columns=['D', 'G', 'A'])
    onsite_prediction.insert(0, 'id', ids)
    onsite_prediction.to_csv(f"./predictions/{onsite_pred_filename}", index=False)
    print(f"✓ Saved ONSITE predictions: ./predictions/{onsite_pred_filename}")

    offsite_pred_filename = f"task3_{backbone}{attn_suffix}_offsite_results.csv"
    ids_offsite = pd.read_csv(offsite_test_csv)['id']
    offsite_prediction = pd.DataFrame(y_pred, columns=['D', 'G', 'A'])
    offsite_prediction.insert(0, 'id', ids_offsite)
    offsite_prediction.to_csv(f"./predictions/{offsite_pred_filename}", index=False)
    print(f"✓ Saved OFFSITE predictions: ./predictions/{offsite_pred_filename}")

    return model, avg_f1_offsite


# ============================================================================
# MAIN EXECUTION
# ============================================================================
if __name__ == "__main__":
    base_path = "/content/drive/MyDrive/Colab Notebooks/final_project_resources"

    train_csv = f"{base_path}/train.csv"
    val_csv = f"{base_path}/val.csv"
    offsite_test_csv = f"{base_path}/offsite_test.csv"
    offsite_test_dir = f"{base_path}/images/offsite_test"
    onsite_test_csv = f"{base_path}/onsite_test_submission.csv"
    onsite_test_dir = f"{base_path}/images/onsite_test"
    train_image_dir = f"{base_path}/images/train"
    val_image_dir = f"{base_path}/images/val"
    pretrained_task1 = f"{base_path}/best_resnet18.pt"

    backbone = 'resnet18'

    # ========================================================================
    # TASK 3.1: SE Block Model
    # ========================================================================
    print("\n" + "="*70)
    print("TASK 3.1: SE Block Model")
    print("  Architecture: ORIGINAL SE Block")
    print("  Training: IMPROVED")
    print("="*70)

    model_se, f1_se = train_task3(
        backbone=backbone,
        train_csv=train_csv,
        val_csv=val_csv,
        offsite_test_csv=offsite_test_csv,
        offsite_test_dir=offsite_test_dir,
        onsite_test_csv=onsite_test_csv,
        onsite_test_dir=onsite_test_dir,
        train_image_dir=train_image_dir,
        val_image_dir=val_image_dir,
        epochs=50,
        batch_size=32,
        lr=1e-5,
        img_size=256,
        save_dir=f"{base_path}/checkpoints",
        pretrained_task1_path=pretrained_task1,
        attention_type='se',
        use_tta=True
    )

    # ========================================================================
    # TASK 3.2: MHA Model
    # ========================================================================
    print("\n" + "="*70)
    print("TASK 3.2: MHA Model")
    print("  Architecture: IMPROVED MHA (with dropout)")
    print("  Training: IMPROVED")
    print("="*70)

    model_mha, f1_mha = train_task3(
        backbone=backbone,
        train_csv=train_csv,
        val_csv=val_csv,
        offsite_test_csv=offsite_test_csv,
        offsite_test_dir=offsite_test_dir,
        onsite_test_csv=onsite_test_csv,
        onsite_test_dir=onsite_test_dir,
        train_image_dir=train_image_dir,
        val_image_dir=val_image_dir,
        epochs=50,
        batch_size=12,
        lr=2e-5,
        img_size=256,
        save_dir=f"{base_path}/checkpoints",
        pretrained_task1_path=pretrained_task1,
        attention_type='mha',
        use_tta=True
    )

    # ========================================================================
    # FINAL SUMMARY
    # ========================================================================
    print("\n" + "="*70)
    print("TASK 3 COMPLETE - FINAL SUMMARY")
    print("="*70)
    print(f"SE Model  (ORIGINAL arch + IMPROVED training): Offsite F1 = {f1_se:.4f}")
    print(f"MHA Model (IMPROVED arch + IMPROVED training): Offsite F1 = {f1_mha:.4f}")
    print("="*70)
    print("\nWhat was changed:")
    print("  SE Block:  NO changes (original architecture)")
    print("  MHA Block: IMPROVED with dropout (0.4)")
    print("  Training:  IMPROVED for both (augmentation, scheduler, etc.)")
    print("="*70)



TASK 3.1: Squeeze-and-Excitation (SE) Block

TASK 3: Training with SE Attention
Backbone: resnet18
Loading Task 1 weights from: /content/drive/MyDrive/Colab Notebooks/final_project_resources/checkpoints/resnet18_DLsns_task1-3.pt


Loading datasets...
Dataset ready: 800 valid images
Dataset ready: 200 valid images
Dataset ready: 200 valid images
Dataset ready: 250 valid images

Building model...
 Inserted SE Block after layer4 (512 channels)

Loading Task 1.3 weights...
✓ New layers (randomly initialized): 27 layers
✓ Loaded pretrained backbone and classifier from Task 1.3

Starting Training...

Epoch 1/30 | Train Loss: 0.5295 | Val Loss: 0.5806
  ✓ Saved best model
Epoch 2/30 | Train Loss: 0.4794 | Val Loss: 0.5127
  ✓ Saved best model
Epoch 3/30 | Train Loss: 0.4296 | Val Loss: 0.4592
  ✓ Saved best model
Epoch 4/30 | Train Loss: 0.3928 | Val Loss: 0.4292
  ✓ Saved best model
Epoch 5/30 | Train Loss: 0.3565 | Val Loss: 0.4158
  ✓ Saved best model
Epoch 6/30 | Train Loss: 0.3364 | Val

# Task 4

In [None]:
import torch
import pandas as pd
import os
from torch.utils.data import DataLoader

# ========================
# build model
# ========================
def build_model(backbone="resnet18", num_classes=3, pretrained=True):

    if backbone == "resnet18":
        model = models.resnet18(pretrained=pretrained)
        model.fc = nn.Linear(model.fc.in_features, num_classes)
    elif backbone == "efficientnet":
        model = models.efficientnet_b0(pretrained=pretrained)
        model.classifier[1] = nn.Linear(model.classifier[1].in_features, num_classes)
    else:
        raise ValueError("Unsupported backbone")
    return model


# ==========================================
# Task 4: Ensemble Learning (Weighted Average)
# ==========================================
def generate_ensemble_submission(resnet_ckpt, effnet_ckpt, test_csv, image_dir, output_file="ensemble_submission.csv"):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Generating Ensemble from:\n 1. {resnet_ckpt}\n 2. {effnet_ckpt}")

    #Setup Data
    transform = transforms.Compose([
        transforms.Resize((256, 256)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225]),
    ])

    test_df = pd.read_csv(test_csv)
    dataset = RetinaMultiLabelDataset(test_csv, image_dir, transform)
    loader = DataLoader(dataset, batch_size=32, shuffle=False)

    # Load Both Models
    # Model 1: ResNet18, Model 2: EfficientNet
    model_a = build_model("resnet18", num_classes=3, pretrained=False).to(device)
    model_a.load_state_dict(torch.load(resnet_ckpt, map_location=device))
    model_a.eval()

    model_b = build_model("efficientnet", num_classes=3, pretrained=False).to(device)
    model_b.load_state_dict(torch.load(effnet_ckpt, map_location=device))
    model_b.eval()

    # Predict and Average
    all_preds = []

    with torch.no_grad():
        for imgs, _ in loader:
            imgs = imgs.to(device)

            out_a = model_a(imgs)
            out_b = model_b(imgs)

            prob_a = torch.sigmoid(out_a)
            prob_b = torch.sigmoid(out_b)

            avg_prob = (prob_a + prob_b) / 2.0

            preds = (avg_prob > 0.5).int().cpu().numpy()
            all_preds.extend(preds)

    # Save Submission
    submit_df = pd.DataFrame(all_preds, columns=['D', 'G', 'A'])
    submit_df.insert(0, 'id', test_df.iloc[:, 0])
    submit_df.to_csv(output_file, index=False)
    print(f"✅ Ensemble submission saved to: {output_file}")

# ==========================================
# Execution
# ==========================================
if __name__ == "__main__":
    base_path = "/content/drive/MyDrive/Colab Notebooks/final_project_resources" # replace with your own base path
    resnet_pt = f"{base_path}/checkpoints/resnet18_DLsns_task1-3.pt" # replace with your own best Resnet18 model's path
    effnet_pt = f"{base_path}/checkpoints/efficientnet_DLsns_task1-3.pt" # replace with your own best Efficientnet model's path
    onsite_csv = f"{base_path}/onsite_test_submission.csv"  # replace with your own output result file's path
    onsite_imgs = f"{base_path}/images/onsite_test" # replace with your own test image floder path

    # Run Ensemble
    if os.path.exists(resnet_pt) and os.path.exists(effnet_pt):
        generate_ensemble_submission(resnet_pt, effnet_pt, onsite_csv, onsite_imgs)
    else:
        print("Error: resnet_pt or effnet_pt files does not exist!")

Generating Ensemble from:
 1. /content/drive/MyDrive/Colab Notebooks/final_project_resources/checkpoints/resnet18_DLsns_task1-3.pt
 2. /content/drive/MyDrive/Colab Notebooks/final_project_resources/checkpoints/efficientnet_DLsns_task1-3.pt
Dataset ready: 250 valid images
✅ Ensemble submission saved to: ensemble_submission.csv


In [None]:
from google.colab import drive
drive.mount('/content/drive')

import os
import pandas as pd
import numpy as np
from PIL import Image
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
from sklearn.metrics import precision_score, recall_score, f1_score
from sklearn.linear_model import LogisticRegression
import warnings
warnings.filterwarnings('ignore')

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Device: {device}")

# ===================================================================
# Dataset Class
# ===================================================================
class RetinaMultiLabelDataset(Dataset):
    def __init__(self, csv_file, image_dir, transform=None):
        self.data = pd.read_csv(csv_file)
        self.image_dir = image_dir
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        row = self.data.iloc[idx]
        img_path = os.path.join(self.image_dir, row.iloc[0])
        img = Image.open(img_path).convert("RGB")
        labels = torch.tensor(row[1:].values.astype("float32"))
        if self.transform:
            img = self.transform(img)
        return img, labels


# ===================================================================
# FIXED Model Loading Functions for Task 2 Models
# ===================================================================
def load_resnet18_task2(ckpt_path, device):
    """Load ResNet18 model from Task 2 checkpoint (with dropout)"""
    model = models.resnet18(pretrained=False)
    num_features = model.fc.in_features
    # Task 2 models use Sequential with Dropout
    model.fc = nn.Sequential(
        nn.Dropout(0.5),
        nn.Linear(num_features, 3)
    )
    model.load_state_dict(torch.load(ckpt_path, map_location=device))
    model = model.to(device)
    model.eval()
    return model


def load_efficientnet_task2(ckpt_path, device):
    """Load EfficientNet model from Task 2 checkpoint (with dropout)"""
    model = models.efficientnet_b0(pretrained=False)
    num_features = model.classifier[1].in_features
    # Task 2 models use Sequential with Dropout
    model.classifier = nn.Sequential(
        nn.Dropout(0.5),
        nn.Linear(num_features, 3)
    )
    model.load_state_dict(torch.load(ckpt_path, map_location=device))
    model = model.to(device)
    model.eval()
    return model


# ===================================================================
# TASK 4.1: Weighted Average Ensemble
# ===================================================================
def weighted_average_ensemble(model_paths, model_types, weights, test_csv, test_image_dir,
                               onsite_csv, onsite_image_dir, output_file="task4_weighted_ensemble.csv"):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    print(f"\n{'='*70}")
    print("TASK 4.1: Weighted Average Ensemble")
    print(f"{'='*70}")
    print(f"Number of models: {len(model_paths)}")
    print(f"Weights: {weights}")
    print(f"{'='*70}\n")

    # Load all models
    models = []
    for path, mtype in zip(model_paths, model_types):
        print(f"Loading {mtype} from {os.path.basename(path)}...")
        if mtype == 'resnet18':
            model = load_resnet18_task2(path, device)
        elif mtype == 'efficientnet':
            model = load_efficientnet_task2(path, device)
        models.append(model)

    # Setup data
    transform = transforms.Compose([
        transforms.Resize((256, 256)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])

    # Offsite test evaluation
    test_ds = RetinaMultiLabelDataset(test_csv, test_image_dir, transform)
    test_loader = DataLoader(test_ds, batch_size=32, shuffle=False, num_workers=2)

    print("\nEvaluating on offsite test set...")
    y_true, y_pred = [], []

    with torch.no_grad():
        for imgs, labels in test_loader:
            imgs = imgs.to(device)

            # Get weighted predictions
            weighted_probs = None
            for model, weight in zip(models, weights):
                outputs = model(imgs)
                probs = torch.sigmoid(outputs)
                if weighted_probs is None:
                    weighted_probs = probs * weight
                else:
                    weighted_probs += probs * weight

            preds = (weighted_probs > 0.5).int().cpu().numpy()
            y_true.extend(labels.numpy())
            y_pred.extend(preds)

    y_true = np.array(y_true)
    y_pred = np.array(y_pred)

    # Calculate metrics
    disease_names = ["DR", "Glaucoma", "AMD"]
    f1_scores = []

    print(f"\n{'='*70}")
    print("Offsite Test Results")
    print(f"{'='*70}\n")

    for i, disease in enumerate(disease_names):
        p = precision_score(y_true[:, i], y_pred[:, i], average='binary', zero_division=0)
        r = recall_score(y_true[:, i], y_pred[:, i], average='binary', zero_division=0)
        f1 = f1_score(y_true[:, i], y_pred[:, i], average='binary', zero_division=0)
        f1_scores.append(f1)
        print(f"{disease}: Precision={p:.4f}, Recall={r:.4f}, F1={f1:.4f}")

    avg_f1 = np.mean(f1_scores)
    print(f"\nAverage F1-score: {avg_f1:.4f} ({avg_f1*100:.2f}%)")

    # Generate onsite predictions
    print(f"\n{'='*70}")
    print("Generating onsite test predictions...")
    print(f"{'='*70}\n")

    onsite_ds = RetinaMultiLabelDataset(onsite_csv, onsite_image_dir, transform)
    onsite_loader = DataLoader(onsite_ds, batch_size=32, shuffle=False, num_workers=2)

    onsite_preds = []
    with torch.no_grad():
        for imgs, _ in onsite_loader:
            imgs = imgs.to(device)

            weighted_probs = None
            for model, weight in zip(models, weights):
                outputs = model(imgs)
                probs = torch.sigmoid(outputs)
                if weighted_probs is None:
                    weighted_probs = probs * weight
                else:
                    weighted_probs += probs * weight

            preds = (weighted_probs > 0.5).int().cpu().numpy()
            onsite_preds.extend(preds)

    # Save predictions
    onsite_preds = np.array(onsite_preds)
    submission_df = pd.read_csv(onsite_csv)
    submission_df['D'] = onsite_preds[:, 0]
    submission_df['G'] = onsite_preds[:, 1]
    submission_df['A'] = onsite_preds[:, 2]
    submission_df.to_csv(output_file, index=False)

    print(f"✓ Saved predictions: {output_file}")
    print(f"\n{'='*70}")

    return avg_f1


# ===================================================================
# TASK 4.2: Max Voting Ensemble
# ===================================================================
def max_voting_ensemble(model_paths, model_types, test_csv, test_image_dir,
                        onsite_csv, onsite_image_dir, output_file="task4_max_voting.csv"):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    print(f"\n{'='*70}")
    print("TASK 4.2: Max Voting Ensemble")
    print(f"{'='*70}")
    print(f"Number of models: {len(model_paths)}")
    print(f"{'='*70}\n")

    # Load models
    models = []
    for path, mtype in zip(model_paths, model_types):
        print(f"Loading {mtype} from {os.path.basename(path)}...")
        if mtype == 'resnet18':
            model = load_resnet18_task2(path, device)
        elif mtype == 'efficientnet':
            model = load_efficientnet_task2(path, device)
        models.append(model)

    transform = transforms.Compose([
        transforms.Resize((256, 256)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])

    # Offsite test
    test_ds = RetinaMultiLabelDataset(test_csv, test_image_dir, transform)
    test_loader = DataLoader(test_ds, batch_size=32, shuffle=False, num_workers=2)

    print("\nEvaluating on offsite test set...")
    y_true, y_pred = [], []

    with torch.no_grad():
        for imgs, labels in test_loader:
            imgs = imgs.to(device)

            # Collect votes from all models
            votes = []
            for model in models:
                outputs = model(imgs)
                probs = torch.sigmoid(outputs)
                preds = (probs > 0.5).int()
                votes.append(preds)

            # Stack and take majority vote
            votes = torch.stack(votes)
            majority = (votes.sum(dim=0) > len(models) / 2).int().cpu().numpy()

            y_true.extend(labels.numpy())
            y_pred.extend(majority)

    y_true = np.array(y_true)
    y_pred = np.array(y_pred)

    # Calculate metrics
    disease_names = ["DR", "Glaucoma", "AMD"]
    f1_scores = []

    print(f"\n{'='*70}")
    print("Offsite Test Results")
    print(f"{'='*70}\n")

    for i, disease in enumerate(disease_names):
        p = precision_score(y_true[:, i], y_pred[:, i], average='binary', zero_division=0)
        r = recall_score(y_true[:, i], y_pred[:, i], average='binary', zero_division=0)
        f1 = f1_score(y_true[:, i], y_pred[:, i], average='binary', zero_division=0)
        f1_scores.append(f1)
        print(f"{disease}: Precision={p:.4f}, Recall={r:.4f}, F1={f1:.4f}")

    avg_f1 = np.mean(f1_scores)
    print(f"\nAverage F1-score: {avg_f1:.4f} ({avg_f1*100:.2f}%)")

    # Onsite predictions
    print(f"\n{'='*70}")
    print("Generating onsite test predictions...")
    print(f"{'='*70}\n")

    onsite_ds = RetinaMultiLabelDataset(onsite_csv, onsite_image_dir, transform)
    onsite_loader = DataLoader(onsite_ds, batch_size=32, shuffle=False, num_workers=2)

    onsite_preds = []
    with torch.no_grad():
        for imgs, _ in onsite_loader:
            imgs = imgs.to(device)

            votes = []
            for model in models:
                outputs = model(imgs)
                probs = torch.sigmoid(outputs)
                preds = (probs > 0.5).int()
                votes.append(preds)

            votes = torch.stack(votes)
            majority = (votes.sum(dim=0) > len(models) / 2).int().cpu().numpy()
            onsite_preds.extend(majority)

    # Save
    onsite_preds = np.array(onsite_preds)
    submission_df = pd.read_csv(onsite_csv)
    submission_df['D'] = onsite_preds[:, 0]
    submission_df['G'] = onsite_preds[:, 1]
    submission_df['A'] = onsite_preds[:, 2]
    submission_df.to_csv(output_file, index=False)

    print(f"✓ Saved predictions: {output_file}")
    print(f"\n{'='*70}")

    return avg_f1


# ===================================================================
# TASK 4.3: Stacking Ensemble
# ===================================================================
def stacking_ensemble(model_paths, model_types, train_csv, train_image_dir,
                     val_csv, val_image_dir, test_csv, test_image_dir,
                     onsite_csv, onsite_image_dir, output_file="task4_stacking.csv"):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    print(f"\n{'='*70}")
    print("TASK 4.3: Stacking Ensemble")
    print(f"{'='*70}")
    print(f"Number of base models: {len(model_paths)}")
    print(f"Meta-learner: Logistic Regression")
    print(f"{'='*70}\n")

    # Load models
    models = []
    for path, mtype in zip(model_paths, model_types):
        print(f"Loading {mtype} from {os.path.basename(path)}...")
        if mtype == 'resnet18':
            model = load_resnet18_task2(path, device)
        elif mtype == 'efficientnet':
            model = load_efficientnet_task2(path, device)
        models.append(model)

    transform = transforms.Compose([
        transforms.Resize((256, 256)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])

    # Load datasets
    train_ds = RetinaMultiLabelDataset(train_csv, train_image_dir, transform)
    val_ds = RetinaMultiLabelDataset(val_csv, val_image_dir, transform)
    test_ds = RetinaMultiLabelDataset(test_csv, test_image_dir, transform)

    train_loader = DataLoader(train_ds, batch_size=32, shuffle=False, num_workers=2)
    val_loader = DataLoader(val_ds, batch_size=32, shuffle=False, num_workers=2)
    test_loader = DataLoader(test_ds, batch_size=32, shuffle=False, num_workers=2)

    # Get base model predictions
    def get_predictions(loader):
        all_model_preds = [[] for _ in range(len(models))]
        all_labels = []

        with torch.no_grad():
            for imgs, labels in loader:
                imgs = imgs.to(device)
                for i, model in enumerate(models):
                    outputs = model(imgs)
                    probs = torch.sigmoid(outputs).cpu().numpy()
                    all_model_preds[i].extend(probs)
                all_labels.extend(labels.numpy())

        # Concatenate predictions from all models
        X = np.concatenate([np.array(preds) for preds in all_model_preds], axis=1)
        y = np.array(all_labels)
        return X, y

    print("\nGenerating base model predictions for training...")
    X_train, y_train = get_predictions(train_loader)

    print("Generating base model predictions for validation...")
    X_val, y_val = get_predictions(val_loader)

    print("Generating base model predictions for testing...")
    X_test, y_test = get_predictions(test_loader)

    # Train meta-learners (one per disease)
    meta_learners = []
    disease_names = ["DR", "Glaucoma", "AMD"]

    print(f"\n{'='*70}")
    print("Training Meta-Learners")
    print(f"{'='*70}\n")

    for i, disease in enumerate(disease_names):
        print(f"Training meta-learner for {disease}...")
        clf = LogisticRegression(max_iter=1000, random_state=42)
        clf.fit(X_train, y_train[:, i])
        meta_learners.append(clf)

        # Validation
        val_preds = clf.predict(X_val)
        val_f1 = f1_score(y_val[:, i], val_preds, average='binary', zero_division=0)
        print(f"  Validation F1: {val_f1:.4f}")

    # Test evaluation
    print(f"\n{'='*70}")
    print("Offsite Test Results")
    print(f"{'='*70}\n")

    test_preds = np.zeros((len(X_test), 3), dtype=int)
    f1_scores = []

    for i, (disease, clf) in enumerate(zip(disease_names, meta_learners)):
        test_preds[:, i] = clf.predict(X_test)

        p = precision_score(y_test[:, i], test_preds[:, i], average='binary', zero_division=0)
        r = recall_score(y_test[:, i], test_preds[:, i], average='binary', zero_division=0)
        f1 = f1_score(y_test[:, i], test_preds[:, i], average='binary', zero_division=0)
        f1_scores.append(f1)

        print(f"{disease}: Precision={p:.4f}, Recall={r:.4f}, F1={f1:.4f}")

    avg_f1 = np.mean(f1_scores)
    print(f"\nAverage F1-score: {avg_f1:.4f} ({avg_f1*100:.2f}%)")

    # Onsite predictions
    print(f"\n{'='*70}")
    print("Generating onsite test predictions...")
    print(f"{'='*70}\n")

    onsite_ds = RetinaMultiLabelDataset(onsite_csv, onsite_image_dir, transform)
    onsite_loader = DataLoader(onsite_ds, batch_size=32, shuffle=False, num_workers=2)

    X_onsite, _ = get_predictions(onsite_loader)

    onsite_preds = np.zeros((len(X_onsite), 3), dtype=int)
    for i, clf in enumerate(meta_learners):
        onsite_preds[:, i] = clf.predict(X_onsite)

    # Save
    submission_df = pd.read_csv(onsite_csv)
    submission_df['D'] = onsite_preds[:, 0]
    submission_df['G'] = onsite_preds[:, 1]
    submission_df['A'] = onsite_preds[:, 2]
    submission_df.to_csv(output_file, index=False)

    print(f"✓ Saved predictions: {output_file}")
    print(f"\n{'='*70}")

    return avg_f1


# ===================================================================
# MAIN EXECUTION - USING ONLY TASK 2 MODELS
# ===================================================================
if __name__ == "__main__":
    # Base paths
    base_path = "/content/drive/MyDrive/deep learning project/final_project_resources"
    task2_path = "/content/drive/MyDrive/deep learning project/task2_final"

    # ONLY TASK 2 MODELS (4 models total)
    model_paths = [
        f"{task2_path}/resnet18_focal_loss.pt",
        f"{task2_path}/resnet18_class_balanced.pt",
        f"{task2_path}/efficientnet_focal_loss.pt",
        f"{task2_path}/efficientnet_class_balanced.pt",
    ]

    model_types = [
        'resnet18',
        'resnet18',
        'efficientnet',
        'efficientnet',
    ]

    # Data paths
    train_csv = f"{base_path}/train.csv"
    train_image_dir = f"{base_path}/images/train"
    val_csv = f"{base_path}/val.csv"
    val_image_dir = f"{base_path}/images/val"
    test_csv = f"{base_path}/offsite_test.csv"
    test_image_dir = f"{base_path}/images/offsite_test"
    onsite_csv = f"{base_path}/onsite_test_submission.csv"
    onsite_image_dir = f"{base_path}/images/onsite_test"

    # Check if all Task 2 models exist
    print("="*70)
    print("CHECKING TASK 2 MODEL FILES")
    print("="*70)
    all_exist = True
    for path in model_paths:
        if os.path.exists(path):
            print(f"✓ Found: {os.path.basename(path)}")
        else:
            print(f"✗ NOT FOUND: {path}")
            all_exist = False

    if not all_exist:
        print("\n⚠️  Some Task 2 model files are missing!")
        print("Please make sure all .pt files are in:")
        print(f"  {task2_path}/")
        print("\nExpected files:")
        print("  - resnet18_focal_loss.pt")
        print("  - resnet18_class_balanced.pt")
        print("  - efficientnet_focal_loss.pt")
        print("  - efficientnet_class_balanced.pt")
        exit()

    print("\n" + "="*70)
    print("ALL TASK 2 MODELS FOUND - STARTING ENSEMBLE METHODS")
    print("="*70 + "\n")

    # Run all ensemble methods
    results = {}

    # Task 4.1: Weighted Average (equal weights)
    weights = [0.25, 0.25, 0.25, 0.25]
    results['weighted'] = weighted_average_ensemble(
        model_paths, model_types, weights,
        test_csv, test_image_dir,
        onsite_csv, onsite_image_dir,
        "task4_weighted_ensemble.csv"
    )

    # Task 4.2: Max Voting
    results['voting'] = max_voting_ensemble(
        model_paths, model_types,
        test_csv, test_image_dir,
        onsite_csv, onsite_image_dir,
        "task4_max_voting.csv"
    )

    # Task 4.3: Stacking
    results['stacking'] = stacking_ensemble(
        model_paths, model_types,
        train_csv, train_image_dir,
        val_csv, val_image_dir,
        test_csv, test_image_dir,
        onsite_csv, onsite_image_dir,
        "task4_stacking.csv"
    )

    # Final Summary
    print("\n" + "="*70)
    print("TASK 4 COMPLETE - FINAL SUMMARY (Using Task 2 Models Only)")
    print("="*70)
    print("\nOffsite Test Results:")
    for method, f1 in results.items():
        print(f"  {method.capitalize():20s}: F1 = {f1:.4f} ({f1*100:.2f}%)")

    best_method = max(results, key=results.get)
    best_f1 = results[best_method]

    print(f"\n✓ Best Method: {best_method.upper()} with F1 = {best_f1:.4f}")
    print("="*70)
    print("\nGenerated submission files:")
    print("  - task4_weighted_ensemble.csv")
    print("  - task4_max_voting.csv")
    print("  - task4_stacking.csv")
    print(f"\n✓ Submit task4_{best_method}.csv to Kaggle for best results!")
    print("="*70)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Device: cuda
CHECKING TASK 2 MODEL FILES
✓ Found: resnet18_focal_loss.pt
✓ Found: resnet18_class_balanced.pt
✓ Found: efficientnet_focal_loss.pt
✓ Found: efficientnet_class_balanced.pt

ALL TASK 2 MODELS FOUND - STARTING ENSEMBLE METHODS


TASK 4.1: Weighted Average Ensemble
Number of models: 4
Weights: [0.25, 0.25, 0.25, 0.25]

Loading resnet18 from resnet18_focal_loss.pt...
Loading resnet18 from resnet18_class_balanced.pt...
Loading efficientnet from efficientnet_focal_loss.pt...
Loading efficientnet from efficientnet_class_balanced.pt...

Evaluating on offsite test set...

Offsite Test Results

DR: Precision=0.8859, Recall=0.9429, F1=0.9135
Glaucoma: Precision=0.7872, Recall=0.7551, F1=0.7708
AMD: Precision=0.7368, Recall=0.6364, F1=0.6829

Average F1-score: 0.7891 (78.91%)

Generating onsite test predictions...

✓ Saved predictions: task4_weighted_ensembl

In [None]:
###Task 4 using task 1 , 2 and 3
from google.colab import drive
drive.mount('/content/drive')

import os
import pandas as pd
import numpy as np
from PIL import Image
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
from sklearn.metrics import precision_score, recall_score, f1_score
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.neural_network import MLPClassifier
import warnings
warnings.filterwarnings('ignore')

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Device: {device}")

# ===================================================================
# Dataset Class
# ===================================================================
class RetinaMultiLabelDataset(Dataset):
    def __init__(self, csv_file, image_dir, transform=None):
        self.data = pd.read_csv(csv_file)
        self.image_dir = image_dir
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        row = self.data.iloc[idx]
        img_path = os.path.join(self.image_dir, row.iloc[0])
        img = Image.open(img_path).convert("RGB")
        labels = torch.tensor(row[1:].values.astype("float32"))
        if self.transform:
            img = self.transform(img)
        return img, labels


# ===================================================================
# Model Loading Functions
# ===================================================================
def load_resnet18_task1(ckpt_path, device):
    """Load ResNet18 from Task 1 (simple fc layer)"""
    model = models.resnet18(pretrained=False)
    model.fc = nn.Linear(model.fc.in_features, 3)
    model.load_state_dict(torch.load(ckpt_path, map_location=device))
    model = model.to(device)
    model.eval()
    return model


def load_efficientnet_task1(ckpt_path, device):
    """Load EfficientNet from Task 1 (simple classifier)"""
    model = models.efficientnet_b0(pretrained=False)
    model.classifier[1] = nn.Linear(model.classifier[1].in_features, 3)
    model.load_state_dict(torch.load(ckpt_path, map_location=device))
    model = model.to(device)
    model.eval()
    return model


def load_resnet18_task2(ckpt_path, device):
    """Load ResNet18 from Task 2 (with dropout)"""
    model = models.resnet18(pretrained=False)
    num_features = model.fc.in_features
    model.fc = nn.Sequential(
        nn.Dropout(0.5),
        nn.Linear(num_features, 3)
    )
    model.load_state_dict(torch.load(ckpt_path, map_location=device))
    model = model.to(device)
    model.eval()
    return model


def load_efficientnet_task2(ckpt_path, device):
    """Load EfficientNet from Task 2 (with dropout)"""
    model = models.efficientnet_b0(pretrained=False)
    num_features = model.classifier[1].in_features
    model.classifier = nn.Sequential(
        nn.Dropout(0.5),
        nn.Linear(num_features, 3)
    )
    model.load_state_dict(torch.load(ckpt_path, map_location=device))
    model = model.to(device)
    model.eval()
    return model


def load_resnet18_task3_se(ckpt_path, device):
    """Load ResNet18 from Task 3 with SE attention"""
    # SE Block
    class SEBlock(nn.Module):
        def __init__(self, channels, reduction=16):
            super(SEBlock, self).__init__()
            reduced_channels = max(channels // reduction, 1)
            self.fc1 = nn.Linear(channels, reduced_channels, bias=False)
            self.relu = nn.ReLU(inplace=True)
            self.fc2 = nn.Linear(reduced_channels, channels, bias=False)
            self.sigmoid = nn.Sigmoid()

        def forward(self, x):
            batch, channels, _, _ = x.size()
            squeeze = torch.nn.functional.adaptive_avg_pool2d(x, 1).view(batch, channels)
            excitation = self.fc1(squeeze)
            excitation = self.relu(excitation)
            excitation = self.fc2(excitation)
            excitation = self.sigmoid(excitation)
            excitation = excitation.view(batch, channels, 1, 1)
            return x * excitation.expand_as(x)

    model = models.resnet18(pretrained=False)
    original_layer4 = model.layer4
    model.layer4 = nn.Sequential(
        original_layer4,
        SEBlock(512, reduction=16)
    )
    model.fc = nn.Linear(model.fc.in_features, 3)
    model.load_state_dict(torch.load(ckpt_path, map_location=device), strict=False)
    model = model.to(device)
    model.eval()
    return model


def load_resnet18_task3_mha(ckpt_path, device):
    """Load ResNet18 from Task 3 with MHA attention"""
    # MHA Block
    class MultiHeadAttention(nn.Module):
        def __init__(self, channels, num_heads=8, reduction=4):
            super(MultiHeadAttention, self).__init__()
            self.num_heads = num_heads
            self.head_dim = channels // num_heads
            self.scale = self.head_dim ** -0.5
            self.reduction = reduction

            if reduction > 1:
                self.pool = nn.AvgPool2d(kernel_size=reduction, stride=reduction)
            else:
                self.pool = nn.Identity()

            self.qkv = nn.Linear(channels, channels * 3, bias=False)
            self.proj_out = nn.Linear(channels, channels, bias=False)
            self.norm = nn.LayerNorm(channels)

        def forward(self, x):
            batch, channels, H, W = x.size()
            identity = x
            x_reduced = self.pool(x)
            _, _, h, w = x_reduced.size()
            seq_len = h * w
            x_flat = x_reduced.flatten(2).transpose(1, 2)
            qkv = self.qkv(x_flat)
            qkv = qkv.reshape(batch, seq_len, 3, self.num_heads, self.head_dim)
            qkv = qkv.permute(2, 0, 3, 1, 4)
            q, k, v = qkv[0], qkv[1], qkv[2]
            attn_scores = (q @ k.transpose(-2, -1)) * self.scale
            attn_weights = torch.nn.functional.softmax(attn_scores, dim=-1)
            attn_out = attn_weights @ v
            attn_out = attn_out.transpose(1, 2).contiguous()
            attn_out = attn_out.reshape(batch, seq_len, channels)
            out = self.proj_out(attn_out)
            out = self.norm(out)
            out = out.transpose(1, 2).reshape(batch, channels, h, w)
            if self.reduction > 1:
                out = torch.nn.functional.interpolate(out, size=(H, W), mode='bilinear', align_corners=False)
            return identity + out

    model = models.resnet18(pretrained=False)
    original_layer4 = model.layer4
    model.layer4 = nn.Sequential(
        original_layer4,
        MultiHeadAttention(512, num_heads=8, reduction=2)
    )
    model.fc = nn.Linear(model.fc.in_features, 3)
    model.load_state_dict(torch.load(ckpt_path, map_location=device), strict=False)
    model = model.to(device)
    model.eval()
    return model


def load_model_auto(ckpt_path, model_type, task, device):
    """Automatically load model based on type and task"""
    if task == 1:
        if model_type == 'resnet18':
            return load_resnet18_task1(ckpt_path, device)
        elif model_type == 'efficientnet':
            return load_efficientnet_task1(ckpt_path, device)
    elif task == 2:
        if model_type == 'resnet18':
            return load_resnet18_task2(ckpt_path, device)
        elif model_type == 'efficientnet':
            return load_efficientnet_task2(ckpt_path, device)
    elif task == 3:
        if 'se' in ckpt_path.lower():
            return load_resnet18_task3_se(ckpt_path, device)
        elif 'mha' in ckpt_path.lower():
            return load_resnet18_task3_mha(ckpt_path, device)

    raise ValueError(f"Unknown model configuration: {model_type}, task {task}")


# ===================================================================
# METHOD 1: Super Ensemble with TTA + Optimized Weights
# ===================================================================
def ultimate_weighted_ensemble(model_configs, test_csv, test_image_dir,
                               onsite_csv, onsite_image_dir,
                               output_file="task4_ultimate_weighted.csv",
                               use_tta=True):
    """
    Ultimate weighted ensemble using ALL models with optimized weights
    """
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    print(f"\n{'='*70}")
    print(" ULTIMATE METHOD 1: Super Weighted Ensemble")
    print(f"{'='*70}")
    print(f"Total models: {len(model_configs)}")
    print(f"Test-Time Augmentation: {use_tta}")
    print(f"{'='*70}\n")

    # Load all models
    models = []
    weights = []

    for config in model_configs:
        print(f"Loading {config['name']}...")
        model = load_model_auto(config['path'], config['type'], config['task'], device)
        models.append(model)
        weights.append(config['weight'])

    # Normalize weights
    weights = np.array(weights)
    weights = weights / weights.sum()

    print(f"\n Model Weights:")
    for config, w in zip(model_configs, weights):
        print(f"  {config['name']:40s}: {w:.4f}")

    # Setup data
    transform = transforms.Compose([
        transforms.Resize((256, 256)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])

    # Offsite test evaluation
    test_ds = RetinaMultiLabelDataset(test_csv, test_image_dir, transform)
    test_loader = DataLoader(test_ds, batch_size=16, shuffle=False, num_workers=2)

    print("\n Evaluating on offsite test set...")
    y_true, y_pred = [], []

    with torch.no_grad():
        for imgs, labels in test_loader:
            imgs = imgs.to(device)

            if use_tta:
                # TTA: Original + H-flip + V-flip
                all_augmented_probs = []

                for aug_imgs in [imgs, torch.flip(imgs, dims=[3]), torch.flip(imgs, dims=[2])]:
                    weighted_probs = None
                    for model, weight in zip(models, weights):
                        outputs = model(aug_imgs)
                        probs = torch.sigmoid(outputs)
                        if weighted_probs is None:
                            weighted_probs = probs * weight
                        else:
                            weighted_probs += probs * weight
                    all_augmented_probs.append(weighted_probs)

                # Average TTA predictions
                final_probs = torch.stack(all_augmented_probs).mean(dim=0)
            else:
                weighted_probs = None
                for model, weight in zip(models, weights):
                    outputs = model(imgs)
                    probs = torch.sigmoid(outputs)
                    if weighted_probs is None:
                        weighted_probs = probs * weight
                    else:
                        weighted_probs += probs * weight
                final_probs = weighted_probs

            preds = (final_probs > 0.5).int().cpu().numpy()
            y_true.extend(labels.numpy())
            y_pred.extend(preds)

    y_true = np.array(y_true)
    y_pred = np.array(y_pred)

    # Calculate metrics
    disease_names = ["DR", "Glaucoma", "AMD"]
    f1_scores = []

    print(f"\n{'='*70}")
    print(" Offsite Test Results")
    print(f"{'='*70}\n")

    for i, disease in enumerate(disease_names):
        p = precision_score(y_true[:, i], y_pred[:, i], average='binary', zero_division=0)
        r = recall_score(y_true[:, i], y_pred[:, i], average='binary', zero_division=0)
        f1 = f1_score(y_true[:, i], y_pred[:, i], average='binary', zero_division=0)
        f1_scores.append(f1)
        print(f"{disease:10s}: Precision={p:.4f}, Recall={r:.4f}, F1={f1:.4f}")

    avg_f1 = np.mean(f1_scores)
    print(f"\n{'='*70}")
    print(f" Average F1-score: {avg_f1:.4f} ({avg_f1*100:.2f}%)")
    print(f"{'='*70}\n")

    # Generate onsite predictions
    print(" Generating onsite test predictions...")

    onsite_ds = RetinaMultiLabelDataset(onsite_csv, onsite_image_dir, transform)
    onsite_loader = DataLoader(onsite_ds, batch_size=16, shuffle=False, num_workers=2)

    onsite_preds = []
    with torch.no_grad():
        for imgs, _ in onsite_loader:
            imgs = imgs.to(device)

            if use_tta:
                all_augmented_probs = []
                for aug_imgs in [imgs, torch.flip(imgs, dims=[3]), torch.flip(imgs, dims=[2])]:
                    weighted_probs = None
                    for model, weight in zip(models, weights):
                        outputs = model(aug_imgs)
                        probs = torch.sigmoid(outputs)
                        if weighted_probs is None:
                            weighted_probs = probs * weight
                        else:
                            weighted_probs += probs * weight
                    all_augmented_probs.append(weighted_probs)
                final_probs = torch.stack(all_augmented_probs).mean(dim=0)
            else:
                weighted_probs = None
                for model, weight in zip(models, weights):
                    outputs = model(imgs)
                    probs = torch.sigmoid(outputs)
                    if weighted_probs is None:
                        weighted_probs = probs * weight
                    else:
                        weighted_probs += probs * weight
                final_probs = weighted_probs

            preds = (final_probs > 0.5).int().cpu().numpy()
            onsite_preds.extend(preds)

    # Save predictions
    onsite_preds = np.array(onsite_preds)
    submission_df = pd.read_csv(onsite_csv)
    submission_df['D'] = onsite_preds[:, 0]
    submission_df['G'] = onsite_preds[:, 1]
    submission_df['A'] = onsite_preds[:, 2]
    submission_df.to_csv(output_file, index=False)

    print(f" Saved predictions: {output_file}\n")

    return avg_f1


# ===================================================================
# METHOD 2: Deep Stacking with All Models
# ===================================================================
def ultimate_stacking_ensemble(model_configs, train_csv, train_image_dir,
                               val_csv, val_image_dir, test_csv, test_image_dir,
                               onsite_csv, onsite_image_dir,
                               output_file="task4_ultimate_stacking.csv"):
    """
    Deep stacking with Random Forest using ALL models
    """
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    print(f"\n{'='*70}")
    print(" ULTIMATE METHOD 2: Deep Stacking Ensemble")
    print(f"{'='*70}")
    print(f"Base models: {len(model_configs)}")
    print(f"Meta-learner: Random Forest")
    print(f"{'='*70}\n")

    # Load models
    models = []
    for config in model_configs:
        print(f"Loading {config['name']}...")
        model = load_model_auto(config['path'], config['type'], config['task'], device)
        models.append(model)

    transform = transforms.Compose([
        transforms.Resize((256, 256)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])

    # Load datasets
    train_ds = RetinaMultiLabelDataset(train_csv, train_image_dir, transform)
    val_ds = RetinaMultiLabelDataset(val_csv, val_image_dir, transform)
    test_ds = RetinaMultiLabelDataset(test_csv, test_image_dir, transform)

    train_loader = DataLoader(train_ds, batch_size=32, shuffle=False, num_workers=2)
    val_loader = DataLoader(val_ds, batch_size=32, shuffle=False, num_workers=2)
    test_loader = DataLoader(test_ds, batch_size=32, shuffle=False, num_workers=2)

    # Get base model predictions
    def get_predictions(loader, desc=""):
        print(f"  Generating predictions {desc}...")
        all_model_preds = [[] for _ in range(len(models))]
        all_labels = []

        with torch.no_grad():
            for imgs, labels in loader:
                imgs = imgs.to(device)
                for i, model in enumerate(models):
                    outputs = model(imgs)
                    probs = torch.sigmoid(outputs).cpu().numpy()
                    all_model_preds[i].extend(probs)
                all_labels.extend(labels.numpy())

        X = np.concatenate([np.array(preds) for preds in all_model_preds], axis=1)
        y = np.array(all_labels)
        return X, y

    print("\n Generating meta-features...")
    X_train, y_train = get_predictions(train_loader, "for train")
    X_val, y_val = get_predictions(val_loader, "for validation")
    X_test, y_test = get_predictions(test_loader, "for test")

    print(f"\n  Meta-feature shape: {X_train.shape}")

    # Train meta-learners
    meta_learners = []
    disease_names = ["DR", "Glaucoma", "AMD"]

    print(f"\n Training Meta-Learners...")

    for i, disease in enumerate(disease_names):
        print(f"\n  {disease}:")


        clf = RandomForestClassifier(
            n_estimators=300,
            max_depth=15,
            min_samples_split=5,
            min_samples_leaf=2,
            max_features='sqrt',
            random_state=42,
            n_jobs=-1
        )

        clf.fit(X_train, y_train[:, i])
        meta_learners.append(clf)

        # Validation
        val_preds = clf.predict(X_val)
        val_f1 = f1_score(y_val[:, i], val_preds, average='binary', zero_division=0)
        print(f"    Validation F1: {val_f1:.4f}")

    # Test evaluation
    print(f"\n{'='*70}")
    print(" Offsite Test Results")
    print(f"{'='*70}\n")

    test_preds = np.zeros((len(X_test), 3), dtype=int)
    f1_scores = []

    for i, (disease, clf) in enumerate(zip(disease_names, meta_learners)):
        test_preds[:, i] = clf.predict(X_test)

        p = precision_score(y_test[:, i], test_preds[:, i], average='binary', zero_division=0)
        r = recall_score(y_test[:, i], test_preds[:, i], average='binary', zero_division=0)
        f1 = f1_score(y_test[:, i], test_preds[:, i], average='binary', zero_division=0)
        f1_scores.append(f1)

        print(f"{disease:10s}: Precision={p:.4f}, Recall={r:.4f}, F1={f1:.4f}")

    avg_f1 = np.mean(f1_scores)
    print(f"\n{'='*70}")
    print(f" Average F1-score: {avg_f1:.4f} ({avg_f1*100:.2f}%)")
    print(f"{'='*70}\n")

    # Onsite predictions
    print(" Generating onsite test predictions...")

    onsite_ds = RetinaMultiLabelDataset(onsite_csv, onsite_image_dir, transform)
    onsite_loader = DataLoader(onsite_ds, batch_size=32, shuffle=False, num_workers=2)

    X_onsite, _ = get_predictions(onsite_loader, "for onsite")

    onsite_preds = np.zeros((len(X_onsite), 3), dtype=int)
    for i, clf in enumerate(meta_learners):
        onsite_preds[:, i] = clf.predict(X_onsite)

    # Save
    submission_df = pd.read_csv(onsite_csv)
    submission_df['D'] = onsite_preds[:, 0]
    submission_df['G'] = onsite_preds[:, 1]
    submission_df['A'] = onsite_preds[:, 2]
    submission_df.to_csv(output_file, index=False)

    print(f" Saved predictions: {output_file}\n")

    return avg_f1


# ===================================================================
#  METHOD 3: Adaptive Threshold with All Models
# ===================================================================
def ultimate_adaptive_threshold(model_configs, val_csv, val_image_dir,
                                test_csv, test_image_dir,
                                onsite_csv, onsite_image_dir,
                                output_file="task4_ultimate_adaptive.csv"):
    """
    Adaptive threshold optimization using ALL models
    """
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    print(f"\n{'='*70}")
    print(" ULTIMATE METHOD 3: Adaptive Threshold Optimization")
    print(f"{'='*70}")
    print(f"Total models: {len(model_configs)}")
    print(f"{'='*70}\n")

    # Load models
    models = []
    weights = []

    for config in model_configs:
        print(f"Loading {config['name']}...")
        model = load_model_auto(config['path'], config['type'], config['task'], device)
        models.append(model)
        weights.append(config['weight'])

    # Normalize weights
    weights = np.array(weights)
    weights = weights / weights.sum()

    transform = transforms.Compose([
        transforms.Resize((256, 256)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])

    # Find optimal thresholds on validation set
    print("\n Finding optimal thresholds on validation set...")
    val_ds = RetinaMultiLabelDataset(val_csv, val_image_dir, transform)
    val_loader = DataLoader(val_ds, batch_size=32, shuffle=False, num_workers=2)

    val_probs_all = []
    val_labels_all = []

    with torch.no_grad():
        for imgs, labels in val_loader:
            imgs = imgs.to(device)

            weighted_probs = None
            for model, weight in zip(models, weights):
                outputs = model(imgs)
                probs = torch.sigmoid(outputs)
                if weighted_probs is None:
                    weighted_probs = probs * weight
                else:
                    weighted_probs += probs * weight

            val_probs_all.extend(weighted_probs.cpu().numpy())
            val_labels_all.extend(labels.numpy())

    val_probs_all = np.array(val_probs_all)
    val_labels_all = np.array(val_labels_all)

    # Find optimal threshold for each disease
    optimal_thresholds = []
    disease_names = ["DR", "Glaucoma", "AMD"]

    print("\n Optimal thresholds per disease:")
    for i, disease in enumerate(disease_names):
        best_threshold = 0.5
        best_f1 = 0

        # Try different thresholds
        for threshold in np.arange(0.25, 0.75, 0.025):
            preds = (val_probs_all[:, i] > threshold).astype(int)
            f1 = f1_score(val_labels_all[:, i], preds, average='binary', zero_division=0)

            if f1 > best_f1:
                best_f1 = f1
                best_threshold = threshold

        optimal_thresholds.append(best_threshold)
        print(f"  {disease:10s}: {best_threshold:.3f} (Val F1={best_f1:.4f})")

    # Test with optimal thresholds
    print(f"\n{'='*70}")
    print(" Evaluating on offsite test set...")
    print(f"{'='*70}\n")

    test_ds = RetinaMultiLabelDataset(test_csv, test_image_dir, transform)
    test_loader = DataLoader(test_ds, batch_size=32, shuffle=False, num_workers=2)

    y_true, y_pred = [], []

    with torch.no_grad():
        for imgs, labels in test_loader:
            imgs = imgs.to(device)

            weighted_probs = None
            for model, weight in zip(models, weights):
                outputs = model(imgs)
                probs = torch.sigmoid(outputs)
                if weighted_probs is None:
                    weighted_probs = probs * weight
                else:
                    weighted_probs += probs * weight


            probs_np = weighted_probs.cpu().numpy()
            preds = np.zeros_like(probs_np, dtype=int)
            for i, threshold in enumerate(optimal_thresholds):
                preds[:, i] = (probs_np[:, i] > threshold).astype(int)

            y_true.extend(labels.numpy())
            y_pred.extend(preds)

    y_true = np.array(y_true)
    y_pred = np.array(y_pred)

    # Calculate metrics
    f1_scores = []

    print("Offsite Test Results:\n")

    for i, disease in enumerate(disease_names):
        p = precision_score(y_true[:, i], y_pred[:, i], average='binary', zero_division=0)
        r = recall_score(y_true[:, i], y_pred[:, i], average='binary', zero_division=0)
        f1 = f1_score(y_true[:, i], y_pred[:, i], average='binary', zero_division=0)
        f1_scores.append(f1)
        print(f"{disease:10s}: Precision={p:.4f}, Recall={r:.4f}, F1={f1:.4f} (threshold={optimal_thresholds[i]:.3f})")

    avg_f1 = np.mean(f1_scores)
    print(f"\n{'='*70}")
    print(f" Average F1-score: {avg_f1:.4f} ({avg_f1*100:.2f}%)")
    print(f"{'='*70}\n")

    # Onsite predictions
    print(" Generating onsite test predictions...")

    onsite_ds = RetinaMultiLabelDataset(onsite_csv, onsite_image_dir, transform)
    onsite_loader = DataLoader(onsite_ds, batch_size=32, shuffle=False, num_workers=2)

    onsite_preds = []
    with torch.no_grad():
        for imgs, _ in onsite_loader:
            imgs = imgs.to(device)

            weighted_probs = None
            for model, weight in zip(models, weights):
                outputs = model(imgs)
                probs = torch.sigmoid(outputs)
                if weighted_probs is None:
                    weighted_probs = probs * weight
                else:
                    weighted_probs += probs * weight

            probs_np = weighted_probs.cpu().numpy()
            preds = np.zeros_like(probs_np, dtype=int)
            for i, threshold in enumerate(optimal_thresholds):
                preds[:, i] = (probs_np[:, i] > threshold).astype(int)

            onsite_preds.extend(preds)

    # Save
    onsite_preds = np.array(onsite_preds)
    submission_df = pd.read_csv(onsite_csv)
    submission_df['D'] = onsite_preds[:, 0]
    submission_df['G'] = onsite_preds[:, 1]
    submission_df['A'] = onsite_preds[:, 2]
    submission_df.to_csv(output_file, index=False)
    print(f" Saved predictions: {output_file}\n")

    return avg_f1
# ===================================================================
# MAIN EXECUTION - USING ALL TASK 1,2,3 MODELS
# ===================================================================
if __name__ == "__main__":
    # Base paths
    base_path = "/content/drive/MyDrive/deep learning project/final_project_resources"
    task1_path = "/content/drive/MyDrive/deep learning project/task1_final"
    task2_path = "/content/drive/MyDrive/deep learning project/task2_final"
    task3_path = "/content/drive/MyDrive/deep learning project/task3_final"

    # Data paths
    train_csv = f"{base_path}/train.csv"
    train_image_dir = f"{base_path}/images/train"
    val_csv = f"{base_path}/val.csv"
    val_image_dir = f"{base_path}/images/val"
    test_csv = f"{base_path}/offsite_test.csv"
    test_image_dir = f"{base_path}/images/offsite_test"
    onsite_csv = f"{base_path}/onsite_test_submission.csv"
    onsite_image_dir = f"{base_path}/images/onsite_test"

    # -------------------------------------------------------------------
    # Build model configs automatically from folders
    # -------------------------------------------------------------------
    def build_model_configs(task1_path, task2_path, task3_path):
        model_configs = []

        # Task 1 models
        if os.path.exists(task1_path):
            for f in sorted(os.listdir(task1_path)):
                if f.endswith(".pt"):
                    f_lower = f.lower()
                    if "resnet" in f_lower:
                        mtype = "resnet18"
                    elif "efficientnet" in f_lower:
                        mtype = "efficientnet"
                    else:
                        continue

                    model_configs.append({
                        "name": f"T1_{f}",
                        "path": os.path.join(task1_path, f),
                        "type": mtype,
                        "task": 1,
                        "weight": 1.00
                    })

        # Task 2 models
        if os.path.exists(task2_path):
            for f in sorted(os.listdir(task2_path)):
                if f.endswith(".pt"):
                    f_lower = f.lower()
                    if "resnet" in f_lower:
                        mtype = "resnet18"
                    elif "efficientnet" in f_lower:
                        mtype = "efficientnet"
                    else:
                        continue

                    model_configs.append({
                        "name": f"T2_{f}",
                        "path": os.path.join(task2_path, f),
                        "type": mtype,
                        "task": 2,
                        "weight": 1.00
                    })

        # Task 3 models (ResNet18 SE/MHA)
        if os.path.exists(task3_path):
            for f in sorted(os.listdir(task3_path)):
                if f.endswith(".pt"):
                    f_lower = f.lower()
                    if "resnet" in f_lower:
                        model_configs.append({
                            "name": f"T3_{f}",
                            "path": os.path.join(task3_path, f),
                            "type": "resnet18",
                            "task": 3,
                            "weight": 1.00
                        })

        return model_configs

    model_configs = build_model_configs(task1_path, task2_path, task3_path)

    print("="*70)
    print("CHECKING ALL MODEL FILES (TASK 1 + TASK 2 + TASK 3)")
    print("="*70)

    if len(model_configs) == 0:
        print("⚠️ No .pt files found in task1_final/task2_final/task3_final!")
        raise SystemExit

    all_exist = True
    for cfg in model_configs:
        if os.path.exists(cfg["path"]):
            print(f"✓ Found: {cfg['name']}")
        else:
            print(f"✗ NOT FOUND: {cfg['path']}")
            all_exist = False

    if not all_exist:
        print("\n Some model files are missing. Please fix paths and rerun.")
        raise SystemExit

    print("\n" + "="*70)
    print("ALL MODELS FOUND - STARTING ULTIMATE TASK 4 METHODS")
    print("="*70 + "\n")

    # -------------------------------------------------------------------
    # Run all ultimate methods
    # -------------------------------------------------------------------
    results = {}

    # Ultimate Method 1: Weighted Ensemble (with TTA)
    results["ultimate_weighted_tta"] = ultimate_weighted_ensemble(
        model_configs,
        test_csv, test_image_dir,
        onsite_csv, onsite_image_dir,
        output_file="task4_ultimate_weighted_tta.csv",
        use_tta=True
    )

    # Ultimate Method 1 (No TTA)
    results["ultimate_weighted_no_tta"] = ultimate_weighted_ensemble(
        model_configs,
        test_csv, test_image_dir,
        onsite_csv, onsite_image_dir,
        output_file="task4_ultimate_weighted_no_tta.csv",
        use_tta=False
    )

    # Ultimate Method 2: Deep Stacking
    results["ultimate_stacking"] = ultimate_stacking_ensemble(
        model_configs,
        train_csv, train_image_dir,
        val_csv, val_image_dir,
        test_csv, test_image_dir,
        onsite_csv, onsite_image_dir,
        output_file="task4_ultimate_stacking.csv"
    )

    # Ultimate Method 3: Adaptive Threshold (Val-optimized thresholds)
    results["ultimate_adaptive_threshold"] = ultimate_adaptive_threshold(
        model_configs,
        val_csv, val_image_dir,
        test_csv, test_image_dir,
        onsite_csv, onsite_image_dir,
        output_file="task4_ultimate_adaptive.csv"
    )

    # -------------------------------------------------------------------
    # Final Summary
    # -------------------------------------------------------------------
    print("\n" + "="*70)
    print(" TASK 4 COMPLETE - ULTIMATE FINAL SUMMARY (All Tasks 1+2+3 Models)")
    print("="*70)

    print("\nOffsite Test Results:")
    for method, f1 in results.items():
        print(f"  {method:30s}: F1 = {f1:.4f} ({f1*100:.2f}%)")

    best_method = max(results, key=results.get)
    best_f1 = results[best_method]

    print(f"\n Best Method: {best_method.upper()} with F1 = {best_f1:.4f} ({best_f1*100:.2f}%)")
    print("="*70)

    print("\nGenerated submission files:")
    print("  - task4_ultimate_weighted_tta.csv")
    print("  - task4_ultimate_weighted_no_tta.csv")
    print("  - task4_ultimate_stacking.csv")
    print("  - task4_ultimate_adaptive.csv")

    # Suggest best file name
    best_file_map = {
        "ultimate_weighted_tta": "task4_ultimate_weighted_tta.csv",
        "ultimate_weighted_no_tta": "task4_ultimate_weighted_no_tta.csv",
        "ultimate_stacking": "task4_ultimate_stacking.csv",
        "ultimate_adaptive_threshold": "task4_ultimate_adaptive.csv",
    }

    print(f"\n Submit this file to Kaggle for best results:")
    print(f"  - {best_file_map.get(best_method, 'UNKNOWN')}")
    print("="*70)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Device: cuda
CHECKING ALL MODEL FILES (TASK 1 + TASK 2 + TASK 3)
✓ Found: T1_efficientnet_DLsns_task1-1.pt
✓ Found: T1_efficientnet_DLsns_task1-2.pt
✓ Found: T1_efficientnet_DLsns_task1-3.pt
✓ Found: T1_resnet18_DLsns_task1-1.pt
✓ Found: T1_resnet18_DLsns_task1-2.pt
✓ Found: T1_resnet18_DLsns_task1-3.pt
✓ Found: T2_efficientnet_class_balanced.pt
✓ Found: T2_efficientnet_focal_loss.pt
✓ Found: T2_resnet18_class_balanced.pt
✓ Found: T2_resnet18_focal_loss.pt
✓ Found: T3_best_resnet18_task3_mha.pt
✓ Found: T3_best_resnet18_task3_se.pt

ALL MODELS FOUND - STARTING ULTIMATE TASK 4 METHODS


 ULTIMATE METHOD 1: Super Weighted Ensemble
Total models: 12
Test-Time Augmentation: True

Loading T1_efficientnet_DLsns_task1-1.pt...
Loading T1_efficientnet_DLsns_task1-2.pt...
Loading T1_efficientnet_DLsns_task1-3.pt...
Loading T1_resnet18_DLsns_task1-1.pt...
Loading T1_resn