In [2]:
!pip install mlflow

Collecting mlflow
  Downloading mlflow-3.8.1-py3-none-any.whl.metadata (31 kB)
Collecting mlflow-skinny==3.8.1 (from mlflow)
  Downloading mlflow_skinny-3.8.1-py3-none-any.whl.metadata (31 kB)
Collecting mlflow-tracing==3.8.1 (from mlflow)
  Downloading mlflow_tracing-3.8.1-py3-none-any.whl.metadata (19 kB)
Collecting Flask-CORS<7 (from mlflow)
  Downloading flask_cors-6.0.2-py3-none-any.whl.metadata (5.3 kB)
Collecting docker<8,>=4.0.0 (from mlflow)
  Downloading docker-7.1.0-py3-none-any.whl.metadata (3.8 kB)
Collecting graphene<4 (from mlflow)
  Downloading graphene-3.4.3-py2.py3-none-any.whl.metadata (6.9 kB)
Collecting gunicorn<24 (from mlflow)
  Downloading gunicorn-23.0.0-py3-none-any.whl.metadata (4.4 kB)
Collecting huey<3,>=2.5.0 (from mlflow)
  Downloading huey-2.6.0-py3-none-any.whl.metadata (4.3 kB)
Collecting databricks-sdk<1,>=0.20.0 (from mlflow-skinny==3.8.1->mlflow)
  Downloading databricks_sdk-0.78.0-py3-none-any.whl.metadata (40 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━

In [3]:
import os
import numpy as np
import pandas as pd
from PIL import Image
import torch
import mlflow
import mlflow.pytorch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset,DataLoader,WeightedRandomSampler
from torchvision import transforms,models
from sklearn.metrics import accuracy_score,confusion_matrix,roc_auc_score,recall_score,precision_score,f1_score

In [4]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [5]:
APTOS_ROOT = "/content/drive/MyDrive/Diabetic_retinopathy/data/APTOS"

TRAIN_IMG_DIR = f"{APTOS_ROOT}/train_images"
VAL_IMG_DIR   = f"{APTOS_ROOT}/val_images"

TRAIN_CSV = f"{APTOS_ROOT}/train_1.csv"
VAL_CSV   = f"{APTOS_ROOT}/valid.csv"

In [6]:
NUM_CLASSES = 5
IMG_SIZE = 224
BATCH_SIZE = 32
VAL_BATCH_SIZE = 64
EPOCHS = 5
LR = 3e-4
WEIGHT_DECAY = 1e-4
SEED = 42


In [7]:
MODEL_NAME = "resnet50"   # options: "resnet50", "resnet18"
LOSS_TYPE = "weighted_ce" # options: "ce", "weighted_ce", "focal"
USE_SAMPLER = False       # True/False
FOCAL_GAMMA = 2.0

In [8]:
EXPERIMENT_NAME = "APTOS_Imbalance_Experiments"

In [9]:
def seed_everything(seed: int = 42):
    import random
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = False
    torch.backends.cudnn.benchmark = True

seed_everything(SEED)

device = "cuda" if torch.cuda.is_available() else "cpu"
print("Device:", device)

Device: cuda


In [10]:
train_data = pd.read_csv(TRAIN_CSV)
val_data   = pd.read_csv(VAL_CSV)

train_data.columns = train_data.columns.str.strip()
val_data.columns   = val_data.columns.str.strip()

In [12]:
assert "id_code" in train_data.columns and "diagnosis" in train_data.columns, "CSV must have id_code, diagnosis"
assert "id_code" in val_data.columns and "diagnosis" in val_data.columns, "CSV must have id_code, diagnosis"

print("Train rows:", len(train_data), "Val rows:", len(val_data))
print("Train class counts:\n", train_data["diagnosis"].value_counts().sort_index())

Train rows: 2930 Val rows: 366
Train class counts:
 diagnosis
0    1434
1     300
2     808
3     154
4     234
Name: count, dtype: int64


In [13]:
IMAGENET_MEAN = (0.485, 0.456, 0.406)
IMAGENET_STD  = (0.229, 0.224, 0.225)

train_tfms = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomRotation(10),
    transforms.ColorJitter(brightness=0.1, contrast=0.1),
    transforms.ToTensor(),
    transforms.Normalize(IMAGENET_MEAN, IMAGENET_STD),
])

val_tfms = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize(IMAGENET_MEAN, IMAGENET_STD),
])


In [14]:
class APTOSDataset(Dataset):
    def __init__(self, df, img_dir, transform):
        self.df = df.reset_index(drop=True)
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def _resolve_path(self, img_id: str):
        img_id = str(img_id)

        # if csv has extension already
        if img_id.lower().endswith((".png", ".jpg", ".jpeg")):
            p = os.path.join(self.img_dir, img_id)
            if os.path.exists(p):
                return p

        for ext in [".png", ".jpg", ".jpeg"]:
            p = os.path.join(self.img_dir, img_id + ext)
            if os.path.exists(p):
                return p

        return None

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img_id = row["id_code"]
        label = int(row["diagnosis"])

        img_path = self._resolve_path(img_id)
        if img_path is None:
            raise FileNotFoundError(
                f"Missing image for id_code='{img_id}' in '{self.img_dir}'. "
                f"Checked png/jpg/jpeg."
            )

        img = Image.open(img_path).convert("RGB")
        img = self.transform(img)
        return img, label

In [16]:
train_ds = APTOSDataset(train_data, TRAIN_IMG_DIR, train_tfms)
val_ds   = APTOSDataset(val_data, VAL_IMG_DIR, val_tfms)

In [17]:
def build_train_loader(use_sampler: bool):
    if not use_sampler:
        return DataLoader(
            train_ds,
            batch_size=BATCH_SIZE,
            shuffle=True,
            num_workers=2,
            pin_memory=torch.cuda.is_available(),
        )

    labels = train_df["diagnosis"].values
    class_count = np.bincount(labels, minlength=NUM_CLASSES)
    class_weights = 1.0 / np.maximum(class_count, 1)
    sample_weights = class_weights[labels]

    sampler = WeightedRandomSampler(
        weights=torch.DoubleTensor(sample_weights),
        num_samples=len(sample_weights),
        replacement=True
    )

    return DataLoader(
        train_ds,
        batch_size=BATCH_SIZE,
        sampler=sampler,
        num_workers=2,
        pin_memory=torch.cuda.is_available(),
    )

train_loader = build_train_loader(USE_SAMPLER)
val_loader = DataLoader(
    val_ds,
    batch_size=VAL_BATCH_SIZE,
    shuffle=False,
    num_workers=2,
    pin_memory=torch.cuda.is_available(),
)

In [18]:
def build_model(name: str):
    if name == "resnet50":
        m = models.resnet50(weights=models.ResNet50_Weights.DEFAULT)
        m.fc = nn.Linear(m.fc.in_features, NUM_CLASSES)
        return m
    if name == "resnet18":
        m = models.resnet18(weights=models.ResNet18_Weights.DEFAULT)
        m.fc = nn.Linear(m.fc.in_features, NUM_CLASSES)
        return m
    raise ValueError("Unknown MODEL_NAME")

model = build_model(MODEL_NAME).to(device)

Downloading: "https://download.pytorch.org/models/resnet50-11ad3fa6.pth" to /root/.cache/torch/hub/checkpoints/resnet50-11ad3fa6.pth


100%|██████████| 97.8M/97.8M [00:00<00:00, 166MB/s]


In [20]:
counts = train_data["diagnosis"].value_counts().sort_index().reindex(range(NUM_CLASSES), fill_value=0)
weights = counts.sum() / (NUM_CLASSES * np.maximum(counts.values, 1))
class_weights = torch.tensor(weights, dtype=torch.float32).to(device)

In [21]:
class FocalLoss(nn.Module):
    def __init__(self, alpha=None, gamma=2.0):
        super().__init__()
        self.alpha = alpha
        self.gamma = gamma

    def forward(self, logits, targets):
        ce = F.cross_entropy(logits, targets, weight=self.alpha, reduction="none")
        pt = torch.exp(-ce)
        loss = ((1 - pt) ** self.gamma) * ce
        return loss.mean()

In [22]:
if LOSS_TYPE == "ce":
    criterion = nn.CrossEntropyLoss()
elif LOSS_TYPE == "weighted_ce":
    criterion = nn.CrossEntropyLoss(weight=class_weights)
elif LOSS_TYPE == "focal":
    criterion = FocalLoss(alpha=class_weights, gamma=FOCAL_GAMMA)
else:
    raise ValueError("LOSS_TYPE must be one of: ce, weighted_ce, focal")

optimizer = torch.optim.AdamW(model.parameters(), lr=LR, weight_decay=WEIGHT_DECAY)

In [23]:
def eval_model(model, loader):
    model.eval()
    all_labels, all_probs, all_preds = [], [], []
    total_loss = 0.0

    with torch.no_grad():
        for x, y in loader:
            x, y = x.to(device), y.to(device)
            logits = model(x)
            loss = criterion(logits, y)

            probs = torch.softmax(logits, dim=1).cpu().numpy()
            preds = np.argmax(probs, axis=1)

            all_probs.append(probs)
            all_preds.append(preds)
            all_labels.append(y.cpu().numpy())
            total_loss += loss.item() * x.size(0)

    all_probs = np.concatenate(all_probs)
    all_preds = np.concatenate(all_preds)
    all_labels = np.concatenate(all_labels)

    avg_loss = total_loss / len(loader.dataset)
    acc = accuracy_score(all_labels, all_preds)

    try:
        auc = roc_auc_score(all_labels, all_probs, multi_class="ovr")
    except Exception:
        auc = None

    macro_f1 = f1_score(all_labels, all_preds, average="macro")
    weighted_f1 = f1_score(all_labels, all_preds, average="weighted")

    # per-class recall (especially for class 3 & 4)
    recalls = recall_score(all_labels, all_preds, average=None, labels=list(range(NUM_CLASSES)))
    cm = confusion_matrix(all_labels, all_preds)

    return {
        "loss": avg_loss,
        "acc": acc,
        "auc": auc,
        "macro_f1": macro_f1,
        "weighted_f1": weighted_f1,
        "recalls": recalls,
        "cm": cm,
        "labels": all_labels,
        "preds": all_preds,
    }

In [24]:
def train_one_epoch(model, loader):
    model.train()
    total_loss = 0.0
    for x, y in loader:
        x, y = x.to(device), y.to(device)

        logits = model(x)
        loss = criterion(logits, y)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item() * x.size(0)

    return total_loss / len(loader.dataset)

In [26]:
import matplotlib.pyplot as plt

def plot_confusion_matrix(cm, title="Confusion Matrix"):
    fig = plt.figure(figsize=(6, 5))
    plt.imshow(cm, interpolation="nearest")
    plt.title(title)
    plt.xlabel("Predicted")
    plt.ylabel("True")
    plt.colorbar()
    plt.xticks(range(NUM_CLASSES))
    plt.yticks(range(NUM_CLASSES))
    plt.tight_layout()
    return fig

In [28]:
from sklearn.metrics import classification_report

mlflow.set_experiment(EXPERIMENT_NAME)

run_name = f"{MODEL_NAME}_{LOSS_TYPE}_sampler{USE_SAMPLER}_img{IMG_SIZE}_lr{LR}"
with mlflow.start_run(run_name=run_name):

    # log params
    mlflow.log_param("model_name", MODEL_NAME)
    mlflow.log_param("img_size", IMG_SIZE)
    mlflow.log_param("batch_size", BATCH_SIZE)
    mlflow.log_param("epochs", EPOCHS)
    mlflow.log_param("lr", LR)
    mlflow.log_param("weight_decay", WEIGHT_DECAY)
    mlflow.log_param("loss_type", LOSS_TYPE)
    mlflow.log_param("use_sampler", USE_SAMPLER)
    mlflow.log_param("focal_gamma", FOCAL_GAMMA if LOSS_TYPE == "focal" else None)
    mlflow.log_param("seed", SEED)
    mlflow.log_param("device", device)

    # log class weights for reproducibility
    for c in range(NUM_CLASSES):
        mlflow.log_param(f"class_weight_{c}", float(class_weights[c].detach().cpu()))

    best_val_auc = -1
    best_ckpt_path = "best_model.pt"

    for epoch in range(1, EPOCHS + 1):
        tr_loss = train_one_epoch(model, train_loader)
        val_out = eval_model(model, val_loader)

        print(f"\nEpoch {epoch}/{EPOCHS}")
        print(f"Train loss: {tr_loss:.4f}")
        print(f"Val loss:   {val_out['loss']:.4f}")
        print(f"Val acc:    {val_out['acc']:.4f}")
        print(f"Val auc:    {val_out['auc']}")
        print(f"Val macroF1:{val_out['macro_f1']:.4f}")
        print("Val recalls:", val_out["recalls"])
        print("Val CM:\n", val_out["cm"])

        # log metrics per epoch
        mlflow.log_metric("train_loss", tr_loss, step=epoch)
        mlflow.log_metric("val_loss", val_out["loss"], step=epoch)
        mlflow.log_metric("val_acc", val_out["acc"], step=epoch)
        if val_out["auc"] is not None:
            mlflow.log_metric("val_auc", float(val_out["auc"]), step=epoch)
        mlflow.log_metric("val_macro_f1", val_out["macro_f1"], step=epoch)
        mlflow.log_metric("val_weighted_f1", val_out["weighted_f1"], step=epoch)

        # log per-class recall
        for c in range(NUM_CLASSES):
            mlflow.log_metric(f"val_recall_class{c}", float(val_out["recalls"][c]), step=epoch)

        # save best by AUC (or fallback acc if auc None)
        current_score = float(val_out["auc"]) if val_out["auc"] is not None else float(val_out["acc"])
        if current_score > best_val_auc:
            best_val_auc = current_score
            torch.save(model.state_dict(), best_ckpt_path)

    # Final evaluation + artifacts from best checkpoint
    model.load_state_dict(torch.load(best_ckpt_path, map_location=device))
    final_out = eval_model(model, val_loader)

    # Log final metrics
    mlflow.log_metric("best_val_score", best_val_auc)
    mlflow.log_metric("final_val_acc", final_out["acc"])
    if final_out["auc"] is not None:
        mlflow.log_metric("final_val_auc", float(final_out["auc"]))
    mlflow.log_metric("final_val_macro_f1", final_out["macro_f1"])
    mlflow.log_metric("final_val_weighted_f1", final_out["weighted_f1"])

    # Save & log confusion matrix image
    cm_fig = plot_confusion_matrix(final_out["cm"], title=f"CM_{run_name}")
    cm_path = "confusion_matrix.png"
    cm_fig.savefig(cm_path, dpi=200, bbox_inches="tight")
    plt.close(cm_fig)
    mlflow.log_artifact(cm_path)

    # Save & log classification report
    report = classification_report(final_out["labels"], final_out["preds"], digits=4)
    report_path = "classification_report.txt"
    with open(report_path, "w") as f:
        f.write(report)
    mlflow.log_artifact(report_path)

    # Log model as MLflow artifact + raw checkpoint
    mlflow.pytorch.log_model(model, artifact_path="model")
    mlflow.log_artifact(best_ckpt_path)

print("Done. Check MLflow runs (mlruns folder or MLflow UI).")


Epoch 1/5
Train loss: 1.0532
Val loss:   0.9536
Val acc:    0.7240
Val auc:    0.907668723295512
Val macroF1:0.5835
Val recalls: [0.98255814 0.65       0.38461538 0.72727273 0.5       ]
Val CM:
 [[169   3   0   0   0]
 [  0  26   7   4   3]
 [  3  14  40  32  15]
 [  0   1   0  16   5]
 [  0   3   0  11  14]]

Epoch 2/5
Train loss: 0.8075
Val loss:   0.9446
Val acc:    0.7842
Val auc:    0.9262523889070806
Val macroF1:0.6306
Val recalls: [0.99418605 0.7        0.63461538 0.40909091 0.46428571]
Val CM:
 [[171   1   0   0   0]
 [  3  28   7   1   1]
 [  3  16  66  15   4]
 [  0   2   6   9   5]
 [  0   3   7   5  13]]

Epoch 3/5
Train loss: 0.6774
Val loss:   1.0241
Val acc:    0.7596
Val auc:    0.9269424680646065
Val macroF1:0.6188
Val recalls: [0.99418605 0.775      0.48076923 0.31818182 0.67857143]
Val CM:
 [[171   1   0   0   0]
 [  3  31   2   1   3]
 [  4  30  50   2  18]
 [  0   1   5   7   9]
 [  0   2   7   0  19]]

Epoch 4/5
Train loss: 0.6003
Val loss:   1.1229
Val acc:    0



Done. Check MLflow runs (mlruns folder or MLflow UI).
