### Imports and Configuration

In [1]:
import os, random, torch, cv2
import numpy as np
import pandas as pd
import torch.nn as nn
import matplotlib.pyplot as plt
import seaborn as sns
from PIL import Image
from tqdm import tqdm
from sklearn.metrics import classification_report, confusion_matrix, \
    roc_auc_score
from sklearn.preprocessing import label_binarize
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from torchvision.models import resnet50, densenet121, efficientnet_b7, \
    inception_v3, convnext_tiny
from torchvision.models import ResNet50_Weights, DenseNet121_Weights, \
    EfficientNet_B7_Weights, Inception_V3_Weights, ConvNeXt_Tiny_Weights

# Device config
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
IMG_SIZE = 224  # Adaptable for each model
NUM_CLASSES = 5
BATCH_SIZE = 16



### Dataset + Augmentations

In [2]:
class APTOSDataset(Dataset):
    def __init__(self, df, image_dir, transform=None):
        self.df = df.reset_index(drop=True)
        self.image_dir = image_dir
        self.transform = transform

    def __getitem__(self, idx):
        img_id = self.df.loc[idx, 'id_code']
        label = int(self.df.loc[idx, 'diagnosis'])
        image_path = os.path.join(self.image_dir, f"{img_id}.png")
        image = Image.open(image_path).convert("RGB")
        if self.transform:
            image = self.transform(image)
        return image, label

    def __len__(self):
        return len(self.df)

# Example transforms
train_transforms = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(20),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

val_transforms = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

### Load Data

In [3]:
train_df = pd.read_csv("Dataset/train_1.csv")
val_df = pd.read_csv("Dataset/valid.csv")

train_dataset = APTOSDataset(train_df, "Dataset/train_images",
                             transform=train_transforms)
val_dataset = APTOSDataset(val_df, "Dataset/val_images",
                           transform=val_transforms)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE,
                          shuffle=True, num_workers=0)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE,
                        shuffle=False, num_workers=0)

### Focal Loss

In [4]:
class FocalLoss(nn.Module):
    def __init__(self, alpha=None, gamma=2.0, reduction='mean'):
        super().__init__()
        self.alpha = alpha
        self.gamma = gamma
        self.reduction = reduction

    def forward(self, inputs, targets):
        ce_loss = nn.functional.cross_entropy(inputs, targets,
                                              reduction='none')
        pt = torch.exp(-ce_loss)
        loss = ((1 - pt) ** self.gamma) * ce_loss
        if self.alpha is not None:
            loss = self.alpha[targets] * loss
        return loss.mean()

### Model Wrapper Function

In [5]:
def get_model(name):
    if name == "resnet50":
        model = resnet50(weights=ResNet50_Weights.DEFAULT)
        model.fc = nn.Linear(model.fc.in_features, NUM_CLASSES)
    
    elif name == "resnet34":
        model = resnet50(weights=ResNet34_Weights.DEFAULT)
        model.fc = nn.Linear(model.fc.in_features, NUM_CLASSES)

    elif name == "densenet121":
        model = densenet121(weights=DenseNet121_Weights.DEFAULT)
        model.classifier = nn.Linear(model.classifier.in_features,
                                     NUM_CLASSES)

    elif name == "efficientnet_b7":
        model = efficientnet_b7(weights=EfficientNet_B7_Weights.DEFAULT)
        model.classifier[1] = nn.Linear(model.classifier[1].in_features,
                                        NUM_CLASSES)

    elif name == "inception_v3":
        model = inception_v3(weights=Inception_V3_Weights.DEFAULT,
                             aux_logits=True)
        model.fc = nn.Linear(model.fc.in_features, NUM_CLASSES)

    elif name == "convnext_tiny":
        model = convnext_tiny(weights=ConvNeXt_Tiny_Weights.DEFAULT)
        model.classifier[2] = nn.Linear(model.classifier[2].in_features,
                                        NUM_CLASSES)

    else:
        raise ValueError("Unknown model name")

    return model.to(DEVICE)

### Training & Validation Functions

In [6]:
def train_one_epoch(model, loader, optimizer, criterion):
    model.train()
    total_loss, correct = 0.0, 0

    for images, labels in tqdm(loader, desc="Training", leave=False):
        images, labels = images.to(DEVICE), labels.to(DEVICE)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item() * images.size(0)
        correct += (outputs.argmax(1) == labels).sum().item()

    return total_loss / len(loader.dataset), \
           correct / len(loader.dataset)

### Validate Function

In [7]:
def validate(model, loader, criterion):
    model.eval()
    total_loss, correct = 0.0, 0
    all_labels, all_preds, all_probs = [], [], []

    with torch.no_grad():
        for images, labels in tqdm(loader, desc="Validating",
                                   leave=False):
            images, labels = images.to(DEVICE), labels.to(DEVICE)
            outputs = model(images)
            loss = criterion(outputs, labels)

            total_loss += loss.item() * images.size(0)
            preds = torch.argmax(outputs, 1)
            probs = torch.softmax(outputs, dim=1)

            correct += (preds == labels).sum().item()

            all_labels.extend(labels.cpu().numpy())
            all_preds.extend(preds.cpu().numpy())
            all_probs.extend(probs.cpu().numpy())

    acc = correct / len(loader.dataset)
    auc = roc_auc_score(
        label_binarize(all_labels, classes=np.arange(NUM_CLASSES)),
        all_probs,
        multi_class='ovr'
    )
    report = classification_report(all_labels, all_preds, digits=4)
    cm = confusion_matrix(all_labels, all_preds)

    return total_loss / len(loader.dataset), acc, auc, report, cm

### Imports

In [8]:
import torch
import torch.nn as nn
from tqdm import tqdm
import numpy as np
from sklearn.metrics import classification_report, confusion_matrix, \
    roc_auc_score
from sklearn.preprocessing import label_binarize

In [9]:
import torch
import torch.nn as nn
import numpy as np
from torchvision.models import resnet50, ResNet50_Weights, densenet121, \
    DenseNet121_Weights
from torch.optim import AdamW
from sklearn.metrics import (
    cohen_kappa_score, matthews_corrcoef,
    roc_auc_score, precision_recall_curve, auc,
    confusion_matrix, classification_report
)
from sklearn.preprocessing import label_binarize
from tqdm import tqdm


### Compute Metrics

In [10]:
def compute_extra_metrics(all_labels, all_preds, all_probs, num_classes):
    all_labels = np.array(all_labels)
    all_preds = np.array(all_preds)
    all_probs = np.array(all_probs)

    kappa = cohen_kappa_score(all_labels, all_preds, weights='quadratic')
    mcc = matthews_corrcoef(all_labels, all_preds)

    y_true_bin = label_binarize(all_labels,
                                classes=np.arange(num_classes))

    auc_macro = roc_auc_score(y_true_bin, all_probs,
                              average='macro', multi_class='ovr')
    auc_micro = roc_auc_score(y_true_bin, all_probs,
                              average='micro', multi_class='ovr')
    auc_weighted = roc_auc_score(y_true_bin, all_probs,
                                 average='weighted', multi_class='ovr')

    pr_aucs = []
    for i in range(num_classes):
        precision, recall, _ = precision_recall_curve(y_true_bin[:, i],
                                                      all_probs[:, i])
        pr_aucs.append(auc(recall, precision))
    auprc_macro = np.mean(pr_aucs)

    cm = confusion_matrix(all_labels, all_preds)
    sensitivity = cm.diagonal() / cm.sum(axis=1)

    specificity = []
    for i in range(num_classes):
        tn = cm.sum() - (cm[i, :].sum() +
                         cm[:, i].sum() -
                         cm[i, i])
        fp = cm[:, i].sum() - cm[i, i]
        specificity.append(tn / (tn + fp))

    return {
        "Kappa": kappa,
        "MCC": mcc,
        "AUC_Macro": auc_macro,
        "AUC_Micro": auc_micro,
        "AUC_Weighted": auc_weighted,
        "AUPRC_Macro": auprc_macro,
        "Sensitivity": sensitivity,
        "Specificity": specificity
    }

### Class Weights

In [11]:
from sklearn.utils.class_weight import compute_class_weight
import torch

# Assuming your labels are in train_df['diagnosis']
labels = train_df['diagnosis'].values

class_weights = compute_class_weight(
    class_weight='balanced',
    classes=np.unique(labels),
    y=labels
)

class_weights = torch.tensor(class_weights,
                             dtype=torch.float32).to(DEVICE)


### Imports and Configs

In [12]:
import torch
import torch.nn as nn
from torchvision.models import resnet50, ResNet50_Weights
from sklearn.metrics import classification_report, confusion_matrix, \
    roc_auc_score, matthews_corrcoef, cohen_kappa_score, \
    precision_recall_curve, auc
from sklearn.preprocessing import label_binarize
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt
import seaborn as sns

# Setup
NUM_CLASSES = 5
NUM_EPOCHS=10
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load ResNet50
model = resnet50(weights=ResNet50_Weights.DEFAULT)
model.fc = nn.Linear(model.fc.in_features, NUM_CLASSES)
model = model.to(DEVICE)

# Optimizer and loss
optimizer = torch.optim.AdamW(model.parameters(),
                              lr=1e-4,
                              weight_decay=1e-4)
criterion = nn.CrossEntropyLoss()


### TRAIN LOOP

In [13]:
def train_one_epoch(model, loader, optimizer, criterion):
    model.train()
    total_loss, correct = 0.0, 0

    for images, labels in tqdm(loader, desc="Training", leave=False):
        images, labels = images.to(DEVICE), labels.to(DEVICE)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)

        loss.backward()
        optimizer.step()

        total_loss += loss.item() * images.size(0)
        correct += (outputs.argmax(1) == labels).sum().item()

    avg_loss = total_loss / len(loader.dataset)
    acc = correct / len(loader.dataset)

    return avg_loss, acc

### VALIDATION LOOP

In [14]:
def validate(model, loader, criterion):
    model.eval()
    total_loss, correct = 0.0, 0
    y_true, y_pred, y_probs = [], [], []

    with torch.no_grad():
        for images, labels in tqdm(loader, desc="Validating", leave=False):
            images, labels = images.to(DEVICE), labels.to(DEVICE)

            outputs = model(images)
            loss = criterion(outputs, labels)

            probs = torch.softmax(outputs, dim=1)
            preds = torch.argmax(probs, dim=1)

            total_loss += loss.item() * labels.size(0)
            correct += (preds == labels).sum().item()

            y_true.extend(labels.cpu().numpy())
            y_pred.extend(preds.cpu().numpy())
            y_probs.extend(probs.cpu().numpy())

    avg_loss = total_loss / len(loader.dataset)
    acc = correct / len(loader.dataset)

    y_true_bin = label_binarize(y_true, classes=np.arange(NUM_CLASSES))
    auc_score = roc_auc_score(y_true_bin, np.array(y_probs),
                              multi_class='ovr')

    report = classification_report(y_true, y_pred, digits=4)
    cm = confusion_matrix(y_true, y_pred)

    return avg_loss, acc, auc_score, report, cm, \
           y_true, y_pred, y_probs

### METRICS

In [15]:
def compute_metrics(y_true, y_pred, y_probs):
    y_true = np.array(y_true)
    y_pred = np.array(y_pred)
    y_probs = np.array(y_probs)

    y_true_bin = label_binarize(y_true, classes=np.arange(NUM_CLASSES))

    kappa = cohen_kappa_score(y_true, y_pred, weights='quadratic')
    mcc = matthews_corrcoef(y_true, y_pred)

    macro_auc = roc_auc_score(y_true_bin, y_probs,
                              average='macro', multi_class='ovr')
    micro_auc = roc_auc_score(y_true_bin, y_probs,
                              average='micro', multi_class='ovr')
    weighted_auc = roc_auc_score(y_true_bin, y_probs,
                                 average='weighted', multi_class='ovr')

    pr_aucs = []
    for i in range(NUM_CLASSES):
        precision, recall, _ = precision_recall_curve(y_true_bin[:, i],
                                                      y_probs[:, i])
        pr_aucs.append(auc(recall, precision))

    auprc_macro = np.mean(pr_aucs)

    cm = confusion_matrix(y_true, y_pred)
    sensitivity = cm.diagonal() / cm.sum(axis=1)

    specificity = []
    for i in range(NUM_CLASSES):
        tn = cm.sum() - (cm[i, :].sum() + cm[:, i].sum() - cm[i, i])
        fp = cm[:, i].sum() - cm[i, i]
        specificity.append(tn / (tn + fp))

    return {
        "Cohen_Kappa": kappa,
        "MCC": mcc,
        "AUC_Macro": macro_auc,
        "AUC_Micro": micro_auc,
        "AUC_Weighted": weighted_auc,
        "AUPRC_Macro": auprc_macro,
        "Sensitivity": sensitivity,
        "Specificity": specificity,
        "Confusion_Matrix": cm
    }

In [17]:
best_val_acc = 0.0
best_model_path = "saved_models/best_model_resnet50.pth"

for epoch in range(1, NUM_EPOCHS + 1):
    print(f"\nEpoch {epoch}/{NUM_EPOCHS}")

    train_loss, train_acc = train_one_epoch(
        model, train_loader, optimizer, criterion
    )

    val_loss, val_acc, val_auc, report, cm, \
    y_true, y_pred, y_probs = validate(
        model, val_loader, criterion
    )

    metrics = compute_metrics(y_true, y_pred, y_probs)

    # Save model if validation accuracy improves
    if val_acc > best_val_acc:
        print(f"Validation accuracy improved "
              f"({best_val_acc:.4f} --> {val_acc:.4f}), saving model")

        best_val_acc = val_acc
        torch.save(model.state_dict(), best_model_path)

print(f"\nBest model saved to: {best_model_path}")


Epoch 1/10


                                                                                

Validation accuracy improved (0.0000 --> 0.7678), saving model

Epoch 2/10


                                                                                

Validation accuracy improved (0.7678 --> 0.8197), saving model

Epoch 3/10


                                                                                


Epoch 4/10


                                                                                


Epoch 5/10


                                                                                

Validation accuracy improved (0.8197 --> 0.8415), saving model

Epoch 6/10


                                                                                


Epoch 7/10


                                                                                

Validation accuracy improved (0.8415 --> 0.8525), saving model

Epoch 8/10


                                                                                


Epoch 9/10


                                                                                


Epoch 10/10


                                                                                


Best model saved to: saved_models/best_model_resnet50.pth




In [16]:
import torch
import torch.nn as nn
import numpy as np
from torchvision.models import densenet121, DenseNet121_Weights
from torch.optim import AdamW
from sklearn.metrics import (
    cohen_kappa_score, matthews_corrcoef, roc_auc_score,
    precision_recall_curve, auc, confusion_matrix,
    classification_report
)
from sklearn.preprocessing import label_binarize
from tqdm import tqdm

# Setup
NUM_CLASSES = 5
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load DenseNet121
model = densenet121(weights=DenseNet121_Weights.DEFAULT)
model.classifier = nn.Linear(model.classifier.in_features, NUM_CLASSES)
model = model.to(DEVICE)

# Optimizer and loss
optimizer = AdamW(model.parameters(), lr=1e-4, weight_decay=1e-4)
criterion = nn.CrossEntropyLoss()

### Train One epoch

In [17]:
def train_one_epoch(model, loader, optimizer, criterion):
    model.train()
    total_loss, correct = 0.0, 0

    for images, labels in tqdm(loader, desc="Training", leave=False):
        images, labels = images.to(DEVICE), labels.to(DEVICE)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item() * labels.size(0)
        correct += (outputs.argmax(1) == labels).sum().item()

    avg_loss = total_loss / len(loader.dataset)
    acc = correct / len(loader.dataset)
    return avg_loss, acc


### Validation

In [18]:
def validate(model, loader, criterion):
    model.eval()
    total_loss, correct = 0.0, 0
    y_true, y_pred, y_probs = [], [], []

    with torch.no_grad():
        for images, labels in tqdm(loader, desc="Validating", leave=False):
            images, labels = images.to(DEVICE), labels.to(DEVICE)
            outputs = model(images)
            loss = criterion(outputs, labels)

            probs = torch.softmax(outputs, dim=1)
            preds = torch.argmax(probs, dim=1)

            total_loss += loss.item() * labels.size(0)
            correct += (preds == labels).sum().item()

            y_true.extend(labels.cpu().numpy())
            y_pred.extend(preds.cpu().numpy())
            y_probs.extend(probs.cpu().numpy())

    avg_loss = total_loss / len(loader.dataset)
    acc = correct / len(loader.dataset)

    y_true_bin = label_binarize(y_true, classes=np.arange(NUM_CLASSES))
    auc_score = roc_auc_score(y_true_bin, np.array(y_probs),
                              multi_class='ovr')

    report = classification_report(y_true, y_pred, digits=4)
    cm = confusion_matrix(y_true, y_pred)

    return avg_loss, acc, auc_score, report, cm, y_true, y_pred, y_probs

### Metrics computation

In [19]:
def compute_metrics(y_true, y_pred, y_probs):
    y_true = np.array(y_true)
    y_pred = np.array(y_pred)
    y_probs = np.array(y_probs)

    y_true_bin = label_binarize(y_true, classes=np.arange(NUM_CLASSES))

    kappa = cohen_kappa_score(y_true, y_pred, weights='quadratic')
    mcc = matthews_corrcoef(y_true, y_pred)

    macro_auc = roc_auc_score(y_true_bin, y_probs,
                              average='macro', multi_class='ovr')
    micro_auc = roc_auc_score(y_true_bin, y_probs,
                              average='micro', multi_class='ovr')
    weighted_auc = roc_auc_score(y_true_bin, y_probs,
                                 average='weighted', multi_class='ovr')

    pr_aucs = []
    for i in range(NUM_CLASSES):
        precision, recall, _ = precision_recall_curve(y_true_bin[:, i],
                                                      y_probs[:, i])
        pr_aucs.append(auc(recall, precision))

    auprc_macro = np.mean(pr_aucs)

    cm = confusion_matrix(y_true, y_pred)
    sensitivity = cm.diagonal() / cm.sum(axis=1)

    specificity = []
    for i in range(NUM_CLASSES):
        tn = cm.sum() - (cm[i, :].sum() + cm[:, i].sum() - cm[i, i])
        fp = cm[:, i].sum() - cm[i, i]
        specificity.append(tn / (tn + fp))

    return {
        "Cohen_Kappa": kappa,
        "MCC": mcc,
        "AUC_Macro": macro_auc,
        "AUC_Micro": micro_auc,
        "AUC_Weighted": weighted_auc,
        "AUPRC_Macro": auprc_macro,
        "Sensitivity": sensitivity,
        "Specificity": specificity,
        "Confusion_Matrix": cm
    }

In [20]:
best_val_acc = 0.0
best_model_path = "saved_models/best_model_densenet121.pth"

for epoch in range(1, NUM_EPOCHS + 1):
    print(f"\nEpoch {epoch}/{NUM_EPOCHS}")

    train_loss, train_acc = train_one_epoch(
        model, train_loader, optimizer, criterion
    )

    val_loss, val_acc, val_auc, report, cm, \
    y_true, y_pred, y_probs = validate(
        model, val_loader, criterion
    )

    metrics = compute_metrics(y_true, y_pred, y_probs)

    if val_acc > best_val_acc:
        print(f"Validation accuracy improved "
              f"({best_val_acc:.4f} --> {val_acc:.4f}); saving model.")

        best_val_acc = val_acc
        torch.save(model.state_dict(), best_model_path)

print(f"Best model saved to: {best_model_path}")



Epoch 1/10


                                                                                

Validation accuracy improved (0.0000 --> 0.7923); saving model.

Epoch 2/10


                                                                                

Validation accuracy improved (0.7923 --> 0.8060); saving model.

Epoch 3/10


                                                                                

Validation accuracy improved (0.8060 --> 0.8279); saving model.

Epoch 4/10


                                                                                


Epoch 5/10


                                                                                


Epoch 6/10


                                                                                


Epoch 7/10


                                                                                

Validation accuracy improved (0.8279 --> 0.8443); saving model.

Epoch 8/10


                                                                                

Validation accuracy improved (0.8443 --> 0.8470); saving model.

Epoch 9/10


                                                                                


Epoch 10/10


                                                                                

Best model saved to: best_model_densenet121.pth




### Imports

In [23]:
import os
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"

import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from torchvision.models import efficientnet_b0, EfficientNet_B0_Weights
from sklearn.preprocessing import label_binarize
from sklearn.metrics import (
    roc_auc_score, classification_report, confusion_matrix,
    cohen_kappa_score, matthews_corrcoef, precision_recall_curve, auc
)
from tqdm import tqdm


NUM_CLASSES = 5
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Mixed precision scaler
scaler = torch.cuda.amp.GradScaler()

  scaler = torch.cuda.amp.GradScaler()


### Efficientnet Model

In [24]:
model = efficientnet_b0(weights=EfficientNet_B0_Weights.DEFAULT)

model.classifier[1] = nn.Linear(model.classifier[1].in_features, NUM_CLASSES)

model = model.to(DEVICE)

optimizer = optim.AdamW(model.parameters(), lr=1e-4, weight_decay=1e-4)
criterion = nn.CrossEntropyLoss()

### Train One epoch

In [25]:
def train_one_epoch(model, loader, optimizer, criterion):
    model.train()
    total_loss, correct = 0.0, 0

    for images, labels in tqdm(loader, desc="Training", leave=False):
        images = images.to(DEVICE)
        labels = labels.to(DEVICE)

        optimizer.zero_grad(set_to_none=True)

        # Mixed precision forward pass
        with torch.amp.autocast("cuda"):
            outputs = model(images)
            loss = criterion(outputs, labels)

        # Backward pass with AMP
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        total_loss += loss.item() * labels.size(0)
        correct += (outputs.argmax(1) == labels).sum().item()

        # memory cleanup
        del images, labels, outputs
        torch.cuda.empty_cache()

    avg_loss = total_loss / len(loader.dataset)
    acc = correct / len(loader.dataset)
    return avg_loss, acc

### Validation

In [26]:
def validate(model, loader, criterion):
    model.eval()
    total_loss, correct = 0.0, 0
    y_true, y_pred, y_probs = [], [], []

    with torch.no_grad():
        for images, labels in tqdm(loader, desc="Validating", leave=False):
            images = images.to(DEVICE)
            labels = labels.to(DEVICE)

            with torch.amp.autocast("cuda"):
                outputs = model(images)
                loss = criterion(outputs, labels)

            probs = torch.softmax(outputs, dim=1)
            preds = probs.argmax(dim=1)

            total_loss += loss.item() * labels.size(0)
            correct += (preds == labels).sum().item()

            y_true.extend(labels.cpu().numpy())
            y_pred.extend(preds.cpu().numpy())
            y_probs.extend(probs.cpu().numpy())

            del images, labels, outputs, probs, preds
            torch.cuda.empty_cache()

    avg_loss = total_loss / len(loader.dataset)
    acc = correct / len(loader.dataset)

    y_true_bin = label_binarize(y_true, classes=np.arange(NUM_CLASSES))
    auc_score = roc_auc_score(y_true_bin, np.array(y_probs), multi_class="ovr")

    report = classification_report(y_true, y_pred, digits=4)
    cm = confusion_matrix(y_true, y_pred)

    return avg_loss, acc, auc_score, report, cm, y_true, y_pred, y_probs

### Metrics Computation

In [27]:
def compute_metrics(y_true, y_pred, y_probs):
    y_true = np.array(y_true)
    y_pred = np.array(y_pred)
    y_probs = np.array(y_probs)

    y_true_bin = label_binarize(y_true, classes=np.arange(NUM_CLASSES))

    kappa = cohen_kappa_score(y_true, y_pred, weights="quadratic")
    mcc = matthews_corrcoef(y_true, y_pred)

    macro_auc = roc_auc_score(y_true_bin, y_probs, average="macro", multi_class="ovr")
    micro_auc = roc_auc_score(y_true_bin, y_probs, average="micro", multi_class="ovr")
    weighted_auc = roc_auc_score(y_true_bin, y_probs, average="weighted", multi_class="ovr")

    # PR AUC per class
    pr_aucs = []
    for i in range(NUM_CLASSES):
        p, r, _ = precision_recall_curve(y_true_bin[:, i], y_probs[:, i])
        pr_aucs.append(auc(r, p))

    auprc_macro = np.mean(pr_aucs)
    cm = confusion_matrix(y_true, y_pred)
    sensitivity = cm.diagonal() / cm.sum(axis=1)

    specificity = []
    for i in range(NUM_CLASSES):
        tn = cm.sum() - (cm[i, :].sum() + cm[:, i].sum() - cm[i, i])
        fp = cm[:, i].sum() - cm[i, i]
        specificity.append(tn / (tn + fp))

    return {
        "Cohen_Kappa": kappa,
        "MCC": mcc,
        "AUC_Macro": macro_auc,
        "AUC_Micro": micro_auc,
        "AUC_Weighted": weighted_auc,
        "AUPRC_Macro": auprc_macro,
        "Sensitivity": sensitivity,
        "Specificity": specificity,
        "Confusion_Matrix": cm,
    }

In [28]:
best_val_acc = 0.0
best_model_path = "saved_models/best_model_efficientnet_b0_lowmem.pth"

for epoch in range(1, NUM_EPOCHS + 1):
    print(f"\nEpoch {epoch}/{NUM_EPOCHS}")

    train_loss, train_acc = train_one_epoch(model, train_loader, optimizer, criterion)

    val_loss, val_acc, val_auc, report, cm, \
        y_true, y_pred, y_probs = validate(model, val_loader, criterion)

    metrics = compute_metrics(y_true, y_pred, y_probs)

    if val_acc > best_val_acc:
        print(f"Validation accuracy improved ({best_val_acc:.4f} → {val_acc:.4f}). Saving model...")
        best_val_acc = val_acc
        torch.save(model.state_dict(), best_model_path)

print(f"Best model saved to: {best_model_path}")


Epoch 1/10


                                                                                

Validation accuracy improved (0.0000 → 0.7568). Saving model...

Epoch 2/10


                                                                                

Validation accuracy improved (0.7568 → 0.8197). Saving model...

Epoch 3/10


                                                                                


Epoch 4/10


                                                                                

Validation accuracy improved (0.8197 → 0.8333). Saving model...

Epoch 5/10


                                                                                


Epoch 6/10


                                                                                


Epoch 7/10


                                                                                


Epoch 8/10


                                                                                

Validation accuracy improved (0.8333 → 0.8443). Saving model...

Epoch 9/10


                                                                                


Epoch 10/10


                                                                                

Best model saved to: best_model_efficientnet_b0_lowmem.pth




### Imports

In [45]:
import os
import torch
import torch.nn as nn
import numpy as np
from torchvision import models, transforms
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import (
    classification_report, confusion_matrix, roc_auc_score,
    matthews_corrcoef, cohen_kappa_score, precision_recall_curve, auc
)
from sklearn.preprocessing import label_binarize
from tqdm import tqdm
from PIL import Image

NUM_CLASSES = 5
IMG_SIZE = 299                       # IMPORTANT for Inception v3
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
BATCH_SIZE = 8                       # Safe for 20GB GPUs
NUM_EPOCHS = 10
best_val_acc = 0
best_model_path = "best_inceptionv3.pth"


class APTOSDataset(Dataset):
    def __init__(self, df, image_dir, transform=None):
        self.df = df.reset_index(drop=True)
        self.image_dir = image_dir
        self.transform = transform

    def __getitem__(self, idx):
        img_id = self.df.loc[idx, "id_code"]
        label = int(self.df.loc[idx, "diagnosis"])

        img_path = os.path.join(self.image_dir, f"{img_id}.png")
        image = Image.open(img_path).convert("RGB")

        if self.transform:
            image = self.transform(image)

        return image, label

    def __len__(self):
        return len(self.df)

train_transforms = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(20),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
])

val_transforms = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
])

In [46]:
import pandas as pd

train_df = pd.read_csv("Dataset/train_1.csv")
val_df   = pd.read_csv("Dataset/valid.csv")

train_dataset = APTOSDataset(train_df, "Dataset/train_images", train_transforms)
val_dataset   = APTOSDataset(val_df,   "Dataset/val_images",   val_transforms)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=0)
val_loader   = DataLoader(val_dataset,   batch_size=BATCH_SIZE, shuffle=False, num_workers=0)


### Inception_V3

In [47]:
model = models.inception_v3(weights=models.Inception_V3_Weights.DEFAULT)

# Enable Aux logits
model.aux_logits = True

# Update classifier
model.AuxLogits.fc = nn.Linear(model.AuxLogits.fc.in_features, NUM_CLASSES)
model.fc = nn.Linear(model.fc.in_features, NUM_CLASSES)

model = model.to(DEVICE)

optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4, weight_decay=1e-4)
criterion = nn.CrossEntropyLoss()
scaler = torch.cuda.amp.GradScaler()

  scaler = torch.cuda.amp.GradScaler()


In [55]:
def extract_logits(output):
    """
    Handles Inception behavior:
    - Training mode returns InceptionOutputs (with .logits + .aux_logits)
    - Eval mode returns a plain tensor
    """
    # In training mode: InceptionOutputs object
    if hasattr(output, "logits"):
        return output.logits

    # Eval mode: plain tensor
    return output

### Training

In [56]:
def train_one_epoch(model, loader, optimizer, criterion):
    model.train()
    total_loss, correct = 0.0, 0

    for images, labels in tqdm(loader, desc="Training", leave=False):
        images, labels = images.to(DEVICE), labels.to(DEVICE)

        optimizer.zero_grad()

        with torch.cuda.amp.autocast():
            outputs = model(images)

            # Main and auxiliary losses
            loss1 = criterion(outputs.logits, labels)
            loss2 = criterion(outputs.aux_logits, labels)

            loss = loss1 + 0.4 * loss2

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        total_loss += loss.item() * labels.size(0)
        correct += (outputs.logits.argmax(1) == labels).sum().item()

    return total_loss / len(loader.dataset), correct / len(loader.dataset)

### Validation

In [57]:
def validate(model, loader, criterion):
    model.eval()
    total_loss, correct = 0.0, 0
    y_true, y_pred, y_probs = [], [], []

    with torch.no_grad():
        for images, labels in tqdm(loader, desc="Validating", leave=False):
            images, labels = images.to(DEVICE), labels.to(DEVICE)

            outputs = model(images)
            logits = extract_logits(outputs)

            loss = criterion(logits, labels)

            probs = torch.softmax(logits, dim=1)
            preds = torch.argmax(probs, dim=1)

            total_loss += loss.item() * labels.size(0)
            correct += (preds == labels).sum().item()

            y_true.extend(labels.cpu().numpy())
            y_pred.extend(preds.cpu().numpy())
            y_probs.extend(probs.cpu().numpy())

    avg_loss = total_loss / len(loader.dataset)
    acc = correct / len(loader.dataset)

    y_true_bin = label_binarize(y_true, classes=np.arange(NUM_CLASSES))
    auc_score = roc_auc_score(y_true_bin, np.array(y_probs), multi_class="ovr")

    report = classification_report(y_true, y_pred, digits=4)
    cm = confusion_matrix(y_true, y_pred)

    return avg_loss, acc, auc_score, report, cm, y_true, y_pred, y_probs


### Metrics Computation

In [58]:
def compute_metrics(y_true, y_pred, y_probs):
    y_true = np.array(y_true)
    y_pred = np.array(y_pred)
    y_probs = np.array(y_probs)

    y_true_bin = label_binarize(y_true, classes=np.arange(NUM_CLASSES))

    kappa = cohen_kappa_score(y_true, y_pred, weights='quadratic')
    mcc = matthews_corrcoef(y_true, y_pred)

    macro_auc = roc_auc_score(y_true_bin, y_probs, average='macro', multi_class='ovr')
    micro_auc = roc_auc_score(y_true_bin, y_probs, average='micro', multi_class='ovr')
    weighted_auc = roc_auc_score(y_true_bin, y_probs, average='weighted', multi_class='ovr')

    pr_aucs = []
    for i in range(NUM_CLASSES):
        precision, recall, _ = precision_recall_curve(y_true_bin[:, i], y_probs[:, i])
        pr_aucs.append(auc(recall, precision))

    auprc_macro = np.mean(pr_aucs)

    cm = confusion_matrix(y_true, y_pred)
    sensitivity = cm.diagonal() / cm.sum(axis=1)

    specificity = []
    for i in range(NUM_CLASSES):
        tn = cm.sum() - (cm[i, :].sum() + cm[:, i].sum() - cm[i, i])
        fp = cm[:, i].sum() - cm[i, i]
        specificity.append(tn / (tn + fp))

    return {
        "Cohen_Kappa": kappa,
        "MCC": mcc,
        "AUC_Macro": macro_auc,
        "AUC_Micro": micro_auc,
        "AUC_Weighted": weighted_auc,
        "AUPRC_Macro": auprc_macro,
        "Sensitivity": sensitivity,
        "Specificity": specificity,
        "Confusion_Matrix": cm
    }

### Saving Best Model

In [59]:
for epoch in range(1, NUM_EPOCHS + 1):
    print(f"\n===== EPOCH {epoch}/{NUM_EPOCHS} =====")

    train_loss, train_acc = train_one_epoch(model, train_loader, optimizer, criterion)
    val_loss, val_acc, val_auc, report, cm, y_true, y_pred, y_probs = validate(model, val_loader, criterion)

    print(f"Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.4f}")
    print(f"Val Loss: {val_loss:.4f} | Val Acc: {val_acc:.4f} | AUC: {val_auc:.4f}")

    if val_acc > best_val_acc:
        print(f"Improved {best_val_acc:.4f} → {val_acc:.4f}, saving model.")
        best_val_acc = val_acc
        torch.save(model.state_dict(), best_model_path)


print(f"\nBest Inception model saved to: {best_model_path}")


===== EPOCH 1/10 =====


  with torch.cuda.amp.autocast():
                                                                                

Train Loss: 0.6994 | Train Acc: 0.8164
Val Loss: 0.5679 | Val Acc: 0.8060 | AUC: 0.9217
Improved 0.0000 → 0.8060, saving model.

===== EPOCH 2/10 =====


  with torch.cuda.amp.autocast():
                                                                                

Train Loss: 0.6437 | Train Acc: 0.8324
Val Loss: 0.6150 | Val Acc: 0.8169 | AUC: 0.9174
Improved 0.8060 → 0.8169, saving model.

===== EPOCH 3/10 =====


  with torch.cuda.amp.autocast():
                                                                                

Train Loss: 0.6113 | Train Acc: 0.8399
Val Loss: 0.5365 | Val Acc: 0.8142 | AUC: 0.9316

===== EPOCH 4/10 =====


  with torch.cuda.amp.autocast():
                                                                                

Train Loss: 0.5054 | Train Acc: 0.8686
Val Loss: 0.5124 | Val Acc: 0.8306 | AUC: 0.9426
Improved 0.8169 → 0.8306, saving model.

===== EPOCH 5/10 =====


  with torch.cuda.amp.autocast():
                                                                                

Train Loss: 0.4757 | Train Acc: 0.8761
Val Loss: 0.4955 | Val Acc: 0.8470 | AUC: 0.9511
Improved 0.8306 → 0.8470, saving model.

===== EPOCH 6/10 =====


  with torch.cuda.amp.autocast():
                                                                                

Train Loss: 0.4639 | Train Acc: 0.8809
Val Loss: 0.4683 | Val Acc: 0.8607 | AUC: 0.9510
Improved 0.8470 → 0.8607, saving model.

===== EPOCH 7/10 =====


  with torch.cuda.amp.autocast():
                                                                                

Train Loss: 0.3637 | Train Acc: 0.9075
Val Loss: 0.4383 | Val Acc: 0.8525 | AUC: 0.9558

===== EPOCH 8/10 =====


  with torch.cuda.amp.autocast():
                                                                                

Train Loss: 0.3404 | Train Acc: 0.9113
Val Loss: 0.5523 | Val Acc: 0.8115 | AUC: 0.9471

===== EPOCH 9/10 =====


  with torch.cuda.amp.autocast():
                                                                                

Train Loss: 0.3282 | Train Acc: 0.9150
Val Loss: 0.7212 | Val Acc: 0.8197 | AUC: 0.9238

===== EPOCH 10/10 =====


  with torch.cuda.amp.autocast():
                                                                                

Train Loss: 0.3091 | Train Acc: 0.9246
Val Loss: 0.5548 | Val Acc: 0.8470 | AUC: 0.9470

Best Inception model saved to: best_inceptionv3.pth




### Imports

In [60]:
from torchvision import models, transforms
from sklearn.metrics import (
    classification_report, confusion_matrix, roc_auc_score,
    matthews_corrcoef, cohen_kappa_score, precision_recall_curve, auc
)
from sklearn.preprocessing import label_binarize
import torch
import torch.nn as nn
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt
import seaborn as sns

# Setup
NUM_CLASSES = 5
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

### ConvNeXt-Tiny

In [61]:
model = models.convnext_tiny(pretrained=True)
model.classifier[2] = nn.Linear(model.classifier[2].in_features, NUM_CLASSES)
model = model.to(DEVICE)

optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4, weight_decay=1e-4)
criterion = nn.CrossEntropyLoss()




### Training

In [62]:
def train_one_epoch(model, loader, optimizer, criterion):
    model.train()
    total_loss, correct = 0.0, 0

    for images, labels in tqdm(loader, desc="Training", leave=False):
        images, labels = images.to(DEVICE), labels.to(DEVICE)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item() * images.size(0)
        correct += (outputs.argmax(1) == labels).sum().item()

    avg_loss = total_loss / len(loader.dataset)
    acc = correct / len(loader.dataset)
    return avg_loss, acc


### Validation

In [63]:
def validate(model, loader, criterion):
    model.eval()
    total_loss, correct = 0.0, 0
    y_true, y_pred, y_probs = [], [], []

    with torch.no_grad():
        for images, labels in tqdm(loader, desc="Validating", leave=False):
            images, labels = images.to(DEVICE), labels.to(DEVICE)

            outputs = model(images)
            loss = criterion(outputs, labels)

            probs = torch.softmax(outputs, dim=1)
            preds = torch.argmax(probs, dim=1)

            total_loss += loss.item() * labels.size(0)
            correct += (preds == labels).sum().item()

            y_true.extend(labels.cpu().numpy())
            y_pred.extend(preds.cpu().numpy())
            y_probs.extend(probs.cpu().numpy())

    avg_loss = total_loss / len(loader.dataset)
    acc = correct / len(loader.dataset)

    y_true_bin = label_binarize(y_true, classes=np.arange(NUM_CLASSES))
    auc_score = roc_auc_score(y_true_bin, np.array(y_probs), multi_class='ovr')

    report = classification_report(y_true, y_pred, digits=4)
    cm = confusion_matrix(y_true, y_pred)

    return avg_loss, acc, auc_score, report, cm, y_true, y_pred, y_probs


### Metrics Computation

In [64]:
def compute_metrics(y_true, y_pred, y_probs):
    y_true = np.array(y_true)
    y_pred = np.array(y_pred)
    y_probs = np.array(y_probs)

    y_true_bin = label_binarize(y_true, classes=np.arange(NUM_CLASSES))

    kappa = cohen_kappa_score(y_true, y_pred, weights='quadratic')
    mcc = matthews_corrcoef(y_true, y_pred)

    macro_auc = roc_auc_score(y_true_bin, y_probs, average='macro', multi_class='ovr')
    micro_auc = roc_auc_score(y_true_bin, y_probs, average='micro', multi_class='ovr')
    weighted_auc = roc_auc_score(y_true_bin, y_probs, average='weighted', multi_class='ovr')

    pr_aucs = []
    for i in range(NUM_CLASSES):
        precision, recall, _ = precision_recall_curve(y_true_bin[:, i], y_probs[:, i])
        pr_aucs.append(auc(recall, precision))

    auprc_macro = np.mean(pr_aucs)

    cm = confusion_matrix(y_true, y_pred)
    sensitivity = cm.diagonal() / cm.sum(axis=1)

    specificity = []
    for i in range(NUM_CLASSES):
        tn = cm.sum() - (cm[i, :].sum() + cm[:, i].sum() - cm[i, i])
        fp = cm[:, i].sum() - cm[i, i]
        specificity.append(tn / (tn + fp))

    return {
        "Cohen_Kappa": kappa,
        "MCC": mcc,
        "AUC_Macro": macro_auc,
        "AUC_Micro": micro_auc,
        "AUC_Weighted": weighted_auc,
        "AUPRC_Macro": auprc_macro,
        "Sensitivity": sensitivity,
        "Specificity": specificity,
        "Confusion_Matrix": cm
    }

### Saving Best Model

In [65]:
NUM_EPOCHS = 10  # Set as needed
best_val_acc = 0.0  # Track the best validation accuracy
best_model_path = "saved_models/best_model_convnext_tiny.pth"

for epoch in range(1, NUM_EPOCHS + 1):
    print(f"\nEpoch {epoch}/{NUM_EPOCHS}")

    train_loss, train_acc = train_one_epoch(
        model, train_loader, optimizer, criterion
    )

    val_loss, val_acc, val_auc, report, cm, \
    y_true, y_pred, y_probs = validate(
        model, val_loader, criterion
    )

    metrics = compute_metrics(y_true, y_pred, y_probs)

    print(f"\nTrain Loss: {train_loss:.4f} | Train Acc: {train_acc:.4f}")
    print(f"Val Loss: {val_loss:.4f} | Val Acc: {val_acc:.4f} | AUC: {val_auc:.4f}")
    print(f"Cohen’s Kappa: {metrics['Cohen_Kappa']:.4f} | MCC: {metrics['MCC']:.4f}")
    print(f"Macro AUC: {metrics['AUC_Macro']:.4f} | Micro AUC: {metrics['AUC_Micro']:.4f} | "
          f"Weighted AUC: {metrics['AUC_Weighted']:.4f}")
    print(f"AUPRC (Macro): {metrics['AUPRC_Macro']:.4f}")
    for i in range(NUM_CLASSES):
        print(f"Class {i}: Sensitivity = {metrics['Sensitivity'][i]:.4f}, "
              f"Specificity = {metrics['Specificity'][i]:.4f}")

    # Save model if validation accuracy improves
    if val_acc > best_val_acc:
        print(f"Validation accuracy improved ({best_val_acc:.4f} --> {val_acc:.4f}), saving model")
        best_val_acc = val_acc
        torch.save(model.state_dict(), best_model_path)

print(f"\nBest model saved to: {best_model_path}")


Epoch 1/10


                                                                                


Train Loss: 0.6199 | Train Acc: 0.7648
Val Loss: 0.6255 | Val Acc: 0.7568 | AUC: 0.9210
Cohen’s Kappa: 0.8476 | MCC: 0.6452
Macro AUC: 0.9210 | Micro AUC: 0.9499 | Weighted AUC: 0.9477
AUPRC (Macro): 0.5968
Class 0: Sensitivity = 0.9942, Specificity = 0.9742
Class 1: Sensitivity = 0.2500, Specificity = 0.9939
Class 2: Sensitivity = 0.6731, Specificity = 0.8740
Class 3: Sensitivity = 0.6818, Specificity = 0.9186
Class 4: Sensitivity = 0.3929, Specificity = 0.9379
Validation accuracy improved (0.0000 --> 0.7568), saving model

Epoch 2/10


                                                                                


Train Loss: 0.4858 | Train Acc: 0.8096
Val Loss: 0.4958 | Val Acc: 0.8306 | AUC: 0.9395
Cohen’s Kappa: 0.8993 | MCC: 0.7537
Macro AUC: 0.9395 | Micro AUC: 0.9711 | Weighted AUC: 0.9615
AUPRC (Macro): 0.6918
Class 0: Sensitivity = 0.9942, Specificity = 0.9948
Class 1: Sensitivity = 0.4250, Specificity = 0.9877
Class 2: Sensitivity = 0.9615, Specificity = 0.8130
Class 3: Sensitivity = 0.0000, Specificity = 0.9971
Class 4: Sensitivity = 0.5714, Specificity = 0.9793
Validation accuracy improved (0.7568 --> 0.8306), saving model

Epoch 3/10


                                                                                


Train Loss: 0.4150 | Train Acc: 0.8369
Val Loss: 0.5930 | Val Acc: 0.7869 | AUC: 0.9368
Cohen’s Kappa: 0.8814 | MCC: 0.6859
Macro AUC: 0.9368 | Micro AUC: 0.9594 | Weighted AUC: 0.9562
AUPRC (Macro): 0.6836
Class 0: Sensitivity = 0.9942, Specificity = 0.9639
Class 1: Sensitivity = 0.0500, Specificity = 1.0000
Class 2: Sensitivity = 0.8846, Specificity = 0.8244
Class 3: Sensitivity = 0.2727, Specificity = 0.9767
Class 4: Sensitivity = 0.6071, Specificity = 0.9497

Epoch 4/10


                                                                                


Train Loss: 0.3673 | Train Acc: 0.8577
Val Loss: 0.4762 | Val Acc: 0.8251 | AUC: 0.9479
Cohen’s Kappa: 0.9015 | MCC: 0.7426
Macro AUC: 0.9479 | Micro AUC: 0.9738 | Weighted AUC: 0.9643
AUPRC (Macro): 0.7313
Class 0: Sensitivity = 0.9942, Specificity = 0.9845
Class 1: Sensitivity = 0.3250, Specificity = 0.9847
Class 2: Sensitivity = 0.9231, Specificity = 0.8244
Class 3: Sensitivity = 0.2727, Specificity = 0.9855
Class 4: Sensitivity = 0.5714, Specificity = 0.9852

Epoch 5/10


                                                                                


Train Loss: 0.3214 | Train Acc: 0.8857
Val Loss: 0.4758 | Val Acc: 0.8361 | AUC: 0.9467
Cohen’s Kappa: 0.9107 | MCC: 0.7616
Macro AUC: 0.9467 | Micro AUC: 0.9742 | Weighted AUC: 0.9654
AUPRC (Macro): 0.7341
Class 0: Sensitivity = 0.9942, Specificity = 0.9897
Class 1: Sensitivity = 0.4250, Specificity = 0.9908
Class 2: Sensitivity = 0.9615, Specificity = 0.8168
Class 3: Sensitivity = 0.0455, Specificity = 0.9942
Class 4: Sensitivity = 0.6071, Specificity = 0.9852
Validation accuracy improved (0.8306 --> 0.8361), saving model

Epoch 6/10


                                                                                


Train Loss: 0.2924 | Train Acc: 0.8857
Val Loss: 0.4718 | Val Acc: 0.8607 | AUC: 0.9517
Cohen’s Kappa: 0.9054 | MCC: 0.7932
Macro AUC: 0.9517 | Micro AUC: 0.9767 | Weighted AUC: 0.9676
AUPRC (Macro): 0.7661
Class 0: Sensitivity = 0.9942, Specificity = 0.9897
Class 1: Sensitivity = 0.6250, Specificity = 0.9755
Class 2: Sensitivity = 0.9038, Specificity = 0.8779
Class 3: Sensitivity = 0.4091, Specificity = 0.9884
Class 4: Sensitivity = 0.5714, Specificity = 0.9852
Validation accuracy improved (0.8361 --> 0.8607), saving model

Epoch 7/10


                                                                                


Train Loss: 0.2585 | Train Acc: 0.9058
Val Loss: 0.5591 | Val Acc: 0.8361 | AUC: 0.9514
Cohen’s Kappa: 0.8946 | MCC: 0.7626
Macro AUC: 0.9514 | Micro AUC: 0.9754 | Weighted AUC: 0.9692
AUPRC (Macro): 0.7560
Class 0: Sensitivity = 0.9942, Specificity = 1.0000
Class 1: Sensitivity = 0.4750, Specificity = 0.9847
Class 2: Sensitivity = 0.9615, Specificity = 0.8053
Class 3: Sensitivity = 0.0909, Specificity = 0.9971
Class 4: Sensitivity = 0.5000, Specificity = 0.9911

Epoch 8/10


                                                                                


Train Loss: 0.2425 | Train Acc: 0.9075
Val Loss: 0.4987 | Val Acc: 0.8552 | AUC: 0.9497
Cohen’s Kappa: 0.8993 | MCC: 0.7855
Macro AUC: 0.9497 | Micro AUC: 0.9749 | Weighted AUC: 0.9660
AUPRC (Macro): 0.7608
Class 0: Sensitivity = 0.9884, Specificity = 0.9948
Class 1: Sensitivity = 0.7250, Specificity = 0.9724
Class 2: Sensitivity = 0.8750, Specificity = 0.8702
Class 3: Sensitivity = 0.2727, Specificity = 0.9913
Class 4: Sensitivity = 0.6071, Specificity = 0.9822

Epoch 9/10


                                                                                


Train Loss: 0.2123 | Train Acc: 0.9195
Val Loss: 0.5129 | Val Acc: 0.8497 | AUC: 0.9498
Cohen’s Kappa: 0.9138 | MCC: 0.7781
Macro AUC: 0.9498 | Micro AUC: 0.9765 | Weighted AUC: 0.9678
AUPRC (Macro): 0.7606
Class 0: Sensitivity = 0.9942, Specificity = 1.0000
Class 1: Sensitivity = 0.5750, Specificity = 0.9816
Class 2: Sensitivity = 0.9038, Specificity = 0.8550
Class 3: Sensitivity = 0.2727, Specificity = 0.9826
Class 4: Sensitivity = 0.6071, Specificity = 0.9852

Epoch 10/10


                                                                                


Train Loss: 0.1848 | Train Acc: 0.9355
Val Loss: 0.5266 | Val Acc: 0.8497 | AUC: 0.9488
Cohen’s Kappa: 0.9137 | MCC: 0.7782
Macro AUC: 0.9488 | Micro AUC: 0.9766 | Weighted AUC: 0.9661
AUPRC (Macro): 0.7425
Class 0: Sensitivity = 0.9942, Specificity = 1.0000
Class 1: Sensitivity = 0.6000, Specificity = 0.9755
Class 2: Sensitivity = 0.9231, Specificity = 0.8588
Class 3: Sensitivity = 0.1818, Specificity = 0.9884
Class 4: Sensitivity = 0.5714, Specificity = 0.9822

Best model saved to: best_model_convnext_tiny.pth




### Vision Transformer (ViT-B/16)

In [21]:
import os
import torch
import torch.nn as nn
import pandas as pd
import numpy as np

from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from torchvision.models import vit_b_16, ViT_B_16_Weights

from PIL import Image
from tqdm import tqdm

from sklearn.preprocessing import label_binarize
from sklearn.metrics import (
    roc_auc_score, confusion_matrix, classification_report,
    precision_recall_curve, auc, cohen_kappa_score, matthews_corrcoef
)

### CONFIG

In [22]:
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

NUM_CLASSES = 5
IMG_SIZE = 224
BATCH_SIZE = 8
NUM_EPOCHS = 10
LR = 1e-4
WD = 1e-4

# Mixed precision scaler (updated API)
scaler = torch.amp.GradScaler(device="cuda")

os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"

### DATASET

In [23]:
class APTOSDataset(Dataset):
    def __init__(self, df, image_dir, transform=None):
        self.df = df.reset_index(drop=True)
        self.image_dir = image_dir
        self.transform = transform

    def __getitem__(self, idx):
        img_id = self.df.loc[idx, "id_code"]
        label = int(self.df.loc[idx, "diagnosis"])

        path = os.path.join(self.image_dir, f"{img_id}.png")
        image = Image.open(path).convert("RGB")

        if self.transform:
            image = self.transform(image)

        return image, label

    def __len__(self):
        return len(self.df)

### TRANSFORMS (224×224 for ViT)

In [24]:
train_transforms = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
])

val_transforms = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
])

### LOAD CSVs & SET IMAGE PATHS

In [25]:
train_df = pd.read_csv("Dataset/train_1.csv")
val_df   = pd.read_csv("Dataset/valid.csv")

train_dir = "Dataset/train_images"
val_dir   = "Dataset/val_images"

train_dataset = APTOSDataset(train_df, train_dir, transform=train_transforms)
val_dataset   = APTOSDataset(val_df, val_dir, transform=val_transforms)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True,
                          num_workers=0, pin_memory=True)
val_loader   = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False,
                          num_workers=0, pin_memory=True)

### MODEL: ViT-B/16

In [26]:
print("Loading ViT-B/16...")

model = vit_b_16(weights=ViT_B_16_Weights.DEFAULT)
model.heads.head = nn.Linear(model.heads.head.in_features, NUM_CLASSES)
model = model.to(DEVICE)

optimizer = torch.optim.AdamW(model.parameters(), lr=LR, weight_decay=WD)
criterion = nn.CrossEntropyLoss()

Loading ViT-B/16...


### TRAINING LOOP

In [27]:
def train_one_epoch(model, loader, optimizer, criterion, epoch):
    model.train()
    running_loss = 0
    correct = 0

    for images, labels in tqdm(loader, desc=f"Training {epoch}", leave=False):
        images, labels = images.to(DEVICE), labels.to(DEVICE)

        optimizer.zero_grad()

        with torch.amp.autocast("cuda"):
            outputs = model(images)
            loss = criterion(outputs, labels)

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        running_loss += loss.item() * labels.size(0)
        correct += (outputs.argmax(dim=1) == labels).sum().item()

        del images, labels, outputs
        torch.cuda.empty_cache()

    epoch_loss = running_loss / len(loader.dataset)
    epoch_acc = correct / len(loader.dataset)
    return epoch_loss, epoch_acc

### VALIDATION LOOP

In [28]:
def validate(model, loader, criterion):
    model.eval()
    running_loss = 0
    correct = 0

    y_true, y_pred, y_probs = [], [], []

    with torch.no_grad():
        for images, labels in tqdm(loader, desc="Validating", leave=False):
            images, labels = images.to(DEVICE), labels.to(DEVICE)

            with torch.amp.autocast("cuda"):
                outputs = model(images)
                loss = criterion(outputs, labels)

            probs = torch.softmax(outputs, dim=1)
            preds = probs.argmax(1)

            running_loss += loss.item() * labels.size(0)
            correct += (preds == labels).sum().item()

            y_true.extend(labels.cpu().numpy())
            y_pred.extend(preds.cpu().numpy())
            y_probs.extend(probs.cpu().numpy())

            del images, labels, outputs, probs
            torch.cuda.empty_cache()

    avg_loss = running_loss / len(loader.dataset)
    accuracy = correct / len(loader.dataset)

    # AUC
    y_true_bin = label_binarize(y_true, classes=np.arange(NUM_CLASSES))
    auc_score = roc_auc_score(y_true_bin, np.array(y_probs), multi_class="ovr")

    # Confusion matrix + report
    cm = confusion_matrix(y_true, y_pred)
    report = classification_report(y_true, y_pred, digits=4)

    return avg_loss, accuracy, auc_score, report, cm, y_true, y_pred, y_probs

### EXTRA METRICS

In [29]:
def compute_metrics(y_true, y_pred, y_probs):
    y_true = np.array(y_true)
    y_pred = np.array(y_pred)
    y_probs = np.array(y_probs)

    y_true_bin = label_binarize(y_true, classes=np.arange(NUM_CLASSES))

    kappa = cohen_kappa_score(y_true, y_pred, weights="quadratic")
    mcc = matthews_corrcoef(y_true, y_pred)

    macro_auc = roc_auc_score(y_true_bin, y_probs, average="macro", multi_class="ovr")

    # AUPRC per class
    pr_aucs = []
    for i in range(NUM_CLASSES):
        p, r, _ = precision_recall_curve(y_true_bin[:, i], y_probs[:, i])
        pr_aucs.append(auc(r, p))

    cm = confusion_matrix(y_true, y_pred)
    sensitivity = cm.diagonal() / cm.sum(axis=1)

    specificity = []
    for i in range(NUM_CLASSES):
        tn = cm.sum() - (cm[i, :].sum() + cm[:, i].sum() - cm[i, i])
        fp = cm[:, i].sum() - cm[i, i]
        specificity.append(tn / (tn + fp))

    return {
        "Kappa": kappa,
        "MCC": mcc,
        "Macro_AUC": macro_auc,
        "AUPRC_Macro": np.mean(pr_aucs),
        "Sensitivity": sensitivity,
        "Specificity": specificity,
        "ConfusionMatrix": cm
    }

### TRAINING EXECUTION

In [30]:
best_acc = 0
save_path = "saved_models/best_vit_b16.pth"

for epoch in range(1, NUM_EPOCHS + 1):
    print(f"\n===== Epoch {epoch}/{NUM_EPOCHS} =====")

    train_loss, train_acc = train_one_epoch(model, train_loader, optimizer, criterion, epoch)

    val_loss, val_acc, val_auc, report, cm, y_true, y_pred, y_probs = validate(
        model, val_loader, criterion
    )

    metrics = compute_metrics(y_true, y_pred, y_probs)

    print(f"Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.4f}")
    print(f"Val Loss:   {val_loss:.4f} | Val Acc: {val_acc:.4f} | AUC: {val_auc:.4f}")
    print("Kappa:", metrics["Kappa"], "| MCC:", metrics["MCC"])

    if val_acc > best_acc:
        print(f"Improved {best_acc:.4f} → {val_acc:.4f}. Saving model...")
        best_acc = val_acc
        torch.save(model.state_dict(), save_path)

print(f"\nTraining complete. Best model saved to: {save_path}")


===== Epoch 1/10 =====


  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


Train Loss: 0.9379 | Train Acc: 0.6703
Val Loss:   0.8412 | Val Acc: 0.6995 | AUC: 0.8739
Kappa: 0.7378430509965821 | MCC: 0.5750379138567142
Improved 0.0000 → 0.6995. Saving model...

===== Epoch 2/10 =====


  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


Train Loss: 0.7930 | Train Acc: 0.7106
Val Loss:   0.8493 | Val Acc: 0.6940 | AUC: 0.8702
Kappa: 0.6719908364885934 | MCC: 0.5270252012236379

===== Epoch 3/10 =====


  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


Train Loss: 0.7445 | Train Acc: 0.7246
Val Loss:   0.7642 | Val Acc: 0.7131 | AUC: 0.8891
Kappa: 0.7130012557555463 | MCC: 0.560254877967469
Improved 0.6995 → 0.7131. Saving model...

===== Epoch 4/10 =====


                                                                                  

Train Loss: 0.7187 | Train Acc: 0.7290
Val Loss:   0.6680 | Val Acc: 0.7404 | AUC: 0.8957
Kappa: 0.7889996883764413 | MCC: 0.617808816658476
Improved 0.7131 → 0.7404. Saving model...

===== Epoch 5/10 =====


  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


Train Loss: 0.7127 | Train Acc: 0.7266
Val Loss:   0.6385 | Val Acc: 0.7568 | AUC: 0.9119
Kappa: 0.7995927999629818 | MCC: 0.6459722824501959
Improved 0.7404 → 0.7568. Saving model...

===== Epoch 6/10 =====


                                                                                  

Train Loss: 0.6649 | Train Acc: 0.7556
Val Loss:   0.6193 | Val Acc: 0.7678 | AUC: 0.9142
Kappa: 0.829498618239242 | MCC: 0.6610633711322322
Improved 0.7568 → 0.7678. Saving model...

===== Epoch 7/10 =====


                                                                                  

Train Loss: 0.6352 | Train Acc: 0.7584
Val Loss:   0.7133 | Val Acc: 0.7268 | AUC: 0.8895
Kappa: 0.7157375745526839 | MCC: 0.5912068379449216

===== Epoch 8/10 =====


                                                                                  

Train Loss: 0.6261 | Train Acc: 0.7679
Val Loss:   0.6550 | Val Acc: 0.7678 | AUC: 0.9059
Kappa: 0.8531860327480766 | MCC: 0.6559150446594172

===== Epoch 9/10 =====


                                                                                  

Train Loss: 0.6071 | Train Acc: 0.7737
Val Loss:   0.7338 | Val Acc: 0.7322 | AUC: 0.9054
Kappa: 0.823073890515882 | MCC: 0.6014713469114709

===== Epoch 10/10 =====


                                                                                  

Train Loss: 0.5904 | Train Acc: 0.7792
Val Loss:   0.5792 | Val Acc: 0.7951 | AUC: 0.9238
Kappa: 0.8508360319873108 | MCC: 0.6997697175150761
Improved 0.7678 → 0.7951. Saving model...

Training complete. Best model saved to: best_vit_b16.pth


### Imports

In [77]:
import torch
import torch.nn as nn
from torchvision.models import resnet34, ResNet34_Weights
from sklearn.metrics import classification_report, confusion_matrix, \
    roc_auc_score, matthews_corrcoef, cohen_kappa_score, \
    precision_recall_curve, auc
from sklearn.preprocessing import label_binarize
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt
import seaborn as sns

### Setup

In [79]:
NUM_CLASSES = 5
NUM_EPOCHS=10
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load ResNet34
model = resnet34(weights=ResNet34_Weights.DEFAULT)
model.fc = nn.Linear(model.fc.in_features, NUM_CLASSES)
model = model.to(DEVICE)

# Optimizer and loss
optimizer = torch.optim.AdamW(model.parameters(),
                              lr=1e-4,
                              weight_decay=1e-4)
criterion = nn.CrossEntropyLoss()

Downloading: "https://download.pytorch.org/models/resnet34-b627a593.pth" to /home/ubuntu/.cache/torch/hub/checkpoints/resnet34-b627a593.pth


100%|█████████████████████████████████████| 83.3M/83.3M [00:00<00:00, 143MB/s]


### Training

In [80]:
def train_one_epoch(model, loader, optimizer, criterion):
    model.train()
    total_loss, correct = 0.0, 0

    for images, labels in tqdm(loader, desc="Training", leave=False):
        images, labels = images.to(DEVICE), labels.to(DEVICE)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)

        loss.backward()
        optimizer.step()

        total_loss += loss.item() * images.size(0)
        correct += (outputs.argmax(1) == labels).sum().item()

    avg_loss = total_loss / len(loader.dataset)
    acc = correct / len(loader.dataset)

    return avg_loss, acc

### Validation

In [81]:
def validate(model, loader, criterion):
    model.eval()
    total_loss, correct = 0.0, 0
    y_true, y_pred, y_probs = [], [], []

    with torch.no_grad():
        for images, labels in tqdm(loader, desc="Validating", leave=False):
            images, labels = images.to(DEVICE), labels.to(DEVICE)

            outputs = model(images)
            loss = criterion(outputs, labels)

            probs = torch.softmax(outputs, dim=1)
            preds = torch.argmax(probs, dim=1)

            total_loss += loss.item() * labels.size(0)
            correct += (preds == labels).sum().item()

            y_true.extend(labels.cpu().numpy())
            y_pred.extend(preds.cpu().numpy())
            y_probs.extend(probs.cpu().numpy())

    avg_loss = total_loss / len(loader.dataset)
    acc = correct / len(loader.dataset)

    y_true_bin = label_binarize(y_true, classes=np.arange(NUM_CLASSES))
    auc_score = roc_auc_score(y_true_bin, np.array(y_probs),
                              multi_class='ovr')

    report = classification_report(y_true, y_pred, digits=4)
    cm = confusion_matrix(y_true, y_pred)

    return avg_loss, acc, auc_score, report, cm, \
           y_true, y_pred, y_probs

### Metrics Computation

In [82]:
def compute_metrics(y_true, y_pred, y_probs):
    y_true = np.array(y_true)
    y_pred = np.array(y_pred)
    y_probs = np.array(y_probs)

    y_true_bin = label_binarize(y_true, classes=np.arange(NUM_CLASSES))

    kappa = cohen_kappa_score(y_true, y_pred, weights='quadratic')
    mcc = matthews_corrcoef(y_true, y_pred)

    macro_auc = roc_auc_score(y_true_bin, y_probs,
                              average='macro', multi_class='ovr')
    micro_auc = roc_auc_score(y_true_bin, y_probs,
                              average='micro', multi_class='ovr')
    weighted_auc = roc_auc_score(y_true_bin, y_probs,
                                 average='weighted', multi_class='ovr')

    pr_aucs = []
    for i in range(NUM_CLASSES):
        precision, recall, _ = precision_recall_curve(y_true_bin[:, i],
                                                      y_probs[:, i])
        pr_aucs.append(auc(recall, precision))

    auprc_macro = np.mean(pr_aucs)

    cm = confusion_matrix(y_true, y_pred)
    sensitivity = cm.diagonal() / cm.sum(axis=1)

    specificity = []
    for i in range(NUM_CLASSES):
        tn = cm.sum() - (cm[i, :].sum() + cm[:, i].sum() - cm[i, i])
        fp = cm[:, i].sum() - cm[i, i]
        specificity.append(tn / (tn + fp))

    return {
        "Cohen_Kappa": kappa,
        "MCC": mcc,
        "AUC_Macro": macro_auc,
        "AUC_Micro": micro_auc,
        "AUC_Weighted": weighted_auc,
        "AUPRC_Macro": auprc_macro,
        "Sensitivity": sensitivity,
        "Specificity": specificity,
        "Confusion_Matrix": cm
    }

### Saving Best Model

In [83]:
best_val_acc = 0.0
best_model_path = "saved_models/best_model_resnet34.pth"

for epoch in range(1, NUM_EPOCHS + 1):
    print(f"\nEpoch {epoch}/{NUM_EPOCHS}")

    train_loss, train_acc = train_one_epoch(
        model, train_loader, optimizer, criterion
    )

    val_loss, val_acc, val_auc, report, cm, \
    y_true, y_pred, y_probs = validate(
        model, val_loader, criterion
    )

    metrics = compute_metrics(y_true, y_pred, y_probs)

    # Save model if validation accuracy improves
    if val_acc > best_val_acc:
        print(f"Validation accuracy improved "
              f"({best_val_acc:.4f} --> {val_acc:.4f}), saving model")

        best_val_acc = val_acc
        torch.save(model.state_dict(), best_model_path)

print(f"\nBest model saved to: {best_model_path}")


Epoch 1/10


                                                                              

Validation accuracy improved (0.0000 --> 0.7869), saving model

Epoch 2/10


                                                                              

Validation accuracy improved (0.7869 --> 0.8279), saving model

Epoch 3/10


                                                                              

Validation accuracy improved (0.8279 --> 0.8361), saving model

Epoch 4/10


                                                                              


Epoch 5/10


                                                                              


Epoch 6/10


                                                                              


Epoch 7/10


                                                                              


Epoch 8/10


                                                                              


Epoch 9/10


                                                                              


Epoch 10/10


                                                                              


Best model saved to: best_model_resnet34.pth




### Model Comparison

In [85]:
import torch
import torch.nn as nn
from torchvision import models
from torchvision.models import (
    convnext_tiny, ConvNeXt_Tiny_Weights,
    efficientnet_b0, EfficientNet_B0_Weights,
    densenet121, DenseNet121_Weights,
    vit_b_16, ViT_B_16_Weights
)

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
NUM_CLASSES = 5

In [86]:
def build_inceptionv3():
    from torchvision import models

    # Must enable aux_logits=True to create AuxLogits branch
    model = models.inception_v3(weights=models.Inception_V3_Weights.DEFAULT,
                                 aux_logits=True)

    # Replace both classifier heads to match the trained model
    model.AuxLogits.fc = nn.Linear(model.AuxLogits.fc.in_features, NUM_CLASSES)
    model.fc = nn.Linear(model.fc.in_features, NUM_CLASSES)

    return model

In [87]:
def build_convnext_tiny():
    model = convnext_tiny(weights=ConvNeXt_Tiny_Weights.DEFAULT)
    model.classifier[2] = nn.Linear(model.classifier[2].in_features, NUM_CLASSES)
    return model

In [88]:
def build_densenet121():
    model = densenet121(weights=DenseNet121_Weights.DEFAULT)
    model.classifier = nn.Linear(model.classifier.in_features, NUM_CLASSES)
    return model

In [89]:
def build_efficientnet_b0():
    model = efficientnet_b0(weights=EfficientNet_B0_Weights.DEFAULT)
    model.classifier[1] = nn.Linear(model.classifier[1].in_features, NUM_CLASSES)
    return model

In [90]:
def build_vit_b16():
    model = vit_b_16(weights=ViT_B_16_Weights.DEFAULT)
    model.heads.head = nn.Linear(model.heads.head.in_features, NUM_CLASSES)
    return model

In [91]:
def build_resnet50():
    from torchvision.models import resnet50, ResNet50_Weights
    model = resnet50(weights=ResNet50_Weights.DEFAULT)
    model.fc = nn.Linear(model.fc.in_features, NUM_CLASSES)
    return model

In [92]:
def build_resnet34():
    from torchvision.models import resnet34, ResNet34_Weights
    model = resnet34(weights=ResNet34_Weights.DEFAULT)
    model.fc = nn.Linear(model.fc.in_features, NUM_CLASSES)
    return model

In [97]:
saved_models = {
    "InceptionV3": {
        "fn": build_inceptionv3,
        "path": "saved_models/best_inceptionv3.pth"
    },
    "ConvNeXt-Tiny": {
        "fn": build_convnext_tiny,
        "path": "saved_models/best_model_convnext_tiny.pth"
    },
    "DenseNet121": {
        "fn": build_densenet121,
        "path": "saved_models/best_model_densenet121.pth"
    },
    "EfficientNet-B0": {
        "fn": build_efficientnet_b0,
        "path": "saved_models/best_model_efficientnet_b0_lowmem.pth"
    },
    "ViT-B16": {
        "fn": build_vit_b16,
        "path": "saved_models/best_vit_b16.pth"
    },
    "ResNet50": {
        "fn": build_resnet50,
        "path": "saved_models/best_model_resnet50.pth"
    },
    "ResNet34": {
        "fn": build_resnet34,
        "path": "saved_models/best_model_resnet34.pth"
    }
}

In [98]:
def evaluate_saved_model(model_name, model_fn, weights_path, val_loader, criterion):
    print(f"\nEvaluating {model_name} ...")

    model = model_fn()
    model.load_state_dict(torch.load(weights_path, map_location=DEVICE))
    model.to(DEVICE)
    model.eval()

    val_loss, val_acc, val_auc, report, cm, y_true, y_pred, y_probs = validate(
        model, val_loader, criterion
    )

    metrics = compute_metrics(y_true, y_pred, y_probs)

    return {
    "Accuracy": val_acc,
    "AUC": metrics.get("AUC_Macro", metrics.get("AUC", 0.0)),
    "Kappa": metrics.get("Cohen_Kappa", metrics.get("Kappa", 0.0)),
    "MCC": metrics.get("MCC", 0.0)
    }


In [99]:
results = {}

for name, info in saved_models.items():
    metrics = evaluate_saved_model(
        model_name=name,
        model_fn=info["fn"],
        weights_path=info["path"],
        val_loader=val_loader,
        criterion=criterion
    )
    results[name] = metrics


Evaluating InceptionV3 ...


                                                                              


Evaluating ConvNeXt-Tiny ...


                                                                              


Evaluating DenseNet121 ...


                                                                              


Evaluating EfficientNet-B0 ...


                                                                              


Evaluating ViT-B16 ...


                                                                              


Evaluating ResNet50 ...


                                                                              


Evaluating ResNet34 ...


                                                                              

### Saving Model Comparison Results

In [100]:
import pandas as pd
summary_df = pd.DataFrame([
{
"Model": name,
"Accuracy": results[name]["Accuracy"],
"AUC": results[name]["AUC"],
"Kappa": results[name]["Kappa"],
"MCC": results[name]["MCC"]
} for name in results
])
summary_df.to_csv("Results/model_comparison_report.csv", index=False)
print("\nSaved: model_comparison_report.csv")


Saved: model_comparison_report.csv
