## **Installing LibAUC**


In [1]:
! pip install medmnist
! pip install libauc==1.2.0

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting medmnist
  Downloading medmnist-2.2.1-py3-none-any.whl (21 kB)
Collecting fire
  Downloading fire-0.5.0.tar.gz (88 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m88.3/88.3 kB[0m [31m6.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: fire
  Building wheel for fire (setup.py) ... [?25l[?25hdone
  Created wheel for fire: filename=fire-0.5.0-py2.py3-none-any.whl size=116952 sha256=5c88e9905753853e2ca9a796bbfa15bc7492c3f9735f5a2dd3d76b7c266f9ec7
  Stored in directory: /root/.cache/pip/wheels/90/d4/f7/9404e5db0116bd4d43e5666eaa3e70ab53723e1e3ea40c9a95
Successfully built fire
Installing collected packages: fire, medmnist
Successfully installed fire-0.5.0 medmnist-2.2.1
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting li

# **Importing Libraries**


In [2]:
import os
import time
from PIL import Image
import numpy as np
import random

from libauc.losses import AUCMLoss, CrossEntropyLoss
from libauc.optimizers import PESG
from libauc.models import resnet20 as ResNet20
from libauc.models import resnet18 as ResNet18
from libauc.utils import ImbalancedDataGenerator
from libauc.sampler import DualSampler
from libauc.metrics import auc_roc_score

import medmnist
from medmnist import PneumoniaMNIST
from medmnist import INFO, Evaluator

import torch 
import torch.nn as nn
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim

from sklearn import metrics
from sklearn.metrics import accuracy_score
from sklearn.metrics import roc_auc_score

import warnings
warnings.filterwarnings('ignore')

## **Reproducibility**



In [3]:
random_seed = 42
random.seed(random_seed)
np.random.seed(random_seed)
torch.manual_seed(random_seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

## **Image Dataset**





In [16]:
class ImageDataset(Dataset):
    def __init__(self, images, targets, image_size=32, crop_size=28, mode='train'):
       self.images = images.astype(np.uint8)
       self.targets = targets
       self.mode = mode
       self.transform_train = transforms.Compose([                                                
                              transforms.ToTensor(),
                              # transforms.RandomCrop((crop_size, crop_size), padding=None),
                              transforms.RandomHorizontalFlip(),
                              transforms.Resize((image_size, image_size)),
                              transforms.Normalize(mean=[.5], std=[.5])
                              ])
       self.transform_test = transforms.Compose([
                             transforms.ToTensor(),
                             transforms.Resize((image_size, image_size)),
                             transforms.Normalize(mean=[.5], std=[.5])
                              ])
    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image = self.images[idx]
        target = self.targets[idx]
        image = Image.fromarray(image.astype('uint8'))
        if self.mode == 'train':
            image = self.transform_train(image)
        else:
            image = self.transform_test(image)
        return image, target

# **Paramaters**

In [17]:
# HyperParameters
imratio = 0.1 # for demo 
total_epochs = 120
decay_epochs = [50, 75]
sampling_rate = 0.5
lr = 0.1
margin = 1.0
epoch_decay = 0.003
weight_decay = 0.0001
BATCH_SIZE=128
momentum=0.9

# **Loading datasets**

In [18]:
dataflag = 'pneumoniamnist'
info = INFO[dataflag]
task = info['task']
n_classes = len(info['label'])
info

{'python_class': 'PneumoniaMNIST',
 'description': 'The PneumoniaMNIST is based on a prior dataset of 5,856 pediatric chest X-Ray images. The task is binary-class classification of pneumonia against normal. We split the source training set with a ratio of 9:1 into training and validation set and use its source validation set as the test set. The source images are gray-scale, and their sizes are (384−2,916)×(127−2,713). We center-crop the images and resize them into 1×28×28.',
 'url': 'https://zenodo.org/record/6496656/files/pneumoniamnist.npz?download=1',
 'MD5': '28209eda62fecd6e6a2d98b1501bb15f',
 'task': 'binary-class',
 'label': {'0': 'normal', '1': 'pneumonia'},
 'n_channels': 1,
 'n_samples': {'train': 4708, 'val': 524, 'test': 624},
 'license': 'CC BY 4.0'}

In [19]:
# Load the train dataset
train_dataset = PneumoniaMNIST(root='./', split='train', download=True)
train_images = train_dataset.imgs 
train_labels = train_dataset.labels[:, 0]

# Load the val dataset
val_dataset = PneumoniaMNIST(root='./', split='val', download=True)
val_images = val_dataset.imgs 
val_labels = val_dataset.labels[:, 0]

# Load the test dataset
test_dataset = PneumoniaMNIST(root='./', split='test', download=True)
test_images = test_dataset.imgs 
test_labels = test_dataset.labels[:, 0]

Using downloaded and verified file: ./pneumoniamnist.npz
Using downloaded and verified file: ./pneumoniamnist.npz
Using downloaded and verified file: ./pneumoniamnist.npz


In [20]:
# data augmentations 
trainSet = ImageDataset(train_images, train_labels)
trainSet_eval = ImageDataset(val_images, val_labels, mode='test')
testSet = ImageDataset(test_images, test_labels, mode='test')

# dataloaders
sampler = DualSampler(trainSet, BATCH_SIZE, sampling_rate=sampling_rate)
train_loader = torch.utils.data.DataLoader(trainSet, batch_size=BATCH_SIZE, sampler=sampler, num_workers=2)
val_loader = torch.utils.data.DataLoader(trainSet_eval, batch_size=BATCH_SIZE, shuffle=False, num_workers=2)
test_loader = torch.utils.data.DataLoader(testSet, batch_size=BATCH_SIZE, shuffle=False, num_workers=2)

# **Creating models & AUC Optimizer**

In [21]:
# Creating the model
model = ResNet18(pretrained=False, last_activation=None) 
model = model.cuda()

# Defining Loss and Optimizers
loss_fn = AUCMLoss()
optimizer = PESG(model, 
                 loss_fn=loss_fn,
                 lr=lr, 
                 momentum=momentum,
                 margin=margin, 
                 epoch_decay=epoch_decay, 
                 weight_decay=weight_decay)

# **Training**

In [22]:
print ('Start Training')
print ('-'*30)

best_val_auc = 0
best_model = model

train_log = []

for epoch in range(total_epochs):
     if epoch in decay_epochs:
         optimizer.update_regularizer(decay_factor=10) # decrease learning rate by 10x & update regularizer
   
     # TRAINING   
     train_loss = []
     model.train()    
     for train_data, train_targets in train_loader:
         train_data, train_targets  = train_data.cuda(), train_targets.cuda()
         train_data = train_data.expand(-1, 3, -1, -1)
         y_pred = model(train_data)
         y_pred = torch.sigmoid(y_pred)
         loss = loss_fn(y_pred, train_targets)
         optimizer.zero_grad()
         loss.backward()
         optimizer.step()
         train_loss.append(loss.item())
    
     epoch_loss = np.mean(train_loss)

     # VALIDATION
     with torch.no_grad():
        model.eval()
        val_pred_list = []
        val_true_list = []
        for val_data, val_targets in val_loader:
            val_data  = val_data.cuda()
            val_data = val_data.expand(-1, 3, -1, -1)
            val_pred = model(val_data)
            val_pred_list.append(val_pred.cpu().detach().numpy())
            val_true_list.append(val_targets.numpy())
        val_true = np.concatenate(val_true_list)
        val_pred = np.concatenate(val_pred_list)
        val_auc = auc_roc_score(val_true, val_pred)

        val_pred_binary = (val_pred > 0.5).astype(int)
        val_accuracy = accuracy_score(val_true, val_pred_binary)
          
        if best_val_auc < val_auc:
          best_val_auc = val_auc
          best_model = model
          
        train_log.append(val_auc)    

     print("epoch: %s, epoch_loss: %.4f, val_auc: %.4f, lr: %.4f, best_val_auc: %.4f"%(epoch, epoch_loss, val_auc, optimizer.lr, best_val_auc ))    

Start Training
------------------------------
epoch: 0, epoch_loss: 0.0568, val_auc: 0.9741, lr: 0.1000, best_val_auc: 0.9741
epoch: 1, epoch_loss: 0.0179, val_auc: 0.9865, lr: 0.1000, best_val_auc: 0.9865
epoch: 2, epoch_loss: 0.0126, val_auc: 0.9931, lr: 0.1000, best_val_auc: 0.9931
epoch: 3, epoch_loss: 0.0104, val_auc: 0.9919, lr: 0.1000, best_val_auc: 0.9931
epoch: 4, epoch_loss: 0.0066, val_auc: 0.9938, lr: 0.1000, best_val_auc: 0.9938
epoch: 5, epoch_loss: 0.0062, val_auc: 0.9918, lr: 0.1000, best_val_auc: 0.9938
epoch: 6, epoch_loss: 0.0040, val_auc: 0.9943, lr: 0.1000, best_val_auc: 0.9943
epoch: 7, epoch_loss: 0.0038, val_auc: 0.9927, lr: 0.1000, best_val_auc: 0.9943
epoch: 8, epoch_loss: 0.0031, val_auc: 0.9904, lr: 0.1000, best_val_auc: 0.9943
epoch: 9, epoch_loss: 0.0025, val_auc: 0.9937, lr: 0.1000, best_val_auc: 0.9943
epoch: 10, epoch_loss: 0.0019, val_auc: 0.9938, lr: 0.1000, best_val_auc: 0.9943
epoch: 11, epoch_loss: 0.0020, val_auc: 0.9884, lr: 0.1000, best_val_auc:

# **Testing**

In [23]:
# Evaluation on Test data

test_pred_list = []
test_true_list = [] 
for test_data, test_targets in test_loader:
    test_data  = test_data.cuda()
    test_data = test_data.expand(-1, 3, -1, -1)
    test_pred = best_model(test_data)
    test_pred_list.append(test_pred.cpu().detach().numpy())
    test_true_list.append(test_targets.numpy())
test_true = np.concatenate(test_true_list)
test_pred = np.concatenate(test_pred_list)
test_auc =  auc_roc_score(test_true, test_pred) 

test_pred_binary = (test_pred > 0.5).astype(int)
test_accuracy = accuracy_score(test_true, test_pred_binary)
print("best_test_auc: %.4f, test_accuracy: %.4f"%(test_auc, test_accuracy))      

best_test_auc: 0.9533, test_accuracy: 0.8830


In [24]:
# Saving the best model
state = {
    'net': best_model.state_dict(),
}

output_root = os.path.join('./output', dataflag)
if not os.path.exists(output_root):
    os.makedirs(output_root)

filename = dataflag + '_auc_' + str(round(test_auc,4)) + '_model.pth'
path = os.path.join(output_root, filename)
torch.save(state, path)

In [25]:
# Eval function similar to demo file

def evaluate(net, test_loader):
    # Testing AUC
    score_list = list()
    label_list = list()
    for tmp_data, tmp_label in test_loader:
        tmp_data, tmp_label = tmp_data.cuda(), tmp_label.cuda()
        tmp_data = tmp_data.expand(-1, 3, -1, -1)        
        tmp_score = net(tmp_data).detach().clone().cpu()
        score_list.append(tmp_score)
        label_list.append(tmp_label.cpu())
    test_label = torch.cat(label_list)
    test_score = torch.cat(score_list)
                   
    test_auc = metrics.roc_auc_score(test_label, test_score)                   
    print("Test: %.4f"%test_auc, flush=True)

checkpoint = torch.load(path)
final_model = ResNet18() 
final_model = final_model.cuda()
final_model.load_state_dict(checkpoint['net'])
evaluate(final_model, test_loader)

Test: 0.9521
