# **Optimizing Multi-label AUROC loss on Chest X-Ray Dataset (CheXpert)**

* Author: Zhuoning Yuan

**Useful Resources**:
* Website: https://libauc.org
* Github: https://github.com/Optimization-AI/LibAUC

**Reference**:  

If you find this tutorial helpful in your work,  please acknowledge our library and cite the following paper:
```
@inproceedings{yuan2021large,
  title={Large-scale robust deep auc maximization: A new surrogate loss and empirical studies on medical image classification},
  author={Yuan, Zhuoning and Yan, Yan and Sonka, Milan and Yang, Tianbao},
  booktitle={Proceedings of the IEEE/CVF International Conference on Computer Vision},
  pages={3040--3049},
  year={2021}
}

@misc{libauc2022,
      title={LibAUC: A Deep Learning Library for X-Risk Optimization.},
      author={Zhuoning Yuan, Zi-Hao Qiu, Gang Li, Dixian Zhu, Zhishuai Guo, Quanqi Hu, Bokun Wang, Qi Qi, Yongjian Zhong, Tianbao Yang},
      year={2022}
    }


# **Installing LibAUC**

In [None]:
!pip install libauc==1.1.9rc3

# **Downloading CheXpert**
 
*   To request dataset access, you need to apply from CheXpert website: https://stanfordmlgroup.github.io/competitions/chexpert/
*   In this tutorial, we use the smaller version of dataset with lower image resolution, i.e., *CheXpert-v1.0-small.zip*



In [None]:
!cp /content/drive/MyDrive/chexpert-dataset/CheXpert-v1.0-small.zip /content/
!mkdir CheXpert
!unzip CheXpert-v1.0-small.zip -d /content/CheXpert/


# **Importing LibAUC**

In [7]:
from libauc.losses import AUCM_MultiLabel, CrossEntropyLoss
from libauc.optimizers import PESG, Adam
from libauc.models import densenet121 as DenseNet121
from libauc.datasets import CheXpert
from libauc.metrics import auc_roc_score # for multi-task

import torch 
from PIL import Image
import numpy as np
import torchvision.transforms as transforms
from torch.utils.data import Dataset
from sklearn.metrics import roc_auc_score
import torch.nn.functional as F   

# **Reproducibility**

In [8]:
def set_all_seeds(SEED):
    # REPRODUCIBILITY
    torch.manual_seed(SEED)
    np.random.seed(SEED)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

# **Multi-label Training**
Optimizing Multi-label AUROC loss (e.g., 5 tasks)   




In [9]:
# dataloader
root = './CheXpert/CheXpert-v1.0-small/'
# Index: -1 denotes multi-label mode including 5 diseases
traindSet = CheXpert(csv_path=root+'train.csv', image_root_path=root, use_upsampling=False, use_frontal=True, image_size=224, mode='train', class_index=-1, verbose=False)
testSet =  CheXpert(csv_path=root+'valid.csv',  image_root_path=root, use_upsampling=False, use_frontal=True, image_size=224, mode='valid', class_index=-1, verbose=False)
trainloader =  torch.utils.data.DataLoader(traindSet, batch_size=32, num_workers=2, shuffle=True)
testloader =  torch.utils.data.DataLoader(testSet, batch_size=32, num_workers=2, shuffle=False)

# check imbalance ratio for each task
print (traindSet.imratio_list )

# paramaters
SEED = 123
BATCH_SIZE = 32
lr = 0.1 
gamma = 500
weight_decay = 1e-5
margin = 1.0

# model
set_all_seeds(SEED)
model = DenseNet121(pretrained=True, last_activation=None, activations='relu', num_classes=5)
model = model.cuda()

# define loss & optimizer
Loss = AUCM_MultiLabel(num_classes=5)
optimizer = PESG(model, 
                 a=Loss.a, 
                 b=Loss.b, 
                 alpha=Loss.alpha, 
                 lr=lr, 
                 gamma=gamma, 
                 margin=margin, 
                 weight_decay=weight_decay, device='cuda')

# training
best_val_auc = 0 
for epoch in range(2):
  
    if epoch > 0:
        optimizer.update_regularizer(decay_factor=10)    

    for idx, data in enumerate(trainloader):
      train_data, train_labels = data
      train_data, train_labels  = train_data.cuda(), train_labels.cuda()
      y_pred = model(train_data)
      y_pred = torch.sigmoid(y_pred)
      loss = Loss(y_pred, train_labels)
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()
        
      # validation  
      if idx % 400 == 0:
         model.eval()
         with torch.no_grad():    
              test_pred = []
              test_true = [] 
              for jdx, data in enumerate(testloader):
                  test_data, test_labels = data
                  test_data = test_data.cuda()
                  y_pred = model(test_data)
                  y_pred = torch.sigmoid(y_pred)
                  test_pred.append(y_pred.cpu().detach().numpy())
                  test_true.append(test_labels.numpy())
            
              test_true = np.concatenate(test_true)
              test_pred = np.concatenate(test_pred)
              val_auc_mean =  roc_auc_score(test_true, test_pred) 
              model.train()

              if best_val_auc < val_auc_mean:
                 best_val_auc = val_auc_mean
                 torch.save(model.state_dict(), 'aucm_multi_label_pretrained_model.pth')

              print ('Epoch=%s, BatchID=%s, Val_AUC=%.4f, Best_Val_AUC=%.4f'%(epoch, idx, val_auc_mean, best_val_auc))

  self.df['Path'] = self.df['Path'].str.replace('CheXpert-v1.0-small/', '')
  self.df['Path'] = self.df['Path'].str.replace('CheXpert-v1.0/', '')
Downloading: "https://download.pytorch.org/models/densenet121-a639ec97.pth" to /root/.cache/torch/hub/checkpoints/densenet121-a639ec97.pth


  0%|          | 0.00/30.8M [00:00<?, ?B/s]

Epoch=0, BatchID=0, Val_AUC=0.5547, Best_Val_AUC=0.5547
Epoch=0, BatchID=400, Val_AUC=0.8445, Best_Val_AUC=0.8445
Epoch=0, BatchID=800, Val_AUC=0.8199, Best_Val_AUC=0.8445
Epoch=0, BatchID=1200, Val_AUC=0.8612, Best_Val_AUC=0.8612
Epoch=0, BatchID=1600, Val_AUC=0.8357, Best_Val_AUC=0.8612
Epoch=0, BatchID=2000, Val_AUC=0.8280, Best_Val_AUC=0.8612
Epoch=0, BatchID=2400, Val_AUC=0.8490, Best_Val_AUC=0.8612
Epoch=0, BatchID=2800, Val_AUC=0.8549, Best_Val_AUC=0.8612
Epoch=0, BatchID=3200, Val_AUC=0.8778, Best_Val_AUC=0.8778
Epoch=0, BatchID=3600, Val_AUC=0.8550, Best_Val_AUC=0.8778
Epoch=0, BatchID=4000, Val_AUC=0.8480, Best_Val_AUC=0.8778
Epoch=0, BatchID=4400, Val_AUC=0.8283, Best_Val_AUC=0.8778
Epoch=0, BatchID=4800, Val_AUC=0.8812, Best_Val_AUC=0.8812
Epoch=0, BatchID=5200, Val_AUC=0.8681, Best_Val_AUC=0.8812
Epoch=0, BatchID=5600, Val_AUC=0.8789, Best_Val_AUC=0.8812
Reducing learning rate to 0.01000 @ T=5970!
Updating regularizer @ T=5970!
Epoch=1, BatchID=0, Val_AUC=0.8600, Best_Val_

# **Evaluation**

In [11]:
# show auc roc scores for each task 
auc_roc_score(test_true, test_pred)

[0.8607397504456329,
 0.905952380952381,
 0.9110294117647059,
 0.8146981627296588,
 0.9171195652173914]