
* Author: Zhuoning Yuan
* Project: https://github.com/Optimization-AI/LibAUC


# **Citation**


If you use this work,  please acknowledge our library and cite the following paper:
```
@inproceedings{
    yuan2022compositional,
    title={Compositional Training for End-to-End Deep {AUC} Maximization},
    author={Zhuoning Yuan and Zhishuai Guo and Nitesh Chawla and Tianbao Yang},
    booktitle={International Conference on Learning Representations},
    year={2022},
    url={https://openreview.net/forum?id=gPvB4pdu_Z}
}
```
```
@inproceedings{yuan2021robust,
	title={Large-scale Robust Deep AUC Maximization: A New Surrogate Loss and Empirical Studies on Medical Image Classification},
	author={Yuan, Zhuoning and Yan, Yan and Sonka, Milan and Yang, Tianbao},
	booktitle={Proceedings of the IEEE/CVF International Conference on Computer Vision},
	year={2021}
	}
```

# **Installing LibAUC**

In [None]:
!pip install libauc==1.1.9rc1

# **Importing LibAUC**

In [3]:
from libauc.losses import CompositionalLoss
from libauc.optimizers import PDSCA
from libauc.models import ResNet20
from libauc.datasets import CIFAR10, CIFAR100, CAT_VS_DOG, STL10 
from libauc.datasets import ImbalanceGenerator

import torch 
from PIL import Image
import numpy as np
import torchvision.transforms as transforms
from torch.utils.data import Dataset
from sklearn.metrics import roc_auc_score

# **Reproducibility**

In [4]:
def set_all_seeds(SEED):
    # REPRODUCIBILITY
    torch.manual_seed(SEED)
    np.random.seed(SEED)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

# **Image Dataset**

In [5]:
class ImageDataset(Dataset):
    def __init__(self, images, targets, image_size=32, crop_size=30, mode='train'):
       self.images = images.astype(np.uint8)
       self.targets = targets
       self.mode = mode
       self.transform_train = transforms.Compose([                                                
                              transforms.ToTensor(),
                              transforms.RandomCrop((crop_size, crop_size), padding=None),
                              transforms.RandomHorizontalFlip(),
                              transforms.Resize((image_size, image_size)),
                              ])
       self.transform_test = transforms.Compose([
                             transforms.ToTensor(),
                             transforms.Resize((image_size, image_size)),
                              ])
    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image = self.images[idx]
        target = self.targets[idx]
        image = Image.fromarray(image.astype('uint8'))
        if self.mode == 'train':
            image = self.transform_train(image)
        else:
            image = self.transform_test(image)
        return image, target

# **Paramaters**

In [7]:
# all paramaters
total_epochs = 200 
SEED = 123
dataset = 'C2' # choose dataset to use
imratio = 0.1
BATCH_SIZE = 128

# tunable paramaters
margin = 1.0
lr = 0.1  
#lr0 = 0.1 # refers to line 5 in algorithm 1. By default, lr0=lr unless you specify the value and pass it to optimizer
gamma = 500 
weight_decay = 1e-4
beta1 = 0.9   # try different values: e.g., [0.999, 0.99, 0.9]
beta2 = 0.999 # try different values: e.g., [0.999, 0.99, 0.9] 

# **Loading datasets**

In [8]:
if dataset == 'C10':
    IMG_SIZE = 32
    (train_data, train_label), (test_data, test_label) = CIFAR10()
elif dataset == 'C100':
    IMG_SIZE = 32
    (train_data, train_label), (test_data, test_label) = CIFAR100()
elif dataset == 'STL10':
    BATCH_SIZE = 32
    IMG_SIZE = 96
    (train_data, train_label), (test_data, test_label) = STL10()
elif dataset == 'C2':
    IMG_SIZE = 50
    (train_data, train_label), (test_data, test_label) = CAT_VS_DOG()

(train_images, train_labels) = ImbalanceGenerator(train_data, train_label, imratio=imratio, shuffle=True, random_seed=0) # fixed seed
(test_images, test_labels) = ImbalanceGenerator(test_data, test_label, is_balanced=True,  random_seed=0)

trainloader = torch.utils.data.DataLoader(ImageDataset(train_images, train_labels, image_size=IMG_SIZE, crop_size=IMG_SIZE-2), batch_size=BATCH_SIZE, shuffle=True, num_workers=8, pin_memory=False, drop_last=True)
testloader = torch.utils.data.DataLoader(ImageDataset(test_images, test_labels, image_size=IMG_SIZE, crop_size=IMG_SIZE-2, mode='test'), batch_size=BATCH_SIZE, shuffle=False, num_workers=8,  pin_memory=False)

Downloading data from  https://homepage.divms.uiowa.edu/~zhuoning/datasets/cat_vs_dog.tar.gz
NUM_SAMPLES: [11128], POS:NEG: [1112 : 10016], POS_RATIO: 0.0999
NUM_SAMPLES: [5000], POS:NEG: [2516 : 2484], POS_RATIO: 0.5032


# **Training**

In [9]:
set_all_seeds(123)
model = ResNet20(pretrained=False, last_activation=None, activations='relu', num_classes=1)
model = model.cuda()
    
# Compositional Training
Loss = CompositionalLoss(imratio=imratio)  
optimizer = PDSCA(model, 
                  a=Loss.a, 
                  b=Loss.b, 
                  alpha=Loss.alpha, 
                  lr=lr,
                  beta1=beta1,
                  beta2=beta2, 
                  gamma=gamma, 
                  margin=margin, 
                  weight_decay=weight_decay)

test_auc_max = 0
print ('-'*30)
for epoch in range(total_epochs):
    if epoch == int(0.5*total_epochs) or epoch==int(0.75*total_epochs):
      optimizer.update_regularizer(decay_factor=10)

    train_pred = []
    train_true = []
    for idx, (data, targets) in enumerate(trainloader):
        model.train()  
        data, targets  = data.cuda(), targets.cuda()
        y_pred = model(data)
        loss = Loss(y_pred, targets)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        train_pred.append(y_pred.cpu().detach().numpy())
        train_true.append(targets.cpu().detach().numpy())
    
    train_true = np.concatenate(train_true)
    train_pred = np.concatenate(train_pred)
    train_auc = roc_auc_score(train_true, train_pred) 
    
    # evaluations
    model.eval()
    test_pred = []
    test_true = [] 
    for j, data in enumerate(testloader):
        test_data, test_targets = data
        test_data = test_data.cuda()
        outputs = model(test_data)
        y_pred = torch.sigmoid(outputs)
        test_pred.append(y_pred.cpu().detach().numpy())
        test_true.append(test_targets.numpy())
    test_true = np.concatenate(test_true)
    test_pred = np.concatenate(test_pred)
    val_auc =  roc_auc_score(test_true, test_pred) 
    model.train()

    if test_auc_max<val_auc:
       test_auc_max = val_auc
      
    # print results
    print("epoch: {}, train_auc:{:4f}, test_auc:{:4f}, test_auc_max:{:4f}".format(epoch, train_auc, val_auc, test_auc_max, optimizer.lr ))          

------------------------------
epoch: 0, train_auc:0.616761, test_auc:0.623856, test_auc_max:0.623856
epoch: 1, train_auc:0.683287, test_auc:0.668705, test_auc_max:0.668705
epoch: 2, train_auc:0.706854, test_auc:0.695609, test_auc_max:0.695609
epoch: 3, train_auc:0.718007, test_auc:0.721402, test_auc_max:0.721402
epoch: 4, train_auc:0.728789, test_auc:0.731166, test_auc_max:0.731166
epoch: 5, train_auc:0.744389, test_auc:0.729358, test_auc_max:0.731166
epoch: 6, train_auc:0.759461, test_auc:0.739063, test_auc_max:0.739063
epoch: 7, train_auc:0.763460, test_auc:0.740313, test_auc_max:0.740313
epoch: 8, train_auc:0.767668, test_auc:0.760257, test_auc_max:0.760257
epoch: 9, train_auc:0.782970, test_auc:0.781897, test_auc_max:0.781897
epoch: 10, train_auc:0.791637, test_auc:0.770308, test_auc_max:0.781897
epoch: 11, train_auc:0.808350, test_auc:0.769889, test_auc_max:0.781897
epoch: 12, train_auc:0.816112, test_auc:0.791682, test_auc_max:0.791682
epoch: 13, train_auc:0.819606, test_auc:0.7