*   Author: Zhuoning Yuan, Qi Qi
*   Project: https://github.com/yzhuoning/LibAUC



# **Installing LibAUC**

In [None]:
!pip install --upgrade libauc

Collecting libauc
[?25l  Downloading https://files.pythonhosted.org/packages/0f/2e/c3e6cae387f51ff6a7288dc4ea4282b7210139c22ce8388ea03f73f8d979/libauc-1.1.1-py3-none-any.whl (40kB)
[K     |████████▏                       | 10kB 16.7MB/s eta 0:00:01[K     |████████████████▎               | 20kB 12.1MB/s eta 0:00:01[K     |████████████████████████▌       | 30kB 7.6MB/s eta 0:00:01[K     |████████████████████████████████| 40kB 3.6MB/s 
[?25hInstalling collected packages: libauc
Successfully installed libauc-1.1.1



# **Importing LibAUC**

In [None]:
from libauc.losses import APLoss_SH
from libauc.optimizers import SOAP_SGD
from libauc.models import ResNet18
from libauc.datasets import CIFAR10
from libauc.datasets import ImbalanceGenerator, ImbalanceSampler 

import torchvision.transforms as transforms
from torch.utils.data import Dataset
from sklearn.metrics import roc_auc_score
from sklearn.metrics import average_precision_score
import numpy as np
import torch
from PIL import Image


# **Reproducibility**

In [None]:
def set_all_seeds(SEED):
    # REPRODUCIBILITY
    torch.manual_seed(SEED)
    np.random.seed(SEED)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

# **Image Dataset**

In [None]:
class ImageDataset(Dataset):
    def __init__(self, images, targets, image_size=32, crop_size=30, mode='train'):
       self.images = images.astype(np.uint8)
       self.targets = targets
       self.mode = mode
       self.transform_train = transforms.Compose([                                                
                              transforms.RandomCrop(image_size, padding=4),
                              transforms.RandomHorizontalFlip(),
                              transforms.ToTensor(),
                              transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
                              
                              ])
       self.transform_test = transforms.Compose([
                              transforms.ToTensor(),
                              transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
                              ])
    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image = self.images[idx]
        target = self.targets[idx]
        image = Image.fromarray(image.astype('uint8'))
        if self.mode == 'train':
            image = self.transform_train(image)
        else:
            image = self.transform_test(image)
        return idx, image, target


# **Paramaters**

In [None]:
# paramaters
imratio = 0.02
SEED = 123
BATCH_SIZE = 64
lr =  0.6
weight_decay = 2e-4
margin = 0.5
beta = 0.99 # this refers to gamma in the paper
posNum = 1

# **Loading datasets**

In [None]:
# dataloader 
(train_data, train_label), (test_data, test_label) = CIFAR10()
(train_images, train_labels) = ImbalanceGenerator(train_data, train_label, imratio=imratio, shuffle=True, random_seed=SEED)
(test_images, test_labels) = ImbalanceGenerator(test_data, test_label, is_balanced=True,  random_seed=SEED)

train_dataset = ImageDataset(train_images, train_labels)
test_dataset = ImageDataset(test_images, test_labels, mode='test')
testloader = torch.utils.data.DataLoader(test_dataset , batch_size=BATCH_SIZE, shuffle=False, num_workers=1,  pin_memory=True)

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
NUM_SAMPLES: [25510], POS:NEG: [510 : 25000], POS_RATIO: 0.0200
NUM_SAMPLES: [10000], POS:NEG: [5000 : 5000], POS_RATIO: 0.5000


# **Creating models & AUC Optimizer**

In [None]:
set_all_seeds(456)
model = ResNet18(pretrained=False, last_activation=None) 
model = model.cuda()

# APLoss_SH requires ImbalanceSampler() with pos_num>=1!
Loss = APLoss_SH(margin=margin, beta=beta, data_len=train_labels.shape[0])
optimizer = SOAP_SGD(model.parameters(), lr=lr, weight_decay=weight_decay)

# **Training**

In [None]:
# training 
model.train()
losses = []  
print ('-'*30)
total_iters = 0
for epoch in range(64):
    if epoch == 32:
       optimizer.param_groups[0]['lr'] = optimizer.param_groups[0]['lr']/10
    
    train_pred = []
    train_true = []
    model.train() 
       
    trainloader = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE, sampler=ImbalanceSampler(train_labels.flatten().astype(int), BATCH_SIZE, pos_num=posNum), num_workers=2, pin_memory=True, drop_last=True) 

    for idx, (index, data, targets) in enumerate(trainloader):
        data, targets  = data.cuda(), targets.cuda()
        y_pred = model(data)
        y_prob = torch.sigmoid(y_pred)
        loss = Loss(y_prob, targets, index_s=index)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        train_pred.append(y_prob.cpu().detach().numpy())
        train_true.append(targets.cpu().detach().numpy())

    train_true = np.concatenate(train_true)
    train_pred = np.concatenate(train_pred)
    train_auc = roc_auc_score(train_true, train_pred) 
    train_prc = average_precision_score(train_true, train_pred)

    model.eval()
    test_pred = []
    test_true = [] 
    for j, data in enumerate(testloader):
        _, test_data, test_targets = data
        test_data = test_data.cuda()
        y_pred = model(test_data)
        y_prob = torch.sigmoid(y_pred)
        test_pred.append(y_prob.cpu().detach().numpy())
        test_true.append(test_targets.numpy())
    test_true = np.concatenate(test_true)
    test_pred = np.concatenate(test_pred)
     
    val_auc =  roc_auc_score(test_true, test_pred) 
    val_prc = average_precision_score(test_true, test_pred)
    
    model.train()
    print("epoch: {}, train_loss: {:4f}, train_ap:{:4f}, test_ap:{:4f},  lr:{:4f}".format(epoch, loss.item(), train_prc, val_prc,  optimizer.param_groups[0]['lr'] ))
    

------------------------------
epoch: 0, train_loss: 0.004271, train_ap:0.016977, test_ap:0.574469,  lr:0.600000
epoch: 1, train_loss: 0.003858, train_ap:0.024827, test_ap:0.611937,  lr:0.600000
epoch: 2, train_loss: 0.003060, train_ap:0.025190, test_ap:0.613633,  lr:0.600000
epoch: 3, train_loss: 0.001054, train_ap:0.025885, test_ap:0.589008,  lr:0.600000
epoch: 4, train_loss: 0.003762, train_ap:0.025945, test_ap:0.610585,  lr:0.600000
epoch: 5, train_loss: 0.004220, train_ap:0.025707, test_ap:0.621060,  lr:0.600000
epoch: 6, train_loss: 0.004753, train_ap:0.029162, test_ap:0.650009,  lr:0.600000
epoch: 7, train_loss: 0.002049, train_ap:0.028029, test_ap:0.651799,  lr:0.600000
epoch: 8, train_loss: 0.005533, train_ap:0.031953, test_ap:0.634691,  lr:0.600000
epoch: 9, train_loss: 0.006606, train_ap:0.031283, test_ap:0.664364,  lr:0.600000
epoch: 10, train_loss: 0.003242, train_ap:0.035829, test_ap:0.663829,  lr:0.600000
epoch: 11, train_loss: 0.003200, train_ap:0.034632, test_ap:0.6679