# Demo of logits collection and getting entropy 

In [1]:
import argparse
import os
import shutil
import time
import torch.optim as optim
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.backends.cudnn as cudnn
import torch.distributed as dist
import torch.optim
import torch.utils.data
import torch.utils.data.distributed
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torchvision.models as models
import torchvision
import copy
import random
import tensorflow_probability as tfp
import numpy as np
from PIL import Image
from timm import create_model
import torch.nn.functional as F
from sklearn import preprocessing
from sklearn.preprocessing import minmax_scale

device = 'cuda' if torch.cuda.is_available() else 'cpu'
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
def seed_everything(seed=42):
    random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
parser = argparse.ArgumentParser(description='ImageNet Training')
parser.add_argument('--lr', default=0.1, type=float, help='learning rate')
parser.add_argument('--lr_schedule', default=0, type=int, help='lr scheduler')
# parser.add_argument('--train_batch', default=512, type=int, help='batch size')
parser.add_argument('--valid_batch', default=512, type=int, help='batch size')
parser.add_argument('--num_epoch', default=200, type=int, help='epoch number')
parser.add_argument('--num_classes', type=int, default=1000, help='number classes')
parser.add_argument('--lr_densenet', default=0.1, type=float, help='learning rate')
parser.add_argument('--lr_vgg16', default=0.001, type=float, help='learning rate')
args = parser.parse_args(args=[])

class ImageNet_valid_test(torchvision.datasets.ImageFolder):

    def __init__(self, path, transform=None, is_valid=False, num_classes = None):
        super(ImageNet_valid_test, self).__init__(root=path, transform=transform) 
        self.transform = transform
        self.num_classes = num_classes
        self.is_valid = is_valid

        if self.is_valid:
            data_for_valid = []
            for i in range(num_classes):

                data_per_class = self.samples[(0+50*i) : (25+50*i)]
                data_for_valid.extend(data_per_class)
            self.samples = data_for_valid

        else:
            data_for_test = []
            for i in range(num_classes):
                test_data_per_class = self.samples[(25+50*i) : (50+50*i)]
                data_for_test.extend(test_data_per_class) 
            self.samples = data_for_test


    def __getitem__(self, index: int):
        path, target = self.samples[index]
        sample = self.loader(path)
        
        if self.transform is not None:
            sample = self.transform(sample)
        if self.target_transform is not None:
            target = self.target_transform(target)

        return sample, target
    
def valid(model, testloader, T=1, device=None):
    model.eval()
    correct = 0
    total = 0
    logits_list = []
    labels_list = []
    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(testloader):
            inputs, targets = inputs.cuda(), targets.cuda()
            outputs = model(inputs)
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()
            logits_list.append(outputs/T)
            labels_list.append(targets)

        logits = torch.cat(logits_list).cpu().numpy()
        labels = torch.cat(labels_list).cpu().numpy()
        ece = tfp.stats.expected_calibration_error(10, logits=logits, labels_true=labels, labels_predicted=np.argmax(logits,1))
    return correct / total, logits, labels, ece

def ece_eval(preds, targets, n_bins=15, bg_cls = -1):
    bin_boundaries = np.linspace(0, 1, n_bins + 1)
    bin_lowers = bin_boundaries[:-1]
    bin_uppers = bin_boundaries[1:]
    confidences, predictions = np.max(preds,1), np.argmax(preds,1)#confidences: pred prob; predictions: pred classes
    confidences, predictions = confidences[targets>bg_cls], predictions[targets>bg_cls]#len: 10000
    accuracies = (predictions == targets[targets>bg_cls]) 
    
    Bm, acc, conf = np.zeros(n_bins), np.zeros(n_bins), np.zeros(n_bins)
    ece = 0.0
    bin_idx = 0
   
    for bin_lower, bin_upper in zip(bin_lowers, bin_uppers):
        in_bin = np.logical_and(confidences > bin_lower, confidences <= bin_upper)#boolean vector of len 100
        bin_size = np.sum(in_bin)
        Bm[bin_idx] = bin_size
        if bin_size > 0:  
            accuracy_in_bin = np.sum(accuracies[in_bin])
            acc[bin_idx] = accuracy_in_bin / Bm[bin_idx]
            confidence_in_bin = np.sum(confidences[in_bin])
            conf[bin_idx] = confidence_in_bin / Bm[bin_idx]

        bin_idx += 1
    ece = (Bm * np.abs((acc - conf))).sum()/ Bm.sum()
    ece_level = (Bm * (conf - acc)).sum()/ Bm.sum()
    return ece, acc, conf, Bm, ece_level

def optimal_T(logits, labels, upper=None, lower=None):
    best_ece = np.inf
    best_t = 0
    for T in np.arange(lower, upper, 0.01):
        logits = torch.tensor(logits_bs/T)
        logits_all =F.softmax(logits, dim=1).detach().cpu().numpy()
        ece,_,_,_,_ = ece_eval(logits_all, labels)
        if ece < best_ece:
            best_ece = ece
            best_t = T
    return np.round(best_ece,3), best_t

    
def classwise_ece(logits, labels, num_cls):
    ece_per_class = []
    ece_level_per_class = []
    for i in range (num_cls):
        ece_c, acc_c, conf_c, Bm_c, level_c = ece_eval(logits[labels==i], labels[labels==i])
        ece_per_class.append(np.round(ece_c,3))

        ece_level_per_class.append(np.round(level_c,3))

    return np.array(ece_per_class).mean(), np.array(ece_level_per_class).mean(), ece_level_per_class

def tuning_ece_level_factor(opt_t, norm_ece_level,model, cifar10_test_loader,device):
    
    best_ece, best_ece_level_factor,best_ece_level_per_class = np.inf, 0, 0
    
    for i, ece_level_factor in enumerate(np.arange(-2, -2, 0.1)):
        ece_level_t = opt_t + norm_ece_level * ece_level_factor
        ece_level_t = torch.tensor(ece_level_t).float().to(device)
        accuracy, logits, labels,_ = valid(model, cifar10_test_loader, T = ece_level_t, device = device)

        logits = torch.tensor(logits)
        logits_all = F.softmax(logits, dim=1).detach().cpu().numpy()
        
        ece_c, acc_c, conf_c, Bm_c, diff_c = ece_eval(logits_all, labels)

        if ece_c<best_ece:
            best_ece = ece_c
            best_ece_level_factor = ece_level_factor
            best_ece_level_per_class=ece_level_t
    print("best ece and the ece level factor:",best_ece, best_ece_level_factor)
    return best_ece_level_per_class



In [2]:
seed_everything()
valdir = os.path.join('imagenet', 'val')
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
# transform_val = transforms.Compose([transforms.Resize(256),transforms.CenterCrop(224),
#         transforms.ToTensor(),normalize])
transform_test = transforms.Compose([transforms.Resize((224,224)), transforms.ToTensor(), normalize])

valid_dataset = ImageNet_valid_test(os.path.join(valdir),
            transform=transform_test, is_valid = True, num_classes= args.num_classes)

test_dataset = ImageNet_valid_test(os.path.join(valdir),transform = transform_test, 
                                   is_valid = False, num_classes= args.num_classes)

valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=args.valid_batch, shuffle=False,
                                          num_workers=2, pin_memory=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=args.valid_batch, shuffle=False, 
                                         num_workers=2, pin_memory=True)

model = models.resnet34(pretrained=True).to(device)
model = model.to(device)
optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=0.9, nesterov=False, weight_decay=5e-4)

# model = models.densenet121(pretrained=True).to(device)
# model = model.to(device)
# optimizer = optim.SGD(model.parameters(), lr=args.lr_densenet, momentum=0.9, nesterov=False, weight_decay=5e-4)

# model = models.vgg16(pretrained=True).to(device)
# model = model.to(device)
# optimizer = optim.SGD(model.parameters(), lr=args.lr_vgg16, momentum=0.9, nesterov=False, weight_decay=5e-4)

# efficientnet = torch.hub.load('NVIDIA/DeepLearningExamples:torchhub', 'nvidia_efficientnet_b0', pretrained=True)
# model = efficientnet.eval().to(device)
# optimizer = optim.Adam(model.parameters())


# vit = create_model("vit_large_patch16_224", pretrained=True).to(device)#vit_base_patch16_224




Logit Collection

In [4]:
accuracy, logits, labels, _ = valid(model, test_loader)
logits = torch.tensor(logits)
np.save('logits_sample.npy', (logits, labels), allow_pickle=True, fix_imports=True)


2023-06-09 12:25:29.551843: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-06-09 12:25:34.099715: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1635] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 16432 MB memory:  -> device: 0, name: Tesla P40, pci bus id: 0000:3b:00.0, compute capability: 6.1
2023-06-09 12:25:34.100428: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1635] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 21794 MB memory:  -> device: 1, name: Tesla P40, pci bus id: 0000:af:00.0, compute capability: 6.1
  arr = np.asanyarray(arr)
  arr = np.asanyarray(arr)


Get the entropy of each sample based on the logit, then rank samples accordingly. 

In [6]:
import numpy as np
import torch
from torch.distributions import Categorical
import torch.nn.functional as F
import sys

def entropy_calculation(logits):
    logits = torch.tensor(logits)
    sm_logits_all = []
    etp_all = []
    for i in range(len(logits)):
        sm_logits = F.softmax(logits[i], dim = 0)
        etp = Categorical(probs = sm_logits).entropy()
        sm_logits_all.extend(sm_logits)
        etp_all.append(etp)
    
    return sm_logits_all, etp_all

logits_bs, labels_bs = np.load('logits_sample.npy',allow_pickle=True)


logits_bs_ = torch.tensor(logits_bs)
# labels_bs_ = torch.tensor(labels_bs).long()

conf, entropy = entropy_calculation(logits_bs)
entropy_ = torch.tensor(entropy)
entropy_sort = torch.sort(entropy_)
print("minmax entropy:",entropy_sort[0][0], entropy_sort[1][0], entropy_sort[0][-1], entropy_sort[1][-1])

np.save('entropy_sample.npy', (logits_bs, labels_bs, entropy, 
                                          np.array(entropy_sort[0]), np.array(entropy_sort[1])), 
                                          allow_pickle=True, fix_imports=True)

  logits_bs_ = torch.tensor(logits_bs)
  logits = torch.tensor(logits)


minmax entropy: tensor(1.2066e-07) tensor(22968) tensor(5.3163) tensor(20630)
