In [25]:
! wget -nv -O imagenet_val.tar.gz -L https://berkeley.box.com/shared/static/pouthcomrvxw9hj64oxhacjvqdw3ihlp.gz
! mkdir imagenet_val
! tar -xf imagenet_val.tar.gz -C ./imagenet_val/
! mv imagenet_val/scratch/group/ilsvrc/val/* imagenet_val/
! rm -r imagenet_val/scratch

2025-04-13 11:17:51 URL:https://public.boxcloud.com/d/1/b1!gd-i9MlBheWoW19HTEgDHSLs4dETHkw8dlIR3Ygu_zFXwH1X3X6kDu_NPANnAmiurHrtuQgtqXkzjBjjKn18y-YHajXoSz_JBoR2OwPWu5dPh1DOi_lmlNwuMiyakVhv-1SdmrLUtAj1pr4UTelnSixwKHRzpJrmaAOg5ysRrlYB8zRq6Bk-Fv8OfAXV7EPobhxThoSFdaVQdHQqKBsTRfk-gu0s0ttDs2UhizuciEZcA52_Jz7K-bGIbBaHz6koY_lS03yS-wPEZcSraOXVsU_U6PcyIa14rslPTUpxx9Xfq3yVk9lA_qSpxEtWC01i70Dv3E14RhFFRpk7SVWlJsVGXOOf_Qxlu6-bcRBZkHkYkCYLKBQpyFySTfRCAetiO0l1yWCZDR2W9GITfo4mH4axWvqWS_nLLhqognnkjg95o0YM9NnSO0VPF3xP61Jc-90WEkjKZiOUx5CcxS0F5bdFUzeiUDNOS1v-ShQd0NgbKXI6cRRaIfbu0ljZCPUwc2dz80MZwXFs9spJGJpPBLzB5ZhQCT8Fr8SC_M_81tBk43C_rTDkiGto7v4L0CFZ2-siqHfsjXHplzDckEhlPoEr4kUJ_brUeQ-056Gl96fThwywCzHDjvncWvm2JpIUpbKK4aKaSQ8OBoC67F65ePVJYalTcqAL5ZjymWtQtf2rAkTx9QxUrVC7HtcNPcnpgkGr8brHs_HmM41SiShKpzRZJjI6CjNDB-aq8FeM8iL9gzLZGRLw9AU7QTCYzWOgm9Hg1NWBZbr0g8UY09mqYm8F29G6s53JrdBuU1nYiIk5lgMs_W_DHoGpcn3yZlN-oJA5avbaoGfXWDkTkHv0J0tTitTEiCNfq2vWjq2N6ZS6WnNYThIYbQkMg4eiVRESpBQH8i7g8nxkX8viJhRwCkLZTq6UHloDmfTIxjMGV9khw

In [26]:
import torch
from torchvision import models, transforms
from tqdm import tqdm
import numpy as np
import torchvision
import matplotlib.pyplot as plt
from torch import nn, optim
from scipy.special import softmax
from tqdm import tqdm


In [27]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


# Load the models

In [28]:
model_constructors = {
    "ResNeXt101":models.resnext101_64x4d,
    "ResNet152": models.resnet152,
    "ResNet101": models.resnet101,
    "ResNet50": models.resnet50,
    "ResNet18": models.resnet18,
    "VGG16_BN": models.vgg16_bn
}

# Initialize and move models to device
model_dict = {}
for name, constructor in model_constructors.items():
    model = constructor(pretrained=True, progress=True)
    model.eval()
    model = model.to(device)
    model_dict[name] = model

# Transformation pipeline
transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    ),
])

 # Load the data set

In [29]:
num_calib = 5000

# Get the conformal calibration dataset
imagenet_calib_data, imagenet_val_data , imagenet_param_data= torch.utils.data.random_split(torchvision.datasets.ImageFolder('./imagenet_val/', transform), [num_calib, 50000-num_calib-1000, 1000])
batch_size = 128

# Reduce the size of the validation set to 10000
imagenet_val_data = torch.utils.data.Subset(imagenet_val_data, range(10000))

# Initialize loaders
calib_loader = torch.utils.data.DataLoader(imagenet_calib_data, batch_size=batch_size, shuffle=True, pin_memory=True)
test_loader = torch.utils.data.DataLoader(imagenet_val_data, batch_size=batch_size, shuffle=True, pin_memory=True)
param_loader = torch.utils.data.DataLoader(imagenet_param_data, batch_size=batch_size, shuffle=True, pin_memory=True)

 # Function for Conformal Prediction Procedures

In [30]:

def get_logits(model, dataloader, device):
    model.eval()
    logits_list, labels_list = [], []
    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs = inputs.to(device)
            outputs = model(inputs)
            logits_list.append(outputs.cpu())
            labels_list.append(labels)
    return torch.cat(logits_list), torch.cat(labels_list)

def temperature_scaling(logits, labels, max_iters=50, lr=0.01, epsilon=1e-4):
    T = nn.Parameter(torch.tensor([1.3], requires_grad=True))
    optimizer = optim.SGD([T], lr=lr)
    criterion = nn.CrossEntropyLoss()

    for i in range(max_iters):
        optimizer.zero_grad()
        scaled_logits = logits / T
        loss = criterion(scaled_logits, labels)
        loss.backward()
        optimizer.step()
        if abs(loss.item()) < epsilon:
            break
    return T.detach()

def compute_scores(sorted_probs_list, sorted_indices_list, labels,
                   lambda_reg=0.1, k_reg=5, randomized=True, allow_zero_sets=True):
    scores = []
    for i in range(len(labels)):
        sorted_probs = sorted_probs_list[i]
        sorted_indices = sorted_indices_list[i]
        cumsum = np.cumsum(sorted_probs)

        penalties = np.zeros_like(sorted_probs)
        penalties[k_reg:] += lambda_reg
        penalties_cumsum = np.cumsum(penalties)

        target = labels[i].item()
        rank = np.where(sorted_indices == target)[0][0]

        if not randomized:
            tau = cumsum[rank] + penalties_cumsum[rank]
        else:
            U = np.random.rand()
            if rank == 0:
                tau = U * cumsum[0] + penalties_cumsum[0] if allow_zero_sets else cumsum[0] + penalties_cumsum[0]
            else:
                tau = U * sorted_probs[rank] + cumsum[rank - 1] + penalties_cumsum[rank]

        scores.append(tau)
    return np.array(scores)


def compute_threshold(scores, alpha):
    return np.quantile(scores, 1 - alpha, method="higher")

def predict_set(sorted_probs, sorted_indices, tau, lambda_reg=0.1, k_reg=5, randomized=True, allow_zero_sets=True):

    cumsum = np.cumsum(sorted_probs)

    penalties = np.zeros_like(sorted_probs)
    penalties[k_reg:] += lambda_reg
    penalties_cumsum = np.cumsum(penalties)

    sizes_base = (cumsum + penalties_cumsum <= tau).sum() + 1
    sizes_base = min(sizes_base, len(sorted_probs))

    if randomized and sizes_base < len(sorted_probs):
        V = 1 / sorted_probs[sizes_base - 1] * (
            tau - (cumsum[sizes_base - 1] - sorted_probs[sizes_base - 1]) - penalties_cumsum[sizes_base - 1]
        )
        sizes = sizes_base - int(np.random.rand() >= V)
    else:
        sizes = sizes_base

    if tau == 1.0:
        sizes = len(sorted_probs)

    if not allow_zero_sets and sizes == 0:
        sizes = 1

    sizes = max(sizes, 1)
    return sorted_indices[:sizes].tolist()


def platt_scaling(logits_data, T):
    logits_data = logits_data.cpu().numpy()
    sorted_probs = []
    sorted_indices =[]
    for i in range(len(logits_data)):
        scaled_logits = logits_data[i] / T.item()
        probs = softmax(scaled_logits)
        sorted_indices.append(np.argsort(probs)[::-1])
        sorted_probs.append(np.sort(probs)[::-1])

    return sorted_probs, sorted_indices

def optimal_k_reg(sorted_indices_list, labels, alpha):

    ranks = []
    for i in range(len(labels)):
        true_label = labels[i].item()
        rank = np.where(sorted_indices_list[i] == true_label)[0][0]
        ranks.append(rank)

    k_reg = compute_threshold(np.array(ranks), alpha=alpha)
    return k_reg



## Initializing hypermateers and Models

In [31]:
# Hyperparameters
lambda_reg = 0.1
k_reg = 5
randomized = True
allow_zero_sets = True
allow_optimal_k = True
alpha = 0.1
methods = ['RAPS', 'APS']

models_val = {
    'ResNet18': model_dict['ResNet18'],
    'ResNet50': model_dict['ResNet50'],
    'ResNet101': model_dict['ResNet101'],
    'ResNet152': model_dict['ResNet152'],
    'ResNeXt101': model_dict['ResNeXt101'],
    'VGG16_BN': model_dict['VGG16_BN']
}

model_results = {}

## Checking for each model

In [32]:
for z in range(5):
    print("Iteration: ", z)

    # Get the conformal calibration dataset
    imagenet_calib_data, imagenet_val_data , imagenet_param_data= torch.utils.data.random_split(torchvision.datasets.ImageFolder('./imagenet_val/', transform), [num_calib, 50000-num_calib-1000, 1000])
    batch_size = 128

    # Reduce the size of the validation set to 10000
    imagenet_val_data = torch.utils.data.Subset(imagenet_val_data, range(10000))

    # Initialize loaders
    calib_loader = torch.utils.data.DataLoader(imagenet_calib_data, batch_size=batch_size, shuffle=True, pin_memory=True)
    test_loader = torch.utils.data.DataLoader(imagenet_val_data, batch_size=batch_size, shuffle=True, pin_memory=True)
    param_loader = torch.utils.data.DataLoader(imagenet_param_data, batch_size=batch_size, shuffle=True, pin_memory=True)

    for model_name, model in models_val.items():
        print(f"\nRunning model: {model_name}")



        # Get logits
        logits_calib, labels_calib = get_logits(model, calib_loader, device)
        logits_test, labels_test = get_logits(model, test_loader, device)
        param_logits, param_labels = get_logits(model, param_loader, device)

        # Scaling
        T = temperature_scaling(logits_calib, labels_calib)
        sorted_probs_calib, sorted_indices_calib = platt_scaling(logits_calib, T)
        sorted_probs_test, sorted_indices_test = platt_scaling(logits_test, T)
        sorted_probs_param, sorted_indices_param = platt_scaling(param_logits, T)

        results = {}

        # RAPS
        if allow_optimal_k:
            k_reg = optimal_k_reg(sorted_indices_param, param_labels, alpha=alpha)
            print(f"Optimal k_reg for alpha {alpha}: {k_reg}")

        scores = compute_scores(sorted_probs_calib, sorted_indices_calib, labels_calib,
                                lambda_reg=lambda_reg, k_reg=k_reg,
                                randomized=randomized, allow_zero_sets=allow_zero_sets)
        tau = compute_threshold(scores, alpha)

        prediction_sets = [
            predict_set(sorted_probs_test[i], sorted_indices_test[i], tau,
                        lambda_reg=lambda_reg, k_reg=k_reg,
                        randomized=randomized, allow_zero_sets=allow_zero_sets)
            for i in range(len(sorted_probs_test))
        ]
        sizes = [len(pset) for pset in prediction_sets]
        coverage = np.mean([
            labels_test[i].item() in prediction_sets[i] for i in range(len(labels_test))
        ])

        results['RAPS'] = {
            'size': np.mean(sizes),
            'coverage': coverage
        }

        # APS (lambda=0, k_reg=0)
        scores_aps = compute_scores(sorted_probs_calib, sorted_indices_calib, labels_calib,
                                    lambda_reg=0, k_reg=0,
                                    randomized=randomized, allow_zero_sets=allow_zero_sets)
        tau_aps = compute_threshold(scores_aps, alpha)

        prediction_sets_aps = [
            predict_set(sorted_probs_test[i], sorted_indices_test[i], tau_aps,
                        lambda_reg=0, k_reg=0,
                        randomized=randomized, allow_zero_sets=allow_zero_sets)
            for i in range(len(sorted_probs_test))
        ]
        sizes_aps = [len(pset) for pset in prediction_sets_aps]
        coverage_aps = np.mean([
            labels_test[i].item() in prediction_sets_aps[i] for i in range(len(labels_test))
        ])

        results['APS'] = {
            'size': np.mean(sizes_aps),
            'coverage': coverage_aps
        }

        # Save results
        model_results[model_name] = results


    print("\n=== RAPS vs APS Results ===")
    header = f"{'Metric':<12} | {'Method':<6} | " + " | ".join([f"{model:<12}" for model in model_results.keys()])
    print("-" * len(header))
    print(header)
    print("-" * len(header))

    # Print Prediction Set Size
    print(f"{'Set Size':<12} | {'APS':<6} | " + " | ".join([f"{model_results[model]['APS']['size']:<12.3f}" for model in model_results]))
    print(f"{'':<12} | {'RAPS':<6} | " + " | ".join([f"{model_results[model]['RAPS']['size']:<12.3f}" for model in model_results]))
    print("-" * len(header))

    # Print Coverage
    print(f"{'Coverage':<12} | {'APS':<6} | " + " | ".join([f"{model_results[model]['APS']['coverage']:<12.3f}" for model in model_results]))
    print(f"{'':<12} | {'RAPS':<6} | " + " | ".join([f"{model_results[model]['RAPS']['coverage']:<12.3f}" for model in model_results]))
    print("-" * len(header))



Iteration:  0

Running model: ResNet18
Optimal k_reg for alpha 0.1: 5

Running model: ResNet50
Optimal k_reg for alpha 0.1: 3

Running model: ResNet101
Optimal k_reg for alpha 0.1: 2

Running model: ResNet152
Optimal k_reg for alpha 0.1: 2

Running model: ResNeXt101
Optimal k_reg for alpha 0.1: 1

Running model: VGG16_BN
Optimal k_reg for alpha 0.1: 4

=== RAPS vs APS Results ===
---------------------------------------------------------------------------------------------------------------
Metric       | Method | ResNet18     | ResNet50     | ResNet101    | ResNet152    | ResNeXt101   | VGG16_BN    
---------------------------------------------------------------------------------------------------------------
Set Size     | APS    | 18.067       | 13.737       | 12.406       | 11.678       | 17.391       | 13.688      
             | RAPS   | 4.226        | 2.641        | 2.202        | 2.060        | 1.415        | 3.209       
---------------------------------------------------------