# 1. Load Model

In [4]:
import torch
import torch.nn as nn
import torchvision.models as models
from torchvision.models import ResNet50_Weights

# check GPU status
print("Is CUDA available:", torch.cuda.is_available())
print("Device count:", torch.cuda.device_count())
print("Device name:", torch.cuda.get_device_name(0) if torch.cuda.is_available() else "No GPU")

# load pre-trained model InceptionV3 and set mode
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = models.resnet50(weights=ResNet50_Weights.DEFAULT).to(device)
model.eval()

Is CUDA available: True
Device count: 1
Device name: NVIDIA GeForce RTX 3060 Ti


Downloading: "https://download.pytorch.org/models/resnet50-11ad3fa6.pth" to C:\Users\jiayang/.cache\torch\hub\checkpoints\resnet50-11ad3fa6.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 107MB/s] 


ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

# 2. Split dataset


In [5]:
from torchvision import transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader,  random_split

def split_data_set(dataset, random_seed):
    if random_seed is not None:
        torch.manual_seed(random_seed)  # set input as random seed

    # split image set ---> half for calibration data set, half for test data set
    dataset_length = len(dataset)
    print(f"Samples amount: {dataset_length}")
    calib_length = dataset_length // 2               
    test_length = dataset_length - calib_length      

    calib_dataset, test_dataset = random_split(dataset, [calib_length, test_length])
    return calib_dataset, test_dataset

# 3. Non-conformity score function 

**Vectorization operation**:
- softmaxs = torch.tensor([<br>
    [0.1, 0.4, 0.2, 0.3, 0.0], <br>
    [0.3, 0.2, 0.4, 0.0, 0.1], <br>
    [0.5, 0.1, 0.0, 0.3, 0.1], <br>
])  # Shape: (3, 5)

- true_labels = torch.tensor([1, 2, 0]) # Shape:(3,)

- sorted_softmax, sorted_indices = torch.sort(softmaxs, descending=True, dim=1)<br>

  sorted_softmax： <br>
  tensor([<br>
    [0.4, 0.3, 0.2, 0.1, 0.0],  <br>
    [0.4, 0.3, 0.2, 0.1, 0.0],  <br>
    [0.5, 0.3, 0.1, 0.1, 0.0],  <br>
    ])  # Shape: (3, 5)

  sorted_indices：<br>
  tensor([ <br>
    [1, 3, 2, 0, 4],  <br>
    [2, 0, 1, 4, 3],  <br>
    [0, 3, 1, 4, 2],  <br>
  ])  # Shape: (3, 5)

- true_label_positions = (sorted_indices == true_labels.unsqueeze(1)).nonzero(as_tuple=True)[1]

  true_labels.unsqueeze(1) -->  extend dimension to align with sorted_indices: <br>
  tensor([ <br>
    [1],   <br>
    [2],   <br>
    [0],   <br>
  ])  # Shape: (3, 1)

  (sorted_indices == true_labels.unsqueeze(1)): <br>
  tensor([<br>
    [True, False, False, False, False],  <br>
    [True, False, False, False, False],  <br>
    [True, False, False, False, False],  <br>
  ])

  .nonzero(as_tuple=True)[1] --> extract the index of True (true labels' psoitions): <br>
  true_label_positions = tensor([0, 0, 0]) # Shape:(3,)

- u = torch.rand(true_labels.size(0), device=device)：

  u = tensor([0.6, 0.8, 0.4])  # Shape: (3,), for example

- max_softmax = sorted_softmax[:, 0] --> extract the maximal probabilities

  max_softmax = tensor([0.4, 0.4, 0.5])  # Shape: (3,)


In [6]:
import numpy as np

# conformal function s(x,y)
def conformal_scores(model, dataloader, alpha=0.1, lambda_=0.1):
    scores = []  # conformal scores of image sets
    labels = []  # true label sets 
    with torch.no_grad():
        for images, true_labels in dataloader:
            images, true_labels = images.to(device), true_labels.to(device)
            outputs = model(images)
            softmaxs = torch.softmax(outputs, dim=1)

            # extract true lables' ranking/positions
            sorted_softmax, sorted_indices = torch.sort(softmaxs, descending=True, dim=1)
            true_label_positions = (sorted_indices == true_labels.unsqueeze(1)).nonzero(as_tuple=True)[1]

            # extract maximal probabilities
            max_softmax = sorted_softmax[:, 0]

            # random variable u(s)
            u = torch.rand(true_labels.size(0), device=device)
            # scores of samples whose correct label is top-ranking --> u * max_softmax
            is_top = (true_label_positions == 0)
            scores_top_rank = u * max_softmax

            # scores of samples whose correct label is  not top-ranking
            # s = max_softmax + (o-2+u) * lambda = max_softmax + (true_lable_position+1-2+u) * lambda
            scores_other_rank = max_softmax + ((true_label_positions - 1).float() + u) * lambda_
            
            conformal_scores = torch.where(is_top, scores_top_rank, scores_other_rank)
            scores.extend(conformal_scores.cpu().tolist())
            labels.extend(true_labels.cpu().tolist())
    return scores, labels

In [14]:
def saps_classification(model, dataloader, t_cal, lambda_=0.1):
    saps = []         # probability set
    saps_labels = []  # label set indicated to the probability set
    labels = []       # true label
    with torch.no_grad():
        for images, true_labels in dataloader:
            images, true_labels = images.to(device), true_labels.to(device)
            outputs = model(images)
            softmax = torch.softmax(outputs, dim=1)

            # sort probabilities
            sorted_softmax, sorted_indices = torch.sort(softmax, descending=True, dim=1)

            # random variable u(s) 
            u = torch.rand(sorted_softmax.shape, device=device) # Shape: (batch_size, 1000)
            # random variable for maximal probabilities
            u_f_max = torch.rand(sorted_softmax.shape[0], device=device).unsqueeze(1)       # Shape: (batch_size, 1)

            # rank of current sorted probability: [1,2,3,...,1000]
            rank = torch.arange(1, sorted_softmax.size(1) + 1, device=device).unsqueeze(0)  # shape: [1, 1000]

            # s = f_max + (o-2+u) * lambda
            # scores --> all the label has been calculate as non-top-ranked label now
            f_max = sorted_softmax[:, 0].unsqueeze(1)               # Shape: (batch_size, 1)
            scores = f_max + ((rank - 2).float() +  u) * lambda_    # Shape: (batch_size, 1000)

            # replace the firt column with u * f_max
            scores[:, 0] = (u_f_max * f_max).squeeze(1) # Shape: (batch_size,)

            # construct prediction sets
            for i in range(len(images)):
                # select indices whose scores <= t_cal
                selected_indices = (scores[i] <= t_cal).nonzero(as_tuple=True)[0]
                
                # add selected label to prediction set
                saps.append(sorted_softmax[i][selected_indices].tolist())
                saps_labels.append(sorted_indices[i][selected_indices].tolist())
                labels.append(true_labels[i].item())  
    return saps, saps_labels, labels

In [15]:
def eval_aps(aps_labels, true_labels):
    total_set_size = 0
    coveraged = 0
    for aps_label, true_label in zip(aps_labels, true_labels):
        # cumulate total set size
        total_set_size += len(aps_label)
        # cumulate the predictions sets if it contains true label
        if true_label in aps_label:
            coveraged += 1

    # calculate average values
    samples_amount = len(true_labels)
    average_set_size = total_set_size / samples_amount
    average_coverage = coveraged / samples_amount
    print(f"Total set size: {total_set_size}")
    print(f"Total coverage sets: {coveraged}")
    print(f"Total samples amount: {samples_amount}")
    return average_set_size, average_coverage

In [16]:
# The number of times the experiment is going to be repeated
num_runs = 10

# error rate
alpha = 0.1
lambda_ = 0.1

#  Reprocess: Center Crop and then resize to 299*299
data_transform = transforms.Compose([
    transforms.CenterCrop(256),
    transforms.Resize(256), 
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) 
])


import matplotlib.pyplot as plt
from PIL import Image

sorted_val_path = "D:\\Download\\ImageNet-1K\\Validation_Set\\sorted_ImageNet_val"
dataset = ImageFolder(root=sorted_val_path, transform=data_transform)


# contruct and evaluate repeatedly
all_avg_set_sizes = []
all_avg_coverages = []
print("SAPS Classification, Start!\n")


for i in range(num_runs):
    print(f"Running experiment {i+1}/{num_runs}...")

    # splite dataset
    calib_dataset, test_dataset = split_data_set(dataset, random_seed=i)

    # load data set respectively
    calib_loader = DataLoader(calib_dataset, batch_size=32, shuffle=False, num_workers=2)
    test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=2)

    # calculate q_hat
    calib_scores, _ = conformal_scores(model, calib_loader, alpha, lambda_)
    t_cal = np.quantile(calib_scores, 1 - alpha)  # calculate 1-alpha quantile
    print(f"t_cal = {t_cal}")

    # construct APS
    aps, aps_labels, true_labels = saps_classification(model, test_loader, t_cal, lambda_)

    # evaluate APS
    avg_set_size, avg_coverage = eval_aps(aps_labels, true_labels)
    print(f"Average Prediction Set Size After APS in runs {i+1}: {avg_set_size}")
    print(f"Average Coverage Rate in runs {i+1}: {100 * avg_coverage:.2f}%\n")

    # record current result
    all_avg_set_sizes.append(avg_set_size)
    all_avg_coverages.append(avg_coverage)

# calculate the final average result
final_avg_set_size = np.mean(all_avg_set_sizes)
final_avg_coverage = np.mean(all_avg_coverages)

print(f"Final Average Prediction Set Size: {final_avg_set_size}")
print(f"Final Average Coverage: {final_avg_coverage}")

SAPS Classification, Start!

Running experiment 1/10...
Samples amount: 50000
t_cal = 0.45717104077339177
Total set size: 84967
Total coverage sets: 22476
Total samples amount: 25000
Average Prediction Set Size After APS in runs 1: 3.39868
Average Coverage Rate in runs 1: 89.90%

Running experiment 2/10...
Samples amount: 50000
t_cal = 0.4579506456851962
Total set size: 84750
Total coverage sets: 22490
Total samples amount: 25000
Average Prediction Set Size After APS in runs 2: 3.39
Average Coverage Rate in runs 2: 89.96%

Running experiment 3/10...
Samples amount: 50000
t_cal = 0.47175684869289397
Total set size: 88305
Total coverage sets: 22641
Total samples amount: 25000
Average Prediction Set Size After APS in runs 3: 3.5322
Average Coverage Rate in runs 3: 90.56%

Running experiment 4/10...
Samples amount: 50000
t_cal = 0.4502555310726166
Total set size: 83029
Total coverage sets: 22376
Total samples amount: 25000
Average Prediction Set Size After APS in runs 4: 3.32116
Average Co

## Result
From the above test, following results can be collected :
- Final Average Prediction Set Size: 3.40 / 1000
- Final Average Coverage: 89.97% ($\alpha$=0.1)