# $\alpha$=0.1

In [20]:
import numpy as np
import torch
import torchvision.transforms as transforms              
from torchvision.datasets import CIFAR100      
from torch.utils.data import DataLoader
import timm
import detectors
from src.saps import saps_test
from src.temperature_scaling import ModelWithTemperature

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# load fine-tuned model
model = timm.create_model("vgg16_bn_cifar100", pretrained=True)
model = model.to(device)

# reprocess the images from CIFAR100
data_transform = transforms.Compose([
    transforms.ToTensor(),          # transfer to tensor
    transforms.Normalize((0.5071, 0.4865, 0.4409), (0.2673, 0.2564, 0.2762))  # normalize
])
# load images from CIFAR100
dataset = CIFAR100(root="../../data", train=False, download=True, transform=data_transform)

temp_scal_loader = DataLoader(dataset, batch_size=32, shuffle=True)
model = ModelWithTemperature(model, temperature=4.85).to(device)
model.set_temperature(temp_scal_loader)

model.eval()

saps_test(model, dataset, device, num_runs=10, alpha=0.1, lambda_=0.03)

Files already downloaded and verified
Before temperature - NLL: 1.481, ECE: 0.159
Optimal temperature: 4.690
After temperature - NLL: 2.898, ECE: 0.617
SAPS Classification, Start!

Running experiment 1/10...
t_cal = 0.1954556122422221
Average Prediction Set Size After APS in runs 1: 4.455
Average Coverage Rate in runs 1: 0.9018

Running experiment 2/10...
t_cal = 0.20119385421276215
Average Prediction Set Size After APS in runs 2: 4.6432
Average Coverage Rate in runs 2: 0.905

Running experiment 3/10...
t_cal = 0.2052663028240208
Average Prediction Set Size After APS in runs 3: 4.791
Average Coverage Rate in runs 3: 0.9074

Running experiment 4/10...
t_cal = 0.19490052759647403
Average Prediction Set Size After APS in runs 4: 4.4166
Average Coverage Rate in runs 4: 0.9006

Running experiment 5/10...
t_cal = 0.1950000733137131
Average Prediction Set Size After APS in runs 5: 4.4344
Average Coverage Rate in runs 5: 0.901

Running experiment 6/10...
t_cal = 0.1877748563885689
Average Pred

# $\alpha$=0.2

In [12]:
saps_test(model, dataset, device, num_runs=10, alpha=0.2, lambda_=0.07)

SAPS Classification, Start!

Running experiment 1/10...
t_cal = 0.13978684544563297
Average Prediction Set Size After APS in runs 1: 1.6802
Average Coverage Rate in runs 1: 0.8024

Running experiment 2/10...
t_cal = 0.13945552110672002
Average Prediction Set Size After APS in runs 2: 1.688
Average Coverage Rate in runs 2: 0.806

Running experiment 3/10...
t_cal = 0.1446319341659546
Average Prediction Set Size After APS in runs 3: 1.775
Average Coverage Rate in runs 3: 0.8128

Running experiment 4/10...
t_cal = 0.13676990866661073
Average Prediction Set Size After APS in runs 4: 1.6586
Average Coverage Rate in runs 4: 0.7928

Running experiment 5/10...
t_cal = 0.14065182507038115
Average Prediction Set Size After APS in runs 5: 1.724
Average Coverage Rate in runs 5: 0.8036

Running experiment 6/10...
t_cal = 0.13737393617630014
Average Prediction Set Size After APS in runs 6: 1.6694
Average Coverage Rate in runs 6: 0.7948

Running experiment 7/10...
t_cal = 0.13746176660060888
Average P

# $\alpha$=0.05

In [16]:
saps_test(model, dataset, device, num_runs=10, alpha=0.05, lambda_=0.01)

SAPS Classification, Start!

Running experiment 1/10...
t_cal = 0.19382024630904213
Average Prediction Set Size After APS in runs 1: 11.1816
Average Coverage Rate in runs 1: 0.9456

Running experiment 2/10...
t_cal = 0.2016237348318102
Average Prediction Set Size After APS in runs 2: 11.963
Average Coverage Rate in runs 2: 0.9518

Running experiment 3/10...
t_cal = 0.20679849013686186
Average Prediction Set Size After APS in runs 3: 12.5062
Average Coverage Rate in runs 3: 0.9528

Running experiment 4/10...
t_cal = 0.19961876124143602
Average Prediction Set Size After APS in runs 4: 11.7172
Average Coverage Rate in runs 4: 0.9494

Running experiment 5/10...
t_cal = 0.19886958003044145
Average Prediction Set Size After APS in runs 5: 11.6896
Average Coverage Rate in runs 5: 0.949

Running experiment 6/10...
t_cal = 0.18686020597815525
Average Prediction Set Size After APS in runs 6: 10.5318
Average Coverage Rate in runs 6: 0.9418

Running experiment 7/10...
t_cal = 0.20650639981031435
A

# Result
  
$\alpha$=0.1
- Final Average **Prediction Set Size： 4.40**
- Final Average **Coverage: 89.96%**  

$\alpha$=0.2
- Final Average **Prediction Set Size： 1.69**
- Final Average **Coverage: 79.94%**  

$\alpha$=0.05
- Final Average **Prediction Set Size： 11.48**
- Final Average **Coverage: 94.78%**