# $\alpha$=0.1

In [4]:
import numpy as np
import torch
import torchvision.transforms as transforms               # include image preprocess tools
from torchvision.datasets import CIFAR10        # for loading images from Pytorch CIFAR
from torch.utils.data import DataLoader
import detectors
import timm
from src.saps import saps_test
from src.temperature_scaling import ModelWithTemperature

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# load fine-tuned model
model = timm.create_model("vgg16_bn_cifar10", pretrained=True)
model = model.to(device)

# reprocess the images from CIFAR
data_transform = transforms.Compose([
    transforms.ToTensor(),          # transfer to tensor
    transforms.Normalize(mean=[0.4914, 0.4822, 0.4465], std=[0.2023, 0.1994, 0.2010])  # normalize
])
# load images from CIFAR10
dataset = CIFAR10(root="../../data", train=False, download=True, transform=data_transform)

# temperature scaling
temp_scal_loader = DataLoader(dataset, batch_size=32, shuffle=True)
model = ModelWithTemperature(model, temperature=5.0).to(device)
model.set_temperature(temp_scal_loader)

model.eval()

saps_test(model, dataset, device, num_runs=10, alpha=0.1, lambda_=5.0)

Files already downloaded and verified
Before temperature - NLL: 0.408, ECE: 0.059
Optimal temperature: 4.903
After temperature - NLL: 0.916, ECE: 0.472
SAPS Classification, Start!

Running experiment 1/10...
t_cal = 0.4873966455459596
Average Prediction Set Size After APS in runs 1: 0.9836
Average Coverage Rate in runs 1: 0.9014

Running experiment 2/10...
t_cal = 0.4915856093168259
Average Prediction Set Size After APS in runs 2: 0.9894
Average Coverage Rate in runs 2: 0.9038

Running experiment 3/10...
t_cal = 0.49294272065162664
Average Prediction Set Size After APS in runs 3: 0.9866
Average Coverage Rate in runs 3: 0.9008

Running experiment 4/10...
t_cal = 0.491474571824074
Average Prediction Set Size After APS in runs 4: 0.9846
Average Coverage Rate in runs 4: 0.902

Running experiment 5/10...
t_cal = 0.48612654805183414
Average Prediction Set Size After APS in runs 5: 0.9834
Average Coverage Rate in runs 5: 0.898

Running experiment 6/10...
t_cal = 0.49834159612655665
Average Pr

# $\alpha$=0.05

In [14]:
saps_test(model, dataset, device, num_runs=10, alpha=0.05, lambda_=0.25)

SAPS Classification, Start!

Running experiment 1/10...
t_cal = 0.5247154742479325
Average Prediction Set Size After APS in runs 1: 1.2798
Average Coverage Rate in runs 1: 0.952

Running experiment 2/10...
t_cal = 0.5247511148452759
Average Prediction Set Size After APS in runs 2: 1.2786
Average Coverage Rate in runs 2: 0.9508

Running experiment 3/10...
t_cal = 0.5262852221727374
Average Prediction Set Size After APS in runs 3: 1.2814
Average Coverage Rate in runs 3: 0.9494

Running experiment 4/10...
t_cal = 0.5280066579580307
Average Prediction Set Size After APS in runs 4: 1.297
Average Coverage Rate in runs 4: 0.9486

Running experiment 5/10...
t_cal = 0.5276476025581361
Average Prediction Set Size After APS in runs 5: 1.2896
Average Coverage Rate in runs 5: 0.9516

Running experiment 6/10...
t_cal = 0.5341581851243973
Average Prediction Set Size After APS in runs 6: 1.3034
Average Coverage Rate in runs 6: 0.9556

Running experiment 7/10...
t_cal = 0.5282776832580567
Average Predi

# Result
$\alpha$=0.1
- Final Average **Prediction Set Size： 0.99**
- Final Average **Coverage: 90.28%**  

$\alpha$=0.05
- Final Average **Prediction Set Size： 1.29**
- Final Average **Coverage: 95.20%**