# $\alpha$=0.1

In [6]:
import numpy as np
import torch
import torchvision.transforms as transforms               # include image preprocess tools
from torchvision.datasets import CIFAR10        # for loading images from Pytorch CIFAR
from torch.utils.data import DataLoader
import detectors
import timm
from src.saps import saps_test
from src.temperature_scaling import ModelWithTemperature

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# load fine-tuned model
model = timm.create_model("resnet18_cifar10", pretrained=True)
model = model.to(device)

# reprocess the images from CIFAR
data_transform = transforms.Compose([
    transforms.ToTensor(),          # transfer to tensor
    transforms.Normalize(mean=[0.4914, 0.4822, 0.4465], std=[0.2023, 0.1994, 0.2010])  # normalize
])
# load images from CIFAR10
dataset = CIFAR10(root="../../data", train=False, download=True, transform=data_transform)

# temperature scaling
temp_scal_loader = DataLoader(dataset, batch_size=32, shuffle=True)
model = ModelWithTemperature(model, temperature=5.0).to(device)
model.set_temperature(temp_scal_loader)

model.eval()

saps_test(model, dataset, device, num_runs=10, alpha=0.1, lambda_=2.2)

Files already downloaded and verified
Before temperature - NLL: 0.302, ECE: 0.042
Optimal temperature: 4.904
After temperature - NLL: 0.866, ECE: 0.462
SAPS Classification, Start!

Running experiment 1/10...
t_cal = 0.5324455618858338
Average Prediction Set Size After APS in runs 1: 0.9932
Average Coverage Rate in runs 1: 0.899

Running experiment 2/10...
t_cal = 0.5322842359542846
Average Prediction Set Size After APS in runs 2: 0.9928
Average Coverage Rate in runs 2: 0.8954

Running experiment 3/10...
t_cal = 0.5302342057228089
Average Prediction Set Size After APS in runs 3: 0.9894
Average Coverage Rate in runs 3: 0.8948

Running experiment 4/10...
t_cal = 0.5235182702541352
Average Prediction Set Size After APS in runs 4: 0.9844
Average Coverage Rate in runs 4: 0.8854

Running experiment 5/10...
t_cal = 0.5330525934696199
Average Prediction Set Size After APS in runs 5: 0.9956
Average Coverage Rate in runs 5: 0.903

Running experiment 6/10...
t_cal = 0.5406745076179506
Average Pred

# $\alpha$=0.05

In [4]:
saps_test(model, dataset, device, num_runs=10, alpha=0.05, lambda_=2.2)

SAPS Classification, Start!

Running experiment 1/10...
t_cal = 1.0313972055912022
Average Prediction Set Size After APS in runs 1: 1.277
Average Coverage Rate in runs 1: 0.9496

Running experiment 2/10...
t_cal = 1.039244639873505
Average Prediction Set Size After APS in runs 2: 1.2748
Average Coverage Rate in runs 2: 0.95

Running experiment 3/10...
t_cal = 1.013372361660004
Average Prediction Set Size After APS in runs 3: 1.2666
Average Coverage Rate in runs 3: 0.948

Running experiment 4/10...
t_cal = 0.7495596408843995
Average Prediction Set Size After APS in runs 4: 1.1332
Average Coverage Rate in runs 4: 0.9374

Running experiment 5/10...
t_cal = 1.1102630317211162
Average Prediction Set Size After APS in runs 5: 1.3208
Average Coverage Rate in runs 5: 0.9516

Running experiment 6/10...
t_cal = 1.0079467833042155
Average Prediction Set Size After APS in runs 6: 1.2564
Average Coverage Rate in runs 6: 0.95

Running experiment 7/10...
t_cal = 1.1005182206630715
Average Prediction 

# Result
$\alpha$=0.1
- Final Average **Prediction Set Size： 0.99**
- Final Average **Coverage: 89.88%**  

$\alpha$=0.05
- Final Average **Prediction Set Size： 1.27**
- Final Average **Coverage: 94.88%**