# $\alpha$=0.1

In [5]:
import torch
import numpy as np
import torchvision.transforms as transforms               # include image preprocess tools
from torchvision.datasets import CIFAR100        # for loading images from Pytorch CIFAR
from torch.utils.data import DataLoader
import timm
import detectors
from src.temperature_scaling import ModelWithTemperature
from src.aps import aps_test

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# load fine-tuned model
model = timm.create_model("vgg16_bn_cifar100", pretrained=True)
model.to(device)

# reprocess the images from CIFAR
data_transform = transforms.Compose([
    transforms.ToTensor(),          # transfer to tensor
    transforms.Normalize((0.5071, 0.4865, 0.4409), (0.2673, 0.2564, 0.2762))  # normalize
])
# load images from CIFAR100
dataset = CIFAR100(root="../../data", train=False, download=True, transform=data_transform)

# Temperature Scaling
temp_scal_loader = DataLoader(dataset, batch_size=32, shuffle=True)
model = ModelWithTemperature(model, temperature=5.0).to(device)
model.set_temperature(temp_scal_loader)

model.eval()

aps_test(model, dataset, device, num_runs=10, alpha=0.1)

Files already downloaded and verified
Before temperature - NLL: 1.484, ECE: 0.157
Optimal temperature: 4.849
After temperature - NLL: 2.953, ECE: 0.618
APS Classification, Start!

Running experiment 1/10...
q_hat = 0.16135387271642687
Total set size: 24384
Total coverage sets: 4488
Total samples amount: 5000
Average Prediction Set Size After APS in runs 1: 4.8768
Average Coverage Ratein runs 1: 0.8976

Running experiment 2/10...
q_hat = 0.16425506025552777
Total set size: 25147
Total coverage sets: 4527
Total samples amount: 5000
Average Prediction Set Size After APS in runs 2: 5.0294
Average Coverage Ratein runs 2: 0.9054

Running experiment 3/10...
q_hat = 0.1668781206011773
Total set size: 26083
Total coverage sets: 4549
Total samples amount: 5000
Average Prediction Set Size After APS in runs 3: 5.2166
Average Coverage Ratein runs 3: 0.9098

Running experiment 4/10...
q_hat = 0.16193535625934607
Total set size: 24255
Total coverage sets: 4498
Total samples amount: 5000
Average Predi

# $\alpha$=0.2

In [6]:
aps_test(model, dataset, device, num_runs=10, alpha=0.2)

APS Classification, Start!

Running experiment 1/10...
q_hat = 0.10967705845832826
Total set size: 11147
Total coverage sets: 3989
Total samples amount: 5000
Average Prediction Set Size After APS in runs 1: 2.2294
Average Coverage Rate in runs 1: 0.7978

Running experiment 2/10...
q_hat = 0.1103537514805794
Total set size: 11280
Total coverage sets: 4020
Total samples amount: 5000
Average Prediction Set Size After APS in runs 2: 2.256
Average Coverage Rate in runs 2: 0.804

Running experiment 3/10...
q_hat = 0.11190841495990758
Total set size: 11600
Total coverage sets: 4042
Total samples amount: 5000
Average Prediction Set Size After APS in runs 3: 2.32
Average Coverage Rate in runs 3: 0.8084

Running experiment 4/10...
q_hat = 0.10872741788625719
Total set size: 10764
Total coverage sets: 3929
Total samples amount: 5000
Average Prediction Set Size After APS in runs 4: 2.1528
Average Coverage Rate in runs 4: 0.7858

Running experiment 5/10...
q_hat = 0.11175488978624344
Total set size

# $\alpha$=0.05

In [7]:
aps_test(model, dataset, device, num_runs=10, alpha=0.05)

APS Classification, Start!

Running experiment 1/10...
q_hat = 0.282224164903164
Total set size: 68470
Total coverage sets: 4725
Total samples amount: 5000
Average Prediction Set Size After APS in runs 1: 13.694
Average Coverage Rate in runs 1: 0.945

Running experiment 2/10...
q_hat = 0.3019949525594712
Total set size: 76430
Total coverage sets: 4764
Total samples amount: 5000
Average Prediction Set Size After APS in runs 2: 15.286
Average Coverage Rate in runs 2: 0.9528

Running experiment 3/10...
q_hat = 0.2978443175554277
Total set size: 74809
Total coverage sets: 4762
Total samples amount: 5000
Average Prediction Set Size After APS in runs 3: 14.9618
Average Coverage Rate in runs 3: 0.9524

Running experiment 4/10...
q_hat = 0.29061544686555874
Total set size: 71504
Total coverage sets: 4749
Total samples amount: 5000
Average Prediction Set Size After APS in runs 4: 14.3008
Average Coverage Rate in runs 4: 0.9498

Running experiment 5/10...
q_hat = 0.2854943990707399
Total set siz

# Result
  
$\alpha$=0.1
- Final Average **Prediction Set Size： 4.86**
- Final Average **Coverage: 89.80%**  

$\alpha$=0.2
- Final Average **Prediction Set Size： 2.24**
- Final Average **Coverage: 79.65%**  

$\alpha$=0.05
- Final Average **Prediction Set Size： 14.45**
- Final Average **Coverage: 94.91%**