# $\alpha$=0.1

In [2]:
import torch
import numpy as np
import torchvision.transforms as transforms               
from torchvision.datasets import CIFAR10       
from torch.utils.data import DataLoader
import detectors
import timm
from src.temperature_scaling import ModelWithTemperature
from src.aps import aps_test

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# load fine-tuned model
model = timm.create_model("resnet34_cifar10", pretrained=True)
model.to(device)

# reprocess the images from CIFAR10
data_transform = transforms.Compose([
    transforms.ToTensor(),          # transfer to tensor
    transforms.Normalize(mean=[0.4914, 0.4822, 0.4465], std=[0.2023, 0.1994, 0.2010])  # normalize
])
# load images from CIFAR10
dataset = CIFAR10(root="../../data", train=False, download=True, transform=data_transform)

# Temperature Scaling
temp_scal_loader = DataLoader(dataset, batch_size=32, shuffle=True)
model = ModelWithTemperature(model, temperature=5.0).to(device)
model.set_temperature(temp_scal_loader)

model.eval()

# The number of times the experiment is going to be repeated
num_runs = 10

# error rate
alpha = 0.1

aps_test(model, dataset, device, num_runs=10, alpha=0.1)

Files already downloaded and verified
Before temperature - NLL: 0.333, ECE: 0.045
Optimal temperature: 4.904
After temperature - NLL: 0.880, ECE: 0.466
APS Classification, Start!

Running experiment 1/10...
q_hat = 0.47902144193649293
Total set size: 6183
Total coverage sets: 4478
Total samples amount: 5000
Average Prediction Set Size After APS in runs 1: 1.2366
Average Coverage Rate in runs 1: 0.8956

Running experiment 2/10...
q_hat = 0.4775484055280686
Total set size: 6222
Total coverage sets: 4498
Total samples amount: 5000
Average Prediction Set Size After APS in runs 2: 1.2444
Average Coverage Rate in runs 2: 0.8996

Running experiment 3/10...
q_hat = 0.4752652019262314
Total set size: 6112
Total coverage sets: 4446
Total samples amount: 5000
Average Prediction Set Size After APS in runs 3: 1.2224
Average Coverage Rate in runs 3: 0.8892

Running experiment 4/10...
q_hat = 0.47388120591640487
Total set size: 6086
Total coverage sets: 4456
Total samples amount: 5000
Average Predict

# $\alpha$=0.05

In [3]:
aps_test(model, dataset, device, num_runs=10, alpha=0.05)

APS Classification, Start!

Running experiment 1/10...
q_hat = 0.5276373624801636
Total set size: 7757
Total coverage sets: 4734
Total samples amount: 5000
Average Prediction Set Size After APS in runs 1: 1.5514
Average Coverage Rate in runs 1: 0.9468

Running experiment 2/10...
q_hat = 0.5281344681978226
Total set size: 7823
Total coverage sets: 4757
Total samples amount: 5000
Average Prediction Set Size After APS in runs 2: 1.5646
Average Coverage Rate in runs 2: 0.9514

Running experiment 3/10...
q_hat = 0.5246659845113755
Total set size: 7649
Total coverage sets: 4739
Total samples amount: 5000
Average Prediction Set Size After APS in runs 3: 1.5298
Average Coverage Rate in runs 3: 0.9478

Running experiment 4/10...
q_hat = 0.5255576133728027
Total set size: 7630
Total coverage sets: 4715
Total samples amount: 5000
Average Prediction Set Size After APS in runs 4: 1.526
Average Coverage Rate in runs 4: 0.943

Running experiment 5/10...
q_hat = 0.5298999458551408
Total set size: 7797

# Result
$\alpha$=0.1
- Final Average **Prediction Set Size： 1.23**
- Final Average **Coverage: 89.55%**  

$\alpha$=0.05
- Final Average **Prediction Set Size： 1.54**
- Final Average **Coverage: 94.70%**