# $\alpha$=0.1

In [1]:
import torch
import numpy as np
import torchvision.transforms as transforms               # include image preprocess tools
from torchvision.datasets import CIFAR10        # for loading images from Pytorch CIFAR
from torch.utils.data import DataLoader
import detectors
import timm
from src.raps import raps_test
from src.temperature_scaling import ModelWithTemperature

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# load fine-tuned model
model = timm.create_model("resnet18_cifar10", pretrained=True)
model = model.to(device)

# reprocess the images from CIFAR
data_transform = transforms.Compose([
    transforms.ToTensor(),          # transfer to tensor
    transforms.Normalize(mean=[0.4914, 0.4822, 0.4465], std=[0.2023, 0.1994, 0.2010])  # normalize
])
# load images from CIFAR10
dataset = CIFAR10(root="../../data", train=False, download=True, transform=data_transform)

temp_scal_loader = DataLoader(dataset, batch_size=32, shuffle=True)
model = ModelWithTemperature(model, temperature=5.0).to(device)
model.set_temperature(temp_scal_loader)

model.eval()

raps_test(model, dataset, device, num_runs=10, alpha=0.1, lambda_=0.1, k_reg=1)

  from .autonotebook import tqdm as notebook_tqdm


Files already downloaded and verified
Before temperature - NLL: 0.307, ECE: 0.042
Optimal temperature: 4.904
After temperature - NLL: 0.866, ECE: 0.464
RAPS Classification, Start!

Running experiment 1/10...
t_cal = 0.49672956466674834
Total set size: 5271
Total coverage sets: 4474
Total samples amount: 5000
Average Prediction Set Size After APS in runs 1: 1.0542
Average Coverage Rate in runs 1: 0.8948

Running experiment 2/10...
t_cal = 0.4972993075847627
Total set size: 5273
Total coverage sets: 4483
Total samples amount: 5000
Average Prediction Set Size After APS in runs 2: 1.0546
Average Coverage Rate in runs 2: 0.8966

Running experiment 3/10...
t_cal = 0.4978980988264084
Total set size: 5276
Total coverage sets: 4480
Total samples amount: 5000
Average Prediction Set Size After APS in runs 3: 1.0552
Average Coverage Rate in runs 3: 0.896

Running experiment 4/10...
t_cal = 0.49334316849708565
Total set size: 5222
Total coverage sets: 4416
Total samples amount: 5000
Average Predict

# $\alpha$=0.05

In [2]:
raps_test(model, dataset, device, num_runs=10, alpha=0.05, lambda_=0.1, k_reg=2)

RAPS Classification, Start!

Running experiment 1/10...
t_cal = 0.538928246498108
Total set size: 7305
Total coverage sets: 4728
Total samples amount: 5000
Average Prediction Set Size After APS in runs 1: 1.461
Average Coverage Rate in runs 1: 0.9456

Running experiment 2/10...
t_cal = 0.5392107963562012
Total set size: 7384
Total coverage sets: 4734
Total samples amount: 5000
Average Prediction Set Size After APS in runs 2: 1.4768
Average Coverage Rate in runs 2: 0.9468

Running experiment 3/10...
t_cal = 0.533281284570694
Total set size: 7175
Total coverage sets: 4723
Total samples amount: 5000
Average Prediction Set Size After APS in runs 3: 1.435
Average Coverage Rate in runs 3: 0.9446

Running experiment 4/10...
t_cal = 0.5288998395204544
Total set size: 7029
Total coverage sets: 4686
Total samples amount: 5000
Average Prediction Set Size After APS in runs 4: 1.4058
Average Coverage Rate in runs 4: 0.9372

Running experiment 5/10...
t_cal = 0.5395091772079469
Total set size: 7324


# Result
$\alpha$=0.1
- Final Average **Prediction Set Size： 1.06**
- Final Average **Coverage: 89.57%**  

$\alpha$=0.05
- Final Average **Prediction Set Size： 1.46**
- Final Average **Coverage: 94.57%**