# $\alpha$=0.1

In [2]:
import torch
import numpy as np
import torchvision.transforms as transforms               # include image preprocess tools
from torchvision.datasets import CIFAR100        # for loading images from Pytorch CIFAR
from torch.utils.data import DataLoader
import timm
import detectors
from src.raps import raps_test
from src.temperature_scaling import ModelWithTemperature

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# load fine-tuned model
model = timm.create_model("resnet50_cifar100", pretrained=True)
model = model.to(device)

# reprocess the images from CIFAR
data_transform = transforms.Compose([
    transforms.ToTensor(),          # transfer to tensor
    transforms.Normalize((0.5071, 0.4865, 0.4409), (0.2673, 0.2564, 0.2762))  # normalize
])
# load images from CIFAR10
dataset = CIFAR100(root="../../data", train=False, download=True, transform=data_transform)

# temperature scaling
temp_scal_loader = DataLoader(dataset, batch_size=32, shuffle=True)
model = ModelWithTemperature(model, temperature=4.85).to(device)
model.set_temperature(temp_scal_loader)

model.eval()

raps_test(model, dataset, device, num_runs=10, alpha=0.1, lambda_=0.1, k_reg=1)

Files already downloaded and verified
Before temperature - NLL: 1.067, ECE: 0.086
Optimal temperature: 4.691
After temperature - NLL: 2.948, ECE: 0.673
RAPS Classification, Start!

Running experiment 1/10...
t_cal = 0.40386754870414737
Total set size: 15337
Total coverage sets: 4538
Total samples amount: 5000
Average Prediction Set Size After APS in runs 1: 3.0674
Average Coverage Ratein runs 1: 0.9076

Running experiment 2/10...
t_cal = 0.3998346805572511
Total set size: 15133
Total coverage sets: 4530
Total samples amount: 5000
Average Prediction Set Size After APS in runs 2: 3.0266
Average Coverage Ratein runs 2: 0.906

Running experiment 3/10...
t_cal = 0.3886567294597627
Total set size: 14715
Total coverage sets: 4487
Total samples amount: 5000
Average Prediction Set Size After APS in runs 3: 2.943
Average Coverage Ratein runs 3: 0.8974

Running experiment 4/10...
t_cal = 0.39517943859100346
Total set size: 14986
Total coverage sets: 4511
Total samples amount: 5000
Average Predict

# $\alpha$=0.2

In [2]:
raps_test(model, dataset, device, num_runs=10, alpha=0.2, lambda_=0.1, k_reg=3)

RAPS Classification, Start!

Running experiment 1/10...
q_hat = 0.1222862303256989
Total set size: 9248
Total coverage sets: 4029
Total samples amount: 5000
Average Prediction Set Size After APS in runs 1: 1.8496
Average Coverage Rate in runs 1: 0.8058

Running experiment 2/10...
q_hat = 0.11924795657396318
Total set size: 8973
Total coverage sets: 3974
Total samples amount: 5000
Average Prediction Set Size After APS in runs 2: 1.7946
Average Coverage Rate in runs 2: 0.7948

Running experiment 3/10...
q_hat = 0.12183211147785189
Total set size: 9210
Total coverage sets: 4011
Total samples amount: 5000
Average Prediction Set Size After APS in runs 3: 1.842
Average Coverage Rate in runs 3: 0.8022

Running experiment 4/10...
q_hat = 0.12056894749403001
Total set size: 9093
Total coverage sets: 3993
Total samples amount: 5000
Average Prediction Set Size After APS in runs 4: 1.8186
Average Coverage Rate in runs 4: 0.7986

Running experiment 5/10...
q_hat = 0.12301954478025438
Total set size

# $\alpha$ =0.05

In [13]:
raps_test(model, dataset, device, num_runs=10, alpha=0.05, lambda_=0.02, k_reg=1)

RAPS Classification, Start!

Running experiment 1/10...
t_cal = 0.3306817322969437
Total set size: 34203
Total coverage sets: 4774
Total samples amount: 5000
Average Prediction Set Size After APS in runs 1: 6.8406
Average Coverage Ratein runs 1: 0.9548

Running experiment 2/10...
t_cal = 0.30727898776531226
Total set size: 30589
Total coverage sets: 4747
Total samples amount: 5000
Average Prediction Set Size After APS in runs 2: 6.1178
Average Coverage Ratein runs 2: 0.9494

Running experiment 3/10...
t_cal = 0.2989882439374925
Total set size: 29433
Total coverage sets: 4730
Total samples amount: 5000
Average Prediction Set Size After APS in runs 3: 5.8866
Average Coverage Ratein runs 3: 0.946

Running experiment 4/10...
t_cal = 0.32574432939291015
Total set size: 33420
Total coverage sets: 4766
Total samples amount: 5000
Average Prediction Set Size After APS in runs 4: 6.684
Average Coverage Ratein runs 4: 0.9532

Running experiment 5/10...
t_cal = 0.322485250234604
Total set size: 32

# Result  

$\alpha$=0.1  
- Final Average **Prediction Set Size： 2.99**
- Final Average **Coverage: 90.13%**  

$\alpha$=0.2  
- Final Average **Prediction Set Size： 1.84**
- Final Average **Coverage: 79.93%**  

$\alpha$=0.05  
- Final Average **Prediction Set Size： 6.21**
- Final Average **Coverage: 94.85%**