# $\alpha$=0.1

In [1]:
import torch
import torchvision.models as models
from torchvision.datasets import ImageFolder
from torchvision.models import ResNet18_Weights
import numpy as np
import torchvision.transforms as transforms        
from torch.utils.data import DataLoader
from torch.utils.data import Subset
from src.temperature_scaling import ModelWithTemperature
from src.raps import raps_test

#  Preprocess
data_transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) 
])

sorted_val_path = "D:\\Download\\ImageNet-1K\\Validation_Set\\sorted_ImageNet_val"
dataset = ImageFolder(root=sorted_val_path, transform=data_transform)

# load pre-trained model 
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = models.resnet18(weights=ResNet18_Weights.IMAGENET1K_V1).to(device)

# Temperature Scaling
subset_size = len(dataset) // 10
indices = np.random.choice(len(dataset), subset_size, replace=False)
subset_dataset = Subset(dataset, indices)
train_loader = DataLoader(subset_dataset, batch_size=32, shuffle=False, num_workers=2)
model = ModelWithTemperature(model, temperature = 1.0).to(device)
model.set_temperature(train_loader)

model.eval()

raps_test(model, dataset, device, num_runs=10, alpha=0.1, lambda_=0.1, k_reg=6)

Before temperature - NLL: 1.346, ECE: 0.029
Optimal temperature: 0.993
After temperature - NLL: 1.346, ECE: 0.029


RAPS Classification, Start!

Running experiment 1/10...
q_hat = 0.9872312784194947
Total set size: 113756
Total coverage sets: 22563
Total samples amount: 25000
Average Prediction Set Size After APS in runs 1: 4.55024
Average Coverage Rate in runs 1: 90.25%

Running experiment 2/10...
q_hat = 0.9862579643726351
Total set size: 112588
Total coverage sets: 22539
Total samples amount: 25000
Average Prediction Set Size After APS in runs 2: 4.50352
Average Coverage Rate in runs 2: 90.16%

Running experiment 3/10...
q_hat = 0.985847705602646
Total set size: 112328
Total coverage sets: 22517
Total samples amount: 25000
Average Prediction Set Size After APS in runs 3: 4.49312
Average Coverage Rate in runs 3: 90.07%

Running experiment 4/10...
q_hat = 0.9834862530231476
Total set size: 110667
Total coverage sets: 22420
Total samples amount: 25000
Average Prediction Set Size After 

# $\alpha$=0.2

In [2]:
raps_test(model, dataset, device, num_runs=10, alpha=0.2, lambda_=0.05, k_reg=6)



RAPS Classification, Start!

Running experiment 1/10...
q_hat = 0.8508627057075502
Total set size: 76746
Total coverage sets: 20090
Total samples amount: 25000
Average Prediction Set Size After APS in runs 1: 3.06984
Average Coverage Rate in runs 1: 80.36%

Running experiment 2/10...
q_hat = 0.8509519696235657
Total set size: 76464
Total coverage sets: 20003
Total samples amount: 25000
Average Prediction Set Size After APS in runs 2: 3.05856
Average Coverage Rate in runs 2: 80.01%

Running experiment 3/10...
q_hat = 0.8515253901481629
Total set size: 76621
Total coverage sets: 20083
Total samples amount: 25000
Average Prediction Set Size After APS in runs 3: 3.06484
Average Coverage Rate in runs 3: 80.33%

Running experiment 4/10...
q_hat = 0.8448399305343629
Total set size: 75777
Total coverage sets: 19897
Total samples amount: 25000
Average Prediction Set Size After APS in runs 4: 3.03108
Average Coverage Rate in runs 4: 79.59%

Running experiment 5/10...
q_hat = 0.8487290382385254

# $\alpha$=0.05

In [3]:
raps_test(model, dataset, device, num_runs=10, alpha=0.05, lambda_=0.01, k_reg=7)



RAPS Classification, Start!

Running experiment 1/10...
q_hat = 1.0070212960243223
Total set size: 283259
Total coverage sets: 23764
Total samples amount: 25000
Average Prediction Set Size After APS in runs 1: 11.33036
Average Coverage Rate in runs 1: 95.06%

Running experiment 2/10...
q_hat = 1.0081717908382415
Total set size: 284869
Total coverage sets: 23774
Total samples amount: 25000
Average Prediction Set Size After APS in runs 2: 11.39476
Average Coverage Rate in runs 2: 95.10%

Running experiment 3/10...
q_hat = 1.006326949596405
Total set size: 282167
Total coverage sets: 23755
Total samples amount: 25000
Average Prediction Set Size After APS in runs 3: 11.28668
Average Coverage Rate in runs 3: 95.02%

Running experiment 4/10...
q_hat = 1.0009825468063354
Total set size: 275966
Total coverage sets: 23690
Total samples amount: 25000
Average Prediction Set Size After APS in runs 4: 11.03864
Average Coverage Rate in runs 4: 94.76%

Running experiment 5/10...
q_hat = 1.006866359

## Result  
$\alpha$=0.1  
From the above test, following results can be collected :
- Final Average Prediction Set Size: **4.47**
- Final Average Coverage: **89.93%**  

$\alpha$=0.2  
From the above test, following results can be collected :
- Final Average Prediction Set Size: **3.06**
- Final Average Coverage: **80.00%**  

$\alpha$=0.05  
From the above test, following results can be collected :
- Final Average Prediction Set Size: **11.18**
- Final Average Coverage: **94.96%**