# $\alpha$=0.1

In [1]:
import torch
import torchvision.models as models
from torchvision.datasets import ImageFolder
from torchvision.models import ResNet34_Weights
import numpy as np
import torchvision.transforms as transforms        
from torch.utils.data import DataLoader
from torch.utils.data import Subset
from src.temperature_scaling import ModelWithTemperature
from src.raps import raps_test

#  Preprocess
data_transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) 
])

sorted_val_path = "D:\\Download\\ImageNet-1K\\Validation_Set\\sorted_ImageNet_val"
dataset = ImageFolder(root=sorted_val_path, transform=data_transform)

# load pre-trained model 
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = models.resnet34(weights=ResNet34_Weights.IMAGENET1K_V1).to(device)

# Temperature Scaling
subset_size = len(dataset) // 10
indices = np.random.choice(len(dataset), subset_size, replace=False)
subset_dataset = Subset(dataset, indices)
train_loader = DataLoader(subset_dataset, batch_size=32, shuffle=False, num_workers=2)
model = ModelWithTemperature(model, temperature = 1.0).to(device)
model.set_temperature(train_loader)

model.eval()

raps_test(model, dataset, device, num_runs=10, alpha=0.1, lambda_=0.07, k_reg=9)

Before temperature - NLL: 1.137, ECE: 0.024
Optimal temperature: 1.008
After temperature - NLL: 1.137, ECE: 0.020


RAPS Classification, Start!

Running experiment 1/10...
q_hat = 0.9552643954753877
Total set size: 101576
Total coverage sets: 22496
Total samples amount: 25000
Average Prediction Set Size After APS in runs 1: 4.06304
Average Coverage Rate in runs 1: 89.98%

Running experiment 2/10...
q_hat = 0.9543303847312927
Total set size: 100286
Total coverage sets: 22479
Total samples amount: 25000
Average Prediction Set Size After APS in runs 2: 4.01144
Average Coverage Rate in runs 2: 89.92%

Running experiment 3/10...
q_hat = 0.9534759104251862
Total set size: 100410
Total coverage sets: 22475
Total samples amount: 25000
Average Prediction Set Size After APS in runs 3: 4.0164
Average Coverage Rate in runs 3: 89.90%

Running experiment 4/10...
q_hat = 0.9510663866996766
Total set size: 99784
Total coverage sets: 22399
Total samples amount: 25000
Average Prediction Set Size After A

# $\alpha$=0.2

In [2]:
raps_test(model, dataset, device, num_runs=10, alpha=0.2, lambda_=0.01, k_reg=9)



RAPS Classification, Start!

Running experiment 1/10...
q_hat = 0.8329256892204284
Total set size: 80841
Total coverage sets: 19973
Total samples amount: 25000
Average Prediction Set Size After APS in runs 1: 3.23364
Average Coverage Rate in runs 1: 79.89%

Running experiment 2/10...
q_hat = 0.8337922453880311
Total set size: 79747
Total coverage sets: 19942
Total samples amount: 25000
Average Prediction Set Size After APS in runs 2: 3.18988
Average Coverage Rate in runs 2: 79.77%

Running experiment 3/10...
q_hat = 0.8363909006118775
Total set size: 81909
Total coverage sets: 20119
Total samples amount: 25000
Average Prediction Set Size After APS in runs 3: 3.27636
Average Coverage Rate in runs 3: 80.48%

Running experiment 4/10...
q_hat = 0.8332459926605225
Total set size: 80480
Total coverage sets: 19931
Total samples amount: 25000
Average Prediction Set Size After APS in runs 4: 3.2192
Average Coverage Rate in runs 4: 79.72%

Running experiment 5/10...
q_hat = 0.8331016540527344


# $\alpha$=0.05

In [2]:
raps_test(model, dataset, device, num_runs=10, alpha=0.05, lambda_=0.2, k_reg=9)



RAPS Classification, Start!

Running experiment 1/10...
q_hat = 1.411945629119864
Total set size: 277638
Total coverage sets: 23728
Total samples amount: 25000
Average Prediction Set Size After APS in runs 1: 11.10552
Average Coverage Rate in runs 1: 94.91%

Running experiment 2/10...
q_hat = 1.4942803502082802
Total set size: 279384
Total coverage sets: 23762
Total samples amount: 25000
Average Prediction Set Size After APS in runs 2: 11.17536
Average Coverage Rate in runs 2: 95.05%

Running experiment 3/10...
q_hat = 1.477931916713714
Total set size: 279008
Total coverage sets: 23752
Total samples amount: 25000
Average Prediction Set Size After APS in runs 3: 11.16032
Average Coverage Rate in runs 3: 95.01%

Running experiment 4/10...
q_hat = 1.3522397696971893
Total set size: 257268
Total coverage sets: 23659
Total samples amount: 25000
Average Prediction Set Size After APS in runs 4: 10.29072
Average Coverage Rate in runs 4: 94.64%

Running experiment 5/10...
q_hat = 1.5038854658

## Result  
$\alpha$=0.1  
From the above test, following results can be collected :
- Final Average Prediction Set Size: **4.01**
- Final Average Coverage: **89.85%**  

$\alpha$=0.2  
From the above test, following results can be collected :
- Final Average Prediction Set Size: **3.23**
- Final Average Coverage: **79.96%**  

$\alpha$=0.05  
From the above test, following results can be collected :
- Final Average Prediction Set Size: **10.04**
- Final Average Coverage: **94.95%**