# $\alpha$=0.1

In [1]:
import torch
import torchvision.models as models
from torchvision.datasets import ImageFolder
from torchvision.models import ResNet18_Weights
import numpy as np
import torchvision.transforms as transforms        
from torch.utils.data import DataLoader
from torch.utils.data import Subset
from src.temperature_scaling import ModelWithTemperature
from src.saps import saps_test

#  Preprocess
data_transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) 
])

sorted_val_path = "D:\\Download\\ImageNet-1K\\Validation_Set\\sorted_ImageNet_val"
dataset = ImageFolder(root=sorted_val_path, transform=data_transform)

# load pre-trained model 
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = models.resnet18(weights=ResNet18_Weights.IMAGENET1K_V1).to(device)

# Temperature Scaling
subset_size = len(dataset) // 10
indices = np.random.choice(len(dataset), subset_size, replace=False)
subset_dataset = Subset(dataset, indices)
train_loader = DataLoader(subset_dataset, batch_size=32, shuffle=False, num_workers=2)
model = ModelWithTemperature(model, temperature = 1.0).to(device)
model.set_temperature(train_loader)

model.eval()

saps_test(model, dataset, device, num_runs=10, alpha=0.1, lambda_=0.1)

Before temperature - NLL: 1.320, ECE: 0.017
Optimal temperature: 0.987
After temperature - NLL: 1.319, ECE: 0.015


SAPS Classification, Start!

Running experiment 1/10...
q_hat = 1.0401371121406555
Average Prediction Set Size After APS in runs 1: 4.2952
Average Coverage Rate in runs 1: 90.16%

Running experiment 2/10...
q_hat = 1.044836914539338
Average Prediction Set Size After APS in runs 2: 4.32908
Average Coverage Rate in runs 2: 90.19%

Running experiment 3/10...
q_hat = 1.0436384081840524
Average Prediction Set Size After APS in runs 3: 4.32548
Average Coverage Rate in runs 3: 90.18%

Running experiment 4/10...
q_hat = 1.0275766849517824
Average Prediction Set Size After APS in runs 4: 4.18568
Average Coverage Rate in runs 4: 89.66%

Running experiment 5/10...
q_hat = 1.0397905230522162
Average Prediction Set Size After APS in runs 5: 4.29404
Average Coverage Rate in runs 5: 90.05%

Running experiment 6/10...
q_hat = 1.0365165472030644
Average Prediction Set Size After APS in ru

# $\alpha$=0.2

In [2]:
saps_test(model, dataset, device, num_runs=10, alpha=0.2, lambda_=0.1)# error rate



SAPS Classification, Start!

Running experiment 1/10...
q_hat = 0.836382532119751
Average Prediction Set Size After APS in runs 1: 2.78824
Average Coverage Rate in runs 1: 80.35%

Running experiment 2/10...
q_hat = 0.8324943780899049
Average Prediction Set Size After APS in runs 2: 2.757
Average Coverage Rate in runs 2: 79.70%

Running experiment 3/10...
q_hat = 0.8415474653244018
Average Prediction Set Size After APS in runs 3: 2.81168
Average Coverage Rate in runs 3: 80.64%

Running experiment 4/10...
q_hat = 0.8295932531356812
Average Prediction Set Size After APS in runs 4: 2.75596
Average Coverage Rate in runs 4: 79.32%

Running experiment 5/10...
q_hat = 0.8372910618782043
Average Prediction Set Size After APS in runs 5: 2.79728
Average Coverage Rate in runs 5: 80.09%

Running experiment 6/10...
q_hat = 0.8358636021614075
Average Prediction Set Size After APS in runs 6: 2.794
Average Coverage Rate in runs 6: 80.14%

Running experiment 7/10...
q_hat = 0.8298634171485901
Average 

# $\alpha$=0.05

In [2]:
saps_test(model, dataset, device, num_runs=10, alpha=0.05, lambda_=0.03)



SAPS Classification, Start!

Running experiment 1/10...
q_hat = 1.0160166144371032
Average Prediction Set Size After APS in runs 1: 11.06288
Average Coverage Rate in runs 1: 94.97%

Running experiment 2/10...
q_hat = 1.0225111067295072
Average Prediction Set Size After APS in runs 2: 11.25356
Average Coverage Rate in runs 2: 95.12%

Running experiment 3/10...
q_hat = 1.01646888256073
Average Prediction Set Size After APS in runs 3: 11.04428
Average Coverage Rate in runs 3: 94.96%

Running experiment 4/10...
q_hat = 1.0147178709506988
Average Prediction Set Size After APS in runs 4: 11.06004
Average Coverage Rate in runs 4: 94.89%

Running experiment 5/10...
q_hat = 1.01485076546669
Average Prediction Set Size After APS in runs 5: 11.04108
Average Coverage Rate in runs 5: 94.90%

Running experiment 6/10...
q_hat = 1.0138855457305904
Average Prediction Set Size After APS in runs 6: 11.055
Average Coverage Rate in runs 6: 94.86%

Running experiment 7/10...
q_hat = 1.01494100689888
Avera

## Result  
$\alpha$=0.1  
From the above test, following results can be collected :
- Final Average Prediction Set Size: **4.26**
- Final Average Coverage: **89.94%**  

$\alpha$=0.2  
From the above test, following results can be collected :
- Final Average Prediction Set Size: **2.78**
- Final Average Coverage: **79.99%**  

$\alpha$=0.05  
From the above test, following results can be collected :
- Final Average Prediction Set Size: **11.09**
- Final Average Coverage: **94.93%**