# $\alpha$=0.1

In [1]:
import torch
import torchvision.models as models
from torchvision.datasets import ImageFolder
from torchvision.models import ResNet34_Weights
import numpy as np
import torchvision.transforms as transforms        
from torch.utils.data import DataLoader
from torch.utils.data import Subset
from src.temperature_scaling import ModelWithTemperature
from src.saps import saps_test

#  Preprocess
data_transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) 
])

sorted_val_path = "D:\\Download\\ImageNet-1K\\Validation_Set\\sorted_ImageNet_val"
dataset = ImageFolder(root=sorted_val_path, transform=data_transform)

# load pre-trained model 
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = models.resnet34(weights=ResNet34_Weights.IMAGENET1K_V1).to(device)

# Temperature Scaling
subset_size = len(dataset) // 10
indices = np.random.choice(len(dataset), subset_size, replace=False)
subset_dataset = Subset(dataset, indices)
train_loader = DataLoader(subset_dataset, batch_size=32, shuffle=False, num_workers=2)
model = ModelWithTemperature(model, temperature = 1.0).to(device)
model.set_temperature(train_loader)

model.eval()

saps_test(model, dataset, device, num_runs=10, alpha=0.1, lambda_=0.15)

Before temperature - NLL: 1.170, ECE: 0.020
Optimal temperature: 1.004
After temperature - NLL: 1.170, ECE: 0.019


SAPS Classification, Start!

Running experiment 1/10...
q_hat = 1.0641706109046951
Average Prediction Set Size After APS in runs 1: 3.03784
Average Coverage Rate in runs 1: 89.68%

Running experiment 2/10...
q_hat = 1.0802031040191655
Average Prediction Set Size After APS in runs 2: 3.1246
Average Coverage Rate in runs 2: 90.16%

Running experiment 3/10...
q_hat = 1.0660554051399238
Average Prediction Set Size After APS in runs 3: 3.04676
Average Coverage Rate in runs 3: 89.79%

Running experiment 4/10...
q_hat = 1.0600157737731937
Average Prediction Set Size After APS in runs 4: 3.01324
Average Coverage Rate in runs 4: 89.60%

Running experiment 5/10...
q_hat = 1.0755097627639776
Average Prediction Set Size After APS in runs 5: 3.09636
Average Coverage Rate in runs 5: 90.04%

Running experiment 6/10...
q_hat = 1.0812483906745913
Average Prediction Set Size After APS in r

# $\alpha$=0.2

In [2]:
saps_test(model, dataset, device, num_runs=10, alpha=0.2, lambda_=0.1)



SAPS Classification, Start!

Running experiment 1/10...
q_hat = 0.8282156229019165
Average Prediction Set Size After APS in runs 1: 2.36376
Average Coverage Rate in runs 1: 79.91%

Running experiment 2/10...
q_hat = 0.8266348838806153
Average Prediction Set Size After APS in runs 2: 2.33148
Average Coverage Rate in runs 2: 79.51%

Running experiment 3/10...
q_hat = 0.8364889979362489
Average Prediction Set Size After APS in runs 3: 2.40084
Average Coverage Rate in runs 3: 80.65%

Running experiment 4/10...
q_hat = 0.821812379360199
Average Prediction Set Size After APS in runs 4: 2.33544
Average Coverage Rate in runs 4: 79.14%

Running experiment 5/10...
q_hat = 0.8320026874542237
Average Prediction Set Size After APS in runs 5: 2.37004
Average Coverage Rate in runs 5: 80.14%

Running experiment 6/10...
q_hat = 0.8297326445579529
Average Prediction Set Size After APS in runs 6: 2.36876
Average Coverage Rate in runs 6: 80.17%

Running experiment 7/10...
q_hat = 0.8269782900810242
Aver

# $\alpha$=0.05

In [3]:
saps_test(model, dataset, device, num_runs=10, alpha=0.05, lambda_=0.05)



SAPS Classification, Start!

Running experiment 1/10...
q_hat = 1.095287239551544
Average Prediction Set Size After APS in runs 1: 7.72548
Average Coverage Rate in runs 1: 95.00%

Running experiment 2/10...
q_hat = 1.0987992405891411
Average Prediction Set Size After APS in runs 2: 7.75004
Average Coverage Rate in runs 2: 95.08%

Running experiment 3/10...
q_hat = 1.0942593634128568
Average Prediction Set Size After APS in runs 3: 7.69232
Average Coverage Rate in runs 3: 94.94%

Running experiment 4/10...
q_hat = 1.07923663854599
Average Prediction Set Size After APS in runs 4: 7.42788
Average Coverage Rate in runs 4: 94.74%

Running experiment 5/10...
q_hat = 1.0870329260826106
Average Prediction Set Size After APS in runs 5: 7.53624
Average Coverage Rate in runs 5: 94.85%

Running experiment 6/10...
q_hat = 1.0888455867767328
Average Prediction Set Size After APS in runs 6: 7.60704
Average Coverage Rate in runs 6: 94.86%

Running experiment 7/10...
q_hat = 1.0883684992790221
Averag

## Result  
$\alpha$=0.1  
From the above test, following results can be collected :
- Final Average Prediction Set Size: **3.07**
- Final Average Coverage: **89.89%**  

$\alpha$=0.2  
From the above test, following results can be collected :
- Final Average Prediction Set Size: **2.36**
- Final Average Coverage: **79.92%**  

$\alpha$=0.05  
From the above test, following results can be collected :
- Final Average Prediction Set Size: **7.66**
- Final Average Coverage: **94.93%**