# $\alpha$=0.1

In [1]:
import torch
import torchvision.models as models
from torchvision.datasets import ImageFolder
from torchvision.models import ResNet34_Weights
import numpy as np
import torchvision.transforms as transforms        
from torch.utils.data import DataLoader
from torch.utils.data import Subset
from src.temperature_scaling import ModelWithTemperature
from src.aps import aps_test

#  Preprocess
data_transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) 
])

sorted_val_path = "D:\\Download\\ImageNet-1K\\Validation_Set\\sorted_ImageNet_val"
dataset = ImageFolder(root=sorted_val_path, transform=data_transform)

# load pre-trained model 
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = models.resnet34(weights=ResNet34_Weights.IMAGENET1K_V1).to(device)


# Temperature Scaling
subset_size = len(dataset) // 10
indices = np.random.choice(len(dataset), subset_size, replace=False)
subset_dataset = Subset(dataset, indices)
train_loader = DataLoader(subset_dataset, batch_size=32, shuffle=False, num_workers=2)
model = ModelWithTemperature(model, temperature = 1.2).to(device)
model.set_temperature(train_loader)

model.eval()

aps_test(model, dataset, device, num_runs=10, alpha=0.1)

Before temperature - NLL: 1.146, ECE: 0.026
Optimal temperature: 1.125
After temperature - NLL: 1.154, ECE: 0.032


APS Classification, Start!

Running experiment 1/10...
q_hat = 0.9002267897129059
Total set size: 371243
Total coverage sets: 22517
Total samples amount: 25000
Average Prediction Set Size After APS in runs 1: 14.84972
Average Coverage Rate in runs 1: 90.07%

Running experiment 2/10...
q_hat = 0.9015366077423099
Total set size: 365200
Total coverage sets: 22456
Total samples amount: 25000
Average Prediction Set Size After APS in runs 2: 14.608
Average Coverage Rate in runs 2: 89.82%

Running experiment 3/10...
q_hat = 0.9018255472183228
Total set size: 378249
Total coverage sets: 22576
Total samples amount: 25000
Average Prediction Set Size After APS in runs 3: 15.12996
Average Coverage Rate in runs 3: 90.30%

Running experiment 4/10...
q_hat = 0.8988027691841126
Total set size: 356434
Total coverage sets: 22483
Total samples amount: 25000
Average Prediction Set Size After

# $\alpha$=0.2

In [2]:
aps_test(model, dataset, device, num_runs=10, alpha=0.2)



APS Classification, Start!

Running experiment 1/10...
q_hat = 0.7936891078948974
Total set size: 157926
Total coverage sets: 19982
Total samples amount: 25000
Average Prediction Set Size After APS in runs 1: 6.31704
Average Coverage Rate in runs 1: 79.93%

Running experiment 2/10...
q_hat = 0.7931611657142639
Total set size: 152241
Total coverage sets: 19898
Total samples amount: 25000
Average Prediction Set Size After APS in runs 2: 6.08964
Average Coverage Rate in runs 2: 79.59%

Running experiment 3/10...
q_hat = 0.796750283241272
Total set size: 161288
Total coverage sets: 20156
Total samples amount: 25000
Average Prediction Set Size After APS in runs 3: 6.45152
Average Coverage Rate in runs 3: 80.62%

Running experiment 4/10...
q_hat = 0.7933259725570679
Total set size: 152610
Total coverage sets: 19950
Total samples amount: 25000
Average Prediction Set Size After APS in runs 4: 6.1044
Average Coverage Rate in runs 4: 79.80%

Running experiment 5/10...
q_hat = 0.792654967308044

# $\alpha$=0.05

In [3]:
aps_test(model, dataset, device, num_runs=10, alpha=0.05)



APS Classification, Start!

Running experiment 1/10...
q_hat = 0.9538080483675003
Total set size: 760897
Total coverage sets: 23755
Total samples amount: 25000
Average Prediction Set Size After APS in runs 1: 30.43588
Average Coverage Rate in runs 1: 95.02%

Running experiment 2/10...
q_hat = 0.9535224467515946
Total set size: 740446
Total coverage sets: 23747
Total samples amount: 25000
Average Prediction Set Size After APS in runs 2: 29.61784
Average Coverage Rate in runs 2: 94.99%

Running experiment 3/10...
q_hat = 0.9539184302091598
Total set size: 764335
Total coverage sets: 23782
Total samples amount: 25000
Average Prediction Set Size After APS in runs 3: 30.5734
Average Coverage Rate in runs 3: 95.13%

Running experiment 4/10...
q_hat = 0.9526998549699783
Total set size: 733665
Total coverage sets: 23752
Total samples amount: 25000
Average Prediction Set Size After APS in runs 4: 29.3466
Average Coverage Rate in runs 4: 95.01%

Running experiment 5/10...
q_hat = 0.95211724936

## Result  
$\alpha$=0.1  
From the above test, following results can be collected :
- Final Average Prediction Set Size: **14.56**
- Final Average Coverage: **89.89%**  

$\alpha$=0.2  
From the above test, following results can be collected :
- Final Average Prediction Set Size: **6.26**
- Final Average Coverage: **80.01%**  

$\alpha$=0.05  
From the above test, following results can be collected :
- Final Average Prediction Set Size: **29.78**
- Final Average Coverage: **94.96%**