# $\alpha$=0.1

In [1]:
import torch
import torchvision.transforms as transforms              
from torchvision.datasets import ImageFolder
import torchvision.models as models
from torchvision.models import VGG16_BN_Weights
from torch.utils.data import DataLoader
import numpy as np
from torch.utils.data import Subset
from src.temperature_scaling import ModelWithTemperature
from src.aps import aps_test

# load pre-trained model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = models.vgg16_bn(weights=VGG16_BN_Weights.IMAGENET1K_V1).to(device)

#  Reprocess
data_transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224), 
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) 
])

sorted_val_path = "D:\\Download\\ImageNet-1K\\Validation_Set\\sorted_ImageNet_val"
dataset = ImageFolder(root=sorted_val_path, transform=data_transform)

# Temperature Scaling
subset_size = len(dataset) // 10
indices = np.random.choice(len(dataset), subset_size, replace=False)
subset_dataset = Subset(dataset, indices)
train_loader = DataLoader(subset_dataset, batch_size=32, shuffle=False, num_workers=4)

model = ModelWithTemperature(model, temperature = 1.0).to(device)
model.set_temperature(train_loader)
model.eval()

aps_test(model, dataset, device, num_runs=10, alpha=0.1)

Before temperature - NLL: 1.257, ECE: 0.030
Optimal temperature: 1.030
After temperature - NLL: 1.254, ECE: 0.031


APS Classification, Start!

Running experiment 1/10...
q_hat = 0.918293446302414
Total set size: 300506
Total coverage sets: 22540
Total samples amount: 25000
Average Prediction Set Size After APS in runs 1: 12.02024
Average Coverage Rate in runs 1: 90.16%

Running experiment 2/10...
q_hat = 0.9176324129104615
Total set size: 291134
Total coverage sets: 22447
Total samples amount: 25000
Average Prediction Set Size After APS in runs 2: 11.64536
Average Coverage Rate in runs 2: 89.79%

Running experiment 3/10...
q_hat = 0.9181209385395053
Total set size: 301410
Total coverage sets: 22584
Total samples amount: 25000
Average Prediction Set Size After APS in runs 3: 12.0564
Average Coverage Rate in runs 3: 90.34%

Running experiment 4/10...
q_hat = 0.9167555212974549
Total set size: 294995
Total coverage sets: 22504
Total samples amount: 25000
Average Prediction Set Size After

# $\alpha$=0.2

In [2]:
aps_test(model, dataset, device, num_runs=10, alpha=0.2)



APS Classification, Start!

Running experiment 1/10...
q_hat = 0.8153685450553895
Total set size: 131336
Total coverage sets: 20055
Total samples amount: 25000
Average Prediction Set Size After APS in runs 1: 5.25344
Average Coverage Rate in runs 1: 80.22%

Running experiment 2/10...
q_hat = 0.8144441723823548
Total set size: 127035
Total coverage sets: 19953
Total samples amount: 25000
Average Prediction Set Size After APS in runs 2: 5.0814
Average Coverage Rate in runs 2: 79.81%

Running experiment 3/10...
q_hat = 0.817321240901947
Total set size: 133218
Total coverage sets: 20165
Total samples amount: 25000
Average Prediction Set Size After APS in runs 3: 5.32872
Average Coverage Rate in runs 3: 80.66%

Running experiment 4/10...
q_hat = 0.8173040628433228
Total set size: 132870
Total coverage sets: 20029
Total samples amount: 25000
Average Prediction Set Size After APS in runs 4: 5.3148
Average Coverage Rate in runs 4: 80.12%

Running experiment 5/10...
q_hat = 0.8154766917228699

# $\alpha$=0.05

In [3]:
aps_test(model, dataset, device, num_runs=10, alpha=0.05)



APS Classification, Start!

Running experiment 1/10...
q_hat = 0.9661534935235977
Total set size: 607877
Total coverage sets: 23721
Total samples amount: 25000
Average Prediction Set Size After APS in runs 1: 24.31508
Average Coverage Rate in runs 1: 94.88%

Running experiment 2/10...
q_hat = 0.9652826011180877
Total set size: 585498
Total coverage sets: 23715
Total samples amount: 25000
Average Prediction Set Size After APS in runs 2: 23.41992
Average Coverage Rate in runs 2: 94.86%

Running experiment 3/10...
q_hat = 0.9662377178668976
Total set size: 613345
Total coverage sets: 23759
Total samples amount: 25000
Average Prediction Set Size After APS in runs 3: 24.5338
Average Coverage Rate in runs 3: 95.04%

Running experiment 4/10...
q_hat = 0.9662063241004943
Total set size: 607207
Total coverage sets: 23742
Total samples amount: 25000
Average Prediction Set Size After APS in runs 4: 24.28828
Average Coverage Rate in runs 4: 94.97%

Running experiment 5/10...
q_hat = 0.9651857048

## Result  
$\alpha$=0.1  
From the above test, following results can be collected :
- Final Average Prediction Set Size: **11.70**
- Final Average Coverage: **89.91%**  

$\alpha$=0.2  
From the above test, following results can be collected :
- Final Average Prediction Set Size: **5.21**
- Final Average Coverage: **80.05%**  

$\alpha$=0.05  
From the above test, following results can be collected :
- Final Average Prediction Set Size: **23.76**
- Final Average Coverage: **94.88%**