# $\alpha$=0.1

In [1]:
import torch
import torchvision.models as models
from torchvision.datasets import ImageFolder
from torchvision.models import ResNet18_Weights
import numpy as np
import torchvision.transforms as transforms        
from torch.utils.data import DataLoader
from torch.utils.data import Subset
from src.temperature_scaling import ModelWithTemperature
from src.aps import aps_test

#  Preprocess
data_transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) 
])

sorted_val_path = "D:\\Download\\ImageNet-1K\\Validation_Set\\sorted_ImageNet_val"
dataset = ImageFolder(root=sorted_val_path, transform=data_transform)

# load pre-trained model 
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = models.resnet18(weights=ResNet18_Weights.IMAGENET1K_V1).to(device)


# Temperature Scaling
subset_size = len(dataset) // 10
indices = np.random.choice(len(dataset), subset_size, replace=False)
subset_dataset = Subset(dataset, indices)
train_loader = DataLoader(subset_dataset, batch_size=32, shuffle=False, num_workers=2)
model = ModelWithTemperature(model, temperature = 1.0).to(device)
model.set_temperature(train_loader)

model.eval()

aps_test(model, dataset, device, num_runs=10, alpha=0.1)

Before temperature - NLL: 1.328, ECE: 0.026
Optimal temperature: 0.987
After temperature - NLL: 1.327, ECE: 0.027
APS Classification, Start!

Running experiment 1/10...
q_hat = 0.9223724067211151
Total set size: 374286
Total coverage sets: 22503
Total samples amount: 25000
Average Prediction Set Size After APS in runs 1: 14.97144
Average Coverage Rate in runs 1: 0.90012

Running experiment 2/10...
q_hat = 0.923025143146515
Total set size: 376731
Total coverage sets: 22484
Total samples amount: 25000
Average Prediction Set Size After APS in runs 2: 15.06924
Average Coverage Rate in runs 2: 0.89936

Running experiment 3/10...
q_hat = 0.9244899928569795
Total set size: 386622
Total coverage sets: 22649
Total samples amount: 25000
Average Prediction Set Size After APS in runs 3: 15.46488
Average Coverage Rate in runs 3: 0.90596

Running experiment 4/10...
q_hat = 0.9195575833320617
Total set size: 360605
Total coverage sets: 22416
Total samples amount: 25000
Average Prediction Set Size Aft

# $\alpha$=0.2

In [2]:
aps_test(model, dataset, device, num_runs=10, alpha=0.2)

APS Classification, Start!

Running experiment 1/10...
q_hat = 0.8210610032081604
Total set size: 159566
Total coverage sets: 19982
Total samples amount: 25000
Average Prediction Set Size After APS in runs 1: 6.38264
Average Coverage Rate in runs 1: 0.79928

Running experiment 2/10...
q_hat = 0.8238945841789246
Total set size: 161903
Total coverage sets: 20009
Total samples amount: 25000
Average Prediction Set Size After APS in runs 2: 6.47612
Average Coverage Rate in runs 2: 0.80036

Running experiment 3/10...
q_hat = 0.8265883088111877
Total set size: 166744
Total coverage sets: 20173
Total samples amount: 25000
Average Prediction Set Size After APS in runs 3: 6.66976
Average Coverage Rate in runs 3: 0.80692

Running experiment 4/10...
q_hat = 0.8188735246658325
Total set size: 156356
Total coverage sets: 19849
Total samples amount: 25000
Average Prediction Set Size After APS in runs 4: 6.25424
Average Coverage Rate in runs 4: 0.79396

Running experiment 5/10...
q_hat = 0.82311996221

# $\alpha$=0.05

In [3]:
aps_test(model, dataset, device, num_runs=10, alpha=0.05)

APS Classification, Start!

Running experiment 1/10...
q_hat = 0.969964474439621
Total set size: 793046
Total coverage sets: 23768
Total samples amount: 25000
Average Prediction Set Size After APS in runs 1: 31.72184
Average Coverage Rate in runs 1: 0.95072

Running experiment 2/10...
q_hat = 0.9691448897123337
Total set size: 778389
Total coverage sets: 23756
Total samples amount: 25000
Average Prediction Set Size After APS in runs 2: 31.13556
Average Coverage Rate in runs 2: 0.95024

Running experiment 3/10...
q_hat = 0.969944953918457
Total set size: 797101
Total coverage sets: 23793
Total samples amount: 25000
Average Prediction Set Size After APS in runs 3: 31.88404
Average Coverage Rate in runs 3: 0.95172

Running experiment 4/10...
q_hat = 0.9682551681995392
Total set size: 760439
Total coverage sets: 23698
Total samples amount: 25000
Average Prediction Set Size After APS in runs 4: 30.41756
Average Coverage Rate in runs 4: 0.94792

Running experiment 5/10...
q_hat = 0.968869695

## Result  
$\alpha$=0.1  
From the above test, following results can be collected :
- Final Average Prediction Set Size: **14,94**
- Final Average Coverage: **89.91%**  

$\alpha$=0.2  
From the above test, following results can be collected :
- Final Average Prediction Set Size: **6.51**
- Final Average Coverage: **80.04%**  

$\alpha$=0.05  
From the above test, following results can be collected :
- Final Average Prediction Set Size: **31.08**
- Final Average Coverage: **94.93%**