# $\alpha$=0.1

In [1]:
import torch
import torchvision.models as models
from torchvision.datasets import ImageFolder
from torchvision.models import ResNet34_Weights
import numpy as np
import torchvision.transforms as transforms        
from torch.utils.data import DataLoader
from torch.utils.data import Subset
from src.temperature_scaling import ModelWithTemperature
from src.aps import aps_test

#  Preprocess
data_transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) 
])

sorted_val_path = "D:\\Download\\ImageNet-1K\\Validation_Set\\sorted_ImageNet_val"
dataset = ImageFolder(root=sorted_val_path, transform=data_transform)

# load pre-trained model 
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = models.resnet34(weights=ResNet34_Weights.IMAGENET1K_V1).to(device)


# Temperature Scaling
subset_size = len(dataset) // 10
indices = np.random.choice(len(dataset), subset_size, replace=False)
subset_dataset = Subset(dataset, indices)
train_loader = DataLoader(subset_dataset, batch_size=32, shuffle=False, num_workers=2)
model = ModelWithTemperature(model, temperature = 1.2).to(device)
model.set_temperature(train_loader)

model.eval()

aps_test(model, dataset, device, num_runs=10, alpha=0.1)

Before temperature - NLL: 1.143, ECE: 0.025
Optimal temperature: 1.119
After temperature - NLL: 1.153, ECE: 0.034
APS Classification, Start!

Running experiment 1/10...
q_hat = 0.9041442692279817
Total set size: 371081
Total coverage sets: 22518
Total samples amount: 25000
Average Prediction Set Size After APS in runs 1: 14.84324
Average Coverage Rate in runs 1: 0.90072

Running experiment 2/10...
q_hat = 0.904286527633667
Total set size: 361690
Total coverage sets: 22443
Total samples amount: 25000
Average Prediction Set Size After APS in runs 2: 14.4676
Average Coverage Rate in runs 2: 0.89772

Running experiment 3/10...
q_hat = 0.9054918169975281
Total set size: 378168
Total coverage sets: 22590
Total samples amount: 25000
Average Prediction Set Size After APS in runs 3: 15.12672
Average Coverage Rate in runs 3: 0.9036

Running experiment 4/10...
q_hat = 0.902033942937851
Total set size: 352948
Total coverage sets: 22499
Total samples amount: 25000
Average Prediction Set Size After 

# $\alpha$=0.2

In [2]:
aps_test(model, dataset, device, num_runs=10, alpha=0.2)

APS Classification, Start!

Running experiment 1/10...
q_hat = 0.7987095355987549
Total set size: 157598
Total coverage sets: 20016
Total samples amount: 25000
Average Prediction Set Size After APS in runs 1: 6.30392
Average Coverage Rate in runs 1: 0.80064

Running experiment 2/10...
q_hat = 0.7987485647201539
Total set size: 153168
Total coverage sets: 19958
Total samples amount: 25000
Average Prediction Set Size After APS in runs 2: 6.12672
Average Coverage Rate in runs 2: 0.79832

Running experiment 3/10...
q_hat = 0.8020266532897949
Total set size: 161523
Total coverage sets: 20173
Total samples amount: 25000
Average Prediction Set Size After APS in runs 3: 6.46092
Average Coverage Rate in runs 3: 0.80692

Running experiment 4/10...
q_hat = 0.797117042541504
Total set size: 150524
Total coverage sets: 19914
Total samples amount: 25000
Average Prediction Set Size After APS in runs 4: 6.02096
Average Coverage Rate in runs 4: 0.79656

Running experiment 5/10...
q_hat = 0.799058151245

# $\alpha$=0.05

In [3]:
aps_test(model, dataset, device, num_runs=10, alpha=0.05)

APS Classification, Start!

Running experiment 1/10...
q_hat = 0.9560917168855666
Total set size: 757773
Total coverage sets: 23756
Total samples amount: 25000
Average Prediction Set Size After APS in runs 1: 30.31092
Average Coverage Rate in runs 1: 0.95024

Running experiment 2/10...
q_hat = 0.9563030153512954
Total set size: 745611
Total coverage sets: 23750
Total samples amount: 25000
Average Prediction Set Size After APS in runs 2: 29.82444
Average Coverage Rate in runs 2: 0.95

Running experiment 3/10...
q_hat = 0.9567684948444366
Total set size: 771031
Total coverage sets: 23803
Total samples amount: 25000
Average Prediction Set Size After APS in runs 3: 30.84124
Average Coverage Rate in runs 3: 0.95212

Running experiment 4/10...
q_hat = 0.9544147819280623
Total set size: 721566
Total coverage sets: 23721
Total samples amount: 25000
Average Prediction Set Size After APS in runs 4: 28.86264
Average Coverage Rate in runs 4: 0.94884

Running experiment 5/10...
q_hat = 0.9546449422

## Result  
$\alpha$=0.1  
From the above test, following results can be collected :
- Final Average Prediction Set Size: **14.51**
- Final Average Coverage: **89.92%**  

$\alpha$=0.2  
From the above test, following results can be collected :
- Final Average Prediction Set Size: **6.23**
- Final Average Coverage: **80.00%**  

$\alpha$=0.05  
From the above test, following results can be collected :
- Final Average Prediction Set Size: **29.73**
- Final Average Coverage: **94.96%**