# $\alpha$=0.1

In [1]:
import torch
import torchvision.models as models
from torchvision.datasets import ImageFolder
from torchvision.models import ResNet50_Weights
import numpy as np
import torchvision.transforms as transforms        
from torch.utils.data import DataLoader
from torch.utils.data import Subset
from src.temperature_scaling import ModelWithTemperature
from src.aps import aps_test

#  Reprocess
data_transform = transforms.Compose([
    transforms.CenterCrop(256),
    transforms.Resize(256),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) 
])

sorted_val_path = "D:\\Download\\ImageNet-1K\\Validation_Set\\sorted_ImageNet_val"
dataset = ImageFolder(root=sorted_val_path, transform=data_transform)

# load pre-trained model 
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = models.resnet50(weights=ResNet50_Weights.IMAGENET1K_V1).to(device)

# Temperature Scaling
subset_size = len(dataset) // 10
indices = np.random.choice(len(dataset), subset_size, replace=False)
subset_dataset = Subset(dataset, indices)
train_loader = DataLoader(subset_dataset, batch_size=32, shuffle=False, num_workers=2)
model = ModelWithTemperature(model, temperature = 1.0).to(device)
model.set_temperature(train_loader)

model.eval()

aps_test(model, dataset, device, num_runs=10, alpha=0.1)

Before temperature - NLL: 1.147, ECE: 0.028
Optimal temperature: 0.988
After temperature - NLL: 1.146, ECE: 0.025
APS Classification, Start!

Running experiment 1/10...
q_hat = 0.927324265241623
Total set size: 382549
Total coverage sets: 22551
Total samples amount: 25000
Average Prediction Set Size After APS in runs 1: 15.30196
Average Coverage Rate in runs 1: 0.90204

Running experiment 2/10...
q_hat = 0.9275358080863954
Total set size: 382414
Total coverage sets: 22464
Total samples amount: 25000
Average Prediction Set Size After APS in runs 2: 15.29656
Average Coverage Rate in runs 2: 0.89856

Running experiment 3/10...
q_hat = 0.9284525990486145
Total set size: 381907
Total coverage sets: 22606
Total samples amount: 25000
Average Prediction Set Size After APS in runs 3: 15.27628
Average Coverage Rate in runs 3: 0.90424

Running experiment 4/10...
q_hat = 0.9237889766693116
Total set size: 362399
Total coverage sets: 22467
Total samples amount: 25000
Average Prediction Set Size Aft

# $\alpha$=0.2

In [2]:
aps_test(model, dataset, device, num_runs=10, alpha=0.2)

APS Classification, Start!

Running experiment 1/10...
q_hat = 0.8257336020469666
Total set size: 158579
Total coverage sets: 20011
Total samples amount: 25000
Average Prediction Set Size After APS in runs 1: 6.34316
Average Coverage Rate in runs 1: 0.80044

Running experiment 2/10...
q_hat = 0.8251468539237976
Total set size: 157130
Total coverage sets: 19893
Total samples amount: 25000
Average Prediction Set Size After APS in runs 2: 6.2852
Average Coverage Rate in runs 2: 0.79572

Running experiment 3/10...
q_hat = 0.8323890686035156
Total set size: 162441
Total coverage sets: 20252
Total samples amount: 25000
Average Prediction Set Size After APS in runs 3: 6.49764
Average Coverage Rate in runs 3: 0.81008

Running experiment 4/10...
q_hat = 0.822765588760376
Total set size: 152402
Total coverage sets: 19882
Total samples amount: 25000
Average Prediction Set Size After APS in runs 4: 6.09608
Average Coverage Rate in runs 4: 0.79528

Running experiment 5/10...
q_hat = 0.8233240365982

# $\alpha$=0.05

In [3]:
aps_test(model, dataset, device, num_runs=10, alpha=0.05)

APS Classification, Start!

Running experiment 1/10...
q_hat = 0.9711753606796264
Total set size: 784789
Total coverage sets: 23760
Total samples amount: 25000
Average Prediction Set Size After APS in runs 1: 31.39156
Average Coverage Rate in runs 1: 0.9504

Running experiment 2/10...
q_hat = 0.9703892529010772
Total set size: 768141
Total coverage sets: 23735
Total samples amount: 25000
Average Prediction Set Size After APS in runs 2: 30.72564
Average Coverage Rate in runs 2: 0.9494

Running experiment 3/10...
q_hat = 0.9714735031127929
Total set size: 782872
Total coverage sets: 23812
Total samples amount: 25000
Average Prediction Set Size After APS in runs 3: 31.31488
Average Coverage Rate in runs 3: 0.95248

Running experiment 4/10...
q_hat = 0.9688984811306
Total set size: 740031
Total coverage sets: 23681
Total samples amount: 25000
Average Prediction Set Size After APS in runs 4: 29.60124
Average Coverage Rate in runs 4: 0.94724

Running experiment 5/10...
q_hat = 0.969354724884

## Result  
$\alpha$=0.1  
From the above test, following results can be collected :
- Final Average Prediction Set Size: **14.84**
- Final Average Coverage: **89.93%**  

$\alpha$=0.2  
From the above test, following results can be collected :
- Final Average Prediction Set Size: **6.28**
- Final Average Coverage: **79.97%**  

$\alpha$=0.05  
From the above test, following results can be collected :
- Final Average Prediction Set Size: **30.27**
- Final Average Coverage: **94.90%**