# $\alpha$=0.1

In [1]:
import torch
import torchvision.transforms as transforms              
from torchvision.datasets import ImageFolder
import torchvision.models as models
from torchvision.models import Inception_V3_Weights
from torch.utils.data import DataLoader
import numpy as np
from torch.utils.data import Subset
from src.temperature_scaling import ModelWithTemperature
from src.aps import aps_test

# load pre-trained model InceptionV3 and set mode
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = models.inception_v3(weights=Inception_V3_Weights.IMAGENET1K_V1).to(device)

#  Reprocess: Center Crop and then resize to 299*299
data_transform = transforms.Compose([
    transforms.CenterCrop(299),
    transforms.Resize(299), 
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) 
])

sorted_val_path = "D:\\Download\\ImageNet-1K\\Validation_Set\\sorted_ImageNet_val"
dataset = ImageFolder(root=sorted_val_path, transform=data_transform)

# Temperature Scaling
model.eval() # only use output.logits of Inception's output
subset_size = len(dataset) // 10
indices = np.random.choice(len(dataset), subset_size, replace=False)
subset_dataset = Subset(dataset, indices)
train_loader = DataLoader(subset_dataset, batch_size=32, shuffle=False, num_workers=4)

model = ModelWithTemperature(model, temperature = 1.0).to(device)
model.set_temperature(train_loader)
model.eval()

aps_test(model, dataset, device, num_runs=10, alpha=0.1)

Before temperature - NLL: 1.030, ECE: 0.020
Optimal temperature: 0.964
After temperature - NLL: 1.020, ECE: 0.023
APS Classification, Start!

Running experiment 1/10...
q_hat = 0.8887645900249482
Total set size: 1236482
Total coverage sets: 22482
Total samples amount: 25000
Average Prediction Set Size After APS in runs 1: 49.45928
Average Coverage Rate in runs 1: 0.89928

Running experiment 2/10...
q_hat = 0.8887065351009372
Total set size: 1202018
Total coverage sets: 22363
Total samples amount: 25000
Average Prediction Set Size After APS in runs 2: 48.08072
Average Coverage Rate in runs 2: 0.89452

Running experiment 3/10...
q_hat = 0.8945301234722137
Total set size: 1311679
Total coverage sets: 22680
Total samples amount: 25000
Average Prediction Set Size After APS in runs 3: 52.46716
Average Coverage Rate in runs 3: 0.9072

Running experiment 4/10...
q_hat = 0.8877568483352661
Total set size: 1234578
Total coverage sets: 22486
Total samples amount: 25000
Average Prediction Set Size

# $\alpha$=0.2

In [2]:
aps_test(model, dataset, device, num_runs=10, alpha=0.2)

APS Classification, Start!

Running experiment 1/10...
q_hat = 0.7930824995040894
Total set size: 388878
Total coverage sets: 20033
Total samples amount: 25000
Average Prediction Set Size After APS in runs 1: 15.55512
Average Coverage Rate in runs 1: 0.80132

Running experiment 2/10...
q_hat = 0.7890548467636108
Total set size: 359676
Total coverage sets: 19810
Total samples amount: 25000
Average Prediction Set Size After APS in runs 2: 14.38704
Average Coverage Rate in runs 2: 0.7924

Running experiment 3/10...
q_hat = 0.7969926476478577
Total set size: 395467
Total coverage sets: 20149
Total samples amount: 25000
Average Prediction Set Size After APS in runs 3: 15.81868
Average Coverage Rate in runs 3: 0.80596

Running experiment 4/10...
q_hat = 0.7928884863853455
Total set size: 393725
Total coverage sets: 19973
Total samples amount: 25000
Average Prediction Set Size After APS in runs 4: 15.749
Average Coverage Rate in runs 4: 0.79892

Running experiment 5/10...
q_hat = 0.7930426597

# $\alpha$=0.05

In [3]:
aps_test(model, dataset, device, num_runs=10, alpha=0.05)

APS Classification, Start!

Running experiment 1/10...
q_hat = 0.941176050901413
Total set size: 2818821
Total coverage sets: 23737
Total samples amount: 25000
Average Prediction Set Size After APS in runs 1: 112.75284
Average Coverage Rate in runs 1: 0.94948

Running experiment 2/10...
q_hat = 0.9396757692098617
Total set size: 2706540
Total coverage sets: 23642
Total samples amount: 25000
Average Prediction Set Size After APS in runs 2: 108.2616
Average Coverage Rate in runs 2: 0.94568

Running experiment 3/10...
q_hat = 0.9430534690618514
Total set size: 2898718
Total coverage sets: 23857
Total samples amount: 25000
Average Prediction Set Size After APS in runs 3: 115.94872
Average Coverage Rate in runs 3: 0.95428

Running experiment 4/10...
q_hat = 0.9400837689638137
Total set size: 2788565
Total coverage sets: 23740
Total samples amount: 25000
Average Prediction Set Size After APS in runs 4: 111.5426
Average Coverage Rate in runs 4: 0.9496

Running experiment 5/10...
q_hat = 0.940

# Result  
$\alpha$=0.1  
From the above test, following results can be collected :
- Final Average Prediction Set Size: 49.60
- Final Average Coverage: 89.95%  

$\alpha$=0.2  
From the above test, following results can be collected :
- Final Average Prediction Set Size: 15.44
- Final Average Coverage: 80.08%  

$\alpha$=0.05  
From the above test, following results can be collected :
- Final Average Prediction Set Size: 110.80
- Final Average Coverage: 94.96%