# $\alpha$=0.1

In [2]:
import torch
import torchvision.transforms as transforms              
from torchvision.datasets import CIFAR100      
from torch.utils.data import DataLoader
from src.inception_cifar100 import inceptionv3
from src.temperature_scaling import ModelWithTemperature
import numpy as np
from src.aps import aps_test


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

dict_path = "C:\\Users\\jiayang\\ipynb\\trainedModel\\Inception_CIFAR100.pth"
model = inceptionv3()
model.load_state_dict(torch.load(dict_path, map_location=device, weights_only=True))
model.to(device)

# preprocess the images from CIFAR100
data_transform = transforms.Compose([
    transforms.ToTensor(),         
    transforms.Normalize((0.5071, 0.4865, 0.4409), (0.2673, 0.2564, 0.2762))  
])

# load data set from CIFAR100
dataset = CIFAR100(root="../../data", train=False, download=True,transform=data_transform)

# Temperature Scaling
temp_scal_loader = DataLoader(dataset, batch_size=32, shuffle=True)
model = ModelWithTemperature(model, temperature=0.5).to(device)
model.set_temperature(temp_scal_loader)
model.eval()

aps_test(model, dataset, device, num_runs=10, alpha=0.1)

Files already downloaded and verified
Before temperature - NLL: 1.394, ECE: 0.126
Optimal temperature: 0.633
After temperature - NLL: 1.890, ECE: 0.208
APS Classification, Start!

Running experiment 1/10...
q_hat = 0.9967255115509033
Total set size: 17355
Total coverage sets: 4535
Total samples amount: 5000
Average Prediction Set Size After APS in runs 1: 3.471
Average Coverage Rate in runs 1: 0.907

Running experiment 2/10...
q_hat = 0.9962914645671844
Total set size: 17108
Total coverage sets: 4495
Total samples amount: 5000
Average Prediction Set Size After APS in runs 2: 3.4216
Average Coverage Rate in runs 2: 0.899

Running experiment 3/10...
q_hat = 0.9962484061717987
Total set size: 17061
Total coverage sets: 4484
Total samples amount: 5000
Average Prediction Set Size After APS in runs 3: 3.4122
Average Coverage Rate in runs 3: 0.8968

Running experiment 4/10...
q_hat = 0.9958017051219941
Total set size: 16228
Total coverage sets: 4461
Total samples amount: 5000
Average Predicti

# $\alpha$=0.3

In [1]:
aps_test(model, dataset, device, num_runs=10, alpha=0.3)

Files already downloaded and verified
Before temperature - NLL: 1.399, ECE: 0.124
Optimal temperature: 0.633
After temperature - NLL: 1.897, ECE: 0.207
APS Classification, Start!

Running experiment 1/10...
q_hat = 0.8744838953018188
Total set size: 6495
Total coverage sets: 3519
Total samples amount: 5000
Average Prediction Set Size After APS in runs 1: 1.299
Average Coverage Rate in runs 1: 0.7038

Running experiment 2/10...
q_hat = 0.8679374575614928
Total set size: 6442
Total coverage sets: 3485
Total samples amount: 5000
Average Prediction Set Size After APS in runs 2: 1.2884
Average Coverage Rate in runs 2: 0.697

Running experiment 3/10...
q_hat = 0.8755259096622466
Total set size: 6495
Total coverage sets: 3493
Total samples amount: 5000
Average Prediction Set Size After APS in runs 3: 1.299
Average Coverage Rate in runs 3: 0.6986

Running experiment 4/10...
q_hat = 0.8685945391654968
Total set size: 6358
Total coverage sets: 3459
Total samples amount: 5000
Average Prediction S

# $\alpha$=0.2

In [2]:
aps_test(model, dataset, device, num_runs=10, alpha=0.2)

Files already downloaded and verified
Before temperature - NLL: 1.387, ECE: 0.123
Optimal temperature: 0.633
After temperature - NLL: 1.877, ECE: 0.206
APS Classification, Start!

Running experiment 1/10...
q_hat = 0.9648807406425476
Total set size: 9241
Total coverage sets: 4036
Total samples amount: 5000
Average Prediction Set Size After APS in runs 1: 1.8482
Average Coverage Rate in runs 1: 0.8072

Running experiment 2/10...
q_hat = 0.9597337484359741
Total set size: 9096
Total coverage sets: 3995
Total samples amount: 5000
Average Prediction Set Size After APS in runs 2: 1.8192
Average Coverage Rate in runs 2: 0.799

Running experiment 3/10...
q_hat = 0.9607915282249451
Total set size: 9085
Total coverage sets: 3999
Total samples amount: 5000
Average Prediction Set Size After APS in runs 3: 1.817
Average Coverage Rate in runs 3: 0.7998

Running experiment 4/10...
q_hat = 0.9599417567253112
Total set size: 8931
Total coverage sets: 3971
Total samples amount: 5000
Average Prediction 

# $\alpha$=0.05

In [3]:
aps_test(model, dataset, device, num_runs=10, alpha=0.05)

Files already downloaded and verified
Before temperature - NLL: 1.387, ECE: 0.123
Optimal temperature: 0.633
After temperature - NLL: 1.877, ECE: 0.206
APS Classification, Start!

Running experiment 1/10...
q_hat = 0.9997750341892242
Total set size: 31713
Total coverage sets: 4768
Total samples amount: 5000
Average Prediction Set Size After APS in runs 1: 6.3426
Average Coverage Rate in runs 1: 0.9536

Running experiment 2/10...
q_hat = 0.9997540563344955
Total set size: 31968
Total coverage sets: 4763
Total samples amount: 5000
Average Prediction Set Size After APS in runs 2: 6.3936
Average Coverage Rate in runs 2: 0.9526

Running experiment 3/10...
q_hat = 0.9996624171733857
Total set size: 29582
Total coverage sets: 4719
Total samples amount: 5000
Average Prediction Set Size After APS in runs 3: 5.9164
Average Coverage Rate in runs 3: 0.9438

Running experiment 4/10...
q_hat = 0.9997630804777146
Total set size: 31464
Total coverage sets: 4756
Total samples amount: 5000
Average Predi

#  Result
$\alpha$=0.1  
From the above test, following results can be collected :
- Final Average Prediction Set Size: 3.44
- Final Average Coverage: 90.03%   

$\alpha$=0.3  
From the above test, following results can be collected :
- Final Average Prediction Set Size: 1.29
- Final Average Coverage: 69.59%  

$\alpha$=0.2  
From the above test, following results can be collected :
- Final Average Prediction Set Size: 1.80
- Final Average Coverage: 79.79%  

$\alpha$=0.05  
From the above test, following results can be collected :
- Final Average Prediction Set Size: 6.17
- Final Average Coverage: 94.96%