# $\alpha$=0.1

In [1]:
import torch
import numpy as np
import torchvision.transforms as transforms              
from torchvision.datasets import CIFAR10      
from torch.utils.data import DataLoader
from src.temperature_scaling import ModelWithTemperature
from src.aps import aps_test
from src.inception import inception_v3

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

dict_path = "C:\\Users\\jiayang\\ipynb\\trainedModel\\Inception_CIFAR10.pth"
model = inception_v3(pretrained=True, dict_path=dict_path).to(device)

# preprocess the images from CIFAR10
data_transform = transforms.Compose([
    transforms.ToTensor(),       
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))  
])

# load images from CIFAR10
dataset = CIFAR10(root="../../data", train=False, download=True, transform=data_transform)

# Temperature Scaling
temp_scal_loader = DataLoader(dataset, batch_size=32, shuffle=True)
model = ModelWithTemperature(model, temperature=5.2).to(device)
model.set_temperature(temp_scal_loader)
model.eval()

aps_test(model, dataset, device, num_runs=10, alpha=0.1)

Loading weights from: C:\Users\jiayang\ipynb\trainedModel\Inception_CIFAR10.pth
Files already downloaded and verified
Before temperature - NLL: 0.426, ECE: 0.055
Optimal temperature: 5.128
After temperature - NLL: 0.767, ECE: 0.336
APS Classification, Start!

Running experiment 1/10...
q_hat = 0.6340731620788576
Total set size: 8999
Total coverage sets: 4491
Total samples amount: 5000
Average Prediction Set Size After APS in runs 1: 1.7998
Average Coverage Rate in runs 1: 0.8982

Running experiment 2/10...
q_hat = 0.636247330904007
Total set size: 9196
Total coverage sets: 4508
Total samples amount: 5000
Average Prediction Set Size After APS in runs 2: 1.8392
Average Coverage Rate in runs 2: 0.9016

Running experiment 3/10...
q_hat = 0.6315113365650177
Total set size: 9054
Total coverage sets: 4481
Total samples amount: 5000
Average Prediction Set Size After APS in runs 3: 1.8108
Average Coverage Rate in runs 3: 0.8962

Running experiment 4/10...
q_hat = 0.6286553025245667
Total set si

# $\alpha$=0.05

In [1]:
aps_test(model, dataset, device, num_runs=10, alpha=0.05)

Loading weights from: C:\Users\jiayang\ipynb\trainedModel\Inception_CIFAR10.pth
Files already downloaded and verified
Before temperature - NLL: 0.357, ECE: 0.029
Optimal temperature: 5.125
After temperature - NLL: 0.889, ECE: 0.410
APS Classification, Start!

Running experiment 1/10...
q_hat = 0.6599989801645278
Total set size: 12291
Total coverage sets: 4752
Total samples amount: 5000
Average Prediction Set Size After APS in runs 1: 2.4582
Average Coverage Rate in runs 1: 0.9504

Running experiment 2/10...
q_hat = 0.6599436402320862
Total set size: 12363
Total coverage sets: 4746
Total samples amount: 5000
Average Prediction Set Size After APS in runs 2: 2.4726
Average Coverage Rate in runs 2: 0.9492

Running experiment 3/10...
q_hat = 0.6556271433830263
Total set size: 12162
Total coverage sets: 4721
Total samples amount: 5000
Average Prediction Set Size After APS in runs 3: 2.4324
Average Coverage Rate in runs 3: 0.9442

Running experiment 4/10...
q_hat = 0.6616239428520203
Total se

## Result
$\alpha$=0.1
- Final Average **Prediction Set Size: 1.81**
- Final Average **Coverage: 89.92%**  

$\alpha$=0.05
- Final Average **Prediction Set Size: 2.46 / 10**
- Final Average **Coverage: 94.95%**  