In [8]:
import torch
import numpy as np
import torch.nn as nn
import torchvision.models as models
import torchvision.transforms as transforms              
from torchvision.datasets import CIFAR10      
from torch.utils.data import DataLoader
from src.temperature_scaling import ModelWithTemperature
from src.aps import split_data_set, aps_scores, aps_classification, eval_aps

# check GPU status
print("Is CUDA available:", torch.cuda.is_available())
print("Device count:", torch.cuda.device_count())
print("Device name:", torch.cuda.get_device_name(0) if torch.cuda.is_available() else "No GPU")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# load fine-tuned model
model = models.inception_v3(pretrained=False, init_weights=False) 
model.fc = nn.Linear(model.fc.in_features, 10)
model_path = "C:\\Users\\jiayang\\ipynb\\trainedModel\\Inception_V3_CIFAR10.pth"
model.load_state_dict(torch.load(model_path))
model = model.to(device)
model.eval()
print(f"Model loaded from {model_path}")

# The number of times the experiment is going to be repeated
num_runs = 10

# error rate
alpha = 0.1

# reprocess the images from CIFAR
data_transform = transforms.Compose([
    transforms.Resize((299, 299)),  # resize as the standard size of Inception
    transforms.ToTensor(),          # transfer to tensor
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))  # normalize
])
# load images from CIFAR10
dataset = CIFAR10(root="./data", train=False, download=True, transform=data_transform)

# contruct and evaluate repeatedly
all_avg_set_sizes = []
all_avg_coverages = []
print("APS Classification, Start!\n")
for i in range(num_runs):
    print(f"Running experiment {i+1}/{num_runs}...")

    # splite dataset
    calib_dataset, test_dataset = split_data_set(dataset, random_seed=i)

    # load data set respectively
    calib_loader = DataLoader(calib_dataset, batch_size=32, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

    # calculate q_hat
    calib_scores, _ = aps_scores(model, calib_loader, alpha, device)
    q_hat = np.quantile(calib_scores, 1 - 0.1)  # calculate 1-alpha quantile
    print(f"q_hat = {q_hat}")

    # construct APS
    aps, aps_labels, true_labels = aps_classification(model, test_loader, q_hat, device)

    # evaluate APS
    avg_set_size, avg_coverage = eval_aps(aps_labels, true_labels)
    print(f"Average Prediction Set Size After APS in runs {i+1}: {avg_set_size}")
    print(f"Average Coverage Ratein runs {i+1}: {avg_coverage}\n")

    # record current result
    all_avg_set_sizes.append(avg_set_size)
    all_avg_coverages.append(avg_coverage)

# calculate the final average result
final_avg_set_size = np.mean(all_avg_set_sizes)
final_avg_coverage = np.mean(all_avg_coverages)

print(f"Final Average Prediction Set Size: {final_avg_set_size}")
print(f"Final Average Coverage: {final_avg_coverage}")

Is CUDA available: True
Device count: 1
Device name: NVIDIA GeForce RTX 3060 Ti
Model loaded from C:\Users\jiayang\ipynb\trainedModel\Inception_V3_CIFAR10.pth
Files already downloaded and verified
APS Classification, Start!

Running experiment 1/10...
Samples amount: 10000
q_hat = 0.997151220456562
Total set size: 33556
Total coverage sets: 4368
Total samples amount: 5000
Average Prediction Set Size After APS in runs 1: 6.7112
Average Coverage Ratein runs 1: 0.8736

Running experiment 2/10...
Samples amount: 10000
q_hat = 0.9965794351322568
Total set size: 32425
Total coverage sets: 4281
Total samples amount: 5000
Average Prediction Set Size After APS in runs 2: 6.485
Average Coverage Ratein runs 2: 0.8562

Running experiment 3/10...
Samples amount: 10000
q_hat = 0.9973077357320644
Total set size: 33721
Total coverage sets: 4397
Total samples amount: 5000
Average Prediction Set Size After APS in runs 3: 6.7442
Average Coverage Ratein runs 3: 0.8794

Running experiment 4/10...
Samples a

## Result

- Final Average **Prediction Set Size: 6.60 / 10**
- Final Average **Coverage: 86.72% ($\alpha$=0.1)**