In [2]:
import torch
import torchvision.transforms as transforms              
from torchvision.datasets import ImageFolder
import torchvision.models as models
from torchvision.models import VGG16_BN_Weights
from torch.utils.data import DataLoader
import numpy as np
from torch.utils.data import Subset
from src.temperature_scaling import ModelWithTemperature
from src.raps import raps_test

# load pre-trained model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = models.vgg16_bn(weights=VGG16_BN_Weights.IMAGENET1K_V1).to(device)

#  Reprocess
data_transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224), 
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) 
])

sorted_val_path = "D:\\Download\\ImageNet-1K\\Validation_Set\\sorted_ImageNet_val"
dataset = ImageFolder(root=sorted_val_path, transform=data_transform)

# Temperature Scaling
subset_size = len(dataset) // 10
indices = np.random.choice(len(dataset), subset_size, replace=False)
subset_dataset = Subset(dataset, indices)
train_loader = DataLoader(subset_dataset, batch_size=32, shuffle=False, num_workers=4)

model = ModelWithTemperature(model, temperature = 1.0).to(device)
model.set_temperature(train_loader)
model.eval()

raps_test(model, dataset, device, num_runs=10, alpha=0.1, lambda_=0.05, k_reg=5)

Before temperature - NLL: 1.187, ECE: 0.027
Optimal temperature: 1.018
After temperature - NLL: 1.186, ECE: 0.030


RAPS Classification, Start!

Running experiment 1/10...
q_hat = 0.9596172869205475
Total set size: 87951
Total coverage sets: 22485
Total samples amount: 25000
Average Prediction Set Size After APS in runs 1: 3.51804
Average Coverage Rate in runs 1: 89.94%

Running experiment 2/10...
q_hat = 0.959666019678116
Total set size: 87436
Total coverage sets: 22541
Total samples amount: 25000
Average Prediction Set Size After APS in runs 2: 3.49744
Average Coverage Rate in runs 2: 90.16%

Running experiment 3/10...
q_hat = 0.9612351059913635
Total set size: 88710
Total coverage sets: 22565
Total samples amount: 25000
Average Prediction Set Size After APS in runs 3: 3.5484
Average Coverage Rate in runs 3: 90.26%

Running experiment 4/10...
q_hat = 0.9582522332668305
Total set size: 87952
Total coverage sets: 22469
Total samples amount: 25000
Average Prediction Set Size After APS i


# $\alpha$ =0.2

In [5]:
raps_test(model, dataset, device, num_runs=10, alpha=0.2, lambda_=0.03, k_reg=7)



RAPS Classification, Start!

Running experiment 1/10...
q_hat = 0.8296633720397949
Total set size: 69414
Total coverage sets: 20019
Total samples amount: 25000
Average Prediction Set Size After APS in runs 1: 2.77656
Average Coverage Rate in runs 1: 80.08%

Running experiment 2/10...
q_hat = 0.8301449537277222
Total set size: 68854
Total coverage sets: 19989
Total samples amount: 25000
Average Prediction Set Size After APS in runs 2: 2.75416
Average Coverage Rate in runs 2: 79.96%

Running experiment 3/10...
q_hat = 0.8328807711601258
Total set size: 70278
Total coverage sets: 20163
Total samples amount: 25000
Average Prediction Set Size After APS in runs 3: 2.81112
Average Coverage Rate in runs 3: 80.65%

Running experiment 4/10...
q_hat = 0.8309711933135986
Total set size: 69980
Total coverage sets: 19999
Total samples amount: 25000
Average Prediction Set Size After APS in runs 4: 2.7992
Average Coverage Rate in runs 4: 80.00%

Running experiment 5/10...
q_hat = 0.8277983903884888


# $\alpha$ =0.05

In [6]:
raps_test(model, dataset, device, num_runs=10, alpha=0.05, lambda_=0.05, k_reg=5)



RAPS Classification, Start!

Running experiment 1/10...
q_hat = 1.1598649680614463
Total set size: 220544
Total coverage sets: 23725
Total samples amount: 25000
Average Prediction Set Size After APS in runs 1: 8.82176
Average Coverage Rate in runs 1: 94.90%

Running experiment 2/10...
q_hat = 1.1713940560817717
Total set size: 222055
Total coverage sets: 23762
Total samples amount: 25000
Average Prediction Set Size After APS in runs 2: 8.8822
Average Coverage Rate in runs 2: 95.05%

Running experiment 3/10...
q_hat = 1.173923176527023
Total set size: 222961
Total coverage sets: 23770
Total samples amount: 25000
Average Prediction Set Size After APS in runs 3: 8.91844
Average Coverage Rate in runs 3: 95.08%

Running experiment 4/10...
q_hat = 1.1488503038883207
Total set size: 208872
Total coverage sets: 23691
Total samples amount: 25000
Average Prediction Set Size After APS in runs 4: 8.35488
Average Coverage Rate in runs 4: 94.76%

Running experiment 5/10...
q_hat = 1.15052503943443

## Result  
$\alpha$=0.1  
From the above test, following results can be collected :
- Final Average Prediction Set Size: **3.51**
- Final Average Coverage: **89.92%**  

$\alpha$=0.2  
From the above test, following results can be collected :
- Final Average Prediction Set Size: **2.78**
- Final Average Coverage: **80.03%**  

$\alpha$=0.05  
From the above test, following results can be collected :
- Final Average Prediction Set Size: **8.84**
- Final Average Coverage: **95.01%**