# $\alpha$=0.1

In [1]:
import torch
import torchvision.transforms as transforms              
from torchvision.datasets import ImageFolder
import torchvision.models as models
from torchvision.models import Inception_V3_Weights
from torch.utils.data import DataLoader
import numpy as np
from torch.utils.data import Subset
from src.temperature_scaling import ModelWithTemperature
from src.raps import raps_test

# load pre-trained model InceptionV3 and set mode
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = models.inception_v3(weights=Inception_V3_Weights.IMAGENET1K_V1).to(device)

#  Reprocess: Center Crop and then resize to 299*299
data_transform = transforms.Compose([
    transforms.CenterCrop(299),
    transforms.Resize(299), 
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) 
])

sorted_val_path = "D:\\Download\\ImageNet-1K\\Validation_Set\\sorted_ImageNet_val"
dataset = ImageFolder(root=sorted_val_path, transform=data_transform)

# Temperature Scaling
model.eval() # only use output.logits of Inception's output
subset_size = len(dataset) // 10
indices = np.random.choice(len(dataset), subset_size, replace=False)
subset_dataset = Subset(dataset, indices)
train_loader = DataLoader(subset_dataset, batch_size=32, shuffle=False, num_workers=4)

model = ModelWithTemperature(model, temperature = 1.0).to(device)
model.set_temperature(train_loader)
model.eval()

raps_test(model, dataset, device, num_runs=10, alpha=0.1, lambda_=0.2, k_reg=4)

Before temperature - NLL: 1.082, ECE: 0.018
Optimal temperature: 0.967
After temperature - NLL: 1.073, ECE: 0.024
RAPS Classification, Start!

Running experiment 1/10...
q_hat = 0.9593629121780396
Total set size: 76860
Total coverage sets: 22527
Total samples amount: 25000
Average Prediction Set Size After APS in runs 1: 3.0744
Average Coverage Rate in runs 1: 0.90108

Running experiment 2/10...
q_hat = 0.9579761445522313
Total set size: 75961
Total coverage sets: 22502
Total samples amount: 25000
Average Prediction Set Size After APS in runs 2: 3.03844
Average Coverage Rate in runs 2: 0.90008

Running experiment 3/10...
q_hat = 0.959210479259491
Total set size: 76762
Total coverage sets: 22547
Total samples amount: 25000
Average Prediction Set Size After APS in runs 3: 3.07048
Average Coverage Rate in runs 3: 0.90188

Running experiment 4/10...
q_hat = 0.954694497585297
Total set size: 75009
Total coverage sets: 22410
Total samples amount: 25000
Average Prediction Set Size After APS i

In [1]:
import torch
import torchvision.transforms as transforms              
from torchvision.datasets import ImageFolder
import torchvision.models as models
from torchvision.models import Inception_V3_Weights
from torch.utils.data import DataLoader
import numpy as np
from torch.utils.data import Subset
from src.temperature_scaling import ModelWithTemperature
from src.raps import raps_test

# load pre-trained model InceptionV3 and set mode
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = models.inception_v3(weights=Inception_V3_Weights.IMAGENET1K_V1).to(device)

#  Reprocess: Center Crop and then resize to 299*299
data_transform = transforms.Compose([
    transforms.CenterCrop(299),
    transforms.Resize(299), 
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) 
])

sorted_val_path = "D:\\Download\\ImageNet-1K\\Validation_Set\\sorted_ImageNet_val"
dataset = ImageFolder(root=sorted_val_path, transform=data_transform)

# Temperature Scaling
model.eval() # only use output.logits of Inception's output
subset_size = len(dataset) // 10
indices = np.random.choice(len(dataset), subset_size, replace=False)
subset_dataset = Subset(dataset, indices)
train_loader = DataLoader(subset_dataset, batch_size=32, shuffle=False, num_workers=4)

model = ModelWithTemperature(model, temperature = 1.0).to(device)
model.set_temperature(train_loader)
model.eval()


Before temperature - NLL: 1.057, ECE: 0.025
Optimal temperature: 0.965
After temperature - NLL: 1.047, ECE: 0.029


ModelWithTemperature(
  (model): Inception3(
    (Conv2d_1a_3x3): BasicConv2d(
      (conv): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), bias=False)
      (bn): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
    )
    (Conv2d_2a_3x3): BasicConv2d(
      (conv): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), bias=False)
      (bn): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
    )
    (Conv2d_2b_3x3): BasicConv2d(
      (conv): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
    )
    (maxpool1): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (Conv2d_3b_1x1): BasicConv2d(
      (conv): Conv2d(64, 80, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn): BatchNorm2d(80, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
    )
    (Conv2d_4a_3x

# $\alpha$=0.2

In [3]:
raps_test(model, dataset, device, num_runs=10, alpha=0.2, lambda_=0.05, k_reg=9)

RAPS Classification, Start!

Running experiment 1/10...
q_hat = 0.8089204430580139
Total set size: 74876
Total coverage sets: 20026
Total samples amount: 25000
Average Prediction Set Size After APS in runs 1: 2.99504
Average Coverage Rate in runs 1: 0.80104

Running experiment 2/10...
q_hat = 0.8062701344490052
Total set size: 72836
Total coverage sets: 19881
Total samples amount: 25000
Average Prediction Set Size After APS in runs 2: 2.91344
Average Coverage Rate in runs 2: 0.79524

Running experiment 3/10...
q_hat = 0.8126830577850341
Total set size: 75266
Total coverage sets: 20087
Total samples amount: 25000
Average Prediction Set Size After APS in runs 3: 3.01064
Average Coverage Rate in runs 3: 0.80348

Running experiment 4/10...
q_hat = 0.8084321856498718
Total set size: 75303
Total coverage sets: 19945
Total samples amount: 25000
Average Prediction Set Size After APS in runs 4: 3.01212
Average Coverage Rate in runs 4: 0.7978

Running experiment 5/10...
q_hat = 0.808964633941650

# $\alpha$=0.05

In [3]:
raps_test(model, dataset, device, num_runs=10, alpha=0.05, lambda_=0.02, k_reg=15)

RAPS Classification, Start!

Running experiment 1/10...
q_hat = 0.9644444614648818
Total set size: 296017
Total coverage sets: 23815
Total samples amount: 25000
Average Prediction Set Size After APS in runs 1: 11.84068
Average Coverage Rate in runs 1: 0.9526

Running experiment 2/10...
q_hat = 0.9632684409618377
Total set size: 290781
Total coverage sets: 23757
Total samples amount: 25000
Average Prediction Set Size After APS in runs 2: 11.63124
Average Coverage Rate in runs 2: 0.95028

Running experiment 3/10...
q_hat = 0.9637184977531433
Total set size: 293752
Total coverage sets: 23808
Total samples amount: 25000
Average Prediction Set Size After APS in runs 3: 11.75008
Average Coverage Rate in runs 3: 0.95232

Running experiment 4/10...
q_hat = 0.962581181526184
Total set size: 291788
Total coverage sets: 23754
Total samples amount: 25000
Average Prediction Set Size After APS in runs 4: 11.67152
Average Coverage Rate in runs 4: 0.95016

Running experiment 5/10...
q_hat = 0.96150817

## Result  


$\alpha$=0.1  
From the above test, following results can be collected :
- Final Average Prediction Set Size: **3.05**
- Final Average Coverage: **89.97%**

$\alpha$=0.2  
From the above test, following results can be collected :
- Final Average Prediction Set Size: **3.00**
- Final Average Coverage: **80.09%**  

$\alpha$=0.05  
From the above test, following results can be collected :
- Final Average Prediction Set Size: **11.62**
- Final Average Coverage: **95.00%**