## Basic import

In [11]:
from torchcp.classification.predictors import ClassWisePredictor,ClusterPredictor,SplitPredictor
from torchcp.classification.scores import APS,RAPS,SAPS,THR,Margin
from torchcp.utils import fix_randomness
import torch
from torchvision import datasets, transforms
import dill
import pandas as pd

fix_randomness(seed=1)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

## Load Dataset

### Fashion MNIST

In [2]:
transform_train_FM=transforms.Compose([
    transforms.Grayscale(num_output_channels=3),
    transforms.Resize((32,32)),
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.28604063391685486,0.28604063391685486,0.28604063391685486),(0.10887365788221359,0.10887365788221359,0.10887365788221359))
])

transform_test_FM=transforms.Compose([
    transforms.Grayscale(num_output_channels=3),
    transforms.Resize((32,32)),
    transforms.ToTensor(),
    transforms.Normalize((0.28604063391685486,0.28604063391685486,0.28604063391685486),(0.10887365788221359,0.10887365788221359,0.10887365788221359))
])

testset_FM = datasets.FashionMNIST('~/.pytorch/F_MNIST_data/', download=True, train=False, transform=transform_test_FM)

cal_dataset_FM, test_dataset_FM = torch.utils.data.random_split(testset_FM, [5000, 5000])

cal_loader_FM = torch.utils.data.DataLoader(cal_dataset_FM, batch_size=250, shuffle=False)
test_loader_FM = torch.utils.data.DataLoader(test_dataset_FM, batch_size=250, shuffle=False)

class_names_FM = ["T-shirt/top","Trouser","Pullover","Dress","Coat","Sandal","Shirt","Sneaker","Bag","Ankle boot"]

### Cifar-10

In [3]:
transform_train_Cifar = transforms.Compose([
    transforms.RandomCrop(32,padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])

transform_test_Cifar = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])

testset_Cifar10 = datasets.CIFAR10(root='~/.pytorch/CIFAR10_data/', download=True, train=False, transform=transform_test_Cifar)

cal_dataset_Cifar10, test_dataset_Cifar10 = torch.utils.data.random_split(testset_Cifar10, [5000, 5000])

cal_loader_Cifar10 = torch.utils.data.DataLoader(cal_dataset_Cifar10, batch_size=250, shuffle=False)
test_loader_cifar10 = torch.utils.data.DataLoader(test_dataset_Cifar10, batch_size=250, shuffle=False)

Files already downloaded and verified


### Cifar-100

In [4]:
testset_Cifar100 = datasets.CIFAR100(root='~/.pytorch/CIFAR100_data/', download=True, train=False, transform=transform_test_Cifar)

cal_dataset_Cifar100, test_dataset_Cifar100 = torch.utils.data.random_split(testset_Cifar100, [5000, 5000])

cal_loader_Cifar100 = torch.utils.data.DataLoader(cal_dataset_Cifar100, batch_size=250, shuffle=False)
test_loader_cifar100 = torch.utils.data.DataLoader(test_dataset_Cifar100, batch_size=250, shuffle=False)

Files already downloaded and verified


## Conformal Prediction

In [12]:
model1 = torch.load('model/model1.pth', pickle_module=dill)
resnet1 = torch.load('model/resnet1.pth', pickle_module=dill)
resnet2 = torch.load('model/resnet2.pth', pickle_module=dill)
resnet3 = torch.load('model/resnet3.pth', pickle_module=dill)

In [13]:
def CP(model,cal_loader,test_loader,setname,mdname):
    columns = ['THR', 'APS', 'RAPS', 'SAPS', 'Margin']
    index = ['SplitPredictor', 'ClassWisePredictor', 'ClusterPredictor']
    df_Cr = pd.DataFrame(columns=columns, index=index)
    df_As = pd.DataFrame(columns=columns, index=index)

    predictors = [SplitPredictor, ClassWisePredictor, ClusterPredictor]
    score_functions = [THR(),  APS(), RAPS(1, 0), SAPS(0.2), Margin()]
    for score in score_functions: 
        for class_predictor in predictors:
            predictor = class_predictor(score, model)
            predictor.calibrate(cal_loader, 0.05)
            print(f"Experiment--Data : {setname}, Model : {mdname}, Score : {score.__class__.__name__}, Predictor : {predictor.__class__.__name__}, Alpha : 0.05")
            result=predictor.evaluate(test_loader)
            print(result)
            df_Cr.loc[predictor.__class__.__name__,score.__class__.__name__]=result['Coverage_rate']
            df_As.loc[predictor.__class__.__name__,score.__class__.__name__]=result['Average_size']

    path1="table/"+mdname+"_"+setname+"_Cr.csv"
    path2="table/"+mdname+"_"+setname+"_As.csv"
    df_Cr.to_csv(path1)
    df_As.to_csv(path2)

In [7]:
CP(model1,cal_loader_FM,test_loader_FM,"Fashion_MNIST","CNN")

Experiment--Data : Fashion_MNIST, Model : CNN, Score : THR, Predictor : SplitPredictor, Alpha : 0.05
{'Coverage_rate': 0.9414, 'Average_size': 1.1202}
Experiment--Data : Fashion_MNIST, Model : CNN, Score : THR, Predictor : ClassWisePredictor, Alpha : 0.05
{'Coverage_rate': 0.9494, 'Average_size': 1.2172}
Experiment--Data : Fashion_MNIST, Model : CNN, Score : THR, Predictor : ClusterPredictor, Alpha : 0.05
{'Coverage_rate': 0.9406, 'Average_size': 1.1168}
Experiment--Data : Fashion_MNIST, Model : CNN, Score : APS, Predictor : SplitPredictor, Alpha : 0.05
{'Coverage_rate': 0.9532, 'Average_size': 1.3768}
Experiment--Data : Fashion_MNIST, Model : CNN, Score : APS, Predictor : ClassWisePredictor, Alpha : 0.05
{'Coverage_rate': 0.9446, 'Average_size': 1.367}
Experiment--Data : Fashion_MNIST, Model : CNN, Score : APS, Predictor : ClusterPredictor, Alpha : 0.05
{'Coverage_rate': 0.9528, 'Average_size': 1.387}
Experiment--Data : Fashion_MNIST, Model : CNN, Score : RAPS, Predictor : SplitPredic

In [8]:
CP(resnet1,cal_loader_FM,test_loader_FM,"Fashion_MNIST","Resnet50")

Experiment--Data : Fashion_MNIST, Model : Resnet50, Score : THR, Predictor : SplitPredictor, Alpha : 0.05
{'Coverage_rate': 0.951, 'Average_size': 1.1594}
Experiment--Data : Fashion_MNIST, Model : Resnet50, Score : THR, Predictor : ClassWisePredictor, Alpha : 0.05
{'Coverage_rate': 0.9504, 'Average_size': 1.226}
Experiment--Data : Fashion_MNIST, Model : Resnet50, Score : THR, Predictor : ClusterPredictor, Alpha : 0.05
{'Coverage_rate': 0.9512, 'Average_size': 1.16}
Experiment--Data : Fashion_MNIST, Model : Resnet50, Score : APS, Predictor : SplitPredictor, Alpha : 0.05
{'Coverage_rate': 0.9532, 'Average_size': 1.3582}
Experiment--Data : Fashion_MNIST, Model : Resnet50, Score : APS, Predictor : ClassWisePredictor, Alpha : 0.05
{'Coverage_rate': 0.9522, 'Average_size': 1.3982}
Experiment--Data : Fashion_MNIST, Model : Resnet50, Score : APS, Predictor : ClusterPredictor, Alpha : 0.05
{'Coverage_rate': 0.9456, 'Average_size': 1.3366}
Experiment--Data : Fashion_MNIST, Model : Resnet50, Scor

In [9]:
CP(resnet2,cal_loader_Cifar10,test_loader_cifar10,"Cifar10","Resnet50")

Experiment--Data : Cifar10, Model : Resnet50, Score : THR, Predictor : SplitPredictor, Alpha : 0.05
{'Coverage_rate': 0.9496, 'Average_size': 1.72}
Experiment--Data : Cifar10, Model : Resnet50, Score : THR, Predictor : ClassWisePredictor, Alpha : 0.05
{'Coverage_rate': 0.9518, 'Average_size': 1.7732}
Experiment--Data : Cifar10, Model : Resnet50, Score : THR, Predictor : ClusterPredictor, Alpha : 0.05
{'Coverage_rate': 0.9496, 'Average_size': 1.7204}
Experiment--Data : Cifar10, Model : Resnet50, Score : APS, Predictor : SplitPredictor, Alpha : 0.05
{'Coverage_rate': 0.9502, 'Average_size': 1.9852}
Experiment--Data : Cifar10, Model : Resnet50, Score : APS, Predictor : ClassWisePredictor, Alpha : 0.05
{'Coverage_rate': 0.95, 'Average_size': 2.0462}
Experiment--Data : Cifar10, Model : Resnet50, Score : APS, Predictor : ClusterPredictor, Alpha : 0.05
{'Coverage_rate': 0.9522, 'Average_size': 1.9842}
Experiment--Data : Cifar10, Model : Resnet50, Score : RAPS, Predictor : SplitPredictor, Alph

In [10]:
CP(resnet3,cal_loader_Cifar100,test_loader_cifar100,"Cifar100","Renet50")

Experiment--Data : Cifar100, Model : Renet50, Score : THR, Predictor : SplitPredictor, Alpha : 0.05
{'Coverage_rate': 0.953, 'Average_size': 19.8032}
Experiment--Data : Cifar100, Model : Renet50, Score : THR, Predictor : ClassWisePredictor, Alpha : 0.05
{'Coverage_rate': 0.9468, 'Average_size': 22.954}
Experiment--Data : Cifar100, Model : Renet50, Score : THR, Predictor : ClusterPredictor, Alpha : 0.05
{'Coverage_rate': 0.953, 'Average_size': 21.6614}
Experiment--Data : Cifar100, Model : Renet50, Score : APS, Predictor : SplitPredictor, Alpha : 0.05
{'Coverage_rate': 0.9504, 'Average_size': 20.0052}
Experiment--Data : Cifar100, Model : Renet50, Score : APS, Predictor : ClassWisePredictor, Alpha : 0.05
{'Coverage_rate': 0.9516, 'Average_size': 24.3418}
Experiment--Data : Cifar100, Model : Renet50, Score : APS, Predictor : ClusterPredictor, Alpha : 0.05
{'Coverage_rate': 0.9508, 'Average_size': 21.6504}
Experiment--Data : Cifar100, Model : Renet50, Score : RAPS, Predictor : SplitPredicto

In [14]:
transform_train_FM=transforms.Compose([
    transforms.Grayscale(num_output_channels=3),
    transforms.Resize((32,32)),
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.28604063391685486,0.28604063391685486,0.28604063391685486),(0.10887365788221359,0.10887365788221359,0.10887365788221359))
])

transform_test_FM=transforms.Compose([
    transforms.Grayscale(num_output_channels=3),
    transforms.Resize((32,32)),
    transforms.ToTensor(),
    transforms.Normalize((0.28604063391685486,0.28604063391685486,0.28604063391685486),(0.10887365788221359,0.10887365788221359,0.10887365788221359))
])

testset_FM = datasets.FashionMNIST('~/.pytorch/F_MNIST_data/', download=True, train=False, transform=transform_test_FM)

cal_dataset_FM, test_dataset_FM = torch.utils.data.random_split(testset_FM, [5000, 5000])

cal_loader_FM = torch.utils.data.DataLoader(cal_dataset_FM, batch_size=250, shuffle=False)
test_loader_FM = torch.utils.data.DataLoader(test_dataset_FM, batch_size=128, shuffle=False)

class_names_FM = ["T-shirt/top","Trouser","Pullover","Dress","Coat","Sandal","Shirt","Sneaker","Bag","Ankle boot"]

In [15]:
CP(model1,cal_loader_FM,test_loader_FM,"Fashion_MNIST","CNN")

Experiment--Data : Fashion_MNIST, Model : CNN, Score : THR, Predictor : SplitPredictor, Alpha : 0.05
{'Coverage_rate': 0.9414, 'Average_size': 1.1202}
Experiment--Data : Fashion_MNIST, Model : CNN, Score : THR, Predictor : ClassWisePredictor, Alpha : 0.05
{'Coverage_rate': 0.9494, 'Average_size': 1.2172}
Experiment--Data : Fashion_MNIST, Model : CNN, Score : THR, Predictor : ClusterPredictor, Alpha : 0.05
{'Coverage_rate': 0.9406, 'Average_size': 1.1168}
Experiment--Data : Fashion_MNIST, Model : CNN, Score : APS, Predictor : SplitPredictor, Alpha : 0.05
{'Coverage_rate': 0.9532, 'Average_size': 1.3744}
Experiment--Data : Fashion_MNIST, Model : CNN, Score : APS, Predictor : ClassWisePredictor, Alpha : 0.05
{'Coverage_rate': 0.9502, 'Average_size': 1.3964}
Experiment--Data : Fashion_MNIST, Model : CNN, Score : APS, Predictor : ClusterPredictor, Alpha : 0.05
{'Coverage_rate': 0.9438, 'Average_size': 1.347}
Experiment--Data : Fashion_MNIST, Model : CNN, Score : RAPS, Predictor : SplitPredi

../aten/src/ATen/native/cuda/IndexKernel.cu:92: operator(): block: [0,0,0], thread: [96,0,0] Assertion `-sizes[i] <= index && index < sizes[i] && "index out of bounds"` failed.
../aten/src/ATen/native/cuda/IndexKernel.cu:92: operator(): block: [0,0,0], thread: [97,0,0] Assertion `-sizes[i] <= index && index < sizes[i] && "index out of bounds"` failed.
../aten/src/ATen/native/cuda/IndexKernel.cu:92: operator(): block: [0,0,0], thread: [98,0,0] Assertion `-sizes[i] <= index && index < sizes[i] && "index out of bounds"` failed.
../aten/src/ATen/native/cuda/IndexKernel.cu:92: operator(): block: [0,0,0], thread: [99,0,0] Assertion `-sizes[i] <= index && index < sizes[i] && "index out of bounds"` failed.
../aten/src/ATen/native/cuda/IndexKernel.cu:92: operator(): block: [0,0,0], thread: [80,0,0] Assertion `-sizes[i] <= index && index < sizes[i] && "index out of bounds"` failed.
../aten/src/ATen/native/cuda/IndexKernel.cu:92: operator(): block: [0,0,0], thread: [81,0,0] Assertion `-sizes[i] <

RuntimeError: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.
