In [12]:
!pip install torchcp
import argparse
import os

import torch
import torchvision
import torchvision.transforms as trn
from tqdm import tqdm
from torch.utils.data import random_split
from torchcp.classification.predictors import ClusterPredictor, ClassWisePredictor, SplitPredictor
from torchcp.classification.scores import THR, APS, SAPS, RAPS
from torchcp.classification import Metrics
from torchcp.utils import fix_randomness

Looking in indexes: https://pypi.tuna.tsinghua.edu.cn/simple
[0m

In [2]:
# 手动设置命令行参数
class Args:
    def __init__(self, seed, alpha, predictor, score, penalty, kreg, weight, split):
        self.seed = seed
        self.alpha = alpha
        self.predictor = predictor
        self.score = score
        self.penalty = penalty
        self.kreg = kreg
        self.weight = weight
        self.split = split

args = Args(seed=0, alpha=0.1, predictor="Standard", score="THR", penalty=1, kreg=0, weight=0.2, split="random")

fix_randomness(seed=args.seed)

model_name = 'ResNet101'

# load model
model = torchvision.models.resnet101(weights="IMAGENET1K_V1", progress=True)
model_device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model.to(model_device)

# load dataset (Using CIFAR-10)
transform = trn.Compose([trn.Resize(256),
                         trn.CenterCrop(224),
                         trn.ToTensor(),
                         trn.Normalize(mean=[0.485, 0.456, 0.406],
                                       std=[0.229, 0.224, 0.225])
                         ])
dataset = torchvision.datasets.CIFAR10(root=os.path.join(os.path.expanduser('~'), "data"), train=False, download=True, transform=transform)

cal_dataset, test_dataset = torch.utils.data.random_split(dataset, [5000, 5000])  # 根据需要调整划分
cal_data_loader = torch.utils.data.DataLoader(cal_dataset, batch_size=64, shuffle=False, pin_memory=True)
test_data_loader = torch.utils.data.DataLoader(test_dataset, batch_size=64, shuffle=False, pin_memory=True)

alpha = args.alpha
print(f"Experiment--Data : CIFAR-10, Model : {model_name}, Score : {args.score}, Predictor : {args.predictor}, Alpha : {alpha}")

num_classes = 1000
if args.score == "THR":
    score_function = THR()
elif args.score == "APS":
    score_function = APS()
elif args.score == "RAPS":
    score_function = RAPS(args.penalty, args.kreg)
elif args.score == "SAPS":
    score_function = SAPS(weight=args.weight)
else:
    raise NotImplementedError

if args.predictor == "Standard":
    predictor = SplitPredictor(score_function, model)
elif args.predictor == "ClassWise":
    predictor = ClassWisePredictor(score_function, model)
elif args.predictor == "Cluster":
    predictor = ClusterPredictor(score_function, model, args.seed)
else:
    raise NotImplementedError
print(f"The size of calibration set is {len(cal_dataset)}.")
predictor.calibrate(cal_data_loader, alpha)
# predictor.evaluate(test_data_loader)

# test examples
print("Testing examples...")
prediction_sets = []
labels_list = []
with torch.no_grad():
    for examples in tqdm(test_data_loader):
        tmp_x, tmp_label = examples[0], examples[1]
        prediction_sets_batch = predictor.predict(tmp_x)
        prediction_sets.extend(prediction_sets_batch)
        labels_list.append(tmp_label)
test_labels = torch.cat(labels_list)

metrics = Metrics()
print("Etestuating prediction sets...")
print(f"Coverage_rate: {metrics('coverage_rate')(prediction_sets, test_labels)}.")
print(f"Average_size: {metrics('average_size')(prediction_sets, test_labels)}.")
print(f"CovGap: {metrics('CovGap')(prediction_sets, test_labels, alpha, num_classes)}.")


Downloading: "https://download.pytorch.org/models/resnet101-63fe2227.pth" to /output/.torch/hub/checkpoints/resnet101-63fe2227.pth
100%|██████████| 171M/171M [00:07<00:00, 22.7MB/s] 


Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to /root/data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:12<00:00, 13269293.38it/s]


Extracting /root/data/cifar-10-python.tar.gz to /root/data
Experiment--Data : CIFAR-10, Model : ResNet101, Score : THR, Predictor : Standard, Alpha : 0.1
The size of calibration set is 5000.
Testing examples...


100%|██████████| 79/79 [00:10<00:00,  7.56it/s]

Etestuating prediction sets...
Coverage_rate: 0.8788.
Average_size: 911.9408.
CovGap: 9.948091821157124.





In [6]:
# 手动设置命令行参数
class Args:
    def __init__(self, seed, alpha, predictor, score, penalty, kreg, weight, split):
        self.seed = seed
        self.alpha = alpha
        self.predictor = predictor
        self.score = score
        self.penalty = penalty
        self.kreg = kreg
        self.weight = weight
        self.split = split

args = Args(seed=0, alpha=0.1, predictor="Standard", score="THR", penalty=1, kreg=0, weight=0.2, split="random")

fix_randomness(seed=args.seed)

model_name = 'ResNet101'

# load model
model = torchvision.models.resnet101(weights="IMAGENET1K_V1", progress=True)
model_device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model.to(model_device)

# load dataset (Using CIFAR-100)
transform = trn.Compose([trn.Resize(256),
                         trn.CenterCrop(224),
                         trn.ToTensor(),
                         trn.Normalize(mean=[0.485, 0.456, 0.406],
                                       std=[0.229, 0.224, 0.225])
                         ])
dataset = torchvision.datasets.CIFAR100(root=os.path.join(os.path.expanduser('~'), "data"), train=False, download=True, transform=transform)

cal_dataset, test_dataset = torch.utils.data.random_split(dataset, [5000, 5000])  # 根据需要调整划分
cal_data_loader = torch.utils.data.DataLoader(cal_dataset, batch_size=64, shuffle=False, pin_memory=True)
test_data_loader = torch.utils.data.DataLoader(test_dataset, batch_size=64, shuffle=False, pin_memory=True)

alpha = args.alpha
print(f"Experiment--Data : CIFAR-100, Model : {model_name}, Score : {args.score}, Predictor : {args.predictor}, Alpha : {alpha}")

num_classes = 100
if args.score == "THR":
    score_function = THR()
elif args.score == "APS":
    score_function = APS()
elif args.score == "RAPS":
    score_function = RAPS(args.penalty, args.kreg)
elif args.score == "SAPS":
    score_function = SAPS(weight=args.weight)
else:
    raise NotImplementedError

if args.predictor == "Standard":
    predictor = SplitPredictor(score_function, model)
elif args.predictor == "ClassWise":
    predictor = ClassWisePredictor(score_function, model)
elif args.predictor == "Cluster":
    predictor = ClusterPredictor(score_function, model, args.seed)
else:
    raise NotImplementedError
print(f"The size of calibration set is {len(cal_dataset)}.")
predictor.calibrate(cal_data_loader, alpha)

# test examples
print("Testing examples...")
prediction_sets = []
labels_list = []
with torch.no_grad():
    for examples in tqdm(test_data_loader):
        tmp_x, tmp_label = examples[0], examples[1]
        prediction_sets_batch = predictor.predict(tmp_x)
        prediction_sets.extend(prediction_sets_batch)
        labels_list.append(tmp_label)
test_labels = torch.cat(labels_list)

metrics = Metrics()
print("Evaluating prediction sets...")
print(f"Coverage_rate: {metrics('coverage_rate')(prediction_sets, test_labels)}.")
print(f"Average_size: {metrics('average_size')(prediction_sets, test_labels)}.")
print(f"CovGap: {metrics('CovGap')(prediction_sets, test_labels, alpha, num_classes)}.")


Files already downloaded and verified
Experiment--Data : CIFAR-100, Model : ResNet101, Score : THR, Predictor : Standard, Alpha : 0.1
The size of calibration set is 5000.
Testing examples...


100%|██████████| 79/79 [00:09<00:00,  8.21it/s]

Evaluating prediction sets...
Coverage_rate: 0.8914.
Average_size: 922.2136.
CovGap: 10.340776907910568.





In [8]:
# 手动设置命令行参数
class Args:
    def __init__(self, seed, alpha, predictor, score, penalty, kreg, weight, split):
        self.seed = seed
        self.alpha = alpha
        self.predictor = predictor
        self.score = score
        self.penalty = penalty
        self.kreg = kreg
        self.weight = weight
        self.split = split

args = Args(seed=0, alpha=0.1, predictor="Standard", score="THR", penalty=1, kreg=0, weight=0.2, split="random")

fix_randomness(seed=args.seed)

model_name = 'ResNet101'

# load model
model = torchvision.models.resnet101(weights="IMAGENET1K_V1", progress=True)
model_device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model.to(model_device)

# load dataset (Using MNIST)
transform = trn.Compose([trn.Resize(256),
                         trn.CenterCrop(224),
                         trn.Grayscale(num_output_channels=3),  # 将单通道图像变为三通道
                         trn.ToTensor(),
                         trn.Normalize(mean=[0.485, 0.456, 0.406],
                                       std=[0.229, 0.224, 0.225])
                         ])
# 使用MNIST
dataset = torchvision.datasets.MNIST(root=os.path.join(os.path.expanduser('~'), "data"), train=False, download=True, transform=transform)

cal_dataset, test_dataset = torch.utils.data.random_split(dataset, [5000, 5000])  # 根据需要调整划分
cal_data_loader = torch.utils.data.DataLoader(cal_dataset, batch_size=64, shuffle=False, pin_memory=True)
test_data_loader = torch.utils.data.DataLoader(test_dataset, batch_size=64, shuffle=False, pin_memory=True)

alpha = args.alpha
print(f"Experiment--Data : MNIST, Model : {model_name}, Score : {args.score}, Predictor : {args.predictor}, Alpha : {alpha}")

num_classes = 10
if args.score == "THR":
    score_function = THR()
elif args.score == "APS":
    score_function = APS()
elif args.score == "RAPS":
    score_function = RAPS(args.penalty, args.kreg)
elif args.score == "SAPS":
    score_function = SAPS(weight=args.weight)
else:
    raise NotImplementedError

if args.predictor == "Standard":
    predictor = SplitPredictor(score_function, model)
elif args.predictor == "ClassWise":
    predictor = ClassWisePredictor(score_function, model)
elif args.predictor == "Cluster":
    predictor = ClusterPredictor(score_function, model, args.seed)
else:
    raise NotImplementedError
print(f"The size of calibration set is {len(cal_dataset)}.")
predictor.calibrate(cal_data_loader, alpha)

# test examples
print("Testing examples...")
prediction_sets = []
labels_list = []
with torch.no_grad():
    for examples in tqdm(test_data_loader):
        tmp_x, tmp_label = examples[0], examples[1]
        prediction_sets_batch = predictor.predict(tmp_x)
        prediction_sets.extend(prediction_sets_batch)
        labels_list.append(tmp_label)
test_labels = torch.cat(labels_list)

metrics = Metrics()
print("Evaluating prediction sets...")
print(f"Coverage_rate: {metrics('coverage_rate')(prediction_sets, test_labels)}.")
print(f"Average_size: {metrics('average_size')(prediction_sets, test_labels)}.")
print(f"CovGap: {metrics('CovGap')(prediction_sets, test_labels, alpha, num_classes)}.")


Experiment--Data : MNIST, Model : ResNet101, Score : THR, Predictor : Standard, Alpha : 0.1
The size of calibration set is 5000.
Testing examples...


100%|██████████| 79/79 [00:09<00:00,  8.44it/s]

Evaluating prediction sets...
Coverage_rate: 0.9032.
Average_size: 942.0134.
CovGap: 10.83743783916796.





In [9]:
# 手动设置命令行参数
class Args:
    def __init__(self, seed, alpha, predictor, score, penalty, kreg, weight, split):
        self.seed = seed
        self.alpha = alpha
        self.predictor = predictor
        self.score = score
        self.penalty = penalty
        self.kreg = kreg
        self.weight = weight
        self.split = split

args = Args(seed=0, alpha=0.1, predictor="Standard", score="THR", penalty=1, kreg=0, weight=0.2, split="random")

fix_randomness(seed=args.seed)

model_name = 'ResNet101'

# load model
model = torchvision.models.resnet101(weights="IMAGENET1K_V1", progress=True)
model_device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model.to(model_device)

# load dataset (Using FashionMNIST)
transform = trn.Compose([trn.Resize(256),
                         trn.CenterCrop(224),
                         trn.Grayscale(num_output_channels=3),  # 将单通道图像变为三通道
                         trn.ToTensor(),
                         trn.Normalize(mean=[0.485, 0.456, 0.406],
                                       std=[0.229, 0.224, 0.225])
                         ])
# 使用FashionMNIST
dataset = torchvision.datasets.FashionMNIST(root=os.path.join(os.path.expanduser('~'), "data"), train=False, download=True, transform=transform)

cal_dataset, test_dataset = torch.utils.data.random_split(dataset, [5000, 5000])  # 根据需要调整划分
cal_data_loader = torch.utils.data.DataLoader(cal_dataset, batch_size=64, shuffle=False, pin_memory=True)
test_data_loader = torch.utils.data.DataLoader(test_dataset, batch_size=64, shuffle=False, pin_memory=True)

alpha = args.alpha
print(f"Experiment--Data : FashionMNIST, Model : {model_name}, Score : {args.score}, Predictor : {args.predictor}, Alpha : {alpha}")

num_classes = 10
if args.score == "THR":
    score_function = THR()
elif args.score == "APS":
    score_function = APS()
elif args.score == "RAPS":
    score_function = RAPS(args.penalty, args.kreg)
elif args.score == "SAPS":
    score_function = SAPS(weight=args.weight)
else:
    raise NotImplementedError

if args.predictor == "Standard":
    predictor = SplitPredictor(score_function, model)
elif args.predictor == "ClassWise":
    predictor = ClassWisePredictor(score_function, model)
elif args.predictor == "Cluster":
    predictor = ClusterPredictor(score_function, model, args.seed)
else:
    raise NotImplementedError
print(f"The size of calibration set is {len(cal_dataset)}.")
predictor.calibrate(cal_data_loader, alpha)

# test examples
print("Testing examples...")
prediction_sets = []
labels_list = []
with torch.no_grad():
    for examples in tqdm(test_data_loader):
        tmp_x, tmp_label = examples[0], examples[1]
        prediction_sets_batch = predictor.predict(tmp_x)
        prediction_sets.extend(prediction_sets_batch)
        labels_list.append(tmp_label)
test_labels = torch.cat(labels_list)

metrics = Metrics()
print("Evaluating prediction sets...")
print(f"Coverage_rate: {metrics('coverage_rate')(prediction_sets, test_labels)}.")
print(f"Average_size: {metrics('average_size')(prediction_sets, test_labels)}.")
print(f"CovGap: {metrics('CovGap')(prediction_sets, test_labels, alpha, num_classes)}.")


Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to /root/data/FashionMNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 26421880/26421880 [00:02<00:00, 10401737.11it/s]


Extracting /root/data/FashionMNIST/raw/train-images-idx3-ubyte.gz to /root/data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to /root/data/FashionMNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 29515/29515 [00:00<00:00, 174264.00it/s]


Extracting /root/data/FashionMNIST/raw/train-labels-idx1-ubyte.gz to /root/data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to /root/data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 4422102/4422102 [00:01<00:00, 3117826.84it/s]


Extracting /root/data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz to /root/data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to /root/data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 5148/5148 [00:00<00:00, 17918902.07it/s]


Extracting /root/data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz to /root/data/FashionMNIST/raw

Experiment--Data : FashionMNIST, Model : ResNet101, Score : THR, Predictor : Standard, Alpha : 0.1
The size of calibration set is 5000.
Testing examples...


100%|██████████| 79/79 [00:09<00:00,  8.52it/s]


Evaluating prediction sets...
Coverage_rate: 0.7498.
Average_size: 894.687.
CovGap: 20.33783188070897.


In [13]:
# 手动设置命令行参数
class Args:
    def __init__(self, seed, alpha, predictor, score, penalty, kreg, weight, split):
        self.seed = seed
        self.alpha = alpha
        self.predictor = predictor
        self.score = score
        self.penalty = penalty
        self.kreg = kreg
        self.weight = weight
        self.split = split

args = Args(seed=0, alpha=0.1, predictor="Standard", score="THR", penalty=1, kreg=0, weight=0.2, split="random")

fix_randomness(seed=args.seed)

model_name = 'ResNet101'

# load model
model = torchvision.models.resnet101(weights="IMAGENET1K_V1", progress=True)
model_device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model.to(model_device)

# load dataset (Using SVHN)
transform = trn.Compose([trn.Resize(256),
                         trn.CenterCrop(224),
                         trn.ToTensor(),
                         trn.Normalize(mean=[0.485, 0.456, 0.406],
                                       std=[0.229, 0.224, 0.225])
                         ])
# 使用SVHN
dataset = torchvision.datasets.SVHN(root=os.path.join(os.path.expanduser('~'), "data"), split='test', download=True, transform=transform)

# 根据比例划分数据集
cal_ratio = 0.5
cal_length = int(cal_ratio * len(dataset))
test_length = len(dataset) - cal_length
cal_dataset, test_dataset = random_split(dataset, [cal_length, test_length])

cal_data_loader = torch.utils.data.DataLoader(cal_dataset, batch_size=64, shuffle=False, pin_memory=True)
test_data_loader = torch.utils.data.DataLoader(test_dataset, batch_size=64, shuffle=False, pin_memory=True)

alpha = args.alpha
print(f"Experiment--Data : SVHN, Model : {model_name}, Score : {args.score}, Predictor : {args.predictor}, Alpha : {alpha}")

num_classes = 10
if args.score == "THR":
    score_function = THR()
elif args.score == "APS":
    score_function = APS()
elif args.score == "RAPS":
    score_function = RAPS(args.penalty, args.kreg)
elif args.score == "SAPS":
    score_function = SAPS(weight=args.weight)
else:
    raise NotImplementedError

if args.predictor == "Standard":
    predictor = SplitPredictor(score_function, model)
elif args.predictor == "ClassWise":
    predictor = ClassWisePredictor(score_function, model)
elif args.predictor == "Cluster":
    predictor = ClusterPredictor(score_function, model, args.seed)
else:
    raise NotImplementedError
print(f"The size of calibration set is {len(cal_dataset)}.")
predictor.calibrate(cal_data_loader, alpha)

# test examples
print("Testing examples...")
prediction_sets = []
labels_list = []
with torch.no_grad():
    for examples in tqdm(test_data_loader):
        tmp_x, tmp_label = examples[0], examples[1]
        prediction_sets_batch = predictor.predict(tmp_x)
        prediction_sets.extend(prediction_sets_batch)
        labels_list.append(tmp_label)
test_labels = torch.cat(labels_list)

metrics = Metrics()
print("Evaluating prediction sets...")
print(f"Coverage_rate: {metrics('coverage_rate')(prediction_sets, test_labels)}.")
print(f"Average_size: {metrics('average_size')(prediction_sets, test_labels)}.")
print(f"CovGap: {metrics('CovGap')(prediction_sets, test_labels, alpha, num_classes)}.")


Using downloaded and verified file: /root/data/test_32x32.mat
Experiment--Data : SVHN, Model : ResNet101, Score : THR, Predictor : Standard, Alpha : 0.1
The size of calibration set is 13016.
Testing examples...


100%|██████████| 204/204 [00:30<00:00,  6.66it/s]


Evaluating prediction sets...
Coverage_rate: 0.8999692685925015.
Average_size: 957.0096035648432.
CovGap: 10.822547382309603.
