## Basic importing and Hyperparameters

In [1]:
import os
import time
import os.path as osp

import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader

from torchvision import datasets
from torchvision import transforms
import torchvision
from torch.utils.data import Subset, DataLoader

import matplotlib.pyplot as plt
from PIL import Image
from torchcp.classification.scores import THR, APS, SAPS, RAPS
from torchcp.classification.predictors import SplitPredictor, ClusterPredictor, ClassWisePredictor
from torchcp.classification.loss import ConfTr

BATCH_SIZE = 64

In [2]:
# random seed
SEED = 1 
NUM_CLASS = 10

# Training
BATCH_SIZE = 128
NUM_EPOCHS = 30
EVAL_INTERVAL=1
SAVE_DIR = './log'

# Optimizer
LEARNING_RATE = 1e-3
MOMENTUM = 0.9
STEP=5
GAMMA=0.5


## Choice of Datasets

In [3]:
"""
Change the dset num from 1 to 6 to chose the dataset
1: STL10
2: CIFAR10
3: CIFAR100
4: Stanford Dogs
5: FasthionMINIST
6: UTKFace
"""
dset = 1 # change this num in 1,2,3,4,5,6 to chose dataset.

#### 1. STL10

In [4]:
if dset == 1:
    ## STL10
    transform_cifar10_train = transforms.Compose([
        transforms.Resize(size=128),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
    ])

    transform_cifar10_test = transforms.Compose([
        # Add a resize operation to match training size
        transforms.Resize(size=32),
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
    ])

    train_set = torchvision.datasets.STL10(root='/data/lab/data', split='train',
                                           download=True, transform=transform_cifar10_train)
    train_dataloader = torch.utils.data.DataLoader(train_set, batch_size=BATCH_SIZE,
                                             shuffle=True, num_workers=2,pin_memory=True)
    test_set = torchvision.datasets.STL10(root='/data/lab/data', split='test',
                                           download=True, transform=transform_cifar10_test)
    cal_dataset, test_dataset = torch.utils.data.random_split(test_set, [4000, 4000])
    # test_dataloader = torch.utils.data.DataLoader(test_set, batch_size=BATCH_SIZE,
    #                                          shuffle=False, num_workers=2,pin_memory=True)
    cal_data_loader = torch.utils.data.DataLoader(cal_dataset, batch_size=1600, shuffle=False, pin_memory=True)
    test_data_loader = torch.utils.data.DataLoader(test_dataset, batch_size=1600, shuffle=False, pin_memory=True)



    class_names = ['airplane', 'bird', 'car', 'cat', 'deer', 'dog', 'horse', 'monkey', 'ship', 'truck']


    dataset_name = 'STL10'

    data_loader_size = len(train_dataloader.dataset)
    print(f"Total number of samples in the STL10: {data_loader_size}")

    class_less = True # mark the amount of class number(less or equal than 10 is marked True)

    # Preparing a calibration data and a test data.
    train_data_loader = train_dataloader
    # cal_data_loader = torch.utils.data.DataLoader(train_set, batch_size=1600, shuffle=False, pin_memory=True)
    # test_data_loader = torch.utils.data.DataLoader(test_set, batch_size=1600, shuffle=False, pin_memory=True)

Files already downloaded and verified
Files already downloaded and verified
Total number of samples in the STL10: 5000


#### 2. CIFAR10

In [5]:
if dset == 2:
    # CIFAR10
    transform_cifar10_test = transforms.Compose([
        transforms.Resize(size=32),
        transforms.CenterCrop(size=(32, 32)),
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
    ])

    test_set = torchvision.datasets.CIFAR10(root='/data/lab/data', train=False,
                                           download=True, transform=transform_cifar10_test)
    train_set = torchvision.datasets.CIFAR10(root='/data/lab/data', train=True,
                                           download=True, transform=transform_cifar10_test)

    # resize test and train dataset
    test_set = Subset(test_set, range(10000))##10000
    train_set = Subset(train_set, range(10000))##10000
    # #######################


    # test_dataloader = torch.utils.data.DataLoader(test_set, batch_size=BATCH_SIZE,
    #                                          shuffle=True, num_workers=2)
    train_dataloader = torch.utils.data.DataLoader(train_set, batch_size=BATCH_SIZE,
                                             shuffle=True, num_workers=2,pin_memory=True)
    # set calibration and test dataloader
    cal_dataset, test_dataset = torch.utils.data.random_split(test_set, [5000, 5000])
    cal_data_loader = torch.utils.data.DataLoader(cal_dataset, batch_size=1600, shuffle=False, pin_memory=True)
    test_data_loader = torch.utils.data.DataLoader(test_dataset, batch_size=1600, shuffle=False, pin_memory=True)

    class_names = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
    dataset_name = 'CIFAR10'

    data_loader_size = len(test_data_loader.dataset)
    print(f"Total number of samples in the CIFAR10: {data_loader_size}")

    class_less = True # mark the amount of class number(less or equal than 10 is marked True)

#### 3. CIFAR-100

In [6]:
if dset == 3:
    ## CIFAR100
    transform_cifar100_test = transforms.Compose([
        transforms.Resize(size=32),
        transforms.CenterCrop(size=(32, 32)),
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
    ])

    test_set = torchvision.datasets.CIFAR100(root='/data/lab/data', train=False,
                                           download=True, transform=transform_cifar100_test)
    train_set = torchvision.datasets.CIFAR100(root='/data/lab/data', train=True,
                                           download=True, transform=transform_cifar100_test)

    # resize test and train dataset
    test_set = Subset(test_set, range(10000))##10000
    train_set = Subset(train_set, range(10000))##10000
    # #######################

    train_dataloader = torch.utils.data.DataLoader(train_set, batch_size=BATCH_SIZE,
                                             shuffle=True, num_workers=2)
    # set calibration and test dataloader
    cal_dataset, test_dataset = torch.utils.data.random_split(test_set, [5000, 5000])
    cal_data_loader = torch.utils.data.DataLoader(cal_dataset, batch_size=1600, shuffle=False, pin_memory=True)
    test_data_loader = torch.utils.data.DataLoader(test_dataset, batch_size=1600, shuffle=False, pin_memory=True)

    class_names = [
        'apple', 'aquarium_fish', 'baby', 'bear', 'beaver', 'bed', 'bee', 'beetle', 'bicycle', 'bottle',
        'bowl', 'boy', 'bridge', 'bus', 'butterfly', 'camel', 'can', 'castle', 'caterpillar', 'cattle',
        'chair', 'chimpanzee', 'clock', 'cloud', 'cockroach', 'couch', 'crab', 'crocodile', 'cup', 'dinosaur',
        'dolphin', 'elephant', 'flatfish', 'forest', 'fox', 'girl', 'hamster', 'house', 'kangaroo', 'keyboard',
        'lamp', 'lawn_mower', 'leopard', 'lion', 'lizard', 'lobster', 'man', 'maple_tree', 'motorcycle', 'mountain',
        'mouse', 'mushroom', 'oak_tree', 'orange', 'orchid', 'otter', 'palm_tree', 'pear', 'pickup_truck', 'pine_tree',
        'plain', 'plate', 'poppy', 'porcupine', 'possum', 'rabbit', 'raccoon', 'ray', 'road', 'rocket', 'rose',
        'sea', 'seal', 'shark', 'shrew', 'skunk', 'skyscraper', 'snail', 'snake', 'spider', 'squirrel', 'streetcar',
        'sunflower', 'sweet_pepper', 'table', 'tank', 'telephone', 'television', 'tiger', 'tractor', 'train', 'trout',
        'tulip', 'turtle', 'wardrobe', 'whale', 'willow_tree', 'wolf', 'woman', 'worm'
    ]

    dataset_name = 'CIFAR100'

    data_loader_size = len(test_data_loader.dataset)
    print(f"Total number of samples in the CIFAR100: {data_loader_size}")

    class_less = False # mark the amount of class number(less or equal than 10 is marked True)

#### 4. Stanford Dogs

In [7]:
if dset == 4:
    ##stanford dogs
    size = 128
    transform_stanford_dogs_test = transforms.Compose([
        transforms.Resize(size=size),
        transforms.CenterCrop(size=size),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])

    # Replace 'root_directory' with the path to the directory containing the Stanford Dogs dataset
    test_set = datasets.ImageFolder(root='/data/lab/data/Images', transform=transform_stanford_dogs_test)
    class_names = test_set.classes

    test_set = Subset(test_set, range(20000))##10000

    cal_dataset, test_dataset, train_set = torch.utils.data.random_split(test_set, [5000, 5000,10000])

    train_dataloader = torch.utils.data.DataLoader(train_set, batch_size=BATCH_SIZE,
                                             shuffle=True, num_workers=2)
    cal_data_loader = torch.utils.data.DataLoader(cal_dataset, batch_size=256, shuffle=True, pin_memory=True)
    test_data_loader = torch.utils.data.DataLoader(test_dataset, batch_size=256, shuffle=True, pin_memory=True)


    # List of class names for the Stanford Dogs dataset (based on the breed names)

    dataset_name = 'Stanford Dogs'

    data_loader_size = len(test_data_loader.dataset)
    print(f"Total number of samples in the Stanford Dogs dataset: {data_loader_size}")

    class_less = False # mark the amount of class number(less or equal than 10 is marked True)

#### 5. Fashionminist

In [8]:
if dset == 5:
    transform_fashionmnist_test = transforms.Compose([
        transforms.Grayscale(num_output_channels=3),
        transforms.Resize(size=128),
        transforms.CenterCrop(size=(128, 128)),
        transforms.ToTensor(),
        transforms.Normalize((0.1307,), (0.3081,))  # Use FashionMNIST mean and std
    ])

    test_set = torchvision.datasets.FashionMNIST(root='/data/lab/data', 
                                                 download=True, 
                                                 train=False,  # Set train=False for the test set
                                                 transform=transform_fashionmnist_test)
    test_set = Subset(test_set, range(10000))##5000
    train_set = torchvision.datasets.FashionMNIST(root='/data/lab/data', train=True,
                                           download=True, transform=transform_fashionmnist_test)
    train_set = Subset(train_set, range(10000))##5000

    train_dataloader = torch.utils.data.DataLoader(train_set, batch_size=BATCH_SIZE,
                                             shuffle=True, num_workers=2)


    cal_dataset, test_dataset = torch.utils.data.random_split(test_set, [5000, 5000])
    cal_data_loader = torch.utils.data.DataLoader(cal_dataset, batch_size=256, shuffle=False, pin_memory=True)
    test_data_loader = torch.utils.data.DataLoader(test_dataset, batch_size=256, shuffle=False, pin_memory=True)

    class_names = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat', 'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']

    dataset_name = 'FashionMNIST'

    data_loader_size = len(test_data_loader.dataset)
    print(f"Total number of samples in the FashionMNIST test set: {data_loader_size}")

    class_less = len(class_names) <= 10  # Check if the number of classes is 10 or less


#### 6. UTKFace

In [9]:
if dset == 6:    
    ##UTKFace for RESNET18
    BATCH_SIZE = 256
    size = 128
    transform_stanford_dogs = transforms.Compose([
        transforms.Resize(size=size),
        transforms.CenterCrop(size=size),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])
    root_directory = '/data/lab/data/UTKFace_age'
    full_dataset = datasets.ImageFolder(root=root_directory, transform=transform_stanford_dogs)

    # Define the ratio of train and test split
    train_ratio = 0.6
    test_ratio = 0.2
    cal_ratio = 0.2

    # Calculate the sizes of train and test sets
    train_size = int(train_ratio * len(full_dataset))
    test_size = int(test_ratio * len(full_dataset))
    cal_size = len(full_dataset) - train_size - test_size

    # Split the dataset into train and test sets
    train_set, test_set,cal_dataset = torch.utils.data.random_split(full_dataset, [train_size, test_size,cal_size])

    # Create DataLoaders for train and test sets
    train_dataloader = DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)
    test_data_loader = DataLoader(test_set, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)
    cal_data_loader = DataLoader(cal_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)

    # List of class names for the Stanford Dogs dataset (based on the breed names)
    class_names = full_dataset.classes

    num_classes = len(class_names)

    dataset_name = 'UTKFace_age'

    print(num_classes)
    # data_loader_size = len(test_dataloader.dataset)
    # print(f"Total number of samples in the UTKFace dataset: {data_loader_size}")

    class_less = False # mark the amount of class number(less or equal than 10 is marked True)

## Model defination

In [10]:
model_name = 'ResNet18'
def train(criterion):
        # resnet18
        model = torchvision.models.resnet18(weights="IMAGENET1K_V1", progress=True)
        num_ftrs = model.fc.in_features

        # 替换模型最后的全连接层
        model.fc = nn.Linear(num_ftrs, len(class_names))

        # 将模型移至GPU（如果可用）
        device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
        model = model.to(device)

        # 定义损失函数和优化器
        criterion = criterion
        optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

        # 训练模型
        for epoch in range(5):  # 训练10个epoch
            running_loss = 0.0
            for i, data in enumerate(train_dataloader, 0):
                inputs, labels = data
                inputs, labels = inputs.to(device), labels.to(device)

                optimizer.zero_grad()

                outputs = model(inputs)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()

                running_loss += loss.item()

                if i % 100 == 99:    # 每100个batch输出一次损失
                    print(f'Epoch [{epoch + 1}, {i + 1}], Loss: {running_loss / 100:.4f}')
                    running_loss = 0.0
            print(f'Epoch [{epoch + 1}, {i + 1}], Loss: {running_loss / 100:.4f}')


        print('Finished Training')
        return model

## Conformal Prediction

In [None]:
alpha = 0.1
weight = 0.2 ## from 0.02 to 0.6
penalty = 0.001 ## from 0.001 to 0.01

# print basic settings
print(f"Experiment--Data : {dataset_name}, Model : {model_name}, Alpha : {alpha}")
print(f"The size of calibration set is {len(cal_dataset)}.")

# write outcomes to a txt
with open('Wide_Compare.txt', 'a') as file:
            file.write(f"--------------------------------\n")
            file.write(f"Dataset: {dataset_name}\n")
            file.write(f"Model: {model_name}\n")
            file.write(f"Size of classes: {len(class_names)}\n")
            file.write(f"SAPS weight: {weight}\n")
            file.write(f"PAPS penalty: {penalty}\n")
            
t = 0 # recorder of score and predictor

# Options of score function: THR, APS, SAPS, RAPS
# Define a conformal prediction algorithm. Optional: SplitPredictor, ClusterPredictor, ClassWisePredictor
for score in [THR(),APS(),SAPS(weight = weight),RAPS(penalty = penalty)]:
    for predictor in [SplitPredictor
                      ,ClusterPredictor
                      ,ClassWisePredictor]:
        #define predictor and criterion
        predictor1 = predictor(score_function=score)
        criterion = ConfTr(weight=0.01,
                        predictor=predictor1,
                        alpha=0.05,
                        fraction=0.5,
                        loss_type="valid",
                        base_loss_fn=nn.CrossEntropyLoss())
        ####### train #######
        model = train(criterion)
        ####### Calibrate prediction #######
        predictor = predictor(score_function=score,model = model)
        
        # Calibrating the predictor with significance level as 0.1
        predictor.calibrate(cal_data_loader, alpha)
        evaluate = predictor.evaluate(test_data_loader)
        
        # write outcomes to a txt
        with open('Wide_Compare.txt', 'a') as file:
            file.write(f"--------------{t}--------------\n")
            file.write(f"Coverage_rate: {evaluate['Coverage_rate']}\n")
            file.write(f"Average_size: {evaluate['Average_size']}\n")
        
        # print outcomes
        print(f'{t}: {evaluate}')
        if t%3 == 2:
            print('\n')
        t+=1

Experiment--Data : STL10, Model : ResNet18, Alpha : 0.1
The size of calibration set is 4000.
Epoch [1, 40], Loss: 0.6641




Epoch [2, 40], Loss: 0.2558
Epoch [3, 40], Loss: 0.1873
Epoch [4, 40], Loss: 0.1517
Epoch [5, 40], Loss: 0.1356
Finished Training
0: {'Coverage_rate': 0.89575, 'Average_size': 6.099}
Epoch [1, 40], Loss: 0.6601
Epoch [2, 40], Loss: 0.2607
Epoch [3, 40], Loss: 0.1877
Epoch [4, 40], Loss: 0.1547
Epoch [5, 40], Loss: 0.1281
Finished Training
1: {'Coverage_rate': 0.892, 'Average_size': 6.44675}
Epoch [1, 40], Loss: 0.9196




Epoch [2, 40], Loss: 0.5132
Epoch [3, 40], Loss: 0.4511
Epoch [4, 40], Loss: 0.4199
Epoch [5, 40], Loss: 0.4015
Finished Training
2: {'Coverage_rate': 0.903, 'Average_size': 4.74325}


Epoch [1, 40], Loss: 0.6871
Epoch [2, 40], Loss: 0.2586
Epoch [3, 40], Loss: 0.1912
Epoch [4, 40], Loss: 0.1643
Epoch [5, 40], Loss: 0.1489
Finished Training
3: {'Coverage_rate': 0.89575, 'Average_size': 5.70325}
Epoch [1, 40], Loss: 0.6642
Epoch [2, 40], Loss: 0.2540
Epoch [3, 40], Loss: 0.1939
Epoch [4, 40], Loss: 0.1655
Epoch [5, 40], Loss: 0.1450
Finished Training
