### Libraries

In [None]:
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from opacus import PrivacyEngine
from opacus.accountants import create_accountant
from tqdm import tqdm
import shutil
import os, sys, inspect
sys.path.insert(1, os.path.join(sys.path[0], '../'))
import torch
import torchvision as tv
import argparse
import time
import numpy as np
import random
from scipy.stats import binom
from PIL import Image
import matplotlib
import matplotlib.pyplot as plt
import pandas as pd
import pickle as pkl
import seaborn as sns
from torchvision.datasets import CIFAR10
from torch.utils.data import DataLoader
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
from scipy.optimize import brentq
from scipy.stats import binom, beta
from scipy.special import softmax
import pdb

### CNN Model

In [None]:
def convnet(num_classes):
    return nn.Sequential(
        nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1),
        nn.GroupNorm(8, 64),  # GroupNorm with 8 groups
        nn.ReLU(),
        nn.AvgPool2d(kernel_size=2, stride=2),
        nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
        nn.GroupNorm(8, 128),  # GroupNorm with 8 groups
        nn.ReLU(),
        nn.AvgPool2d(kernel_size=2, stride=2),
        nn.Conv2d(128, 192, kernel_size=3, stride=1, padding=1),
        nn.GroupNorm(8, 192),  # GroupNorm with 8 groups
        nn.ReLU(),
        nn.AvgPool2d(kernel_size=2, stride=2),
        nn.Conv2d(192, 256, kernel_size=3, stride=1, padding=1),
        nn.GroupNorm(8, 256),  # GroupNorm with 8 groups
        nn.ReLU(),
        nn.AdaptiveAvgPool2d((1, 1)),
        nn.Flatten(start_dim=1, end_dim=-1),
        nn.Linear(256, num_classes, bias=True),
        #nn.Dropout(0.5),  # Add dropout
    )

### Loading Datasets

In [None]:
# Load CIFAR-10 dataset with data augmentation
transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(32, padding=4),
    transforms.ColorJitter(brightness=0.2, contrast=0.2),  # Add color jitter
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.4914, 0.4822, 0.4465], std=[0.2023, 0.1994, 0.2010])
])

train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)


train_loader = DataLoader(train_dataset, batch_size=252, shuffle=True)  
test_loader = DataLoader(test_dataset, batch_size=252, shuffle=False)

### Training and evaluation functions

In [None]:
def train(model, device, train_loader, optimizer, criterion, privacy_engine=None):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for inputs, labels in tqdm(train_loader):
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()

    train_loss = running_loss / len(train_loader)
    train_acc = 100. * correct / total

    return train_loss, train_acc

def evaluate(model, device, test_loader, criterion):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            running_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()

    test_loss = running_loss / len(test_loader)
    test_acc = 100. * correct / total

    return test_loss, test_acc



### Training and saving the private model

In [None]:


# Expand the range of alphas for tighter privacy bounds
create_accountant("rdp").alphas = list(range(2, 100)) + [128, 256, 512, 1024]

# Hyperparameters
EPOCHS = 90  # Train epochs
EPSILON = 8  # model privacy budget
DELTA = 1e-5
MAX_GRAD_NORM = 2  # Maximum gradient norm for clipping
INITIAL_LR = 0.01  # Initial learning rate

# Initialize model, optimizer, and loss function
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = convnet(num_classes=10).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.RMSprop(model.parameters(), lr=INITIAL_LR, weight_decay=1e-4)  # Using RMSprop with weight decay

# Initialize PrivacyEngine
privacy_engine = PrivacyEngine()

model, optimizer, train_loader = privacy_engine.make_private_with_epsilon(
    module=model,
    optimizer=optimizer,
    data_loader=train_loader,
    epochs=EPOCHS,
    target_epsilon=EPSILON,
    target_delta=DELTA,
    max_grad_norm=MAX_GRAD_NORM,
)

# Training loop
best_acc = 0.0

for epoch in range(EPOCHS):
    # Calculate learning rate using cosine schedule
    lr = INITIAL_LR * 0.5 * (1 + np.cos(np.pi * epoch / (EPOCHS + 1)))
    
    # Update the optimizer's learning rate
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr

    # Train and evaluate
    train_loss, train_acc = train(model, device, train_loader, optimizer, criterion, privacy_engine)
    test_loss, test_acc = evaluate(model, device, test_loader, criterion)

    # Compute privacy budget (epsilon) for the current epoch
    epsilon = privacy_engine.get_epsilon(delta=DELTA)
    print(f"Epoch {epoch+1}: LR: {lr:.6f}, Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%, "
          f"Test Loss: {test_loss:.4f}, Test Acc: {test_acc:.2f}%, Epsilon: {epsilon:.4f}")

    # Save the best model
    if test_acc > best_acc:
        best_acc = test_acc
        torch.save(model.state_dict(), "best_model_private.pth")  

print(f"Best Test Accuracy: {best_acc:.2f}%")

### Training and saving nonprivate model

In [None]:
# Hyperparameters
EPOCHS = 90  # Train for more epochs
INITIAL_LR = 0.01  # Initial learning rate

# Initialize model, optimizer, and loss function
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = convnet(num_classes=10).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.RMSprop(model.parameters(), lr=INITIAL_LR, weight_decay=1e-4)  # Use RMSprop with weight decay


# Training loop
best_acc = 0.0

for epoch in range(EPOCHS):
    # Calculate learning rate using cosine schedule
    lr = INITIAL_LR * 0.5 * (1 + np.cos(np.pi * epoch / (EPOCHS + 1)))
    
    # Update the optimizer's learning rate
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr

    # Train and evaluate
    train_loss, train_acc = train(model, device, train_loader, optimizer, criterion)
    test_loss, test_acc = evaluate(model, device, test_loader, criterion)

    
    print(f"Epoch {epoch+1}: LR: {lr:.6f}, Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%, "
          f"Test Loss: {test_loss:.4f}, Test Acc: {test_acc:.2f}%")

    # Save the best model
    if test_acc > best_acc:
        best_acc = test_acc
        torch.save(model.state_dict(), "best_model_nonprivate.pth")  #_RMSprop_Avepool

print(f"Best Test Accuracy: {best_acc:.2f}%")

## Conformal Prediction Steps

### Helping functions for PCOSQ

In [None]:
def NoisyRC(range_bounds, D, sigma):
    """
    Noisy Range Count for float values with Gaussian noise.

    Parameters:
    range_bounds (tuple): A tuple (a, b) representing the range [a, b].
    D (list): The sorted dataset.
    sigma (float): The standard deviation of the Gaussian noise.

    Returns:
    int: The noisy count of elements in the range [a, b].
    """
    a, b = range_bounds
    count = sum(1 for z in D if a <= z <= b)
    noise = np.random.normal(0, sigma)
    noisy_count = count + noise
    return max(0, int(np.floor(noisy_count)))  # Ensure non-negative count

def PrivQuant(D, alpha, rho, seed, lower_bound=0, upper_bound=1, delta=1e-10):
    """
    Differentially Private Quantile Approximation Algorithm without integer conversion.

    Parameters:
    D (list): The sorted dataset.
    alpha (float): The quantile level (e.g., 0.5 for median).
    rho (float): The privacy parameter (smaller = more private).
    lower_bound (float): Lower bound of the search space.
    upper_bound (float): Upper bound of the search space.
    delta (float): Small positive value to ensure convergence.

    Returns:
    float: A differentially private approximation of the quantile x_{(m)}.
    """

    
    n = len(D)
    max_iterations = int(np.ceil(np.log2((upper_bound - lower_bound) / delta)))
    sigma = np.sqrt(max_iterations / (2 * rho)) # Noise scale for Gaussian mechanism
    m = int(np.ceil((1 - alpha) * (n + 1)))

    left, right = lower_bound, upper_bound
    random.seed(seed)
    for i in range(max_iterations):
        mid = (left + right) / 2
        c = NoisyRC((lower_bound, mid), D, sigma)
        
        if c < m:
            left = mid + delta
        else:
            right = mid

    return np.round((left + right) / 2, 2)



### Helping functions for EXPONQ

In [None]:
def get_qtilde(n,alpha,gamma,epsilon,m):
    qtilde = (n+1)*(1-alpha)/(n*(1-gamma*alpha))+2/(epsilon*n)*np.log(m/(gamma*alpha))
    qtilde = min(qtilde, 1-1e-12)
    return qtilde

def generate_scores(n):
    return np.random.uniform(size=(n,))

def hist_2_cdf(cumsum, bins, n):
    def _cdf(t):
        if t > bins[-2]:
            return 1.0
        elif t < bins[1]:
            return 0.0
        else:
            return 1-cumsum[np.searchsorted(bins, t)]/n
    return _cdf

def get_private_quantile(scores, alpha, epsilon, gamma, bins):
    n = scores.shape[0]
    epsilon_normed = epsilon*min(alpha, 1-alpha)
    # Get the quantile
    qtilde = get_qtilde(n, alpha, gamma, epsilon, bins.shape[0])
    scores = scores.squeeze()
    score_to_bin = np.digitize(scores,bins)
    binned_scores = bins[np.minimum(score_to_bin,bins.shape[0]-1)]
    w1 = np.digitize(binned_scores, bins)
    w2 = np.digitize(binned_scores, bins, right=True)
    # Clip bins
    w1 = np.maximum(np.minimum(w1,bins.shape[0]-1),0)
    w2 = np.maximum(np.minimum(w2,bins.shape[0]-1),0)
    lower_mass = np.bincount(w1,minlength=bins.shape[0]).cumsum()/qtilde
    upper_mass = (n-np.bincount(w2,minlength=bins.shape[0]).cumsum())/(1-qtilde)
    w = np.maximum( lower_mass , upper_mass )
    sampling_probabilities = softmax(-(epsilon_normed/2)*w)
    # Check
    sampling_probabilities = sampling_probabilities/sampling_probabilities.sum()
    qhat = np.random.choice(bins,p=sampling_probabilities)
    return qhat

# Optimal gamma is a root.
def get_optimal_gamma(scores,n,alpha,m,epsilon):
    a = alpha**2
    b = - ( alpha*epsilon*(n+1)*(1-alpha)/2 + 2*alpha )
    c = 1
    best_q = 1
    gamma1 = (-b + np.sqrt(b**2 - 4*a*c))/(2*a)
    gamma2 = (-b - np.sqrt(b**2 - 4*a*c))/(2*a)

    gamma1 = min(max(gamma1,1e-12),1-1e-12)
    gamma2 = min(max(gamma2,1e-12),1-1e-12)

    bins = np.linspace(0,1,m)

    q1 = get_private_quantile(scores, alpha, epsilon, gamma1, bins)
    q2 = get_private_quantile(scores, alpha, epsilon, gamma2, bins)

    return (gamma1, q1) if q1 < q2 else (gamma2, q2)

def get_optimal_gamma_m(n, alpha, epsilon):
    candidates_m = np.logspace(4,6,50).astype(int)
    scores = np.random.rand(n,1)
    best_m = int(1/alpha)
    best_gamma = 1
    best_q = 1
    for m in candidates_m:
        gamma, q = get_optimal_gamma(scores,n,alpha,m,epsilon)
        if q < best_q:
            best_q = q
            best_m = m
            best_gamma = gamma
    return best_m, best_gamma

if __name__ == "__main__":
    n = 5000
    alpha = 0.1
    epsilon = 8 # removal definition, 5 is large.  usually we think of epsilon as 1 or 2.
    mstar, gammastar = get_optimal_gamma_m(n, alpha, epsilon)
    scores = np.random.random((n,))
    q = get_private_quantile(scores, alpha, epsilon, gammastar, np.linspace(0,1,mstar))
    print(f"mstar: {mstar}, gammastar: {gammastar}, qhat: {q}")


In [None]:

def get_conformal_scores(scores, labels):
    conformal_scores = torch.tensor([scores[i,labels[i]] for i in range(scores.shape[0])])
    return conformal_scores

def get_shat_from_scores(scores, alpha):
    return np.quantile(scores,1-alpha)

def get_shat_from_scores_private(scores, alpha, epsilon, gamma, score_bins):
    shat = get_private_quantile(scores, alpha, epsilon, gamma, score_bins)
    return shat

def platt_logits(calib_dataset, max_iters=10, lr=0.01, epsilon=0.01):
    calib_loader = torch.utils.data.DataLoader(calib_dataset, batch_size=1024, shuffle=False, pin_memory=True)
    device = "cuda:0" if torch.cuda.is_available() else "cpu"
    nll_criterion = nn.CrossEntropyLoss().to(device)

    T = nn.Parameter(torch.Tensor([1.3]).to(device))

    optimizer = optim.SGD([T], lr=lr)
    for iter in range(max_iters):
        T_old = T.item()
        for x, targets in calib_loader:
            optimizer.zero_grad()
            x = x.to(device)
            x.requires_grad = True
            out = x/T
            loss = nll_criterion(out, targets.long().to(device))
            loss.backward()
            optimizer.step()
        if abs(T_old - T.item()) < epsilon:
            break
    return T

### Helping functions for Lap-Hist

In [None]:
def dp_quantile_noisy_hist(x, q, epsilon, seed, bins=50, domain=(0.0, 1.0), rng=None):
    """
    Differentially private quantile using a Laplace-noised histogram (ε-DP).

    Args:
        x (array-like): data vector (numeric).
        q (float): desired quantile in (0,1).
        epsilon (float): privacy budget for the entire histogram.
        domain (tuple): (lo, hi) public bounds for clipping/binning.
        bins (int): number of fixed, public bins.
        rng: np.random.Generator (optional).

    Returns:
        float: DP quantile estimate (can lie between data points).

    Privacy & assumptions:
        - Data are clipped to the public domain (lo, hi).
        - Build a fixed-bin histogram, add Lap(1/ε) noise to each bin count.
        - Because each record contributes to exactly one bin, releasing
          the full noisy histogram is ε-DP under add/remove adjacency.
        - Quantile is computed from the noisy cumulative counts.

    Notes:
        - Works best if a reasonable public domain is known.
        - For stability, negative noisy counts are floored at 0.
    """
    x = np.asarray(x, dtype=float)
    if x.size == 0:
        raise ValueError("x must be non-empty.")
    if not (0 < q < 1):
        raise ValueError("q must be in (0,1).")
    if epsilon <= 0:
        raise ValueError("epsilon must be > 0.")
    if rng is None:
        rng = np.random.default_rng(seed)

    lo, hi = domain
    if not (lo < hi):
        raise ValueError("domain must satisfy lo < hi.")

    # Clip to public domain
    xc = np.clip(x, lo, hi)

    # Fixed public bins
    edges = np.linspace(lo, hi, bins + 1)
    #print(f"Bins: {edges}")
    counts, _ = np.histogram(xc, bins=edges)
    #print(f"Counts of histogram: {counts}")

    # Laplace noise to each bin (scale = 1/ε)
    noise = rng.laplace(loc=0.0, scale=1.0/epsilon, size=bins)
    #print(f"Noise for each bin: {noise}")
    noisy = np.maximum(counts + noise, 0.0)
    #print(noisy)

    # Cumulative proportion
    csum = np.cumsum(noisy)
    if csum[-1] <= 0:
        # extremely unlikely unless ε is tiny and n is tiny
        return float(np.median(xc))

    target = q * csum[-1]
    j = np.searchsorted(csum, target)  # first bin reaching the target

    j = int(np.clip(j, 0, bins - 1))
    # Linear interpolation within the bin (simple, uniform-within-bin)
    bin_lo, bin_hi = edges[j], edges[j + 1]
    prev = csum[j - 1] if j > 0 else 0.0
    within = (target - prev) / max(noisy[j], 1e-12)
    within = np.clip(within, 0.0, 1.0)
    return float(bin_lo + within * (bin_hi - bin_lo))

## Helping function for the experiment

In [None]:
def trial_precomputed(conformal_scores, raw_scores, alpha, epsilon, gamma, score_bins, num_calib, seed, privateconformal):
    total = conformal_scores.shape[0]
    perm = torch.randperm(conformal_scores.shape[0])
    conformal_scores = conformal_scores[perm]
    raw_scores = raw_scores[perm]
    calib_conformal_scores, val_conformal_scores = (1-conformal_scores[0:num_calib], 1-conformal_scores[num_calib:])
    calib_raw_scores, val_raw_scores = (1-raw_scores[0:num_calib], 1-raw_scores[num_calib:])

    # Always compute non-private results
    threshold_nonpriv = get_shat_from_scores(calib_conformal_scores, alpha)
    corrects_nonpriv = (val_conformal_scores < threshold_nonpriv)
    sizes_nonpriv = (val_raw_scores < threshold_nonpriv).sum(dim=1)
    
    # Initialize private method results as None
    corrects = corrects_PrivQuant = corrects_Lap_hist= None
    sizes = sizes_PrivQuant = sizes_Lap_hist = None
    shat = threshold_PrivQuant = threshold_Lap_hist=  None

    if privateconformal:
        shat = get_shat_from_scores_private(calib_conformal_scores, alpha, epsilon, gamma, score_bins)
        epsilon_comform = (epsilon**2)/2
        threshold_PrivQuant = PrivQuant(calib_conformal_scores, alpha, epsilon_comform, seed, lower_bound=0, upper_bound=1, delta=1e-10)
        q =1-alpha
        threshold_Lap_hist = dp_quantile_noisy_hist(calib_conformal_scores, q, epsilon, seed) 
        
        corrects = (val_conformal_scores < shat)
        corrects_PrivQuant = (val_conformal_scores < threshold_PrivQuant)
        corrects_Lap_hist = (val_conformal_scores < threshold_Lap_hist)
        sizes = (val_raw_scores < shat).sum(dim=1)
        sizes_PrivQuant = (val_raw_scores < threshold_PrivQuant).sum(dim=1)
        sizes_Lap_hist = (val_raw_scores < threshold_Lap_hist).sum(dim=1)

    return (
        corrects.float().mean().item() if corrects is not None else np.nan,
        corrects_PrivQuant.float().mean().item() if corrects_PrivQuant is not None else np.nan,
        corrects_Lap_hist.float().mean().item() if corrects_Lap_hist is not None else np.nan,
        corrects_nonpriv.float().mean().item(),
        sizes if sizes is not None else torch.tensor([]),
        sizes_PrivQuant if sizes_PrivQuant is not None else torch.tensor([]),
        sizes_Lap_hist if sizes_Lap_hist is not None else torch.tensor([]),
        sizes_nonpriv,
        shat if shat is not None else np.nan,
        threshold_PrivQuant if threshold_PrivQuant is not None else np.nan,
        threshold_Lap_hist if threshold_Lap_hist is not None else np.nan,
        threshold_nonpriv
    )


### Experiments

In [None]:

# Define the root directory
dirname = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
sys.path.insert(1, os.path.join(sys.path[0], '../'))






def get_model(private=True):
    device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
    model = convnet(num_classes=10).to(device)

    if private:
        model_path = ".../best_model_private.pth"  #TO DO: Put the complete path to the saved trained private model
    else:
        model_path = ".../best_model_nonprivate.pth"  #TO DO: Put the complete path to the saved trained non-private model



    # Load the model state dict
    state_dict = torch.load(model_path, map_location=device)
    
    # Remove the "_module." prefix from keys if present
    from collections import OrderedDict
    new_state_dict = OrderedDict()
    for k, v in state_dict.items():
        if k.startswith("_module."):
            name = k[8:]  # Remove "_module." prefix
        else:
            name = k
        new_state_dict[name] = v

    # Load the modified state dict
    model.load_state_dict(new_state_dict)
    model.to(device)
    model.eval()

    return model





# Compute logits and targets
def get_logits_targets(model, loader):
    device = "cuda:0" if torch.cuda.is_available() else "cpu"
    logits = torch.zeros((len(loader.dataset), 10))  # 10 classes in CIFAR
    labels = torch.zeros((len(loader.dataset),))
    model = model.to(device)
    i = 0
    print('Computing logits for model (only happens once).')
    with torch.no_grad():
        for x, targets in tqdm(loader):
            batch_logits = model(x.to(device)).detach().cpu()
            logits[i:(i + x.shape[0]), :] = batch_logits
            labels[i:(i + x.shape[0])] = targets.cpu()
            i = i + x.shape[0]

    # Construct the dataset
    dataset_logits = torch.utils.data.TensorDataset(logits, labels.long())
    return dataset_logits

# Load logits dataset
def get_logits_dataset(private, datasetname, datasetpath, cache=os.path.join(dirname, '.cache/')):
    fname = os.path.join(cache, datasetname, 'private.pkl' if private else 'nonprivate.pkl')

    # If the file exists, load and return it
    if os.path.exists(fname):
        with open(fname, 'rb') as handle:
            return pkl.load(handle)

    # Else, load the model, run it on the dataset, and save/return the output
    model = get_model(private)

    test_transform = tv.transforms.Compose([
        tv.transforms.ToTensor(),
        tv.transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
    ])
    test_dataset = CIFAR10(root=datasetpath, train=False, download=True, transform=test_transform)

    loader = DataLoader(test_dataset, batch_size=32, shuffle=False, pin_memory=True)

    # Get the logits and targets
    dataset_logits = get_logits_targets(model, loader)

    # Save the dataset
    os.makedirs(os.path.dirname(fname), exist_ok=True)
    with open(fname, 'wb') as handle:
        pkl.dump(dataset_logits, handle, protocol=pkl.HIGHEST_PROTOCOL)

    return dataset_logits

# Fix randomness for reproducibility
def fix_randomness(seed=0):
    np.random.seed(seed=seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    random.seed(seed)

# Get CIFAR-10 class names
def get_cifar10_classes():
    return ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']

# Experiment function
def experiment(alpha, epsilon, gamma, num_calib, m, seed, cifar10_root, privatemodel, privateconformal):
    df_list = []
    score_bins = np.linspace(0, 1, m)
    fname = f'.cache/opt_{privatemodel}_{privateconformal}_{alpha}_{epsilon}_{num_calib}_{m}bins_dataframe.pkl'

    try:
        df = pd.read_pickle(fname)
        return df
    except FileNotFoundError:
        pass

    dataset_precomputed = get_logits_dataset(privatemodel, 'CIFAR10', cifar10_root)
    print('Dataset loaded')
    T = platt_logits(dataset_precomputed)
    logits, labels = dataset_precomputed.tensors
    scores = (logits / T.cpu()).softmax(dim=1)

    accuracy = (scores.argmax(dim=1) == labels).float().mean()
    print(f"Private model: {privatemodel}, Accuracy: {accuracy}")

    with torch.no_grad():
        conformal_scores = get_conformal_scores(scores, labels)
        local_df_list = []
        for i in tqdm(range(num_trials)):
            seed = i+seed
            cvg1, cvg2, cvg3, cvg4, szs1, szs2, szs3, szs4, shat, threshold_PrivQuant, threshold_Lap_hist, threshold_nonpriv = trial_precomputed(
                conformal_scores, scores, alpha, epsilon, gamma, score_bins, num_calib, seed, privateconformal)
            
            # Store results with consistent structure
            dict_local = {
                "NonprivQuant": cvg4,
                "sizes_NonprivQuant": [szs4],
                "Anas et. al": cvg1 if privateconformal else np.nan,
                "PrivQuant": cvg2 if privateconformal else np.nan,
                "Lap_hist": cvg3 if privateconformal else np.nan,
                "sizes_Anas et. al": [szs1] if privateconformal and szs1 is not None else [torch.tensor([])],
                "sizes_PrivQuant": [szs2] if privateconformal and szs2 is not None else [torch.tensor([])],
                "sizes_Lap_hist": [szs3] if privateconformal and szs3 is not None else [torch.tensor([])],
                "$\\hat{s}$": shat if privateconformal else np.nan,
                "$\\hat{q}_$PrivQuant": threshold_PrivQuant if privateconformal else np.nan,
                "Lap_hist_thrs": threshold_Lap_hist if privateconformal else np.nan,
                "$\\alpha$": alpha,
                "$\\epsilon$": epsilon,
                "PrivateConformal": privateconformal,  
                "PrivateModel": privatemodel          
    }
    
            
            df_local = pd.DataFrame(dict_local)
            local_df_list.append(df_local)
        
        df = pd.concat(local_df_list, axis=0, ignore_index=True)
        os.makedirs('.cache', exist_ok=True)
        df.to_pickle(fname)
    
    return df


shutil.rmtree('.cache', ignore_errors=True)

# Main function
if __name__ == "__main__":
    sns.set(palette='pastel', font='serif')
    sns.set_style('white')
    fix_randomness(seed=0)

    save_path = 'df_list_CIFAR_10_results.pkl'

    if os.path.exists(save_path):
        with open(save_path, 'rb') as f:
            df_list = pkl.load(f)
    else:
        cifar10_root = './data/cifar10'
        privateconformals = [False, True]
        privatemodels = [False, True]

        alpha = 0.1
        epsilon = 1
        num_calib = 5000
        num_trials = 1000
        seed = 123
        mstar, gammastar = get_optimal_gamma_m(num_calib, alpha, epsilon)

        df_list = []
        for privateconformal in privateconformals:
            for privatemodel in privatemodels:
                df_list.append(
                    experiment(alpha, epsilon, gammastar, num_calib, mstar, seed,
                               cifar10_root=cifar10_root, 
                               privatemodel=privatemodel,
                               privateconformal=privateconformal)
                )

       

### Saving the result

In [None]:
with open(save_path, 'wb') as f:
            pkl.dump(df_list, f)

### Processing and saving results in files for ploting

In [None]:
def process_sizes_to_averages(size_series, trials=1000, eval_points=5000):
    """Process size series to compute average set size per trial"""
    # First convert all elements to float values (handling tensors if present)
    processed_sizes = []
    for val in size_series.explode().dropna():
        if isinstance(val, torch.Tensor):
            processed_sizes.append(float(val.item()))
        else:
            processed_sizes.append(float(val))
    
    if not processed_sizes:
        print("Warning: No valid size data found")
        return np.array([])
    
    # Check if we have enough data points
    expected_points = trials * eval_points
    if len(processed_sizes) != expected_points:
        print(f"Warning: Expected {expected_points} size points, got {len(processed_sizes)}")
        return np.array([])
    
    # Reshape and compute averages
    try:
        size_array = np.array(processed_sizes).reshape(trials, eval_points)
        return np.mean(size_array, axis=1)
    except Exception as e:
        print(f"Error computing averages: {str(e)}")
        return np.array([])

def safe_to_dataframe(data_dict):
    """Convert dictionary to DataFrame, handling unequal lengths and empty data"""
    if not data_dict:
        return pd.DataFrame()
    
    # Filter out empty arrays
    filtered = {k: v for k, v in data_dict.items() if len(v) > 0}
    if not filtered:
        return pd.DataFrame()
    
    max_len = max(len(v) for v in filtered.values())
    padded = {k: np.pad(v, (0, max_len - len(v)), 
             mode='constant', constant_values=np.nan)
             for k, v in filtered.items()}
    return pd.DataFrame(padded)

def main():
    # Load your data
    try:
        with open('df_list_CIFAR_10_results.pkl', 'rb') as f:
            df_list = pickle.load(f)
    except FileNotFoundError:
        print("Error: Input file not found")
        return
    except pickle.PickleError:
        print("Error: Could not unpickle the file")
        return

    # Setting names mapping
    setting_names = {
        (False, False): "NonPrivateModel_NonPrivateConformal",
        (False, True): "NonPrivateModel_PrivateConformal",
        (True, False): "PrivateModel_NonPrivateConformal",
        (True, True): "PrivateModel_PrivateConformal"
    }

    # Initialize storage
    results = {
        'coverage': {setting: {} for setting in setting_names.values()},
        'size_avg': {setting: {} for setting in setting_names.values()}
    }

    # Process each setting's DataFrame
    for df_idx, df in enumerate(df_list):
        try:
            private_model = df["PrivateModel"].iloc[0]
            private_conformal = df["PrivateConformal"].iloc[0]
            setting = setting_names[(private_model, private_conformal)]
            
            # Coverage data
            for method in ["NonprivQuant", "Anas et. al", "PrivQuant", "Lap_hist"]:
                if method in df.columns:
                    cov_data = df[method].dropna().astype(float).values
                    results['coverage'][setting][method] = cov_data
            
            # Size data - computing averages per trial
            for method in ["NonprivQuant", "Anas et. al", "PrivQuant", "Lap_hist"]:
                size_key = f"sizes_{method}"
                if size_key in df.columns:
                    avg_sizes = process_sizes_to_averages(df[size_key])
                    if len(avg_sizes) > 0:  # Only store if we got valid averages
                        results['size_avg'][setting][method] = avg_sizes
                    else:
                        print(f"No valid size data for {method} in setting {setting} (DF #{df_idx+1})")
        except Exception as e:
            print(f"Error processing dataframe #{df_idx+1}: {str(e)}")
            continue

    # Save coverage data
    for setting in setting_names.values():
        if results['coverage'][setting]:
            df = safe_to_dataframe(results['coverage'][setting])
            if not df.empty:
                df.to_csv(f'coverage_{setting}.csv', index=False)
            else:
                print(f"No coverage data to save for {setting}")

    # Save average size data
    for setting in setting_names.values():
        if results['size_avg'][setting]:
            df = safe_to_dataframe(results['size_avg'][setting])
            if not df.empty:
                df.to_csv(f'avg_size_{setting}.csv', index=False)
            else:
                print(f"No average size data to save for {setting}")

    print("Data processing complete!")

if __name__ == "__main__":
    main()