### Libraries

In [1]:
from sklearn.datasets import load_breast_cancer
import numpy as np
import random
import diffprivlib
from tqdm import tqdm
from sklearn.metrics import accuracy_score
from diffprivlib.models import LogisticRegression as DP_LogisticRegression
from diffprivlib.models import GaussianNB as DPGaussianNB  
from diffprivlib.models import DecisionTreeClassifier as DP_DecisionTreeClassifier
from diffprivlib.models import RandomForestClassifier as DP_RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.base import clone
from sklearn.preprocessing import LabelEncoder
import math
from scipy.special import softmax
from sklearn.datasets import make_classification
import scipy.stats as stats
import time
from decimal import Decimal
import pickle
import os

### Nonprivate models

In [2]:
def get_nonprivate_model(model_name, random_state):
    

    if model_name == "Random_forest":
        model= RandomForestClassifier(random_state=random_state)
        
        
    elif model_name == "Decision_tree":
        model=DecisionTreeClassifier()
    
        
    elif model_name == "Naive_bayes":
       model=GaussianNB()
        
    elif model_name == "Logistic_regression":
        model=LogisticRegression(max_iter=5000, random_state=random_state)
    return model

### Private models

In [3]:
def get_private_model(model_name, X_train, y_train, y, epsilon):
    EPSILON =epsilon
  
    classes = np.unique(y)

    if model_name == "Random_forest":
        # Define bounds for each feature (for differential privacy)
        min_bounds = -20
        max_bounds = 20
        bounds = (min_bounds, max_bounds)
        
        # Initialize private Random Forest with differential privacy
        model = DP_RandomForestClassifier(
            epsilon=EPSILON,    # Differential privacy parameter
            n_estimators=127,   # Number of trees in the forest
            max_depth=7,        # Maximum depth of the trees
            bounds=bounds,      # Data bounds for differential privacy
            random_state=42,
            classes=classes
        )
    elif model_name == "Decision_tree":
        # Define bounds for each feature (for differential privacy)
        min_bounds = -20
        max_bounds = 20
        bounds = (min_bounds, max_bounds)
    
        # Initialize private Random Forest with differential privacy
        model = DP_DecisionTreeClassifier(
            epsilon=EPSILON,    # Differential privacy parameter
            max_depth=6,        # Maximum depth of the trees
            bounds=bounds,      # Data bounds for differential privacy
            random_state=42,
            classes=classes
        )
    elif model_name == "Naive_bayes":
        # Define bounds for each feature (for differential privacy)
        min_bounds =-20 
        max_bounds =20 
        bounds = (min_bounds, max_bounds)
        
        # Initialize private Naive Bayes with differential privacy
        model = DPGaussianNB(
            epsilon=EPSILON,    # Differential privacy parameter
            bounds=bounds,      # Data bounds for differential privacy
            random_state=42
        )
    elif model_name == "Logistic_regression":
        # Calculate the data norm (L2 norm)
        data_norm = 20
        
        # Initialize private Logistic Regression
        model = DP_LogisticRegression(
            epsilon=EPSILON,
            data_norm=data_norm,
            random_state=42
        )
    return model    

### Performance evaluation function

In [4]:
# Updated evaluation function to include accuracy
def evaluate_conformal_split(prediction_sets, y_test, y_pred):
    # Calculate coverage
    coverage = np.mean([1 if y_test[i] in prediction_sets[i] else 0 for i in range(len(y_test))])

    # Calculate Efficiency
    ambiguities = [len(pred_set) for pred_set in prediction_sets]
    average_Efficiency = np.mean(ambiguities)

    # Calculate accuracy
    accuracy = np.mean(y_test == y_pred)

    return coverage, average_Efficiency, accuracy

### Helping function for PCOQS method

In [5]:
def NoisyRC(range_bounds, D, sigma):
    """
    Noisy Range Count for float values with Gaussian noise.

    Parameters:
    range_bounds (tuple): A tuple (a, b) representing the range [a, b].
    D (list): The sorted dataset.
    sigma (float): The standard deviation of the Gaussian noise.

    Returns:
    int: The noisy count of elements in the range [a, b].
    """
    a, b = range_bounds
    count = sum(1 for z in D if a <= z <= b)
    noise = np.random.normal(0, sigma)
    noisy_count = count + noise
    return max(0, int(np.floor(noisy_count)))  # Ensure non-negative count

def PCOQS(D, alpha, rho, seed, lower_bound=0, upper_bound=1, delta=1e-10):
    """
    Differentially Private Quantile Approximation Algorithm without integer conversion.

    Parameters:
    D (list): The sorted dataset.
    alpha (float): The quantile level (e.g., 0.5 for median).
    rho (float): The privacy parameter (smaller = more private).
    lower_bound (float): Lower bound of the search space.
    upper_bound (float): Upper bound of the search space.
    delta (float): Small positive value to ensure convergence.

    Returns:
    float: A differentially private approximation of the quantile x_{(m)}.
    """

    
    n = len(D)
    max_iterations = int(np.ceil(np.log2((upper_bound - lower_bound) / delta)))
    sigma = np.sqrt(max_iterations / (2 * rho)) # Noise scale for Gaussian mechanism
    m = int(np.ceil((1 - alpha) * (n + 1)))

    left, right = lower_bound, upper_bound
    random.seed(seed)
    for i in range(max_iterations):
        mid = (left + right) / 2
        c = NoisyRC((lower_bound, mid), D, sigma)
        
        if c < m:
            left = mid + delta
        else:
            right = mid

    return np.round((left + right) / 2, 2)



### Helping function for EXPONQ method

In [6]:
# Optimal gamma is a root.
def get_optimal_gamma(scores,n,alpha,m,epsilon):
    a = alpha**2
    b = - ( alpha*epsilon*(n+1)*(1-alpha)/2 + 2*alpha )
    c = 1
    best_q = 1
    gamma1 = (-b + np.sqrt(b**2 - 4*a*c))/(2*a)
    gamma2 = (-b - np.sqrt(b**2 - 4*a*c))/(2*a)

    gamma1 = min(max(gamma1,1e-12),1-1e-12)
    gamma2 = min(max(gamma2,1e-12),1-1e-12)

    bins = np.linspace(0,1,m)

    q1 = get_private_quantile(scores, alpha, epsilon, gamma1, bins)
    q2 = get_private_quantile(scores, alpha, epsilon, gamma2, bins)

    return (gamma1, q1) if q1 < q2 else (gamma2, q2)

def get_optimal_gamma_m(n, alpha, epsilon):
    candidates_m = np.logspace(4,6,50).astype(int)
    scores = np.random.rand(n,1)
    best_m = int(1/alpha)
    best_gamma = 1
    best_q = 1
    for m in candidates_m:
        gamma, q = get_optimal_gamma(scores,n,alpha,m,epsilon)
        if q < best_q:
            best_q = q
            best_m = m
            best_gamma = gamma
    return best_m, best_gamma

def get_private_quantile(scores, alpha, epsilon, gamma, bins):
    n = scores.shape[0]
    epsilon_normed = epsilon*min(alpha, 1-alpha)
    # Get the quantile
    qtilde = get_qtilde(n, alpha, gamma, epsilon, bins.shape[0])
    scores = scores.squeeze()
    score_to_bin = np.digitize(scores,bins)
    binned_scores = bins[np.minimum(score_to_bin,bins.shape[0]-1)]
    w1 = np.digitize(binned_scores, bins)
    w2 = np.digitize(binned_scores, bins, right=True)
    # Clip bins
    w1 = np.maximum(np.minimum(w1,bins.shape[0]-1),0)
    w2 = np.maximum(np.minimum(w2,bins.shape[0]-1),0)
    lower_mass = np.bincount(w1,minlength=bins.shape[0]).cumsum()/qtilde
    upper_mass = (n-np.bincount(w2,minlength=bins.shape[0]).cumsum())/(1-qtilde)
    w = np.maximum( lower_mass , upper_mass )
    sampling_probabilities = softmax(-(epsilon_normed/2)*w)
    # Check
    sampling_probabilities = sampling_probabilities/sampling_probabilities.sum()
    qhat = np.random.choice(bins,p=sampling_probabilities)
    return qhat

def get_shat_from_scores_private(scores, alpha, epsilon, gamma, score_bins):
    shat = get_private_quantile(scores, alpha, epsilon, gamma, score_bins)
    return shat

def get_qtilde(n,alpha,gamma,epsilon,m):
    qtilde = (n+1)*(1-alpha)/(n*(1-gamma*alpha))+2/(epsilon*n)*np.log(m/(gamma*alpha))
    qtilde = min(qtilde, 1-1e-12)
    return qtilde

### Helping function for Histogram + Laplace Method

In [7]:
def dp_quantile_noisy_hist(x, q, epsilon, seed, domain=(0.0, 1.0), bins=50, rng=None):
    """
    Differentially private quantile using a Laplace-noised histogram (ε-DP).

    Args:
        x (array-like): data vector (numeric).
        q (float): desired quantile in (0,1).
        epsilon (float): privacy budget for the entire histogram.
        domain (tuple): (lo, hi) public bounds for clipping/binning.
        bins (int): number of fixed, public bins.
        rng: np.random.Generator (optional).

    Returns:
        float: DP quantile estimate (can lie between data points).

    Privacy & assumptions:
        - Data are clipped to the public domain (lo, hi).
        - Build a fixed-bin histogram, add Lap(1/ε) noise to each bin count.
        - Because each record contributes to exactly one bin, releasing
          the full noisy histogram is ε-DP under add/remove adjacency.
        - Quantile is computed from the noisy cumulative counts.

    Notes:
        - Works best if a reasonable public domain is known.
        - For stability, negative noisy counts are floored at 0.
    """
    x = np.asarray(x, dtype=float)
    if x.size == 0:
        raise ValueError("x must be non-empty.")
    if not (0 < q < 1):
        raise ValueError("q must be in (0,1).")
    if epsilon <= 0:
        raise ValueError("epsilon must be > 0.")
    if rng is None:
        rng = np.random.default_rng(seed)

    lo, hi = domain
    if not (lo < hi):
        raise ValueError("domain must satisfy lo < hi.")

    # Clip to public domain
    xc = np.clip(x, lo, hi)

    # Fixed public bins
    edges = np.linspace(lo, hi, bins + 1)
    #print(f"Bins: {edges}")
    counts, _ = np.histogram(xc, bins=edges)
    #print(f"Counts of histogram: {counts}")

    # Laplace noise to each bin (scale = 1/ε)
    noise = rng.laplace(loc=0.0, scale=1.0/epsilon, size=bins)
    #print(f"Noise for each bin: {noise}")
    noisy = np.maximum(counts + noise, 0.0)
    #print(noisy)

    # Cumulative proportion
    csum = np.cumsum(noisy)
    if csum[-1] <= 0:
        # extremely unlikely unless ε is tiny and n is tiny
        return float(np.median(xc))

    target = q * csum[-1]
    j = np.searchsorted(csum, target)  # first bin reaching the target

    j = int(np.clip(j, 0, bins - 1))
    # Linear interpolation within the bin (simple, uniform-within-bin)
    bin_lo, bin_hi = edges[j], edges[j + 1]
    prev = csum[j - 1] if j > 0 else 0.0
    within = (target - prev) / max(noisy[j], 1e-12)
    within = np.clip(within, 0.0, 1.0)
    return float(bin_lo + within * (bin_hi - bin_lo))

### Data generation

In [8]:
def simulate_normal_classification(
    n_samples_per_class,
    n_features,
    n_classes,
    class_means=None,
    class_covariances=None,
    random_state=None
):
    """
    Simulate classification data directly from Normal distributions.
    
    Parameters:
        n_samples_per_class (int): Number of samples per class.
        n_features (int): Number of features.
        n_classes (int): Number of classes.
        class_means (list of arrays): List of mean vectors for each class. If None, generated randomly.
        class_covariances (list of arrays): List of covariance matrices for each class. If None, identity matrices are used.
        random_state (int): Seed for reproducibility.
    
    Returns:
        X (ndarray): Feature matrix of shape (n_samples, n_features), rounded to 4 decimal places.
        y (ndarray): Class labels of shape (n_samples,), rounded to 4 decimal places.
    """
    np.random.seed(random_state)
    
    X = []
    y = []
    
    # Generate means and covariances if not provided
    if class_means is None:
        class_means = [np.random.uniform(-5, 5, n_features) for _ in range(n_classes)]
    if class_covariances is None:
        class_covariances = [np.eye(n_features) for _ in range(n_classes)]
    
    for class_idx in range(n_classes):
        # Draw samples from the Normal distribution
        samples = np.random.multivariate_normal(
            mean=class_means[class_idx],
            cov=class_covariances[class_idx],
            size=n_samples_per_class
        )
        X.append(samples)
        y.extend([class_idx] * n_samples_per_class)
    
    # Combine data and shuffle
    X = np.vstack(X)
    y = np.array(y)
    indices = np.random.permutation(len(y))
    
    # Round X and y to 4 decimal places
    X = np.round(X[indices], 4)
    y = np.round(y[indices], 4)
    
    return X, y


### Simulation function

In [9]:
def run_simulation(
    private_model,
    private_conformal,
    model_name,
    n_samples_per_class,
    n_features,
    n_classes,
    class_means,
    class_covariances,
    alpha,
    epsilon,
    conformal_epsilon,
    n_trials,
    seed
):
    n_trials = int(n_trials)  # Ensure type safety
    coverage_results_anas = []
    Efficiency_results_anas = []
    informativeness_results_anas = [] 
    coverage_results_PCOQS = []
    Efficiency_results_PCOQS = []
    informativeness_results_PCOQS = []  
    coverage_results_Lap_hist = []
    Efficiency_results_Lap_hist = []
    informativeness_results_Lap_hist = []  
    coverage_results_np = []
    Efficiency_results_np = []
    informativeness_results_np = [] 
    accuracy_results = []

    for i in tqdm(range(n_trials)):
        seed =seed +i
        # Step 1: Generate simulated data
        X, y = simulate_normal_classification(
            n_samples_per_class=n_samples_per_class,
            n_features=n_features,
            n_classes=n_classes,
            class_means=class_means,
            class_covariances=class_covariances,
            random_state=i + 3
        )

        # Step 2: Split data
        X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.4, random_state=i + 3)
        X_cal, X_test, y_cal, y_test = train_test_split(X_temp, y_temp, test_size=0.4, random_state=i + 3)
        num_calib = len(X_cal)

        # Train model
        if private_model:
            model = get_private_model(model_name, X_test, y_train, y, epsilon)
        else:
            model = get_nonprivate_model(model_name, random_state=42)
        model.fit(X_train, y_train)

        # Compute calibration scores
        prob_cal = model.predict_proba(X_cal)
        scores_cal = np.round(1 - prob_cal[np.arange(len(y_cal)), y_cal], 4)

        if private_conformal:
            # Compute thresholds for EXPONQ method
            mstar, gammastar = get_optimal_gamma_m(num_calib, alpha, conformal_epsilon)
            m = mstar
            gamma = gammastar
            score_bins = np.linspace(0, 1, m)
            shat = get_shat_from_scores_private(scores_cal, alpha, conformal_epsilon, gamma, score_bins)
            threshold_anas = shat

            # Compute threshold for PCOQS
            scores_cal = np.sort(scores_cal)
            rho = conformal_epsilon**2/2
            threshold_PCOQS = PCOQS(scores_cal, alpha, rho, seed) 

            # Compute threshold for Lap_hist
            q = 1- alpha
            threshold_Lap_hist = dp_quantile_noisy_hist(scores_cal, q, conformal_epsilon, seed, domain=(0.0, 1.0), bins=50, rng=None)
        else:
            # Non-private threshold
            threshold = np.quantile(scores_cal, math.ceil((1 - alpha) * (len(scores_cal) + 1) / len(scores_cal)))

        # Compute prediction sets
        prob_test = model.predict_proba(X_test)
        scores_test = 1 - prob_test

        if private_conformal:
            # Prediction sets for EXPONQ
            prediction_sets_anas = [
                np.where(scores <= threshold_anas)[0] for scores in scores_test
            ]
            prediction_sets_anas = [
                pset if len(pset) > 0 else [-1] for pset in prediction_sets_anas
            ]
            # Compute informativeness for EXPONQ
            informativeness_anas = np.mean([len(pset) == 1 for pset in prediction_sets_anas])
            informativeness_results_anas.append(informativeness_anas)

            # Prediction sets for PCOQS
            prediction_sets_PCOQS = [
                np.where(scores <= threshold_PCOQS)[0] for scores in scores_test
            ]
            prediction_sets_PCOQS = [
                pset if len(pset) > 0 else [-1] for pset in prediction_sets_PCOQS
            ]
            # Compute informativeness for PCOQS
            informativeness_PCOQS = np.mean([len(pset) == 1 for pset in prediction_sets_PCOQS])
            informativeness_results_PCOQS.append(informativeness_PCOQS)

            # Prediction sets for Lap_hist
            prediction_sets_Lap_hist = [
                np.where(scores <= threshold_Lap_hist)[0] for scores in scores_test
            ]
            prediction_sets_Lap_hist = [
                pset if len(pset) > 0 else [-1] for pset in prediction_sets_Lap_hist
            ]
            # Compute informativeness for Lap_hist
            informativeness_Lap_hist = np.mean([len(pset) == 1 for pset in prediction_sets_Lap_hist])
            informativeness_results_Lap_hist.append(informativeness_Lap_hist)
        else:
            # Non-private prediction sets
            prediction_sets = [
                np.where(scores <= threshold)[0] for scores in scores_test
            ]
            prediction_sets = [
                pset if len(pset) > 0 else [-1] for pset in prediction_sets
            ]
            # Compute informativeness for non-private
            informativeness = np.mean([len(pset) == 1 for pset in prediction_sets])
            informativeness_results_np.append(informativeness)

        # Compute metrics
        if private_conformal:
            # EXPONQ
            coverage_anas = np.mean([y_test[i] in prediction_sets_anas[i] for i in range(len(y_test))])
            coverage_results_anas.append(coverage_anas)
            Efficiency_anas = np.mean([len(pset) for pset in prediction_sets_anas])
            Efficiency_results_anas.append(Efficiency_anas)

            # PCOQS
            coverage_PCOQS = np.mean([y_test[i] in prediction_sets_PCOQS[i] for i in range(len(y_test))])
            coverage_results_PCOQS.append(coverage_PCOQS)
            Efficiency_PCOQS = np.mean([len(pset) for pset in prediction_sets_PCOQS])
            Efficiency_results_PCOQS.append(Efficiency_PCOQS)

            # Lap_hist
            coverage_Lap_hist = np.mean([y_test[i] in prediction_sets_Lap_hist[i] for i in range(len(y_test))])
            coverage_results_Lap_hist.append(coverage_Lap_hist)
            Efficiency_Lap_hist = np.mean([len(pset) for pset in prediction_sets_Lap_hist])
            Efficiency_results_Lap_hist.append(Efficiency_Lap_hist)
        else:
            # Non-private
            coverage = np.mean([y_test[i] in prediction_sets[i] for i in range(len(y_test))])
            coverage_results_np.append(coverage)
            Efficiency = np.mean([len(pset) for pset in prediction_sets])
            Efficiency_results_np.append(Efficiency)

        # Accuracy
        y_pred = model.predict(X_test)
        accuracy = accuracy_score(y_test, y_pred)
        accuracy_results.append(accuracy)

    # Aggregate results
    results = {}
    if private_conformal:
        results["anas"] = {
            "coverage_mean": np.mean(coverage_results_anas),
            "coverage_std": np.std(coverage_results_anas, ddof=1),
            "Efficiency_mean": np.mean(Efficiency_results_anas),
            "Efficiency_std": np.std(Efficiency_results_anas, ddof=1),
            "informativeness_mean": np.mean(informativeness_results_anas),
            "informativeness_std": np.std(informativeness_results_anas, ddof=1),
        }
        results["PCOQS"] = {
            "coverage_mean": np.mean(coverage_results_PCOQS),
            "coverage_std": np.std(coverage_results_PCOQS, ddof=1),
            "Efficiency_mean": np.mean(Efficiency_results_PCOQS),
            "Efficiency_std": np.std(Efficiency_results_PCOQS, ddof=1),
            "informativeness_mean": np.mean(informativeness_results_PCOQS),
            "informativeness_std": np.std(informativeness_results_PCOQS, ddof=1),
        }

        results["Lap_hist"] = {
            "coverage_mean": np.mean(coverage_results_Lap_hist),
            "coverage_std": np.std(coverage_results_Lap_hist, ddof=1),
            "Efficiency_mean": np.mean(Efficiency_results_Lap_hist),
            "Efficiency_std": np.std(Efficiency_results_Lap_hist, ddof=1),
            "informativeness_mean": np.mean(informativeness_results_Lap_hist),
            "informativeness_std": np.std(informativeness_results_Lap_hist, ddof=1),
        }
    else:
        results["nonprivate"] = {
            "coverage_mean": np.mean(coverage_results_np),
            "coverage_std": np.std(coverage_results_np, ddof=1),
            "Efficiency_mean": np.mean(Efficiency_results_np),
            "Efficiency_std": np.std(Efficiency_results_np, ddof=1),
            "informativeness_mean": np.mean(informativeness_results_np),
            "informativeness_std": np.std(informativeness_results_np, ddof=1),
        }
    results["accuracy"] = {
        "mean": np.mean(accuracy_results),
        "std": np.std(accuracy_results, ddof=1),
    }

    return results



### Simulation function for different models

In [10]:
def complete_model_conformal_simulation(
    model_sets,
    private_models,
    private_conformals,
    n_samples_per_class,
    n_features,
    n_classes,
    class_means,
    class_covariances,
    alpha,
    epsilon,
    conformal_epsilon,
    n_trials,
    seed
):
    # Results dictionary to store all results
    results = {}

    # Iterate through models, privacy options, and conformal options
    for model_name in tqdm(model_sets):
        model_results = []  # To collect results for each model
        for private_model in private_models:
            for private_conformal in private_conformals:
                # Run simulation for the current configuration
                sim_results = run_simulation(
                    private_model,
                    private_conformal,
                    model_name,
                    n_samples_per_class,
                    n_features,
                    n_classes,
                    class_means,
                    class_covariances,
                    alpha,
                    epsilon,
                    conformal_epsilon,
                    n_trials,
                    seed 
                )

                # Extract results based on the conformal configuration
                if private_conformal:
                    # Add results for EXPONQ
                    model_results.append({
                        "Description": f"{model_name} - Private Model: {private_model}, Private Conformal: {private_conformal} (EXPONQ)",
                        "Coverage": f"{sim_results['anas']['coverage_mean']:.4f} ± {sim_results['anas']['coverage_std']:.4f}",
                        "Efficiency": f"{sim_results['anas']['Efficiency_mean']:.4f} ± {sim_results['anas']['Efficiency_std']:.4f}",
                        "Informativeness": f"{sim_results['anas']['informativeness_mean']:.4f} ± {sim_results['anas']['informativeness_std']:.4f}",
                        "Accuracy": f"{sim_results['accuracy']['mean']:.4f} ± {sim_results['accuracy']['std']:.4f}",
                    })
                    # Add results for PCOQS
                    model_results.append({
                        "Description": f"{model_name} - Private Model: {private_model}, Private Conformal: {private_conformal} (PCOQS)",
                        "Coverage": f"{sim_results['PCOQS']['coverage_mean']:.4f} ± {sim_results['PCOQS']['coverage_std']:.4f}",
                        "Efficiency": f"{sim_results['PCOQS']['Efficiency_mean']:.4f} ± {sim_results['PCOQS']['Efficiency_std']:.4f}",
                        "Informativeness": f"{sim_results['PCOQS']['informativeness_mean']:.4f} ± {sim_results['PCOQS']['informativeness_std']:.4f}",
                        "Accuracy": f"{sim_results['accuracy']['mean']:.4f} ± {sim_results['accuracy']['std']:.4f}",
                    })

                    # Add results for Lap_hist
                    model_results.append({
                        "Description": f"{model_name} - Private Model: {private_model}, Private Conformal: {private_conformal} (Lap_hist)",
                        "Coverage": f"{sim_results['Lap_hist']['coverage_mean']:.4f} ± {sim_results['Lap_hist']['coverage_std']:.4f}",
                        "Efficiency": f"{sim_results['Lap_hist']['Efficiency_mean']:.4f} ± {sim_results['Lap_hist']['Efficiency_std']:.4f}",
                        "Informativeness": f"{sim_results['Lap_hist']['informativeness_mean']:.4f} ± {sim_results['Lap_hist']['informativeness_std']:.4f}",
                        "Accuracy": f"{sim_results['accuracy']['mean']:.4f} ± {sim_results['accuracy']['std']:.4f}",
                    })
                else:
                    # Add results for non-private conformal
                    model_results.append({
                        "Description": f"{model_name} - Private Model: {private_model}, Private Conformal: {private_conformal}",
                        "Coverage": f"{sim_results['nonprivate']['coverage_mean']:.4f} ± {sim_results['nonprivate']['coverage_std']:.4f}",
                        "Efficiency": f"{sim_results['nonprivate']['Efficiency_mean']:.4f} ± {sim_results['nonprivate']['Efficiency_std']:.4f}",
                        "Informativeness": f"{sim_results['nonprivate']['informativeness_mean']:.4f} ± {sim_results['nonprivate']['informativeness_std']:.4f}",
                        "Accuracy": f"{sim_results['accuracy']['mean']:.4f} ± {sim_results['accuracy']['std']:.4f}",
                    })

        # Store results for the current model
        results[model_name] = model_results

    # Generate and print results in a structured format
    for model_name, model_results in results.items():
        print(f"\n{'#' * 10} {model_name} {'#' * 10}\n")
        print(f"{'Description':<70} {'Coverage':<20} {'Efficiency':<20} {'Informativeness':<20} {'Accuracy':<20}")
        print("-" * 150)
        for result in model_results:
            print(f"{result['Description']:<70} {result['Coverage']:<20} {result['Efficiency']:<20} {result['Informativeness']:<20} {result['Accuracy']:<20}")
        print("\n")
    
    return results

### Simulation function for different model privacy budget values

In [11]:
def compare_conformal_epsilon(
    model_epsilon_values,
    conformal_epsilon,
    private_models,
    private_conformals,
    n_samples_per_class,
    n_features,
    n_classes,
    class_means,
    class_covariances,
    alpha,
    n_trials,
    seed
):
    results = []  # Use a list to store the results
    for epsilon in model_epsilon_values:
        print("model_epsilon:", epsilon)
        #rho = conformal_epsilon
        output = complete_model_conformal_simulation(
                                                    model_sets,
                                                    private_models,
                                                    private_conformals,
                                                    n_samples_per_class,
                                                    n_features,
                                                    n_classes,
                                                    class_means,
                                                    class_covariances,
                                                    alpha,
                                                    epsilon,
                                                    conformal_epsilon,
                                                    n_trials,
                                                    seed
                                                    )
        # Append results for the current epsilon
        results.append({"model_epsilon": epsilon, "output": output})
    return results


### Running the simulation 

In [12]:
#Private model for different epsilon values and Nonprivate conformal 

model_sets = ["Naive_bayes",  "Random_forest"] #"Logistic_regression",
private_models = [True]
private_conformals = [True]

n_samples_per_class=5000
n_features = 8
n_classes = 2
class_means = [
                np.array([0.8, 0.8, 0.8, 0.8, 0.8,0.8, 0.8, 0.8]), 
                np.array([-1, -1, -1, -1, -1, -1, -1, -1])
                
]
class_covariances = [
    np.eye(8)*7,
    np.eye(8) * 8,
     #np.eye(8) * 15,
]

n_trials =1000
alpha = 0.1
#epsilon = 5
model_epsilon_values = [0.1, 0.5, 1, 2, 5, 10]
#conformal_epsilon_values = [0.1, 0.5, 1, 5, 10,20, 50]
conformal_epsilon = 1
seed = 12

results=compare_conformal_epsilon(
    model_epsilon_values,
    conformal_epsilon,
    private_models,
    private_conformals,
    n_samples_per_class,
    n_features,
    n_classes,
    class_means,
    class_covariances,
    alpha,
    n_trials,
    seed
)


print(results)

model_epsilon: 0.1


  0%|                                                     | 0/2 [00:00<?, ?it/s]
  0%|                                                  | 0/1000 [00:00<?, ?it/s][A
  0%|                                          | 1/1000 [00:00<16:34,  1.00it/s][A
  0%|                                          | 2/1000 [00:01<14:39,  1.13it/s][A
  0%|▏                                         | 3/1000 [00:02<13:55,  1.19it/s][A
  0%|▏                                         | 4/1000 [00:03<13:40,  1.21it/s][A
  0%|▏                                         | 5/1000 [00:04<13:22,  1.24it/s][A
  1%|▎                                         | 6/1000 [00:04<13:18,  1.25it/s][A
  1%|▎                                         | 7/1000 [00:05<13:17,  1.25it/s][A
  1%|▎                                         | 8/1000 [00:06<13:21,  1.24it/s][A
  1%|▍                                         | 9/1000 [00:07<13:21,  1.24it/s][A
  1%|▍                                        | 10/1000 [00:08<13:15,  1.24it/s


########## Naive_bayes ##########

Description                                                            Coverage             Efficiency           Informativeness      Accuracy            
------------------------------------------------------------------------------------------------------------------------------------------------------
Naive_bayes - Private Model: True, Private Conformal: True (EXPONQ)    0.9218 ± 0.0115      1.6580 ± 0.0335      0.3420 ± 0.0335      0.6319 ± 0.0139     
Naive_bayes - Private Model: True, Private Conformal: True (PCOQS)     0.8995 ± 0.0102      1.5907 ± 0.0238      0.4093 ± 0.0238      0.6319 ± 0.0139     
Naive_bayes - Private Model: True, Private Conformal: True (Lap_hist)  0.8991 ± 0.0098      1.5895 ± 0.0226      0.4105 ± 0.0226      0.6319 ± 0.0139     



########## Random_forest ##########

Description                                                            Coverage             Efficiency           Informativeness      Accuracy           

  0%|                                                     | 0/2 [00:00<?, ?it/s]
  0%|                                                  | 0/1000 [00:00<?, ?it/s][A
  0%|                                          | 1/1000 [00:00<13:38,  1.22it/s][A
  0%|                                          | 2/1000 [00:01<13:29,  1.23it/s][A
  0%|▏                                         | 3/1000 [00:02<13:27,  1.23it/s][A
  0%|▏                                         | 4/1000 [00:03<13:32,  1.23it/s][A
  0%|▏                                         | 5/1000 [00:04<13:37,  1.22it/s][A
  1%|▎                                         | 6/1000 [00:04<13:31,  1.22it/s][A
  1%|▎                                         | 7/1000 [00:05<13:30,  1.23it/s][A
  1%|▎                                         | 8/1000 [00:06<13:27,  1.23it/s][A
  1%|▍                                         | 9/1000 [00:07<13:28,  1.23it/s][A
  1%|▍                                        | 10/1000 [00:08<13:25,  1.23it/s


########## Naive_bayes ##########

Description                                                            Coverage             Efficiency           Informativeness      Accuracy            
------------------------------------------------------------------------------------------------------------------------------------------------------
Naive_bayes - Private Model: True, Private Conformal: True (EXPONQ)    0.9222 ± 0.0110      1.5361 ± 0.0389      0.4639 ± 0.0389      0.7233 ± 0.0110     
Naive_bayes - Private Model: True, Private Conformal: True (PCOQS)     0.8999 ± 0.0096      1.4530 ± 0.0264      0.5470 ± 0.0264      0.7233 ± 0.0110     
Naive_bayes - Private Model: True, Private Conformal: True (Lap_hist)  0.8995 ± 0.0094      1.4516 ± 0.0259      0.5484 ± 0.0259      0.7233 ± 0.0110     



########## Random_forest ##########

Description                                                            Coverage             Efficiency           Informativeness      Accuracy           

  0%|                                                     | 0/2 [00:00<?, ?it/s]
  0%|                                                  | 0/1000 [00:00<?, ?it/s][A
  0%|                                          | 1/1000 [00:00<14:42,  1.13it/s][A
  0%|                                          | 2/1000 [00:01<14:24,  1.15it/s][A
  0%|▏                                         | 3/1000 [00:02<14:18,  1.16it/s][A
  0%|▏                                         | 4/1000 [00:03<14:08,  1.17it/s][A
  0%|▏                                         | 5/1000 [00:04<14:12,  1.17it/s][A
  1%|▎                                         | 6/1000 [00:05<14:12,  1.17it/s][A
  1%|▎                                         | 7/1000 [00:06<14:12,  1.17it/s][A
  1%|▎                                         | 8/1000 [00:06<14:14,  1.16it/s][A
  1%|▍                                         | 9/1000 [00:07<14:15,  1.16it/s][A
  1%|▍                                        | 10/1000 [00:08<14:10,  1.16it/s


########## Naive_bayes ##########

Description                                                            Coverage             Efficiency           Informativeness      Accuracy            
------------------------------------------------------------------------------------------------------------------------------------------------------
Naive_bayes - Private Model: True, Private Conformal: True (EXPONQ)    0.9405 ± 0.0366      1.7647 ± 0.1443      0.2353 ± 0.1443      0.6559 ± 0.0303     
Naive_bayes - Private Model: True, Private Conformal: True (PCOQS)     0.9812 ± 0.0409      1.9208 ± 0.1717      0.0792 ± 0.1717      0.6559 ± 0.0303     
Naive_bayes - Private Model: True, Private Conformal: True (Lap_hist)  0.8394 ± 0.0554      1.4575 ± 0.0753      0.5425 ± 0.0753      0.6559 ± 0.0303     



########## Random_forest ##########

Description                                                            Coverage             Efficiency           Informativeness      Accuracy           

  0%|                                                     | 0/2 [00:00<?, ?it/s]
  0%|                                                  | 0/1000 [00:00<?, ?it/s][A
  0%|                                          | 1/1000 [00:00<14:31,  1.15it/s][A
  0%|                                          | 2/1000 [00:01<14:59,  1.11it/s][A
  0%|▏                                         | 3/1000 [00:02<14:44,  1.13it/s][A
  0%|▏                                         | 4/1000 [00:03<15:10,  1.09it/s][A
  0%|▏                                         | 5/1000 [00:04<14:52,  1.12it/s][A
  1%|▎                                         | 6/1000 [00:05<14:41,  1.13it/s][A
  1%|▎                                         | 7/1000 [00:06<14:40,  1.13it/s][A
  1%|▎                                         | 8/1000 [00:07<14:39,  1.13it/s][A
  1%|▍                                         | 9/1000 [00:07<14:33,  1.13it/s][A
  1%|▍                                        | 10/1000 [00:08<14:22,  1.15it/s


########## Naive_bayes ##########

Description                                                            Coverage             Efficiency           Informativeness      Accuracy            
------------------------------------------------------------------------------------------------------------------------------------------------------
Naive_bayes - Private Model: True, Private Conformal: True (EXPONQ)    0.9217 ± 0.0116      1.4841 ± 0.0457      0.5159 ± 0.0457      0.7479 ± 0.0125     
Naive_bayes - Private Model: True, Private Conformal: True (PCOQS)     0.8999 ± 0.0100      1.3985 ± 0.0326      0.6015 ± 0.0326      0.7479 ± 0.0125     
Naive_bayes - Private Model: True, Private Conformal: True (Lap_hist)  0.8995 ± 0.0098      1.3971 ± 0.0317      0.6029 ± 0.0317      0.7479 ± 0.0125     



########## Random_forest ##########

Description                                                            Coverage             Efficiency           Informativeness      Accuracy           

  0%|                                                     | 0/2 [00:00<?, ?it/s]
  0%|                                                  | 0/1000 [00:00<?, ?it/s][A
  0%|                                          | 1/1000 [00:00<15:51,  1.05it/s][A
  0%|                                          | 2/1000 [00:01<15:32,  1.07it/s][A
  0%|▏                                         | 3/1000 [00:02<15:27,  1.08it/s][A
  0%|▏                                         | 4/1000 [00:03<15:26,  1.07it/s][A
  0%|▏                                         | 5/1000 [00:04<15:42,  1.06it/s][A
  1%|▎                                         | 6/1000 [00:05<15:30,  1.07it/s][A
  1%|▎                                         | 7/1000 [00:06<15:12,  1.09it/s][A
  1%|▎                                         | 8/1000 [00:07<15:08,  1.09it/s][A
  1%|▍                                         | 9/1000 [00:08<15:14,  1.08it/s][A
  1%|▍                                        | 10/1000 [00:09<15:18,  1.08it/s


########## Naive_bayes ##########

Description                                                            Coverage             Efficiency           Informativeness      Accuracy            
------------------------------------------------------------------------------------------------------------------------------------------------------
Naive_bayes - Private Model: True, Private Conformal: True (EXPONQ)    0.9224 ± 0.0117      1.2784 ± 0.0372      0.7216 ± 0.0372      0.8156 ± 0.0095     
Naive_bayes - Private Model: True, Private Conformal: True (PCOQS)     0.9000 ± 0.0099      1.2035 ± 0.0218      0.7965 ± 0.0218      0.8156 ± 0.0095     
Naive_bayes - Private Model: True, Private Conformal: True (Lap_hist)  0.8997 ± 0.0098      1.2026 ± 0.0217      0.7974 ± 0.0217      0.8156 ± 0.0095     



########## Random_forest ##########

Description                                                            Coverage             Efficiency           Informativeness      Accuracy           

  0%|                                                     | 0/2 [00:00<?, ?it/s]
  0%|                                                  | 0/1000 [00:00<?, ?it/s][A
  0%|                                          | 1/1000 [00:00<14:30,  1.15it/s][A
  0%|                                          | 2/1000 [00:01<15:11,  1.09it/s][A
  0%|▏                                         | 3/1000 [00:02<14:43,  1.13it/s][A
  0%|▏                                         | 4/1000 [00:03<14:50,  1.12it/s][A
  0%|▏                                         | 5/1000 [00:04<14:39,  1.13it/s][A
  1%|▎                                         | 6/1000 [00:05<14:33,  1.14it/s][A
  1%|▎                                         | 7/1000 [00:06<14:30,  1.14it/s][A
  1%|▎                                         | 8/1000 [00:07<14:27,  1.14it/s][A
  1%|▍                                         | 9/1000 [00:07<14:27,  1.14it/s][A
  1%|▍                                        | 10/1000 [00:08<14:24,  1.14it/s


########## Naive_bayes ##########

Description                                                            Coverage             Efficiency           Informativeness      Accuracy            
------------------------------------------------------------------------------------------------------------------------------------------------------
Naive_bayes - Private Model: True, Private Conformal: True (EXPONQ)    0.9225 ± 0.0116      1.2570 ± 0.0361      0.7430 ± 0.0361      0.8230 ± 0.0093     
Naive_bayes - Private Model: True, Private Conformal: True (PCOQS)     0.9003 ± 0.0098      1.1841 ± 0.0205      0.8159 ± 0.0205      0.8230 ± 0.0093     
Naive_bayes - Private Model: True, Private Conformal: True (Lap_hist)  0.8999 ± 0.0099      1.1831 ± 0.0207      0.8169 ± 0.0207      0.8230 ± 0.0093     



########## Random_forest ##########

Description                                                            Coverage             Efficiency           Informativeness      Accuracy           




In [2]:
print(results)

NameError: name 'results' is not defined

### Processing results for tabular presentation

In [1]:
def generate_epsilon_tables(results):
    """Generate properly spaced tables comparing EXPONQ vs PCOQS by model_epsilon."""
    # Extract and filter data
    filtered_data = []
    for entry in results:
        epsilon = entry['model_epsilon']
        for model, model_data in entry['output'].items():
            for config in model_data:
                if "Private Conformal: True" in config['Description']:
                    method = "EXPONQ" if "EXPONQ" in config['Description'] else "PCOQS"
                    filtered_data.append({
                        'Model': model,
                        'e_f': epsilon,
                        'Method': method,
                        'Coverage': config['Coverage'],
                        'Efficiency': config['Efficiency'],
                        'Informativeness': config['Informativeness'],
                        'Accuracy': config['Accuracy']
                    })

    df = pd.DataFrame(filtered_data)
    
    # Generate tables for each model
    output = ""
    for model in df['Model'].unique():
        model_df = df[df['Model'] == model]
        
        # Handle duplicates by taking the first occurrence (or you could aggregate)
        model_df = model_df.drop_duplicates(['e_f', 'Method'], keep='first')
        
        # Pivot table
        try:
            table = model_df.pivot(index='e_f', columns='Method',
                                 values=['Coverage', 'Efficiency', 'Informativeness'])
            
            # Flatten multi-index columns
            table.columns = [f"{metric}\n({method})" for metric, method in table.columns]
            
            # Adding Accuracy column (same for both methods)
            accuracy = model_df.drop_duplicates('e_f').set_index('e_f')['Accuracy']
            table['Model\nAccuracy'] = accuracy
            
            # Reorder columns to match requested format
            col_order = [
                'Coverage\n(EXPONQ)', 'Coverage\n(PCOQS)',
                'Efficiency\n(EXPONQ)', 'Efficiency\n(PCOQS)',
                'Informativeness\n(EXPONQ)', 'Informativeness\n(PCOQS)',
                'Model\nAccuracy'
            ]
            table = table[col_order]
            table = table.reset_index()
            
           
            for col in table.columns:
                if col != 'e_f':
                    table[col] = table[col].str.replace(' ± ', ' (', regex=False) + ')'
            
            # Configure pandas display options for better formatting
            pd.set_option('display.max_columns', None)
            pd.set_option('display.width', 1000)
            pd.set_option('display.colheader_justify', 'center')
            
            # Add header
            header = f"\n{model}\n{'='*len(model)}"
            output += f"{header}\n{table.to_string(index=False, justify='center')}\n\n"
        
        except ValueError as e:
            output += f"\nSkipping {model} due to pivot error: {str(e)}\n"
    
    return output

# Generate and print the tables
print(generate_epsilon_tables(results))

NameError: name 'results' is not defined