### Libraries

In [1]:
from sklearn.datasets import load_breast_cancer
import numpy as np
import random
import diffprivlib
from tqdm import tqdm
from sklearn.metrics import accuracy_score
from diffprivlib.models import LogisticRegression as DP_LogisticRegression
from diffprivlib.models import GaussianNB as DPGaussianNB  
from diffprivlib.models import DecisionTreeClassifier as DP_DecisionTreeClassifier
from diffprivlib.models import RandomForestClassifier as DP_RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.base import clone
from sklearn.preprocessing import LabelEncoder
import math
from scipy.special import softmax
from sklearn.datasets import make_classification
import scipy.stats as stats
import time
from decimal import Decimal
import pickle
import os

### Nonprivate models

In [2]:
def get_nonprivate_model(model_name, random_state):
    

    if model_name == "Random_forest":
        model= RandomForestClassifier(random_state=random_state)
        
        
    elif model_name == "Decision_tree":
        model=DecisionTreeClassifier()
    
        
    elif model_name == "Naive_bayes":
       model=GaussianNB()
        
    elif model_name == "Logistic_regression":
        model=LogisticRegression(max_iter=5000, random_state=random_state)
    return model

### Private models

In [3]:
def get_private_model(model_name, X_train, y_train, y, epsilon):
    EPSILON =epsilon
  
    classes = np.unique(y)

    if model_name == "Random_forest":
        # Define bounds for each feature (for differential privacy)
        min_bounds = -20
        max_bounds = 20
        bounds = (min_bounds, max_bounds)
        
        # Initialize private Random Forest with differential privacy
        model = DP_RandomForestClassifier(
            epsilon=EPSILON,    # Differential privacy parameter
            n_estimators=127,   # Number of trees in the forest
            max_depth=7,        # Maximum depth of the trees
            bounds=bounds,      # Data bounds for differential privacy
            random_state=42,
            classes=classes
        )
    elif model_name == "Decision_tree":
        # Define bounds for each feature (for differential privacy)
        min_bounds = -20
        max_bounds = 20
        bounds = (min_bounds, max_bounds)
    
        # Initialize private Random Forest with differential privacy
        model = DP_DecisionTreeClassifier(
            epsilon=EPSILON,    # Differential privacy parameter
            max_depth=6,        # Maximum depth of the trees
            bounds=bounds,      # Data bounds for differential privacy
            random_state=42,
            classes=classes
        )
    elif model_name == "Naive_bayes":
        # Define bounds for each feature (for differential privacy)
        min_bounds =-20 
        max_bounds =20 
        bounds = (min_bounds, max_bounds)
        
        # Initialize private Naive Bayes with differential privacy
        model = DPGaussianNB(
            epsilon=EPSILON,    # Differential privacy parameter
            bounds=bounds,      # Data bounds for differential privacy
            random_state=42
        )
    elif model_name == "Logistic_regression":
        # Calculate the data norm (L2 norm)
        data_norm = 20
        
        # Initialize private Logistic Regression
        model = DP_LogisticRegression(
            epsilon=EPSILON,
            data_norm=data_norm,
            random_state=42
        )
    return model    

### Performance evaluation function

In [4]:
# Updated evaluation function to include accuracy
def evaluate_conformal_split(prediction_sets, y_test, y_pred):
    # Calculate coverage
    coverage = np.mean([1 if y_test[i] in prediction_sets[i] else 0 for i in range(len(y_test))])

    # Calculate Efficiency
    ambiguities = [len(pred_set) for pred_set in prediction_sets]
    average_Efficiency = np.mean(ambiguities)

    # Calculate accuracy
    accuracy = np.mean(y_test == y_pred)

    return coverage, average_Efficiency, accuracy

### Helping function for PCOQS

In [5]:

def NoisyRC(range_bounds, D, sigma):
    """
    Noisy Range Count for float values with Gaussian noise.

    Parameters:
    range_bounds (tuple): A tuple (a, b) representing the range [a, b].
    D (list): The sorted dataset.
    sigma (float): The standard deviation of the Gaussian noise.

    Returns:
    int: The noisy count of elements in the range [a, b].
    """
    a, b = range_bounds
    count = sum(1 for z in D if a <= z <= b)
    noise = np.random.normal(0, sigma)
    noisy_count = count + noise
    return max(0, int(np.floor(noisy_count)))  # Ensure non-negative count

def PCOQS(D, alpha, rho, lower_bound=0, upper_bound=1, delta=1e-10):
    """
    Differentially Private Quantile Approximation Algorithm without integer conversion.

    Parameters:
    D (list): The sorted dataset.
    alpha (float): The quantile level (e.g., 0.5 for median).
    rho (float): The privacy parameter (smaller = more private).
    lower_bound (float): Lower bound of the search space.
    upper_bound (float): Upper bound of the search space.
    delta (float): Small positive value to ensure convergence.

    Returns:
    float: A differentially private approximation of the quantile x_{(m)}.
    """

    
    n = len(D)
    max_iterations = int(np.ceil(np.log2((upper_bound - lower_bound) / delta)))
    sigma = np.sqrt(max_iterations / (2 * rho))  # Noise scale for Gaussian mechanism
    m = int(np.ceil((1 - alpha) * (n + 1)))

    left, right = lower_bound, upper_bound

    for i in range(max_iterations):
        mid = (left + right) / 2
        c = NoisyRC((lower_bound, mid), D, sigma)
        
        if c < m:
            left = mid + delta
        else:
            right = mid

    return np.round((left + right) / 2, 2)

### Helping function for EXPONQ

In [6]:
# Optimal gamma is a root.
def get_optimal_gamma(scores,n,alpha,m,epsilon):
    a = alpha**2
    b = - ( alpha*epsilon*(n+1)*(1-alpha)/2 + 2*alpha )
    c = 1
    best_q = 1
    gamma1 = (-b + np.sqrt(b**2 - 4*a*c))/(2*a)
    gamma2 = (-b - np.sqrt(b**2 - 4*a*c))/(2*a)

    gamma1 = min(max(gamma1,1e-12),1-1e-12)
    gamma2 = min(max(gamma2,1e-12),1-1e-12)

    bins = np.linspace(0,1,m)

    q1 = get_private_quantile(scores, alpha, epsilon, gamma1, bins)
    q2 = get_private_quantile(scores, alpha, epsilon, gamma2, bins)

    return (gamma1, q1) if q1 < q2 else (gamma2, q2)

def get_optimal_gamma_m(n, alpha, epsilon):
    candidates_m = np.logspace(4,6,50).astype(int)
    scores = np.random.rand(n,1)
    best_m = int(1/alpha)
    best_gamma = 1
    best_q = 1
    for m in candidates_m:
        gamma, q = get_optimal_gamma(scores,n,alpha,m,epsilon)
        if q < best_q:
            best_q = q
            best_m = m
            best_gamma = gamma
    return best_m, best_gamma

def get_private_quantile(scores, alpha, epsilon, gamma, bins):
    n = scores.shape[0]
    epsilon_normed = epsilon*min(alpha, 1-alpha)
    # Get the quantile
    qtilde = get_qtilde(n, alpha, gamma, epsilon, bins.shape[0])
    scores = scores.squeeze()
    score_to_bin = np.digitize(scores,bins)
    binned_scores = bins[np.minimum(score_to_bin,bins.shape[0]-1)]
    w1 = np.digitize(binned_scores, bins)
    w2 = np.digitize(binned_scores, bins, right=True)
    # Clip bins
    w1 = np.maximum(np.minimum(w1,bins.shape[0]-1),0)
    w2 = np.maximum(np.minimum(w2,bins.shape[0]-1),0)
    lower_mass = np.bincount(w1,minlength=bins.shape[0]).cumsum()/qtilde
    upper_mass = (n-np.bincount(w2,minlength=bins.shape[0]).cumsum())/(1-qtilde)
    w = np.maximum( lower_mass , upper_mass )
    sampling_probabilities = softmax(-(epsilon_normed/2)*w)
    # Check
    sampling_probabilities = sampling_probabilities/sampling_probabilities.sum()
    qhat = np.random.choice(bins,p=sampling_probabilities)
    return qhat

def get_shat_from_scores_private(scores, alpha, epsilon, gamma, score_bins):
    shat = get_private_quantile(scores, alpha, epsilon, gamma, score_bins)
    return shat

def get_qtilde(n,alpha,gamma,epsilon,m):
    qtilde = (n+1)*(1-alpha)/(n*(1-gamma*alpha))+2/(epsilon*n)*np.log(m/(gamma*alpha))
    qtilde = min(qtilde, 1-1e-12)
    return qtilde

### Data generation function

In [7]:
def simulate_normal_classification(
    n_samples_per_class,
    n_features,
    n_classes,
    class_means=None,
    class_covariances=None,
    random_state=None
):
    """
    Simulate classification data directly from Normal distributions.
    
    Parameters:
        n_samples_per_class (int): Number of samples per class.
        n_features (int): Number of features.
        n_classes (int): Number of classes.
        class_means (list of arrays): List of mean vectors for each class. If None, generated randomly.
        class_covariances (list of arrays): List of covariance matrices for each class. If None, identity matrices are used.
        random_state (int): Seed for reproducibility.
    
    Returns:
        X (ndarray): Feature matrix of shape (n_samples, n_features), rounded to 4 decimal places.
        y (ndarray): Class labels of shape (n_samples,), rounded to 4 decimal places.
    """
    np.random.seed(random_state)
    
    X = []
    y = []
    
    # Generate means and covariances if not provided
    if class_means is None:
        class_means = [np.random.uniform(-5, 5, n_features) for _ in range(n_classes)]
    if class_covariances is None:
        class_covariances = [np.eye(n_features) for _ in range(n_classes)]
    
    for class_idx in range(n_classes):
        # Draw samples from the Normal distribution
        samples = np.random.multivariate_normal(
            mean=class_means[class_idx],
            cov=class_covariances[class_idx],
            size=n_samples_per_class
        )
        X.append(samples)
        y.extend([class_idx] * n_samples_per_class)
    
    # Combine data and shuffle
    X = np.vstack(X)
    y = np.array(y)
    indices = np.random.permutation(len(y))
    
    # Round X and y to 4 decimal places
    X = np.round(X[indices], 4)
    y = np.round(y[indices], 4)
    
    return X, y


### Simulation function

In [8]:
def run_simulation(
    private_model,
    private_conformal,
    model_name,
    n_samples_per_class,
    n_features,
    n_classes,
    class_means,
    class_covariances,
    alpha,
    epsilon,
    conformal_epsilon,
    n_trials
):
    n_trials = int(n_trials)  # Ensure type safety
    coverage_results_EXPONQ = []
    Efficiency_results_EXPONQ = []
    informativeness_results_EXPONQ = []  
    coverage_results_PCOQS = []
    Efficiency_results_PCOQS = []
    informativeness_results_PCOQS = []  
    coverage_results_np = []
    Efficiency_results_np = []
    informativeness_results_np = []  
    accuracy_results = []

    for i in tqdm(range(n_trials)):
        # Step 1: Generate simulated data
        X, y = simulate_normal_classification(
            n_samples_per_class=n_samples_per_class,
            n_features=n_features,
            n_classes=n_classes,
            class_means=class_means,
            class_covariances=class_covariances,
            random_state=i + 3
        )

        # Step 2: Split data
        X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.4, random_state=i + 3)
        X_cal, X_test, y_cal, y_test = train_test_split(X_temp, y_temp, test_size=0.4, random_state=i + 3)
        num_calib = len(X_cal)

        # Train model
        if private_model:
            model = get_private_model(model_name, X_test, y_train, y, epsilon)
        else:
            model = get_nonprivate_model(model_name, random_state=42)
        model.fit(X_train, y_train)

        # Compute calibration scores
        prob_cal = model.predict_proba(X_cal)
        scores_cal = np.round(1 - prob_cal[np.arange(len(y_cal)), y_cal], 4)

        if private_conformal:
            # Compute thresholds for EXPONQ method
            mstar, gammastar = get_optimal_gamma_m(num_calib, alpha, conformal_epsilon)
            m = mstar
            gamma = gammastar
            score_bins = np.linspace(0, 1, m)
            shat = get_shat_from_scores_private(scores_cal, alpha, conformal_epsilon, gamma, score_bins)
            threshold_EXPONQ = shat

            # Compute threshold for PCOQS
            scores_cal = np.sort(scores_cal)
            rho = conformal_epsilon
            threshold_PCOQS = PCOQS(scores_cal, alpha, rho)  
        else:
            # Non-private threshold
            threshold = np.quantile(scores_cal, math.ceil((1 - alpha) * (len(scores_cal) + 1) / len(scores_cal)))

        # Compute prediction sets
        prob_test = model.predict_proba(X_test)
        scores_test = 1 - prob_test

        if private_conformal:
            # Prediction sets for EXPONQ
            prediction_sets_EXPONQ = [
                np.where(scores <= threshold_EXPONQ)[0] for scores in scores_test
            ]
            prediction_sets_EXPONQ = [
                pset if len(pset) > 0 else [-1] for pset in prediction_sets_EXPONQ
            ]
            # Compute informativeness for Ana
            informativeness_EXPONQ = np.mean([len(pset) == 1 for pset in prediction_sets_EXPONQ])
            informativeness_results_EXPONQ.append(informativeness_EXPONQ)

            # Prediction sets for PCOQS
            prediction_sets_PCOQS = [
                np.where(scores <= threshold_PCOQS)[0] for scores in scores_test
            ]
            prediction_sets_PCOQS = [
                pset if len(pset) > 0 else [-1] for pset in prediction_sets_PCOQS
            ]
            # Compute informativeness for PCOQS
            informativeness_PCOQS = np.mean([len(pset) == 1 for pset in prediction_sets_PCOQS])
            informativeness_results_PCOQS.append(informativeness_PCOQS)
        else:
            # Non-private prediction sets
            prediction_sets = [
                np.where(scores <= threshold)[0] for scores in scores_test
            ]
            prediction_sets = [
                pset if len(pset) > 0 else [-1] for pset in prediction_sets
            ]
            # Compute informativeness for non-private
            informativeness = np.mean([len(pset) == 1 for pset in prediction_sets])
            informativeness_results_np.append(informativeness)

        # Compute metrics
        if private_conformal:
            # EXPONQ
            coverage_EXPONQ = np.mean([y_test[i] in prediction_sets_EXPONQ[i] for i in range(len(y_test))])
            coverage_results_EXPONQ.append(coverage_EXPONQ)
            Efficiency_EXPONQ = np.mean([len(pset) for pset in prediction_sets_EXPONQ])
            Efficiency_results_EXPONQ.append(Efficiency_EXPONQ)

            # PCOQS
            coverage_PCOQS = np.mean([y_test[i] in prediction_sets_PCOQS[i] for i in range(len(y_test))])
            coverage_results_PCOQS.append(coverage_PCOQS)
            Efficiency_PCOQS = np.mean([len(pset) for pset in prediction_sets_PCOQS])
            Efficiency_results_PCOQS.append(Efficiency_PCOQS)
        else:
            # Non-private
            coverage = np.mean([y_test[i] in prediction_sets[i] for i in range(len(y_test))])
            coverage_results_np.append(coverage)
            Efficiency = np.mean([len(pset) for pset in prediction_sets])
            Efficiency_results_np.append(Efficiency)

        # Accuracy
        y_pred = model.predict(X_test)
        accuracy = accuracy_score(y_test, y_pred)
        accuracy_results.append(accuracy)

    # Aggregate results
    results = {}
    if private_conformal:
        results["EXPONQ"] = {
            "coverage_mean": np.mean(coverage_results_EXPONQ),
            "coverage_std": np.std(coverage_results_EXPONQ, ddof=1),
            "Efficiency_mean": np.mean(Efficiency_results_EXPONQ),
            "Efficiency_std": np.std(Efficiency_results_EXPONQ, ddof=1),
            "informativeness_mean": np.mean(informativeness_results_EXPONQ),
            "informativeness_std": np.std(informativeness_results_EXPONQ, ddof=1),
        }
        results["PCOQS"] = {
            "coverage_mean": np.mean(coverage_results_PCOQS),
            "coverage_std": np.std(coverage_results_PCOQS, ddof=1),
            "Efficiency_mean": np.mean(Efficiency_results_PCOQS),
            "Efficiency_std": np.std(Efficiency_results_PCOQS, ddof=1),
            "informativeness_mean": np.mean(informativeness_results_PCOQS),
            "informativeness_std": np.std(informativeness_results_PCOQS, ddof=1),
        }
    else:
        results["nonprivate"] = {
            "coverage_mean": np.mean(coverage_results_np),
            "coverage_std": np.std(coverage_results_np, ddof=1),
            "Efficiency_mean": np.mean(Efficiency_results_np),
            "Efficiency_std": np.std(Efficiency_results_np, ddof=1),
            "informativeness_mean": np.mean(informativeness_results_np),
            "informativeness_std": np.std(informativeness_results_np, ddof=1),
        }
    results["accuracy"] = {
        "mean": np.mean(accuracy_results),
        "std": np.std(accuracy_results, ddof=1),
    }

    return results



### Simulation for different models

In [9]:
def complete_model_conformal_simulation(
    model_sets,
    private_models,
    private_conformals,
    n_samples_per_class,
    n_features,
    n_classes,
    class_means,
    class_covariances,
    alpha,
    epsilon,
    conformal_epsilon,
    n_trials
):
    # Results dictionary to store all results
    results = {}

    # Iterate through models, privacy options, and conformal options
    for model_name in tqdm(model_sets):
        model_results = []  # To collect results for each model
        for private_model in private_models:
            for private_conformal in private_conformals:
                # Run simulation for the current configuration
                sim_results = run_simulation(
                    private_model,
                    private_conformal,
                    model_name,
                    n_samples_per_class,
                    n_features,
                    n_classes,
                    class_means,
                    class_covariances,
                    alpha,
                    epsilon,
                    conformal_epsilon,
                    n_trials
                )

                # Extract results based on the conformal configuration
                if private_conformal:
                    # Add results for EXPONQ
                    model_results.append({
                        "Description": f"{model_name} - Private Model: {private_model}, Private Conformal: {private_conformal} (EXPONQ)",
                        "Coverage": f"{sim_results['EXPONQ']['coverage_mean']:.4f} ± {sim_results['EXPONQ']['coverage_std']:.4f}",
                        "Efficiency": f"{sim_results['EXPONQ']['Efficiency_mean']:.4f} ± {sim_results['EXPONQ']['Efficiency_std']:.4f}",
                        "Informativeness": f"{sim_results['EXPONQ']['informativeness_mean']:.4f} ± {sim_results['EXPONQ']['informativeness_std']:.4f}",
                        "Accuracy": f"{sim_results['accuracy']['mean']:.4f} ± {sim_results['accuracy']['std']:.4f}",
                    })
                    # Add results for PCOQS
                    model_results.append({
                        "Description": f"{model_name} - Private Model: {private_model}, Private Conformal: {private_conformal} (PCOQS)",
                        "Coverage": f"{sim_results['PCOQS']['coverage_mean']:.4f} ± {sim_results['PCOQS']['coverage_std']:.4f}",
                        "Efficiency": f"{sim_results['PCOQS']['Efficiency_mean']:.4f} ± {sim_results['PCOQS']['Efficiency_std']:.4f}",
                        "Informativeness": f"{sim_results['PCOQS']['informativeness_mean']:.4f} ± {sim_results['PCOQS']['informativeness_std']:.4f}",
                        "Accuracy": f"{sim_results['accuracy']['mean']:.4f} ± {sim_results['accuracy']['std']:.4f}",
                    })
                else:
                    # Add results for non-private conformal
                    model_results.append({
                        "Description": f"{model_name} - Private Model: {private_model}, Private Conformal: {private_conformal}",
                        "Coverage": f"{sim_results['nonprivate']['coverage_mean']:.4f} ± {sim_results['nonprivate']['coverage_std']:.4f}",
                        "Efficiency": f"{sim_results['nonprivate']['Efficiency_mean']:.4f} ± {sim_results['nonprivate']['Efficiency_std']:.4f}",
                        "Informativeness": f"{sim_results['nonprivate']['informativeness_mean']:.4f} ± {sim_results['nonprivate']['informativeness_std']:.4f}",
                        "Accuracy": f"{sim_results['accuracy']['mean']:.4f} ± {sim_results['accuracy']['std']:.4f}",
                    })

        # Store results for the current model
        results[model_name] = model_results

    # Generate and print results in a structured format
    for model_name, model_results in results.items():
        print(f"\n{'#' * 10} {model_name} {'#' * 10}\n")
        print(f"{'Description':<70} {'Coverage':<20} {'Efficiency':<20} {'Informativeness':<20} {'Accuracy':<20}")
        print("-" * 150)
        for result in model_results:
            print(f"{result['Description']:<70} {result['Coverage']:<20} {result['Efficiency']:<20} {result['Informativeness']:<20} {result['Accuracy']:<20}")
        print("\n")
    
    return results

### Simulation function over different sample sizes

In [10]:
def compare_conformal_over_n(
    model_sets,
    epsilon,
    conformal_epsilon,
    private_models,
    private_conformals,
    n_samples_per_class_set,
    n_features,
    n_classes,
    class_means,
    class_covariances,
    alpha,
    n_trials
):
    results = []  # Use a list to store the results
    for n_samples_per_class in n_samples_per_class_set:
        print("sample_per_class:", n_samples_per_class)
        #rho = conformal_epsilon
        output = complete_model_conformal_simulation(
                                                    model_sets,
                                                    private_models,
                                                    private_conformals,
                                                    n_samples_per_class,
                                                    n_features,
                                                    n_classes,
                                                    class_means,
                                                    class_covariances,
                                                    alpha,
                                                    epsilon,
                                                    conformal_epsilon,
                                                    n_trials
                                                    )
        # Append results for the current sample size
        results.append({"sample_per_class": n_samples_per_class, "output": output})
    return results


### Running the simulation

In [11]:
#Private model for different epsilon values and Nonprivate conformal 

model_sets = ["Naive_bayes",  "Random_forest"] 
private_models = [False, True]
private_conformals = [False, True]

n_samples_per_class_set=[50, 100, 250, 500, 1000, 3000, 5000]
n_features = 8
n_classes = 2
class_means = [
                np.array([0.8, 0.8, 0.8, 0.8, 0.8,0.8, 0.8, 0.8]), 
                np.array([-1, -1, -1, -1, -1, -1, -1, -1])#,
                
]
class_covariances = [
    np.eye(8)*7,
    np.eye(8) * 8,
     
]

n_trials =1000
alpha = 0.1
epsilon =2
#conformal_epsilon_values = [0.1, 0.5, 1, 5, 10,20, 50]
conformal_epsilon = 1


# Save/load logic
results_filename = "NonPriv_model_priv_conform_over_n_values.pkl"

if os.path.exists(results_filename):
    print("Loading saved results...")
    with open(results_filename, "rb") as f:
        results = pickle.load(f)
else:
    print("Running experiments...")
    
    results = compare_conformal_over_n(
    model_sets,
    epsilon,
    conformal_epsilon,
    private_models,
    private_conformals,
    n_samples_per_class_set,
    n_features,
    n_classes,
    class_means,
    class_covariances,
    alpha,
    n_trials
)
    with open(results_filename, "wb") as f:
        pickle.dump(results, f)
    print(f"Results saved to {results_filename}")






Running experiments...
sample_per_class: 50


  0%|                                                     | 0/2 [00:00<?, ?it/s]
  0%|                                                  | 0/1000 [00:00<?, ?it/s][A
  5%|██                                      | 52/1000 [00:00<00:01, 518.60it/s][A
 10%|████                                   | 104/1000 [00:00<00:01, 495.89it/s][A
 16%|██████▏                                | 160/1000 [00:00<00:01, 523.09it/s][A
 22%|████████▍                              | 217/1000 [00:00<00:01, 538.11it/s][A
 27%|██████████▌                            | 271/1000 [00:00<00:01, 533.86it/s][A
 32%|████████████▋                          | 325/1000 [00:00<00:01, 528.18it/s][A
 38%|██████████████▋                        | 378/1000 [00:00<00:01, 512.98it/s][A
 43%|████████████████▊                      | 430/1000 [00:00<00:01, 503.13it/s][A
 48%|██████████████████▊                    | 481/1000 [00:00<00:01, 496.45it/s][A
 53%|████████████████████▋                  | 531/1000 [00:01<00:00, 488.20it/s


########## Naive_bayes ##########

Description                                                            Coverage             Efficiency           Informativeness      Accuracy            
------------------------------------------------------------------------------------------------------------------------------------------------------
Naive_bayes - Private Model: False, Private Conformal: False           0.9601 ± 0.0604      1.5967 ± 0.2426      0.4033 ± 0.2426      0.7854 ± 0.1025     
Naive_bayes - Private Model: False, Private Conformal: True (EXPONQ)   0.9794 ± 0.0434      1.7300 ± 0.2179      0.2700 ± 0.2179      0.7854 ± 0.1025     
Naive_bayes - Private Model: False, Private Conformal: True (PCOQS)    0.8858 ± 0.1118      1.3219 ± 0.2768      0.6781 ± 0.2768      0.7854 ± 0.1025     
Naive_bayes - Private Model: True, Private Conformal: False            0.9673 ± 0.0582      1.8891 ± 0.1456      0.1109 ± 0.1456      0.5867 ± 0.1248     
Naive_bayes - Private Model: True, Pri

  0%|                                                     | 0/2 [00:00<?, ?it/s]
  0%|                                                  | 0/1000 [00:00<?, ?it/s][A
  4%|█▍                                      | 37/1000 [00:00<00:02, 364.19it/s][A
  7%|██▉                                     | 74/1000 [00:00<00:02, 361.61it/s][A
 11%|████▎                                  | 111/1000 [00:00<00:02, 330.61it/s][A
 14%|█████▋                                 | 145/1000 [00:00<00:02, 300.07it/s][A
 18%|██████▉                                | 177/1000 [00:00<00:02, 304.54it/s][A
 21%|████████▎                              | 213/1000 [00:00<00:02, 319.92it/s][A
 25%|█████████▋                             | 248/1000 [00:00<00:02, 328.81it/s][A
 28%|██████████▉                            | 282/1000 [00:00<00:02, 321.95it/s][A
 32%|████████████▎                          | 316/1000 [00:00<00:02, 324.82it/s][A
 35%|█████████████▌                         | 349/1000 [00:01<00:01, 326.16it/s


########## Naive_bayes ##########

Description                                                            Coverage             Efficiency           Informativeness      Accuracy            
------------------------------------------------------------------------------------------------------------------------------------------------------
Naive_bayes - Private Model: False, Private Conformal: False           0.9795 ± 0.0318      1.6597 ± 0.1872      0.3403 ± 0.1872      0.8071 ± 0.0709     
Naive_bayes - Private Model: False, Private Conformal: True (EXPONQ)   0.9905 ± 0.0207      1.7901 ± 0.1638      0.2099 ± 0.1638      0.8071 ± 0.0709     
Naive_bayes - Private Model: False, Private Conformal: True (PCOQS)    0.9109 ± 0.0791      1.3173 ± 0.2196      0.6827 ± 0.2196      0.8071 ± 0.0709     
Naive_bayes - Private Model: True, Private Conformal: False            0.9793 ± 0.0322      1.9474 ± 0.0680      0.0526 ± 0.0680      0.5543 ± 0.1039     
Naive_bayes - Private Model: True, Pri

  0%|                                                     | 0/2 [00:00<?, ?it/s]
  0%|                                                  | 0/1000 [00:00<?, ?it/s][A
  3%|█▎                                      | 32/1000 [00:00<00:03, 299.68it/s][A
  6%|██▍                                     | 62/1000 [00:00<00:03, 254.09it/s][A
  9%|███▊                                    | 94/1000 [00:00<00:03, 278.57it/s][A
 13%|████▉                                  | 126/1000 [00:00<00:02, 292.42it/s][A
 16%|██████▏                                | 158/1000 [00:00<00:02, 300.73it/s][A
 19%|███████▍                               | 190/1000 [00:00<00:02, 305.10it/s][A
 22%|████████▋                              | 222/1000 [00:00<00:02, 307.69it/s][A
 25%|█████████▉                             | 254/1000 [00:00<00:02, 306.72it/s][A
 28%|███████████                            | 285/1000 [00:01<00:02, 257.64it/s][A
 31%|████████████▏                          | 314/1000 [00:01<00:02, 265.69it/s


########## Naive_bayes ##########

Description                                                            Coverage             Efficiency           Informativeness      Accuracy            
------------------------------------------------------------------------------------------------------------------------------------------------------
Naive_bayes - Private Model: False, Private Conformal: False           0.9918 ± 0.0124      1.7518 ± 0.1315      0.2482 ± 0.1315      0.8183 ± 0.0433     
Naive_bayes - Private Model: False, Private Conformal: True (EXPONQ)   0.9967 ± 0.0078      1.8566 ± 0.1158      0.1434 ± 0.1158      0.8183 ± 0.0433     
Naive_bayes - Private Model: False, Private Conformal: True (PCOQS)    0.9060 ± 0.0470      1.2269 ± 0.1161      0.7731 ± 0.1161      0.8183 ± 0.0433     
Naive_bayes - Private Model: True, Private Conformal: False            0.9919 ± 0.0124      1.9568 ± 0.0480      0.0432 ± 0.0480      0.6289 ± 0.0612     
Naive_bayes - Private Model: True, Pri

  0%|                                                     | 0/2 [00:00<?, ?it/s]
  0%|                                                  | 0/1000 [00:00<?, ?it/s][A
  3%|█▏                                      | 30/1000 [00:00<00:03, 293.13it/s][A
  6%|██▍                                     | 60/1000 [00:00<00:03, 265.96it/s][A
  9%|███▍                                    | 87/1000 [00:00<00:03, 254.08it/s][A
 11%|████▍                                  | 113/1000 [00:00<00:03, 245.01it/s][A
 14%|█████▍                                 | 138/1000 [00:00<00:03, 237.58it/s][A
 16%|██████▎                                | 162/1000 [00:00<00:03, 219.72it/s][A
 19%|███████▎                               | 186/1000 [00:00<00:03, 223.09it/s][A
 21%|████████▏                              | 210/1000 [00:00<00:03, 227.94it/s][A
 23%|█████████                              | 233/1000 [00:01<00:03, 200.49it/s][A
 26%|██████████                             | 258/1000 [00:01<00:03, 212.16it/s


########## Naive_bayes ##########

Description                                                            Coverage             Efficiency           Informativeness      Accuracy            
------------------------------------------------------------------------------------------------------------------------------------------------------
Naive_bayes - Private Model: False, Private Conformal: False           0.9957 ± 0.0067      1.8075 ± 0.0993      0.1925 ± 0.0993      0.8230 ± 0.0312     
Naive_bayes - Private Model: False, Private Conformal: True (EXPONQ)   0.9984 ± 0.0042      1.8951 ± 0.0876      0.1049 ± 0.0876      0.8230 ± 0.0312     
Naive_bayes - Private Model: False, Private Conformal: True (PCOQS)    0.9022 ± 0.0326      1.1976 ± 0.0711      0.8024 ± 0.0711      0.8230 ± 0.0312     
Naive_bayes - Private Model: True, Private Conformal: False            0.9991 ± 0.0040      1.9947 ± 0.0189      0.0053 ± 0.0189      0.6266 ± 0.0478     
Naive_bayes - Private Model: True, Pri

  0%|                                                     | 0/2 [00:00<?, ?it/s]
  0%|                                                  | 0/1000 [00:00<?, ?it/s][A
  3%|█                                       | 27/1000 [00:00<00:03, 259.34it/s][A
  5%|██                                      | 53/1000 [00:00<00:05, 169.88it/s][A
  8%|███▏                                    | 80/1000 [00:00<00:04, 202.75it/s][A
 11%|████▏                                  | 107/1000 [00:00<00:04, 222.29it/s][A
 13%|█████▏                                 | 133/1000 [00:00<00:03, 231.80it/s][A
 16%|██████▏                                | 158/1000 [00:00<00:03, 237.19it/s][A
 18%|███████▏                               | 185/1000 [00:00<00:03, 245.89it/s][A
 21%|████████▏                              | 211/1000 [00:00<00:03, 250.06it/s][A
 24%|█████████▏                             | 237/1000 [00:01<00:03, 252.72it/s][A
 26%|██████████▎                            | 264/1000 [00:01<00:02, 256.50it/s


########## Naive_bayes ##########

Description                                                            Coverage             Efficiency           Informativeness      Accuracy            
------------------------------------------------------------------------------------------------------------------------------------------------------
Naive_bayes - Private Model: False, Private Conformal: False           0.9978 ± 0.0035      1.8544 ± 0.0751      0.1456 ± 0.0751      0.8253 ± 0.0206     
Naive_bayes - Private Model: False, Private Conformal: True (EXPONQ)   0.9836 ± 0.0129      1.6272 ± 0.1483      0.3728 ± 0.1483      0.8253 ± 0.0206     
Naive_bayes - Private Model: False, Private Conformal: True (PCOQS)    0.9031 ± 0.0216      1.1896 ± 0.0485      0.8104 ± 0.0485      0.8253 ± 0.0206     
Naive_bayes - Private Model: True, Private Conformal: False            0.9987 ± 0.0031      1.9881 ± 0.0209      0.0119 ± 0.0209      0.6972 ± 0.0268     
Naive_bayes - Private Model: True, Pri

  0%|                                                     | 0/2 [00:00<?, ?it/s]
  0%|                                                  | 0/1000 [00:00<?, ?it/s][A
  1%|▍                                         | 9/1000 [00:00<00:11, 87.69it/s][A
  2%|▋                                        | 18/1000 [00:00<00:11, 85.84it/s][A
  3%|█                                        | 27/1000 [00:00<00:11, 86.10it/s][A
  4%|█▍                                       | 36/1000 [00:00<00:11, 86.79it/s][A
  5%|█▉                                       | 46/1000 [00:00<00:10, 90.28it/s][A
  6%|██▎                                     | 59/1000 [00:00<00:09, 103.15it/s][A
  7%|██▉                                     | 73/1000 [00:00<00:08, 112.46it/s][A
  9%|███▍                                    | 86/1000 [00:00<00:07, 117.05it/s][A
 10%|███▉                                    | 98/1000 [00:00<00:08, 103.28it/s][A
 11%|████▎                                   | 109/1000 [00:01<00:09, 95.50it/s


########## Naive_bayes ##########

Description                                                            Coverage             Efficiency           Informativeness      Accuracy            
------------------------------------------------------------------------------------------------------------------------------------------------------
Naive_bayes - Private Model: False, Private Conformal: False           0.9994 ± 0.0010      1.9167 ± 0.0431      0.0833 ± 0.0431      0.8256 ± 0.0117     
Naive_bayes - Private Model: False, Private Conformal: True (EXPONQ)   0.9382 ± 0.0169      1.3149 ± 0.0795      0.6851 ± 0.0795      0.8256 ± 0.0117     
Naive_bayes - Private Model: False, Private Conformal: True (PCOQS)    0.9008 ± 0.0124      1.1800 ± 0.0272      0.8200 ± 0.0272      0.8256 ± 0.0117     
Naive_bayes - Private Model: True, Private Conformal: False            1.0000 ± 0.0000      2.0000 ± 0.0000      0.0000 ± 0.0000      0.6949 ± 0.0227     
Naive_bayes - Private Model: True, Pri

  0%|                                                     | 0/2 [00:00<?, ?it/s]
  0%|                                                  | 0/1000 [00:00<?, ?it/s][A
  1%|▎                                         | 7/1000 [00:00<00:16, 59.33it/s][A
  1%|▌                                        | 14/1000 [00:00<00:15, 62.46it/s][A
  2%|▉                                        | 23/1000 [00:00<00:13, 71.57it/s][A
  3%|█▎                                       | 32/1000 [00:00<00:12, 75.98it/s][A
  4%|█▋                                       | 41/1000 [00:00<00:12, 77.96it/s][A
  5%|██                                       | 49/1000 [00:00<00:12, 78.57it/s][A
  6%|██▍                                      | 58/1000 [00:00<00:11, 79.18it/s][A
  7%|██▋                                      | 67/1000 [00:00<00:11, 79.80it/s][A
  8%|███                                      | 75/1000 [00:00<00:11, 79.83it/s][A
  8%|███▍                                     | 83/1000 [00:01<00:11, 79.51it/s


########## Naive_bayes ##########

Description                                                            Coverage             Efficiency           Informativeness      Accuracy            
------------------------------------------------------------------------------------------------------------------------------------------------------
Naive_bayes - Private Model: False, Private Conformal: False           0.9996 ± 0.0007      1.9348 ± 0.0353      0.0652 ± 0.0353      0.8253 ± 0.0092     
Naive_bayes - Private Model: False, Private Conformal: True (EXPONQ)   0.9227 ± 0.0116      1.2509 ± 0.0360      0.7491 ± 0.0360      0.8253 ± 0.0092     
Naive_bayes - Private Model: False, Private Conformal: True (PCOQS)    0.9006 ± 0.0099      1.1788 ± 0.0201      0.8212 ± 0.0201      0.8253 ± 0.0092     
Naive_bayes - Private Model: True, Private Conformal: False            1.0000 ± 0.0003      1.9992 ± 0.0047      0.0008 ± 0.0047      0.7479 ± 0.0125     
Naive_bayes - Private Model: True, Pri




### Processing the results for tabular presentation

In [12]:


def generate_sample_size_tables(results):
    """Process results into formatted tables showing EXPONQ vs PCOQS metrics by sample size."""
    # Extract and filter data
    filtered_data = []
    for entry in results:
        sample_size = entry['sample_per_class']
        for model, model_data in entry['output'].items():
            for config in model_data:
                if "Private Conformal: True" in config['Description']:
                    method = "EXPONQ" if "EXPONQ" in config['Description'] else "PCOQS"
                    is_private = "Private Model: True" in config['Description']
                    filtered_data.append({
                        'Model': model,
                        'Private': is_private,
                        'SamplePerClass': sample_size,
                        'Method': method,
                        'Coverage': config['Coverage'],
                        'Efficiency': config['Efficiency'],
                        'Informativeness': config['Informativeness']
                    })

    df = pd.DataFrame(filtered_data)
    
    # Generate tables for all combinations
    output = ""
    for model in df['Model'].unique():
        for is_private in [False, True]:
            subset = df[(df['Model'] == model) & (df['Private'] == is_private)]
            
            # Pivot table with error checking
            try:
                table = subset.pivot(index='SamplePerClass', columns='Method',
                                   values=['Coverage', 'Efficiency', 'Informativeness'])
                table.columns = [f"{metric} ({method})" for metric, method in table.columns]
                table = table.reset_index()
                
                # Reorder columns for consistent output
                col_order = ['SamplePerClass'] + \
                          [f"{m} ({method})" for m in ['Coverage', 'Efficiency', 'Informativeness'] 
                           for method in ['EXPONQ', 'PCOQS']]
                table = table[col_order]
                
                # Add header
                privacy = "Private Model" if is_private else "Non-private Model"
                header = f"\n{model} - {privacy}\n{'='*(len(model) + len(privacy) + 3)}"
                output += f"{header}\n{table.to_string(index=False)}\n\n"
                
            except ValueError as e:
                output += f"\nWarning: Could not create table for {model} ({privacy}): {str(e)}\n"
    
    return output

# Generate and print all tables
print(generate_sample_size_tables(results))


Naive_bayes - Non-private Model
 SamplePerClass Coverage (EXPONQ) Coverage (PCOQS) Efficiency (EXPONQ) Efficiency (PCOQS) Informativeness (EXPONQ) Informativeness (PCOQS)
             50   0.9794 ± 0.0434  0.8858 ± 0.1118     1.7300 ± 0.2179    1.3219 ± 0.2768          0.2700 ± 0.2179         0.6781 ± 0.2768
            100   0.9905 ± 0.0207  0.9109 ± 0.0791     1.7901 ± 0.1638    1.3173 ± 0.2196          0.2099 ± 0.1638         0.6827 ± 0.2196
            250   0.9967 ± 0.0078  0.9060 ± 0.0470     1.8566 ± 0.1158    1.2269 ± 0.1161          0.1434 ± 0.1158         0.7731 ± 0.1161
            500   0.9984 ± 0.0042  0.9022 ± 0.0326     1.8951 ± 0.0876    1.1976 ± 0.0711          0.1049 ± 0.0876         0.8024 ± 0.0711
           1000   0.9836 ± 0.0129  0.9031 ± 0.0216     1.6272 ± 0.1483    1.1896 ± 0.0485          0.3728 ± 0.1483         0.8104 ± 0.0485
           3000   0.9382 ± 0.0169  0.9008 ± 0.0124     1.3149 ± 0.0795    1.1800 ± 0.0272          0.6851 ± 0.0795         0.8200 ± 0