In [9]:
import pandas as pd
import numpy as np
from interpret.glassbox.ebm.utils import DPUtils

from sklearn.metrics import roc_auc_score, accuracy_score
from sklearn.model_selection import ShuffleSplit, cross_validate, StratifiedShuffleSplit

from tqdm.notebook import tqdm

EPS_RANGE = [0.5, 1, 2, 4, 8]
DELTA = 1e-6
TEST_SIZE = 0.2
SEED = 42
N_SPLITS = 25
CLASSIFICATION_SCORE = 'roc_auc'
REGRESSION_SCORE = 'neg_root_mean_squared_error'
ss = ShuffleSplit(n_splits=N_SPLITS, test_size=TEST_SIZE, random_state=SEED)

# Initialize Results Frames

dataset_meta = {}
results = pd.DataFrame(columns=["dataset", "problem", "learner", "epsilon", "metric_name", "metric_value"])
noise_scales = pd.DataFrame(columns=["dataset", "problem", "learner", "n_rows", "n_features", "epsilon", "noise_scale"])


# Helper Functions 


def make_row(exp, learner_name, epsilon, metric_name, metric_value):
    ''' In place update of results_df to add additional row. '''
    row = {"dataset" : exp["dataset"], "problem" : exp["problem"], 
    "learner" : learner_name, "epsilon" : epsilon,
    "metric_name" : metric_name, "metric_value" : metric_value}
    return row

def run_experiments(estimator, estimator_name, epsilon, exp, cv, scoring, n_jobs, use_numpy=True, output='dict'):
    if use_numpy:
        X, y = exp['X'].to_numpy(), exp['y'].to_numpy()
    else:
        X, y = exp['X'], exp['y']

    results = cross_validate(estimator, X, y, scoring=scoring, cv=cv, n_jobs=n_jobs, error_score='raise')
    metric = np.mean(results['test_score'])
    metric_stdev = np.std(results['test_score'])

    metric_str = f"{metric:.3f} +- {metric_stdev:.3f}"  # Print metric +/- metric_stdev

    if isinstance(output, pd.DataFrame):
        output.loc[len(output)] = make_row(exp, estimator_name, epsilon, scoring, metric_str)
        return None
    elif output == 'dict':
        return results
    else:
        raise Exception('unknown output type')


DPBOOST_PARAMETERS = {
        'boosting_type': 'gbdt',
        'objective': 'regression',
        'metric': 'rmse',
        'num_leaves': 31,
        'max_depth': 6,
        'learning_rate': 0.1,
        'num_iterations': 50,
        'my_n_trees': 50,
        'lambda_l2': 0.1,
        'bagging_freq': 1,
        'bagging_fraction':0.5,
        'max_bin': 255,
        'boost_method': 'DPBoost_2level',
        'high_level_boost_round': 1,
        'inner_boost_round': 50,
        'balance_partition': 1,
        'geo_clip': 1,
        'verbose': 2,
        'num_threads': 1,
}



# Run Classification Experiments

In [10]:
from datasets import load_adult, load_telco_churn, load_credit_fraud, load_pneumonia, onehotencode, make_categorical

classification_datasets = []

adult = load_adult()
telco = load_telco_churn()
credit = load_credit_fraud()
pneumonia = load_pneumonia()

classification_datasets.extend([adult, telco, pneumonia, credit])
# classification_datasets.extend([adult])


for dataset in classification_datasets:
    dataset_meta.update({dataset['dataset'] : {'shape': dataset['df'].shape, 'domain': dataset['domain']}})

In [11]:
from diffprivlib.models import LogisticRegression as dpLogReg
from interpret.glassbox import DPExplainableBoostingClassifier
import lightgbm as lgb # DPBoost
from sklearn.metrics import roc_auc_score, accuracy_score, f1_score
from sklearn.model_selection import ShuffleSplit, cross_validate


import warnings
warnings.filterwarnings('ignore')

# Classification Results Frame
c_res = pd.DataFrame(
    columns=["dataset", "problem", "learner", "epsilon", "metric_name", "metric_value"]
)

for exp in tqdm(classification_datasets):
    print(exp['dataset'])
    cat_exp = make_categorical(exp) # For DPBoost, which needs dtype as "categorical" to support natively
    ohe_exp = onehotencode(exp)  # For LogisticRegression and NeuralNets that don't handle categoricals

    for epsilon in tqdm(EPS_RANGE):
        dp_ebm = DPExplainableBoostingClassifier(binning='private-quantile', epsilon=epsilon, delta=DELTA, composition='classic')
        run_experiments(dp_ebm, "aEBM-classic", epsilon, exp, ss, CLASSIFICATION_SCORE, n_jobs=-1, output=c_res)

        dp_ebm_gdp = DPExplainableBoostingClassifier(binning='private-quantile', epsilon=epsilon, delta=DELTA, composition='gdp')
        run_experiments(dp_ebm_gdp, "aEBM-gdp", epsilon, exp, ss, CLASSIFICATION_SCORE, n_jobs=-1, output=c_res)

        # Logistic Regression Section
        lr = dpLogReg(epsilon=epsilon)
        run_experiments(lr, "Logistic Regression", epsilon, ohe_exp, ss, CLASSIFICATION_SCORE, n_jobs=-1, output=c_res)

        # DPBoost Section -- doesn't support ShuffleSplit, have to do manually using same splits :/
        dpb_scores = []
        for train_index, test_index in ss.split(ohe_exp['X'].values):
            X_train = ohe_exp['X'].iloc[train_index]
            y_train = ohe_exp['y'].values[train_index]
            X_test = ohe_exp['X'].iloc[test_index]
            y_test = ohe_exp['y'].values[test_index]

            data = lgb.Dataset(X_train, y_train)
            dpb_train_params = {**DPBOOST_PARAMETERS, **{'total_budget': epsilon}}
            dp_boost = lgb.train(dpb_train_params, data)

            if CLASSIFICATION_SCORE == 'roc_auc':
                dpb_score = roc_auc_score(y_test, dp_boost.predict(X_test))
            elif CLASSIFICATION_SCORE == 'accuracy':
                dpb_score = accuracy_score(y_test, np.round(np.clip(dp_boost.predict(X_test), 0, 1))) # Adapt DPBoost to classification
            dpb_scores.append(dpb_score)

        dpb_results = f"{np.mean(dpb_scores):.3f} +- {np.std(dpb_scores):.3f}"
        c_res.loc[len(c_res)] = make_row(exp, "DPBoost", epsilon, CLASSIFICATION_SCORE, dpb_results)

c_res
# c_res.to_csv('classification_results.csv', index=None)


HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))

adult-income


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))


telco-churn


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))


pneumonia


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))


credit-fraud


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))





Unnamed: 0,dataset,problem,learner,epsilon,metric_name,metric_value
0,adult-income,classification,aEBM-classic,0.5,roc_auc,0.873 +- 0.007
1,adult-income,classification,aEBM-gdp,0.5,roc_auc,0.875 +- 0.005
2,adult-income,classification,Logistic Regression,0.5,roc_auc,0.488 +- 0.125
3,adult-income,classification,DPBoost,0.5,roc_auc,0.558 +- 0.045
4,adult-income,classification,aEBM-classic,1.0,roc_auc,0.880 +- 0.006
...,...,...,...,...,...,...
75,credit-fraud,classification,DPBoost,4.0,roc_auc,0.465 +- 0.142
76,credit-fraud,classification,aEBM-classic,8.0,roc_auc,0.969 +- 0.011
77,credit-fraud,classification,aEBM-gdp,8.0,roc_auc,0.969 +- 0.011
78,credit-fraud,classification,Logistic Regression,8.0,roc_auc,0.546 +- 0.156


In [12]:
# Build non-private versions of each model
from interpret.glassbox import ExplainableBoostingClassifier
from lightgbm import LGBMClassifier 
from sklearn.linear_model import LogisticRegression


epsilon = 100 # Easy sorting on dataset
for exp in tqdm(classification_datasets):
    print(exp['dataset'])
    cat_exp = make_categorical(exp) # For DPBoost, which needs dtype as "categorical" to support natively
    ohe_exp = onehotencode(exp)  # For LogisticRegression and NeuralNets that don't handle categoricals

    ebm = ExplainableBoostingClassifier(binning='quantile', outer_bags=1, random_state=SEED)
    run_experiments(ebm, "aEBM-classic", epsilon, exp, ss, CLASSIFICATION_SCORE, n_jobs=-1, output=c_res)

    # Logistic Regression Section
    lr = LogisticRegression(random_state=SEED)
    run_experiments(lr, "Logistic Regression", epsilon, ohe_exp, ss, CLASSIFICATION_SCORE, n_jobs=-1, output=c_res)

    # LightGBM (Non-Private DPBoost)
    lgbm = LGBMClassifier(num_iterations=100, my_n_trees=100)  # Equal to default parameters in LightGBM
    run_experiments(lgbm, "DPBoost", epsilon, cat_exp, ss, CLASSIFICATION_SCORE, n_jobs=-1, use_numpy=False, output=c_res)

c_res.to_csv('classification_results.csv', index=None)
c_res

HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))

adult-income
telco-churn
pneumonia
credit-fraud



Unnamed: 0,dataset,problem,learner,epsilon,metric_name,metric_value
0,adult-income,classification,aEBM-classic,0.5,roc_auc,0.873 +- 0.007
1,adult-income,classification,aEBM-gdp,0.5,roc_auc,0.875 +- 0.005
2,adult-income,classification,Logistic Regression,0.5,roc_auc,0.488 +- 0.125
3,adult-income,classification,DPBoost,0.5,roc_auc,0.558 +- 0.045
4,adult-income,classification,aEBM-classic,1.0,roc_auc,0.880 +- 0.006
...,...,...,...,...,...,...
87,pneumonia,classification,Logistic Regression,100.0,roc_auc,0.744 +- 0.014
88,pneumonia,classification,DPBoost,100.0,roc_auc,0.836 +- 0.011
89,credit-fraud,classification,aEBM-classic,100.0,roc_auc,0.965 +- 0.011
90,credit-fraud,classification,Logistic Regression,100.0,roc_auc,0.922 +- 0.019


## Regression Experiments

In [13]:
from datasets import load_cal_housing, load_elevators, load_pol, load_wine_quality
from datasets import onehotencode, make_categorical


regression_datasets = []

cal_housing = load_cal_housing()
elevators = load_elevators()
pol = load_pol()
wine_quality = load_wine_quality()


regression_datasets.extend([cal_housing, elevators, pol, wine_quality])


for dataset in regression_datasets:
    dataset_meta.update({dataset['dataset'] : dataset['df'].shape})

In [14]:
from diffprivlib.models import LinearRegression as dpLinReg  
from interpret.glassbox import DPExplainableBoostingRegressor
import lightgbm as lgb # DPBoost
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_percentage_error
from sklearn.preprocessing import MinMaxScaler
from tqdm.notebook import tqdm
import numpy as np

import warnings
warnings.filterwarnings('ignore')

# Regression Results Frame
r_res = pd.DataFrame(
    columns=["dataset", "problem", "learner", "epsilon", "metric_name", "metric_value"]
)


for exp in tqdm(regression_datasets):
    print(exp['dataset'])
    cat_exp = make_categorical(exp) # For DPBoost, which needs dtype as "categorical" to support natively
    ohe_exp = onehotencode(exp)  # For LinearRegression and NeuralNets that don't handle categoricals
    for epsilon in tqdm(EPS_RANGE):
        
        # EBM Section
        dp_ebm = DPExplainableBoostingRegressor(binning='private-quantile', epsilon=epsilon, delta=DELTA, composition='classic')
        run_experiments(dp_ebm, "aEBM-classic", epsilon, exp, ss, REGRESSION_SCORE, n_jobs=-1, output=r_res)

        dp_ebm_gdp = DPExplainableBoostingRegressor(binning='private-quantile', epsilon=epsilon, delta=DELTA, composition='gdp')
        run_experiments(dp_ebm_gdp, "aEBM-gdp", epsilon, exp, ss, REGRESSION_SCORE, n_jobs=-1, output=r_res)

        # Linear Regression Section
        lr = dpLinReg(epsilon=epsilon)
        run_experiments(lr, "Linear Regression", epsilon, exp, ss, REGRESSION_SCORE, n_jobs=-1, output=r_res)

        # DPBoost Section -- doesn't support ShuffleSplit, have to do manually with same data
        dpb_scores = []
        for train_index, test_index in ss.split(ohe_exp['X'].values):
            X_train = ohe_exp['X'].iloc[train_index]
            y_train = ohe_exp['y'].values[train_index]
            X_test = ohe_exp['X'].iloc[test_index]
            y_test = ohe_exp['y'].values[test_index]

            # Scale targets to [-1, 1] per DP boost Github guidance
            scaler = MinMaxScaler(feature_range=(-1,1))
            _ = scaler.fit(y_train.reshape(-1,1))
            dpb_y_train = scaler.transform(y_train.reshape(-1,1)).reshape(-1)

            data = lgb.Dataset(X_train, dpb_y_train)
            dpb_train_params = {**DPBOOST_PARAMETERS, **{'total_budget': epsilon}}
            dp_boost = lgb.train(dpb_train_params, data)

            # Convert predictions back into original domain
            dpb_preds = scaler.inverse_transform(dp_boost.predict(X_test).reshape(-1, 1))

            dpb_rmse = np.sqrt(mean_squared_error(y_test, dpb_preds))
            dpb_r2 = r2_score(y_test, dpb_preds)
            dpb_scores.append(dpb_r2 if REGRESSION_SCORE == 'r2' else -1*dpb_rmse)
            
        dpb_results = f"{np.mean(dpb_scores):.3f} +- {np.std(dpb_scores):.3f}"
        r_res.loc[len(r_res)] = make_row(exp, "DPBoost", epsilon, REGRESSION_SCORE, dpb_results)

r_res
# r_res.to_csv('regression_results.csv', index=None)

HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))

cal-housing


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))


elevators


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))


pol


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))


wine-quality


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))





Unnamed: 0,dataset,problem,learner,epsilon,metric_name,metric_value
0,cal-housing,regression,aEBM-classic,0.5,neg_root_mean_squared_error,-85652.462 +- 2724.267
1,cal-housing,regression,aEBM-gdp,0.5,neg_root_mean_squared_error,-79967.205 +- 1929.207
2,cal-housing,regression,Linear Regression,0.5,neg_root_mean_squared_error,-111967.283 +- 1080.877
3,cal-housing,regression,DPBoost,0.5,neg_root_mean_squared_error,-383072.269 +- 41952.661
4,cal-housing,regression,aEBM-classic,1.0,neg_root_mean_squared_error,-78527.889 +- 1230.833
...,...,...,...,...,...,...
75,wine-quality,regression,DPBoost,4.0,neg_root_mean_squared_error,-0.946 +- 0.043
76,wine-quality,regression,aEBM-classic,8.0,neg_root_mean_squared_error,-0.751 +- 0.013
77,wine-quality,regression,aEBM-gdp,8.0,neg_root_mean_squared_error,-0.733 +- 0.014
78,wine-quality,regression,Linear Regression,8.0,neg_root_mean_squared_error,-0.839 +- 0.035


In [15]:
# Build non-private versions of each model
from interpret.glassbox import ExplainableBoostingRegressor
from lightgbm import LGBMRegressor
from sklearn.linear_model import LinearRegression


epsilon = 100 # Hack so Non-Private is at the bottom
for exp in tqdm(regression_datasets):
    print(exp['dataset'])
    cat_exp = make_categorical(exp) # For DPBoost, which needs dtype as "categorical" to support natively
    ohe_exp = onehotencode(exp)  # For LinearRegression and NeuralNets that don't handle categoricals

    ebm = ExplainableBoostingRegressor(binning='quantile', outer_bags=1, random_state=SEED)
    run_experiments(ebm, "aEBM-classic", epsilon, exp, ss, REGRESSION_SCORE, n_jobs=-1, output=r_res)

    # Linear Regression Section
    lr = LinearRegression()
    run_experiments(lr, "Linear Regression", epsilon, ohe_exp, ss, REGRESSION_SCORE, n_jobs=-1, output=r_res)

    # Non-Private DPBoost Section
    lgbm = LGBMRegressor(num_iterations=100, my_n_trees=100)  # Equal to default parameters in LightGBM
    run_experiments(lgbm, "DPBoost", epsilon, cat_exp, ss, REGRESSION_SCORE, n_jobs=-1, output=r_res)

r_res.to_csv('regression_results.csv', index=None)
r_res


HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))

cal-housing
elevators
pol
wine-quality



Unnamed: 0,dataset,problem,learner,epsilon,metric_name,metric_value
0,cal-housing,regression,aEBM-classic,0.5,neg_root_mean_squared_error,-85652.462 +- 2724.267
1,cal-housing,regression,aEBM-gdp,0.5,neg_root_mean_squared_error,-79967.205 +- 1929.207
2,cal-housing,regression,Linear Regression,0.5,neg_root_mean_squared_error,-111967.283 +- 1080.877
3,cal-housing,regression,DPBoost,0.5,neg_root_mean_squared_error,-383072.269 +- 41952.661
4,cal-housing,regression,aEBM-classic,1.0,neg_root_mean_squared_error,-78527.889 +- 1230.833
...,...,...,...,...,...,...
87,pol,regression,Linear Regression,100.0,neg_root_mean_squared_error,-30.464 +- 0.264
88,pol,regression,DPBoost,100.0,neg_root_mean_squared_error,-4.703 +- 0.228
89,wine-quality,regression,aEBM-classic,100.0,neg_root_mean_squared_error,-0.681 +- 0.012
90,wine-quality,regression,Linear Regression,100.0,neg_root_mean_squared_error,-0.759 +- 0.015


# EXPORTS

In [18]:
df_export = c_res[(c_res['metric_name'] == 'roc_auc')][['dataset', 'learner', 'epsilon', 'metric_value']]
df_export = df_export.set_index(['dataset', 'epsilon']).pivot(columns='learner')
print(df_export.to_latex(multirow=True, na_rep = "--"))

\begin{tabular}{llllll}
\toprule
            & {} & \multicolumn{4}{l}{metric\_value} \\
            & learner &         DPBoost & Logistic Regression &    aEBM-classic &        aEBM-gdp \\
dataset & epsilon &                 &                     &                 &                 \\
\midrule
\multirow{6}{*}{adult-income} & 0.5   &  0.558 +- 0.045 &      0.488 +- 0.125 &  0.873 +- 0.007 &  0.875 +- 0.005 \\
            & 1.0   &  0.566 +- 0.034 &      0.471 +- 0.111 &  0.880 +- 0.006 &  0.883 +- 0.005 \\
            & 2.0   &  0.629 +- 0.045 &      0.521 +- 0.109 &  0.886 +- 0.005 &  0.887 +- 0.004 \\
            & 4.0   &  0.734 +- 0.019 &      0.549 +- 0.068 &  0.889 +- 0.004 &  0.889 +- 0.004 \\
            & 8.0   &  0.805 +- 0.011 &      0.534 +- 0.070 &  0.890 +- 0.004 &  0.890 +- 0.004 \\
            & 100.0 &  0.928 +- 0.003 &      0.603 +- 0.066 &  0.923 +- 0.003 &              -- \\
\cline{1-6}
\multirow{6}{*}{credit-fraud} & 0.5   &  0.442 +- 0.138 &      0.558 +- 0.076 & 

In [19]:
df_export = r_res[['dataset', 'learner', 'epsilon', 'metric_value']]
df_export = df_export.set_index(['dataset', 'epsilon']).pivot(columns='learner')
print(df_export.to_latex(multirow=True, na_rep = "--"))

\begin{tabular}{llllll}
\toprule
             & {} & \multicolumn{4}{l}{metric\_value} \\
             & learner &                   DPBoost &        Linear Regression &            aEBM-classic &                aEBM-gdp \\
dataset & epsilon &                           &                          &                         &                         \\
\midrule
\multirow{6}{*}{cal-housing} & 0.5   &  -383072.269 +- 41952.661 &  -111967.283 +- 1080.877 &  -85652.462 +- 2724.267 &  -79967.205 +- 1929.207 \\
             & 1.0   &  -204277.173 +- 19350.722 &  -110241.222 +- 1101.717 &  -78527.889 +- 1230.833 &  -76827.355 +- 1470.026 \\
             & 2.0   &   -122494.015 +- 7066.372 &  -109518.840 +- 1244.588 &  -75491.915 +- 1404.387 &  -74573.102 +- 1152.561 \\
             & 4.0   &    -96336.577 +- 3043.443 &  -108882.248 +- 1370.641 &  -73967.071 +- 1028.464 &  -73754.682 +- 1022.504 \\
             & 8.0   &    -90029.722 +- 2508.968 &  -107815.624 +- 1460.346 &  -73327.390 +- 1118.09