## Load Data

In [1]:
import sys
sys.path.append("../")

from federated_gbdt.models.gbdt.private_gbdt import PrivateGBDT
from experiments.experiment_helpers.data_loader import DataLoader
from sklearn.metrics import roc_auc_score
from sklearn.preprocessing import label_binarize

import numpy as np

In [2]:
dataloader = DataLoader()

# Default is 70/30 split
X_train, X_test, y_train, y_test = dataloader.load_datasets(["connect_4"], return_dict=False)[0]

In [3]:
X_train.shape

(47289, 42)

In [4]:
y_train

array([2, 2, 1, ..., 2, 2, 2])

In [5]:
np.unique(y_train)

array([0, 1, 2])

In [6]:
y_test_onehot = label_binarize(y_test, classes=[0,1,2])

## XGBoost Training (No DP)

In [7]:
xgb_model = PrivateGBDT(num_trees=100, epsilon=0)
xgb_model = xgb_model.fit(X_train, y_train)
y_pred = xgb_model.predict_proba(X_test)
print(y_pred.shape)

roc_auc_score(y_test_onehot, y_pred)

(20268, 3)


0.907745991798139

## DP-XGBoost (FEVERLESS)

In [8]:
dp_xgb_model = PrivateGBDT(num_trees=100, epsilon=3, dp_method="gaussian_cdp")
dp_xgb_model = dp_xgb_model.fit(X_train, y_train)
y_pred = dp_xgb_model.predict_proba(X_test)

roc_auc_score(y_test_onehot, y_pred)

0.6525901702496729

## DP-TR XGBoost

In [9]:
dp_tr_model = PrivateGBDT(num_trees=100, epsilon=3, split_method="totally_random", 
                           sketch_type="uniform", dp_method="gaussian_cdp")

dp_tr_model = dp_tr_model.fit(X_train, y_train)
y_pred = dp_tr_model.predict_proba(X_test)

roc_auc_score(y_test_onehot, y_pred)

0.7821243499339423