In [1]:
from sklearn.naive_bayes import GaussianNB
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline

from pydp.ml.mechanisms.sklearn_pipeline import LaplaceMechanism
from pydp.ml.util.accountant import BudgetAccountant

In [5]:
# DUMMY DATASET

# Create random dataset
X, y = make_classification(random_state=0)

# Split training test set
X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                    random_state=0)

## Simple Sklearn Pipeline Example with Laplace Mechanism

In [4]:
# Create the pipeline
pipe = Pipeline([
    ('scaler', StandardScaler()),
    ('laplace', LaplaceMechanism()), 
    ('nb', GaussianNB())
])

# Train Naive Bayes model with Local DP
pipe.fit(X_train, y_train)

# Get model score
pipe.score(X_test, y_test)

0.48

## Configuring Epsilon and Sensitivity Params

In [6]:
# Set laplace mechanism with epsilon 0.1 and sensitivity .5
laplace = LaplaceMechanism(epsilon=0.1, sensitivity=0.5)

# Initialize scaler and naive bayes extimator
scaler = StandardScaler()
nb = GaussianNB()

# Create the pipeline
pipe = Pipeline([('scaler', scaler), ('laplace', laplace), ('nb', nb)])

# Train Naive Bayes model with Local DP
pipe.fit(X_train, y_train)

# Get model score
pipe.score(X_test, y_test)

0.52

## Sensitivty as Function instead of Number

In [7]:
# Set sensitivity function for numerical data
sensitivity = lambda x: (max(x) - min(x))/ (len(x) + 1)

# Set laplace mechanism with epsilon 0.1 and sensitivity is function
laplace = LaplaceMechanism(epsilon=0.1, sensitivity=sensitivity)

# Initialize scaler and naive bayes extimator
scaler = StandardScaler()
nb = GaussianNB()

# Create the pipeline
pipe = Pipeline([('scaler', scaler), ('laplace', laplace), ('nb', nb)])

# Train Naive Bayes model with Local DP
pipe.fit(X_train, y_train)

# Get model score
pipe.score(X_test, y_test)

0.64

## Budget Accountant

In [8]:
# Set a privacy budget accountant
accountant = BudgetAccountant(10000)

# Set sensitivity function for numerical data
sensitivity = lambda x: (max(x) - min(x))/ (len(x) + 1)

# Set laplace mechanism with epsilon, sensitivity, and accountant
laplace = LaplaceMechanism(epsilon=0.1, sensitivity=sensitivity, accountant=accountant)

# Initialize scaler and naive bayes extimator
scaler = StandardScaler()
nb = GaussianNB()

# Create the pipeline
pipe = Pipeline([('scaler', scaler), ('laplace', laplace), ('nb', nb)])

# Train Naive Bayes model with Local DP
pipe.fit(X_train, y_train)

# Get model score
pipe.score(X_test, y_test)

0.36