-
-
Notifications
You must be signed in to change notification settings - Fork 138
/
sklearn_pipeline.py
135 lines (95 loc) · 4.59 KB
/
sklearn_pipeline.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
import numbers
from ..util.accountant import BudgetAccountant
from .laplace import Laplace
from sklearn.base import BaseEstimator, TransformerMixin
import numpy as np
class LaplaceMechanism(BaseEstimator, TransformerMixin):
"""
An SKLearn Pipeline operator for applying differentially private noise
addition using the laplace mechanism.
Paper link: https://link.springer.com/content/pdf/10.1007/11681878_14.pdf
"""
def __init__(self, epsilon=1.0, sensitivity=1, accountant=None):
"""
Checks that all parameters of the mechanism have been initialised correctly, and that the mechanism is ready
to be used.
Parameters
----------
epsilon : float or int
The value of epsilon for achieving :math:`\epsilon`-differential privacy with the mechanism. Must have
`epsilon > 0`.
sensitivity : float or int
The sensitivity of the mechanism. Must satisfy `sensitivity` > 0.
accountant : BudgetAccountant, optional
Accountant to keep track of privacy budget.
Attributes
-------
epsilon
Privacy budget to calculate noise.
sensitivty
Sensitivity of the mechanism to calculate noise.
accountant
Accountant to keep track of privacy budget.
Raises
------
TypeError
If epsilon is not a number, or sensitivity is not a number or a callable.
ValueError
If epsilon less than 0, or sensitivty is a number but less than 0.
"""
if not isinstance(epsilon, numbers.Number):
raise TypeError(f"Epsilon must be a number. Got type {type(epsilon)}.")
if epsilon <= 0:
raise ValueError("Epsilon must be at least larger than 0.")
self.epsilon = epsilon
if not isinstance(sensitivity, numbers.Number):
if not callable(sensitivity):
raise TypeError(f"Sensitivity must be a number or callable. Got type {type(sensitivity)}.")
if isinstance(sensitivity, numbers.Number) and sensitivity <= 0:
raise ValueError("Sensitivity must be at least larger than 0.")
self.sensitivity = sensitivity
self.accountant = BudgetAccountant.load_default(accountant)
self.laplace = None # If sensitivity is callable, set lapalace to None
if not callable(sensitivity):
self.laplace = Laplace()
def sensitivity_calculation(self, X):
"""
Perform local differential privacy by adding noise using Laplace mechanismto the dataset if the sensitivity
provided if a callable.
Parameters
----------
X : numpy.array
Datset in the form of a 2-dimensional numpy array.
Returns
------
X : numpy.array
Original parameter X with differentially private noise added.
"""
n_feature = X.shape[-1]
n_data = X.shape[0]
for data_idx in range(n_data):
self.accountant.check(self.epsilon, 0)
for feature_idx in range(n_feature):
# Array with data point data_idx removed for feature_idx
feature = np.concatenate((X[:data_idx,feature_idx],X[data_idx + 1:,feature_idx]))
# Calculate sensitivity
sensitivity_ = self.sensitivity(feature)
# Initialized Laplace mechanism instance
laplace = Laplace().set_epsilon(self.epsilon).set_sensitivity(sensitivity_)
# Add noise to the data point that was removed
noised_value = laplace.randomise(X[data_idx,feature_idx])
# Replaced data point in the dataset with noised version
X[data_idx,feature_idx] = noised_value
self.accountant.spend(self.epsilon, 0)
return X
def fit(self, X, y=None):
return self
def transform(self, X, y=None):
if self.laplace is not None:
self.laplace.set_epsilon(self.epsilon).set_sensitivity(self.sensitivity)
vector_randomise = np.vectorize(self.laplace.randomise)
noised_array = vector_randomise(X)
return noised_array
else:
X = self.sensitivity_calculation( X)
return X