Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
130 changes: 130 additions & 0 deletions pydp/algorithms/algorithm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
from .._pydp import _algorithms


class MetaAlgorithm:
def __init__(self, **kwargs):
dtype = kwargs.pop("dtype")

# Delete bound params if the are not set to avoid conflicts with builder
if "lower_bound" in kwargs and kwargs["lower_bound"] is None:
kwargs.pop("lower_bound")
if "upper_bound" in kwargs and kwargs["upper_bound"] is None:
kwargs.pop("upper_bound")

binded_class = f"{self.__class__.__name__}{self.__map_dtype_str(dtype)}"
class_ = getattr(_algorithms, binded_class)

self.dtype = dtype
self.__algorithm = class_(**kwargs)
self.epsilon = self.__algorithm.epsilon

@staticmethod
def __map_dtype_str(dtype):
if dtype == "int":
return "Int"
elif dtype == "float":
return "Double"
else:
raise RuntimeError(f"dtype: {dtype} is not supported")

def privacy_budget_left(self):
"""
Returns the remaining privacy budget.
"""
return self.__algorithm.privacy_budget_left()

def memory_used(self):
"""
Returns the memory currently used by the algorithm in bytes.
"""
return self.__algorithm.memory_used()

def add_entries(self, list):
"""
Adds multiple inputs to the algorithm.
"""
return self.__algorithm.add_entries(list)

def add_entry(self, value):
"""
Adds one input to the algorithm.
"""
return self.__algorithm.add_entry(value)

def result(self, list):
"""
Runs the algorithm on the input using the epsilon parameter provided in the constructor and returns output.

Consumes 100% of the privacy budget.
"""
return self.__algorithm.result(list)

def partial_result(self, privacy_budget=None, noise_interval_level=None):
"""
Gets the algorithm result.

The default call consumes the remaining privacy budget.

When `privacy_budget` (defined on [0,1]) is set, it consumes only the `privacy_budget` amount of budget.

`noise_interval_level` provides the confidence level of the noise confidence interval, which may be included in the algorithm output.
"""

if privacy_budget is None:
return self.__algorithm.partial_result()

if noise_interval_level is None:
return self.__algorithm.partial_result(privacy_budget)

return self.__algorithm.partial_result(privacy_budget, noise_interval_level)

def reset(self):
"""
Resets the algorithm to a state in which it has received no input. After Reset is called, the algorithm should only consider input added after the last Reset call when providing output.
"""
return self.__algorithm.reset()

def serialize(self):
"""
Serializes summary data of current entries into Summary proto. This allows results from distributed aggregation to be recorded and later merged.

Returns empty summary for algorithms for which serialize is unimplemented.
"""
return self.__algorithm.serialize()

def merge(self, summary):
"""
Merges serialized summary data into this algorithm. The summary proto must represent data from the same algorithm type with identical parameters. The data field must contain the algorithm summary type of the corresponding algorithm used. The summary proto cannot be empty.
"""
return self.__algorithm.merge(summary)

def noise_confidence_interval(self, confidence_level, privacy_budget):
"""
Returns the confidence_level confidence interval of noise added within the algorithm with specified privacy budget, using epsilon and other relevant, algorithm-specific parameters (e.g. bounds) provided by the constructor.

This metric may be used to gauge the error rate introduced by the noise.

If the returned value is <x,y>, then the noise added has a confidence_level chance of being in the domain [x,y].

By default, NoiseConfidenceInterval() returns an error. Algorithms for which a confidence interval can feasibly be calculated override this and output the relevant value.

Conservatively, we do not release the error rate for algorithms whose confidence intervals rely on input size.
"""
return self.__algorithm.noise_confidence_interval(
confidence_level, privacy_budget
)


class Algorithm(MetaAlgorithm):
def __init__(self, epsilon=1.0, dtype="int"):
super().__init__(epsilon=epsilon, dtype=dtype)


class BoundedAlgorithm(MetaAlgorithm):
def __init__(self, epsilon=1.0, lower_bound=None, upper_bound=None, dtype="int"):
super().__init__(
epsilon=epsilon,
lower_bound=lower_bound,
upper_bound=upper_bound,
dtype=dtype,
)
16 changes: 14 additions & 2 deletions pydp/algorithms/laplacian/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,16 @@
from .bounded_algorithms import (
BoundedMean,
BoundedSum,
BoundedStandardDeviation,
BoundedVariance,
)
from .count import Count
from .boundedalgorithms import BoundedMean, BoundedSum, BoundedStandardDeviation, BoundedVariance

__all__ = ["Count", "BoundedMean", "BoundedSum", "BoundedStandardDeviation", "BoundedVariance"]

__all__ = [
"BoundedMean",
"BoundedStandardDeviation",
"BoundedSum",
"BoundedVariance",
"Count",
]
17 changes: 17 additions & 0 deletions pydp/algorithms/laplacian/bounded_algorithms.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
from ..algorithm import BoundedAlgorithm


class BoundedMean(BoundedAlgorithm):
pass


class BoundedSum(BoundedAlgorithm):
pass


class BoundedStandardDeviation(BoundedAlgorithm):
pass


class BoundedVariance(BoundedAlgorithm):
pass
29 changes: 0 additions & 29 deletions pydp/algorithms/laplacian/boundedalgorithms.py

This file was deleted.

13 changes: 3 additions & 10 deletions pydp/algorithms/laplacian/count.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,5 @@
from ..._pydp import _algorithms
from .helper import map_type_str
from ..algorithm import Algorithm

class Count:
def __init__(self, epsilon=1.0, dtype="int"):
class_ = getattr(_algorithms, f"Count{map_type_str(dtype)}")

self.dtype = dtype
self.__algorithm = class_(epsilon)

def result(self, list):
return self.__algorithm.result(list)
class Count(Algorithm):
pass
7 changes: 0 additions & 7 deletions pydp/algorithms/laplacian/helper.py

This file was deleted.

2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def read(fname):
include_package_data=True,
keywords="pydp",
name="python-dp",
package_data={"_pydp": ["_pydp.so"],},
package_data={"pydp": ["_pydp.so"],},
packages=find_packages(exclude=["tests"]), # need to check this
setup_requires=setup_requirements,
test_suite="tests",
Expand Down
46 changes: 20 additions & 26 deletions src/bindings/PyDP/pydp_lib/algorithm_builder.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ template <typename T, class Algorithm>
class AlgorithmBuilder {
public:
std::unique_ptr<Algorithm> build(double epsilon,
// std::optional<double> delta = std::nullopt,
// std::optional<double> delta = std::nullopt,
std::optional<T> lower_bound = std::nullopt,
std::optional<T> upper_bound = std::nullopt,
std::optional<int> l0_sensitivity = std::nullopt,
Expand Down Expand Up @@ -78,34 +78,30 @@ class AlgorithmBuilder {
// Constructors
if constexpr (is_bounded_algorithm<T, Algorithm>()) {
// Explicit bounds constructor
pyself.def(
py::init([this](double epsilon, T lower_bound, T upper_bound,
int l0_sensitivity, int linf_sensitivity) {
return this->build(epsilon, lower_bound, upper_bound, l0_sensitivity,
linf_sensitivity);
}),
py::arg("epsilon"), py::arg("lower_bound"),
py::arg("upper_bound"), py::arg("l0_sensitivity") = 1,
py::arg("linf_sensitivity") = 1);
pyself.def(py::init([this](double epsilon, T lower_bound, T upper_bound,
int l0_sensitivity, int linf_sensitivity) {
return this->build(epsilon, lower_bound, upper_bound, l0_sensitivity,
linf_sensitivity);
}),
py::arg("epsilon"), py::arg("lower_bound"), py::arg("upper_bound"),
py::arg("l0_sensitivity") = 1, py::arg("linf_sensitivity") = 1);
}

// No bounds constructor
pyself.def(py::init([this](double epsilon, int l0_sensitivity,
int linf_sensitivity) {
return this->build(epsilon, std::nullopt /*lower_bound*/,
std::nullopt /*upper_bound*/, l0_sensitivity,
linf_sensitivity);
}),
py::arg("epsilon"), py::arg("l0_sensitivity") = 1,
py::arg("linf_sensitivity") = 1);
pyself.def(
py::init([this](double epsilon, int l0_sensitivity, int linf_sensitivity) {
return this->build(epsilon, std::nullopt /*lower_bound*/,
std::nullopt /*upper_bound*/, l0_sensitivity,
linf_sensitivity);
}),
py::arg("epsilon"), py::arg("l0_sensitivity") = 1,
py::arg("linf_sensitivity") = 1);

// Getters
pyself.def_property_readonly("epsilon", &Algorithm::GetEpsilon);

pyself.def("privacy_budget_left", &Algorithm::RemainingPrivacyBudget);

pyself.def("consume_privacy_budget", &Algorithm::ConsumePrivacyBudget);

pyself.def("memory_used", &Algorithm::MemoryUsed);

// Input data
Expand All @@ -123,7 +119,7 @@ class AlgorithmBuilder {
throw std::runtime_error(result.status().error_message());
}

return dp::GetValue<double>(result.ValueOrDie());
return dp::GetValue<T>(result.ValueOrDie());
});

pyself.def("partial_result", [](Algorithm& pythis) {
Expand All @@ -133,7 +129,7 @@ class AlgorithmBuilder {
throw std::runtime_error(result.status().error_message());
}

return dp::GetValue<double>(result.ValueOrDie());
return dp::GetValue<T>(result.ValueOrDie());
});

pyself.def("partial_result", [](Algorithm& pythis, double privacy_budget) {
Expand All @@ -147,7 +143,7 @@ class AlgorithmBuilder {
throw std::runtime_error(result.status().error_message());
}

return dp::GetValue<double>(result.ValueOrDie());
return dp::GetValue<T>(result.ValueOrDie());
});

pyself.def("partial_result", [](Algorithm& pythis, double privacy_budget,
Expand All @@ -162,12 +158,10 @@ class AlgorithmBuilder {
throw std::runtime_error(result.status().error_message());
}

return dp::GetValue<double>(result.ValueOrDie());
return dp::GetValue<T>(result.ValueOrDie());
});

// Other methods
pyself.def("consume_privacy_budget", &Algorithm::ConsumePrivacyBudget);

pyself.def("reset", &Algorithm::Reset);

pyself.def("serialize", &Algorithm::Serialize);
Expand Down
16 changes: 9 additions & 7 deletions tests/algorithms/test_bounded_mean.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,23 @@
import pytest
import pydp as dp
from pydp.algorithms.laplacian import BoundedMean


def test_c_api():
def test_python_api():
a = [2, 4, 6, 8]

mean_algorithm = dp.BoundedMean(1.0, 1, 9)
mean_algorithm = BoundedMean(
epsilon=1.0, lower_bound=1, upper_bound=9, dtype="float"
)
assert 1.0 <= mean_algorithm.result(a) <= 9.0


def test_bounded_mean():
bm1 = dp.BoundedMean(3.4, 1, 2)
assert isinstance(bm1, dp.BoundedMean)
bm1 = BoundedMean(epsilon=3.4, lower_bound=1, upper_bound=2, dtype="float")
assert isinstance(bm1, BoundedMean)
assert isinstance(bm1.result([1.5, 2, 2.5]), float)

bm2 = dp.BoundedMean(3.4)
assert isinstance(bm2, dp.BoundedMean)
bm2 = BoundedMean(epsilon=3.4, dtype="int")
assert isinstance(bm2, BoundedMean)
# assert isinstance(bm2.result([1.5, 2, 2.5]), float)


Expand Down
6 changes: 3 additions & 3 deletions tests/algorithms/test_bounded_standard_deviation.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
import pydp as dp
from pydp.algorithms.laplacian import BoundedStandardDeviation


class TestBoundedStandardDeviation:
def test_c_api(self):
def test_python_api(self):
example_data = [1, 5, 7, 9, 13]
epsilon = 1.0
lower_bound, upper_bound = 0, 15
bsd = dp.BoundedStandardDeviation(epsilon, lower_bound, upper_bound)
bsd = BoundedStandardDeviation(epsilon, lower_bound, upper_bound, dtype="float")
result = bsd.result(example_data)
assert type(result) is float and result >= 0
assert result <= (upper_bound - lower_bound) / 2
6 changes: 3 additions & 3 deletions tests/algorithms/test_bounded_sum.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
import pytest
import pydp as dp
from pydp.algorithms.laplacian import BoundedSum


class TestBoundedSum:
def test_c_api_sanity_check(self):
def test_pythons_api_sanity_check(self):
a = [1, 2, 3, 4]

sum_algorithm = dp.BoundedSum(1.0, 0, 10)
sum_algorithm = BoundedSum(1.0, 0, 10, dtype="float")
result = sum_algorithm.result(a)
# The result value is -16.0
# Google library tests make use of ZeroNoiseMechanism
Expand Down
Loading