diff --git a/pydp/algorithms/algorithm.py b/pydp/algorithms/algorithm.py new file mode 100644 index 00000000..cfbc2bb1 --- /dev/null +++ b/pydp/algorithms/algorithm.py @@ -0,0 +1,130 @@ +from .._pydp import _algorithms + + +class MetaAlgorithm: + def __init__(self, **kwargs): + dtype = kwargs.pop("dtype") + + # Delete bound params if the are not set to avoid conflicts with builder + if "lower_bound" in kwargs and kwargs["lower_bound"] is None: + kwargs.pop("lower_bound") + if "upper_bound" in kwargs and kwargs["upper_bound"] is None: + kwargs.pop("upper_bound") + + binded_class = f"{self.__class__.__name__}{self.__map_dtype_str(dtype)}" + class_ = getattr(_algorithms, binded_class) + + self.dtype = dtype + self.__algorithm = class_(**kwargs) + self.epsilon = self.__algorithm.epsilon + + @staticmethod + def __map_dtype_str(dtype): + if dtype == "int": + return "Int" + elif dtype == "float": + return "Double" + else: + raise RuntimeError(f"dtype: {dtype} is not supported") + + def privacy_budget_left(self): + """ + Returns the remaining privacy budget. + """ + return self.__algorithm.privacy_budget_left() + + def memory_used(self): + """ + Returns the memory currently used by the algorithm in bytes. + """ + return self.__algorithm.memory_used() + + def add_entries(self, list): + """ + Adds multiple inputs to the algorithm. + """ + return self.__algorithm.add_entries(list) + + def add_entry(self, value): + """ + Adds one input to the algorithm. + """ + return self.__algorithm.add_entry(value) + + def result(self, list): + """ + Runs the algorithm on the input using the epsilon parameter provided in the constructor and returns output. + + Consumes 100% of the privacy budget. + """ + return self.__algorithm.result(list) + + def partial_result(self, privacy_budget=None, noise_interval_level=None): + """ + Gets the algorithm result. + + The default call consumes the remaining privacy budget. + + When `privacy_budget` (defined on [0,1]) is set, it consumes only the `privacy_budget` amount of budget. + + `noise_interval_level` provides the confidence level of the noise confidence interval, which may be included in the algorithm output. + """ + + if privacy_budget is None: + return self.__algorithm.partial_result() + + if noise_interval_level is None: + return self.__algorithm.partial_result(privacy_budget) + + return self.__algorithm.partial_result(privacy_budget, noise_interval_level) + + def reset(self): + """ + Resets the algorithm to a state in which it has received no input. After Reset is called, the algorithm should only consider input added after the last Reset call when providing output. + """ + return self.__algorithm.reset() + + def serialize(self): + """ + Serializes summary data of current entries into Summary proto. This allows results from distributed aggregation to be recorded and later merged. + + Returns empty summary for algorithms for which serialize is unimplemented. + """ + return self.__algorithm.serialize() + + def merge(self, summary): + """ + Merges serialized summary data into this algorithm. The summary proto must represent data from the same algorithm type with identical parameters. The data field must contain the algorithm summary type of the corresponding algorithm used. The summary proto cannot be empty. + """ + return self.__algorithm.merge(summary) + + def noise_confidence_interval(self, confidence_level, privacy_budget): + """ + Returns the confidence_level confidence interval of noise added within the algorithm with specified privacy budget, using epsilon and other relevant, algorithm-specific parameters (e.g. bounds) provided by the constructor. + + This metric may be used to gauge the error rate introduced by the noise. + + If the returned value is , then the noise added has a confidence_level chance of being in the domain [x,y]. + + By default, NoiseConfidenceInterval() returns an error. Algorithms for which a confidence interval can feasibly be calculated override this and output the relevant value. + + Conservatively, we do not release the error rate for algorithms whose confidence intervals rely on input size. + """ + return self.__algorithm.noise_confidence_interval( + confidence_level, privacy_budget + ) + + +class Algorithm(MetaAlgorithm): + def __init__(self, epsilon=1.0, dtype="int"): + super().__init__(epsilon=epsilon, dtype=dtype) + + +class BoundedAlgorithm(MetaAlgorithm): + def __init__(self, epsilon=1.0, lower_bound=None, upper_bound=None, dtype="int"): + super().__init__( + epsilon=epsilon, + lower_bound=lower_bound, + upper_bound=upper_bound, + dtype=dtype, + ) diff --git a/pydp/algorithms/laplacian/__init__.py b/pydp/algorithms/laplacian/__init__.py index 3526ae4c..ded0980e 100644 --- a/pydp/algorithms/laplacian/__init__.py +++ b/pydp/algorithms/laplacian/__init__.py @@ -1,4 +1,16 @@ +from .bounded_algorithms import ( + BoundedMean, + BoundedSum, + BoundedStandardDeviation, + BoundedVariance, +) from .count import Count -from .boundedalgorithms import BoundedMean, BoundedSum, BoundedStandardDeviation, BoundedVariance -__all__ = ["Count", "BoundedMean", "BoundedSum", "BoundedStandardDeviation", "BoundedVariance"] + +__all__ = [ + "BoundedMean", + "BoundedStandardDeviation", + "BoundedSum", + "BoundedVariance", + "Count", +] diff --git a/pydp/algorithms/laplacian/bounded_algorithms.py b/pydp/algorithms/laplacian/bounded_algorithms.py new file mode 100644 index 00000000..dbcc03bb --- /dev/null +++ b/pydp/algorithms/laplacian/bounded_algorithms.py @@ -0,0 +1,17 @@ +from ..algorithm import BoundedAlgorithm + + +class BoundedMean(BoundedAlgorithm): + pass + + +class BoundedSum(BoundedAlgorithm): + pass + + +class BoundedStandardDeviation(BoundedAlgorithm): + pass + + +class BoundedVariance(BoundedAlgorithm): + pass diff --git a/pydp/algorithms/laplacian/boundedalgorithms.py b/pydp/algorithms/laplacian/boundedalgorithms.py deleted file mode 100644 index f766c22e..00000000 --- a/pydp/algorithms/laplacian/boundedalgorithms.py +++ /dev/null @@ -1,29 +0,0 @@ -from ..._pydp import _algorithms -from .helper import map_type_str - -class Bounded: - def __init__(self, algorithm, epsilon=1.0, lower_bound = None, upper_bound = None, dtype="int"): - class_ = getattr(_algorithms, f"{algorithm}{map_type_str(dtype)}") - - self.dtype = dtype - self.__algorithm = class_(epsilon, lower_bound, upper_bound) - - def result(self, list): - return self.__algorithm.result(list) - -class BoundedMean(Bounded): - def __init__(self, epsilon=1.0, lower_bound = None, upper_bound = None, dtype="int"): - super().__init__("BoundedMean", epsilon, lower_bound, upper_bound, dtype) - - -class BoundedSum(Bounded): - def __init__(self, epsilon=1.0, lower_bound = None, upper_bound = None, dtype="int"): - super().__init__("BoundedSum", epsilon, lower_bound, upper_bound, dtype) - -class BoundedStandardDeviation(Bounded): - def __init__(self, epsilon=1.0, lower_bound = None, upper_bound = None, dtype="int"): - super().__init__("BoundedStandardDeviation", epsilon, lower_bound, upper_bound, dtype) - -class BoundedVariance(Bounded): - def __init__(self, epsilon=1.0, lower_bound = None, upper_bound = None, dtype="int"): - super().__init__("BoundedVariance", epsilon, lower_bound, upper_bound, dtype) \ No newline at end of file diff --git a/pydp/algorithms/laplacian/count.py b/pydp/algorithms/laplacian/count.py index 17ca85ab..e8b8a722 100644 --- a/pydp/algorithms/laplacian/count.py +++ b/pydp/algorithms/laplacian/count.py @@ -1,12 +1,5 @@ -from ..._pydp import _algorithms -from .helper import map_type_str +from ..algorithm import Algorithm -class Count: - def __init__(self, epsilon=1.0, dtype="int"): - class_ = getattr(_algorithms, f"Count{map_type_str(dtype)}") - self.dtype = dtype - self.__algorithm = class_(epsilon) - - def result(self, list): - return self.__algorithm.result(list) +class Count(Algorithm): + pass diff --git a/pydp/algorithms/laplacian/helper.py b/pydp/algorithms/laplacian/helper.py deleted file mode 100644 index ac244adf..00000000 --- a/pydp/algorithms/laplacian/helper.py +++ /dev/null @@ -1,7 +0,0 @@ -def map_type_str(type): - if type == "int": - return "Int" - elif type == "float": - return "Double" - else: - raise RuntimeError(f"dtype: {dtype} is not supported") \ No newline at end of file diff --git a/setup.py b/setup.py index 89ddb1ca..ebbd6b7c 100644 --- a/setup.py +++ b/setup.py @@ -47,7 +47,7 @@ def read(fname): include_package_data=True, keywords="pydp", name="python-dp", - package_data={"_pydp": ["_pydp.so"],}, + package_data={"pydp": ["_pydp.so"],}, packages=find_packages(exclude=["tests"]), # need to check this setup_requires=setup_requirements, test_suite="tests", diff --git a/src/bindings/PyDP/pydp_lib/algorithm_builder.hpp b/src/bindings/PyDP/pydp_lib/algorithm_builder.hpp index ffc19fd3..45d5cc99 100644 --- a/src/bindings/PyDP/pydp_lib/algorithm_builder.hpp +++ b/src/bindings/PyDP/pydp_lib/algorithm_builder.hpp @@ -28,7 +28,7 @@ template class AlgorithmBuilder { public: std::unique_ptr build(double epsilon, - // std::optional delta = std::nullopt, + // std::optional delta = std::nullopt, std::optional lower_bound = std::nullopt, std::optional upper_bound = std::nullopt, std::optional l0_sensitivity = std::nullopt, @@ -78,34 +78,30 @@ class AlgorithmBuilder { // Constructors if constexpr (is_bounded_algorithm()) { // Explicit bounds constructor - pyself.def( - py::init([this](double epsilon, T lower_bound, T upper_bound, - int l0_sensitivity, int linf_sensitivity) { - return this->build(epsilon, lower_bound, upper_bound, l0_sensitivity, - linf_sensitivity); - }), - py::arg("epsilon"), py::arg("lower_bound"), - py::arg("upper_bound"), py::arg("l0_sensitivity") = 1, - py::arg("linf_sensitivity") = 1); + pyself.def(py::init([this](double epsilon, T lower_bound, T upper_bound, + int l0_sensitivity, int linf_sensitivity) { + return this->build(epsilon, lower_bound, upper_bound, l0_sensitivity, + linf_sensitivity); + }), + py::arg("epsilon"), py::arg("lower_bound"), py::arg("upper_bound"), + py::arg("l0_sensitivity") = 1, py::arg("linf_sensitivity") = 1); } // No bounds constructor - pyself.def(py::init([this](double epsilon, int l0_sensitivity, - int linf_sensitivity) { - return this->build(epsilon, std::nullopt /*lower_bound*/, - std::nullopt /*upper_bound*/, l0_sensitivity, - linf_sensitivity); - }), - py::arg("epsilon"), py::arg("l0_sensitivity") = 1, - py::arg("linf_sensitivity") = 1); + pyself.def( + py::init([this](double epsilon, int l0_sensitivity, int linf_sensitivity) { + return this->build(epsilon, std::nullopt /*lower_bound*/, + std::nullopt /*upper_bound*/, l0_sensitivity, + linf_sensitivity); + }), + py::arg("epsilon"), py::arg("l0_sensitivity") = 1, + py::arg("linf_sensitivity") = 1); // Getters pyself.def_property_readonly("epsilon", &Algorithm::GetEpsilon); pyself.def("privacy_budget_left", &Algorithm::RemainingPrivacyBudget); - pyself.def("consume_privacy_budget", &Algorithm::ConsumePrivacyBudget); - pyself.def("memory_used", &Algorithm::MemoryUsed); // Input data @@ -123,7 +119,7 @@ class AlgorithmBuilder { throw std::runtime_error(result.status().error_message()); } - return dp::GetValue(result.ValueOrDie()); + return dp::GetValue(result.ValueOrDie()); }); pyself.def("partial_result", [](Algorithm& pythis) { @@ -133,7 +129,7 @@ class AlgorithmBuilder { throw std::runtime_error(result.status().error_message()); } - return dp::GetValue(result.ValueOrDie()); + return dp::GetValue(result.ValueOrDie()); }); pyself.def("partial_result", [](Algorithm& pythis, double privacy_budget) { @@ -147,7 +143,7 @@ class AlgorithmBuilder { throw std::runtime_error(result.status().error_message()); } - return dp::GetValue(result.ValueOrDie()); + return dp::GetValue(result.ValueOrDie()); }); pyself.def("partial_result", [](Algorithm& pythis, double privacy_budget, @@ -162,12 +158,10 @@ class AlgorithmBuilder { throw std::runtime_error(result.status().error_message()); } - return dp::GetValue(result.ValueOrDie()); + return dp::GetValue(result.ValueOrDie()); }); // Other methods - pyself.def("consume_privacy_budget", &Algorithm::ConsumePrivacyBudget); - pyself.def("reset", &Algorithm::Reset); pyself.def("serialize", &Algorithm::Serialize); diff --git a/tests/algorithms/test_bounded_mean.py b/tests/algorithms/test_bounded_mean.py index c42f2bd1..7c5e40a8 100644 --- a/tests/algorithms/test_bounded_mean.py +++ b/tests/algorithms/test_bounded_mean.py @@ -1,21 +1,23 @@ import pytest -import pydp as dp +from pydp.algorithms.laplacian import BoundedMean -def test_c_api(): +def test_python_api(): a = [2, 4, 6, 8] - mean_algorithm = dp.BoundedMean(1.0, 1, 9) + mean_algorithm = BoundedMean( + epsilon=1.0, lower_bound=1, upper_bound=9, dtype="float" + ) assert 1.0 <= mean_algorithm.result(a) <= 9.0 def test_bounded_mean(): - bm1 = dp.BoundedMean(3.4, 1, 2) - assert isinstance(bm1, dp.BoundedMean) + bm1 = BoundedMean(epsilon=3.4, lower_bound=1, upper_bound=2, dtype="float") + assert isinstance(bm1, BoundedMean) assert isinstance(bm1.result([1.5, 2, 2.5]), float) - bm2 = dp.BoundedMean(3.4) - assert isinstance(bm2, dp.BoundedMean) + bm2 = BoundedMean(epsilon=3.4, dtype="int") + assert isinstance(bm2, BoundedMean) # assert isinstance(bm2.result([1.5, 2, 2.5]), float) diff --git a/tests/algorithms/test_bounded_standard_deviation.py b/tests/algorithms/test_bounded_standard_deviation.py index addb9825..bb550572 100644 --- a/tests/algorithms/test_bounded_standard_deviation.py +++ b/tests/algorithms/test_bounded_standard_deviation.py @@ -1,12 +1,12 @@ -import pydp as dp +from pydp.algorithms.laplacian import BoundedStandardDeviation class TestBoundedStandardDeviation: - def test_c_api(self): + def test_python_api(self): example_data = [1, 5, 7, 9, 13] epsilon = 1.0 lower_bound, upper_bound = 0, 15 - bsd = dp.BoundedStandardDeviation(epsilon, lower_bound, upper_bound) + bsd = BoundedStandardDeviation(epsilon, lower_bound, upper_bound, dtype="float") result = bsd.result(example_data) assert type(result) is float and result >= 0 assert result <= (upper_bound - lower_bound) / 2 diff --git a/tests/algorithms/test_bounded_sum.py b/tests/algorithms/test_bounded_sum.py index 73bc1e6a..c69c5e2c 100644 --- a/tests/algorithms/test_bounded_sum.py +++ b/tests/algorithms/test_bounded_sum.py @@ -1,12 +1,12 @@ import pytest -import pydp as dp +from pydp.algorithms.laplacian import BoundedSum class TestBoundedSum: - def test_c_api_sanity_check(self): + def test_pythons_api_sanity_check(self): a = [1, 2, 3, 4] - sum_algorithm = dp.BoundedSum(1.0, 0, 10) + sum_algorithm = BoundedSum(1.0, 0, 10, dtype="float") result = sum_algorithm.result(a) # The result value is -16.0 # Google library tests make use of ZeroNoiseMechanism diff --git a/tests/algorithms/test_bounded_variance.py b/tests/algorithms/test_bounded_variance.py index c0688767..5bd074ac 100644 --- a/tests/algorithms/test_bounded_variance.py +++ b/tests/algorithms/test_bounded_variance.py @@ -1,4 +1,4 @@ -import pydp as dp +from pydp.algorithms.laplacian import BoundedVariance class TestBoundedVariance: @@ -6,7 +6,7 @@ def test_basic(self): example_data = [1, 5, 7, 9, 13] epsilon = 1.0 lower_bound, upper_bound = 0, 16 - bv = dp.BoundedVariance(epsilon, lower_bound, upper_bound) + bv = BoundedVariance(epsilon, lower_bound, upper_bound, dtype="float") result = bv.result(example_data) assert type(result) is float and result >= 0 # Popoviciu's inequality on variances: diff --git a/tests/algorithms/test_count.py b/tests/algorithms/test_count.py index 99a396a5..a289a721 100644 --- a/tests/algorithms/test_count.py +++ b/tests/algorithms/test_count.py @@ -1,17 +1,17 @@ import pytest -import pydp as dp +from pydp.algorithms.laplacian import Count -@pytest.mark.parametrize("input_class", [dp.CountInt, dp.CountDouble]) +@pytest.mark.parametrize("dtype_in", ["int", "float"]) class TestPercentile: - def test_basic(self, input_class): + def test_basic(self, dtype_in): c = [1, 2, 3, 4, 2, 3] - count = input_class(1.7) + count = Count(epsilon=1.7, dtype=dtype_in) count.result(c) - def test_repeated_result(self, input_class): + def test_repeated_result(self, dtype_in): c = [1, 2, 3, 4, 2, 3] - count = input_class(1.7) + count = Count(epsilon=1.7, dtype=dtype_in) count.add_entries(c) count.partial_result(0.5) @@ -48,8 +48,8 @@ def test_memory(self, input_class): class TestCountDataTypes: def test_count_datatypes(self): - count = dp.CountInt(2.0) - assert isinstance(count, dp.CountInt) + count = Count(2.0, dtype="int") + assert isinstance(count, Count) countae = count.add_entry(2) assert isinstance(countae, type(None))