From 2b01e529b5851170ede8e1ec8df0769536380364 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alejandro=20S=C3=A1nchez=20Medina?= Date: Wed, 5 Aug 2020 19:10:03 +0100 Subject: [PATCH 01/47] Disable distributions code --- src/bindings/PyDP/algorithms/distributions.cpp | 16 ++++++++-------- tests/algorithms/test_distributions.py | 6 ++++-- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/src/bindings/PyDP/algorithms/distributions.cpp b/src/bindings/PyDP/algorithms/distributions.cpp index 5f1b7891..359df583 100644 --- a/src/bindings/PyDP/algorithms/distributions.cpp +++ b/src/bindings/PyDP/algorithms/distributions.cpp @@ -1,5 +1,4 @@ // Provides bindings for distributions - #include "pybind11/pybind11.h" #include "algorithms/distributions.h" @@ -19,7 +18,7 @@ void declareLaplaceDistribution(py::module &m) { py::arg("scale") = 1.0, R"pbdoc( Samples the Laplacian distribution Laplace(u, scale*b). - + Parameters ---------- scale @@ -28,7 +27,7 @@ void declareLaplaceDistribution(py::module &m) { laplace_dist.def("get_diversity", &dpi::LaplaceDistribution::GetDiversity, R"pbdoc( Returns the parameter defining this distribution, often labeled b. - + )pbdoc"); laplace_dist.attr("__doc__") = "Draws samples from the Laplacian distribution."; } @@ -43,12 +42,13 @@ void declareGaussianDistribution(py::module &m) { Parameters ---------- scale - A factor to scale stddev. + A factor to scale stddev. )pbdoc") .def_property_readonly("stddev", &dpi::GaussianDistribution::Stddev, R"pbdoc(Returns stddev)pbdoc"); } +/* void declareGeometricDistribution(py::module &m) { py::class_ geometric_dist(m, "GeometricDistribution"); geometric_dist.attr("__module__") = "pydp"; @@ -64,15 +64,15 @@ void declareGeometricDistribution(py::module &m) { R"pbdoc(Returns lambda. Where p = 1 - e^-lambda)pbdoc"); geometric_dist.attr("__doc__") = R"pbdoc(Draws samples from the geometric distribution of probability - \math{p = 1 - e^{-\lambda}}, i.e. the number of bernoulli trial failures before the - first success where the success probability is as defined above. lambda must + \math{p = 1 - e^{-\lambda}}, i.e. the number of bernoulli trial failures +before the first success where the success probability is as defined above. lambda must be positive. If the result would be higher than the maximum int64_t, returns the maximum int64_t, which means that users should be careful around the edges of their distribution)pbdoc"; } - +*/ void init_algorithms_distributions(py::module &m) { declareLaplaceDistribution(m); declareGaussianDistribution(m); - declareGeometricDistribution(m); + // declareGeometricDistribution(m); } diff --git a/tests/algorithms/test_distributions.py b/tests/algorithms/test_distributions.py index c68cde34..daf593ba 100644 --- a/tests/algorithms/test_distributions.py +++ b/tests/algorithms/test_distributions.py @@ -5,6 +5,8 @@ from itertools import accumulate import math +pytestmark = pytest.mark.skip(reason="distributions fail to compile") + k_num_samples = 10000000 k_num_geometric_samples = 1000000 k_gaussian_samples = 1000000 @@ -15,7 +17,7 @@ def skew(samples: List[float], mu: float, sigma: float): """Unfortunately this is implemented in third_party/differential-privacy/cc/algorithms/distributions_test.cc and we don't want to pull the test files in. I'm assuming it'll be moved to third_party/differential-privacy/cc/algorithms/util.h If they (upstream) move it we can use it. - Until then this should suffice. #FIXME: when possible we can fix this. + Until then this should suffice. #FIXME: when possible we can fix this. """ skew = list( accumulate(samples, lambda lhs, rhs: lhs + (rhs - mu) * (rhs - mu) * (rhs - mu)) @@ -27,7 +29,7 @@ def kurtosis(samples: List[float], mu: float, var: float): """Unfortunately this is implemented in third_party/differential-privacy/cc/algorithms/distributions_test.cc and we don't want to pull the test files in. I'm assuming it'll be moved to third_party/differential-privacy/cc/algorithms/util.h If they (upstream) move it we can use it. - Until then this should suffice. #FIXME: when possible we can fix this. + Until then this should suffice. #FIXME: when possible we can fix this. """ kurt = list( accumulate(samples, lambda lhs, rhs: lhs + ((rhs - mu) * (rhs - mu)) ** 2) From 6ef9cda3954bde50da0b47e110af318bf6364f00 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alejandro=20S=C3=A1nchez=20Medina?= Date: Wed, 5 Aug 2020 19:12:21 +0100 Subject: [PATCH 02/47] First proof of concept for internal aux class --- src/bindings/PyDP/algorithms/count.cpp | 9 +++---- .../PyDP/pydp_lib/algorithm_builder.hpp | 25 +++++++++++++++++++ tests/algorithms/test_count.py | 7 +----- third_party/differential-privacy | 2 +- 4 files changed, 31 insertions(+), 12 deletions(-) create mode 100644 src/bindings/PyDP/pydp_lib/algorithm_builder.hpp diff --git a/src/bindings/PyDP/algorithms/count.cpp b/src/bindings/PyDP/algorithms/count.cpp index d4a30ff6..62d3c811 100644 --- a/src/bindings/PyDP/algorithms/count.cpp +++ b/src/bindings/PyDP/algorithms/count.cpp @@ -5,6 +5,8 @@ #include "algorithms/count.h" +#include "../pydp_lib/algorithm_builder.hpp" + using namespace std; namespace py = pybind11; @@ -12,14 +14,11 @@ namespace dp = differential_privacy; template void declareCount(py::module& m, string const& suffix) { - using count_builder = typename dp::Count::Builder; + using builder = typename dp::python::AlgorithmBuilder>; py::class_> count(m, ("Count" + suffix).c_str()); count.attr("__module__") = "pydp"; - count.def(py::init([]() { return count_builder().Build().ValueOrDie(); })) - .def(py::init([](double epsilon) { - return count_builder().SetEpsilon(epsilon).Build().ValueOrDie(); - })) + count.def(py::init([](double epsilon) { return builder().Build(epsilon); })) .def("add_entry", &dp::Count::AddEntry) .def("add_entries", [](dp::Count& obj, std::vector& v) { diff --git a/src/bindings/PyDP/pydp_lib/algorithm_builder.hpp b/src/bindings/PyDP/pydp_lib/algorithm_builder.hpp new file mode 100644 index 00000000..9b7e7298 --- /dev/null +++ b/src/bindings/PyDP/pydp_lib/algorithm_builder.hpp @@ -0,0 +1,25 @@ +#ifndef PYDP_LIB_ALGORITHM_H_ +#define PYDP_LIB_ALGORITHM_H_ + +#include "algorithms/algorithm.h" +#include "algorithms/numerical-mechanisms.h" + +namespace differential_privacy { +namespace python { + +template +class AlgorithmBuilder { + public: + std::unique_ptr Build(double epsilon) { + return typename Algorithm::Builder() + .SetLaplaceMechanism(absl::make_unique()) + .SetEpsilon(epsilon) + .Build() + .ValueOrDie(); + } +}; + +} // namespace python +} // namespace differential_privacy + +#endif // PYDP_LIB_ALGORITHM_H_ diff --git a/tests/algorithms/test_count.py b/tests/algorithms/test_count.py index 3988c941..5d27f861 100644 --- a/tests/algorithms/test_count.py +++ b/tests/algorithms/test_count.py @@ -5,18 +5,13 @@ @pytest.mark.parametrize("input_class", [dp.CountInt, dp.CountDouble]) class TestPercentile: def test_basic(self, input_class): - c = [1, 2, 3, 4, 2, 3] - count = input_class() - count.result(c) - - def test_basic_epsilon(self, input_class): c = [1, 2, 3, 4, 2, 3] count = input_class(1.7) count.result(c) def test_repeated_result(self, input_class): c = [1, 2, 3, 4, 2, 3] - count = input_class() + count = input_class(1.7) count.add_entries(c) count.partial_result(0.5) diff --git a/third_party/differential-privacy b/third_party/differential-privacy index 0b0a5c23..b7f4c39d 160000 --- a/third_party/differential-privacy +++ b/third_party/differential-privacy @@ -1 +1 @@ -Subproject commit 0b0a5c2315d84a6a7b1ff34591e33ec11680891e +Subproject commit b7f4c39d9f73d67b34cdbd1b8483e5f72072fc73 From 958d57f0e6291c7eadc4ef082613dea185c5e94c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alejandro=20S=C3=A1nchez=20Medina?= Date: Thu, 6 Aug 2020 12:07:02 +0100 Subject: [PATCH 03/47] Update Count test --- tests/algorithms/test_count.py | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/tests/algorithms/test_count.py b/tests/algorithms/test_count.py index 5d27f861..99a396a5 100644 --- a/tests/algorithms/test_count.py +++ b/tests/algorithms/test_count.py @@ -48,23 +48,21 @@ def test_memory(self, input_class): class TestCountDataTypes: def test_count_datatypes(self): - ci1 = dp.CountInt() - ci2 = dp.CountInt(2.0) - assert isinstance(ci1, dp.CountInt) - assert isinstance(ci2, dp.CountInt) - - ci2ae = ci2.add_entry(2) - assert isinstance(ci2ae, type(None)) - ci2aes = ci2.add_entries([4, 6, 8]) - assert isinstance(ci2aes, type(None)) - mem = ci2.memory_used() + count = dp.CountInt(2.0) + assert isinstance(count, dp.CountInt) + + countae = count.add_entry(2) + assert isinstance(countae, type(None)) + countaes = count.add_entries([4, 6, 8]) + assert isinstance(countaes, type(None)) + mem = count.memory_used() assert isinstance(mem, int) - par = ci2.partial_result() + par = count.partial_result() assert isinstance(par, int) # TODO - # par2 = ci2.partial_result(1.0) + # par2 = count.partial_result(1.0) # assert isinstance(par2, int) - res = ci2.result([2]) + res = count.result([2]) assert isinstance(res, int) From 6885fb3b43148b7af3030b491b41e276e8cbd9d6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alejandro=20S=C3=A1nchez=20Medina?= Date: Thu, 6 Aug 2020 18:35:00 +0100 Subject: [PATCH 04/47] Refactor bounded functions --- src/bindings/BUILD | 2 +- .../PyDP/algorithms/bounded_functions.cpp | 134 ++++++------------ .../PyDP/pydp_lib/algorithm_builder.hpp | 61 ++++++++ 3 files changed, 103 insertions(+), 94 deletions(-) diff --git a/src/bindings/BUILD b/src/bindings/BUILD index 76bbd6e6..3fa8691d 100644 --- a/src/bindings/BUILD +++ b/src/bindings/BUILD @@ -29,4 +29,4 @@ pybind_extension( "@google_dp//algorithms:order-statistics", "@google_dp//proto:util-lib" ], -) \ No newline at end of file +) diff --git a/src/bindings/PyDP/algorithms/bounded_functions.cpp b/src/bindings/PyDP/algorithms/bounded_functions.cpp index 8e878336..d42240de 100644 --- a/src/bindings/PyDP/algorithms/bounded_functions.cpp +++ b/src/bindings/PyDP/algorithms/bounded_functions.cpp @@ -1,111 +1,59 @@ // Provides bindings for Bounded Functions -#include "../../c/c_api.h" - -#include "../pydp_lib/casting.hpp" // our caster helper library -#include "../pydp_lib/helper_class.hpp" // Dummy helper class - #include "pybind11/complex.h" #include "pybind11/functional.h" #include "pybind11/pybind11.h" #include "pybind11/stl.h" -using namespace std; +#include "algorithms/algorithm.h" +#include "algorithms/bounded-mean.h" +#include "algorithms/bounded-standard-deviation.h" +#include "algorithms/bounded-sum.h" +#include "algorithms/bounded-variance.h" -namespace py = pybind11; +#include "../pydp_lib/algorithm_builder.hpp" +#include "../pydp_lib/casting.hpp" // our caster helper library -class BoundedMeanDummy : public Dummy { - public: - using Dummy::Dummy; - double Result(py::list l) override { - return Result_BoundedMean(obj, l); - } -}; - -class BoundedSumDummy : public Dummy { - public: - using Dummy::Dummy; - double Result(py::list l) override { - return Result_BoundedSum(obj, l); - } -}; - -class BoundedStandardDeviationDummy : public Dummy { - public: - using Dummy::Dummy; - double Result(py::list l) override { - return Result_BoundedStandardDeviation(obj, l); - } -}; +using namespace std; -class BoundedVarianceDummy : public Dummy { - public: - using Dummy::Dummy; - double Result(py::list l) override { - return Result_BoundedVariance(obj, l); - } -}; +namespace py = pybind11; +namespace dp = differential_privacy; -void declareBoundedMean(py::module& m) { - py::class_ bld(m, "BoundedMean"); +template +void declareBoundedAlgorithm(py::module& m) { + using builder = typename dp::python::AlgorithmBuilder; + py::class_ bld(m, builder().get_algorithm_name().c_str()); bld.attr("__module__") = "pydp"; - bld.def(py::init(), py::return_value_policy::reference, - py::call_guard()); - bld.def(py::init(), py::return_value_policy::reference, - py::call_guard()); - bld.def("result", &BoundedMeanDummy::Result); - bld.def_property("l0_sensitivity", &BoundedMeanDummy::get_l0_sensitivity, - &BoundedMeanDummy::set_l0_sensitivity); - bld.def_property("linf_sensitivity", &BoundedMeanDummy::get_linf_sensitivity, - &BoundedMeanDummy::set_linf_sensitivity); + bld.def(py::init([](double epsilon, T lower_bound, T upper_bound, int l0_sensitivity, + int linf_sensitivity) { + py::print("Building with bounds"); + return builder().BuildWithBounds(epsilon, lower_bound, upper_bound, + l0_sensitivity, linf_sensitivity); + }), + py::arg("epsilon"), py::arg("lower_bound"), py::arg("upper_bound"), + py::arg("l0_sensitivity") = 1, py::arg("linf_sensitivity") = 1); + bld.def(py::init([](double epsilon, int l0_sensitivity, int linf_sensitivity) { + py::print("Building without bounds"); + return builder().BuildWithoutBounds(epsilon, l0_sensitivity, + linf_sensitivity); + }), + py::arg("epsilon"), py::arg("l0_sensitivity") = 1, + py::arg("linf_sensitivity") = 1); + bld.def("result", [](Algorithm& obj, std::vector& v) { + return dp::GetValue(obj.Result(v.begin(), v.end()).ValueOrDie()); + }); } -void declareBoundedSum(py::module& m) { - py::class_ cls(m, "BoundedSum"); - cls.attr("__module__") = "pydp"; - cls.def(py::init(), py::return_value_policy::reference, - py::call_guard()); - cls.def(py::init(), py::return_value_policy::reference, - py::call_guard()); - cls.def("result", &BoundedSumDummy::Result); - cls.def_property("l0_sensitivity", &BoundedSumDummy::get_l0_sensitivity, - &BoundedSumDummy::set_l0_sensitivity); - cls.def_property("linf_sensitivity", &BoundedSumDummy::get_linf_sensitivity, - &BoundedSumDummy::set_linf_sensitivity); -} +void init_algorithms_bounded_functions(py::module& m) { + declareBoundedAlgorithm>(m); + declareBoundedAlgorithm>(m); -void declareBoundedStandardDeviation(py::module& m) { - py::class_ cls(m, "BoundedStandardDeviation"); - cls.attr("__module__") = "pydp"; - cls.def(py::init(), py::return_value_policy::reference, - py::call_guard()); - cls.def(py::init(), py::return_value_policy::reference, - py::call_guard()); - cls.def("result", &BoundedStandardDeviationDummy::Result); - cls.def_property("l0_sensitivity", &BoundedStandardDeviationDummy::get_l0_sensitivity, - &BoundedStandardDeviationDummy::set_l0_sensitivity); - cls.def_property("linf_sensitivity", - &BoundedStandardDeviationDummy::get_linf_sensitivity, - &BoundedSumDummy::set_linf_sensitivity); -} + declareBoundedAlgorithm>(m); + declareBoundedAlgorithm>(m); -void declareBoundedVariance(py::module& m) { - py::class_ cls(m, "BoundedVariance"); - cls.attr("__module__") = "pydp"; - cls.def(py::init(), py::return_value_policy::reference, - py::call_guard()); - cls.def(py::init(), py::return_value_policy::reference, - py::call_guard()); - cls.def("result", &BoundedVarianceDummy::Result); - cls.def_property("l0_sensitivity", &BoundedVarianceDummy::get_l0_sensitivity, - &BoundedVarianceDummy::set_l0_sensitivity); - cls.def_property("linf_sensitivity", &BoundedVarianceDummy::get_linf_sensitivity, - &BoundedVarianceDummy::set_linf_sensitivity); -} + declareBoundedAlgorithm>(m); + declareBoundedAlgorithm>(m); -void init_algorithms_bounded_functions(py::module& m) { - declareBoundedMean(m); - declareBoundedSum(m); - declareBoundedStandardDeviation(m); - declareBoundedVariance(m); + declareBoundedAlgorithm>(m); + declareBoundedAlgorithm>(m); } diff --git a/src/bindings/PyDP/pydp_lib/algorithm_builder.hpp b/src/bindings/PyDP/pydp_lib/algorithm_builder.hpp index 9b7e7298..c76b1d83 100644 --- a/src/bindings/PyDP/pydp_lib/algorithm_builder.hpp +++ b/src/bindings/PyDP/pydp_lib/algorithm_builder.hpp @@ -2,8 +2,14 @@ #define PYDP_LIB_ALGORITHM_H_ #include "algorithms/algorithm.h" +#include "algorithms/bounded-mean.h" +#include "algorithms/bounded-standard-deviation.h" +#include "algorithms/bounded-sum.h" +#include "algorithms/bounded-variance.h" #include "algorithms/numerical-mechanisms.h" +namespace dp = differential_privacy; + namespace differential_privacy { namespace python { @@ -17,6 +23,61 @@ class AlgorithmBuilder { .Build() .ValueOrDie(); } + + std::unique_ptr BuildWithBounds(double epsilon, T lower_bound, + T upper_bound, int l0_sensitivity = 1, + int linf_sensitivity = 1) { + return typename Algorithm::Builder() + .SetLaplaceMechanism(absl::make_unique()) + .SetEpsilon(epsilon) + .SetLower(lower_bound) + .SetUpper(upper_bound) + .SetMaxPartitionsContributed(l0_sensitivity) + .SetMaxContributionsPerPartition(linf_sensitivity) + .Build() + .ValueOrDie(); + } + + std::unique_ptr BuildWithoutBounds(double epsilon, int l0_sensitivity = 1, + int linf_sensitivity = 1) { + return typename Algorithm::Builder() + .SetLaplaceMechanism(absl::make_unique()) + .SetEpsilon(epsilon) + .SetMaxPartitionsContributed(l0_sensitivity) + .SetMaxContributionsPerPartition(linf_sensitivity) + .Build() + .ValueOrDie(); + } + + std::string get_algorithm_name() { + // Set the suffix string + std::string suffix = ""; + // TODO: Change to mapping function + if (typeid(T) == typeid(int)) { + suffix = "Int"; + } else if (typeid(T) == typeid(double)) { + suffix = "Double"; + } else { + throw std::runtime_error("Binding error - Only int and double types supported"); + } + + // Set the algorithm name string + std::string name = ""; + // TODO: Change to mapping function + if (typeid(Algorithm) == typeid(dp::BoundedMean)) { + name = "BoundedMean"; + } else if (typeid(Algorithm) == typeid(dp::BoundedSum)) { + name = "BoundedSum"; + } else if (typeid(Algorithm) == typeid(dp::BoundedStandardDeviation)) { + name = "BoundedStandardDeviation"; + } else if (typeid(Algorithm) == typeid(dp::BoundedVariance)) { + name = "BoundedVariance"; + } else { + throw std::runtime_error(std::string("Binding error - Unsupported algorithm: ") + + std::string(typeid(Algorithm).name())); + } + return (name + suffix); + } }; } // namespace python From e08c9cafca6e5cb5eee9abbc3aff316934b65200 Mon Sep 17 00:00:00 2001 From: benjamindev Date: Fri, 7 Aug 2020 07:36:40 +0000 Subject: [PATCH 05/47] Added a more compact name type lookup. We might need to validate the combinations as sum maybe not supported. --- .../PyDP/pydp_lib/algorithm_builder.hpp | 35 +++++-------------- 1 file changed, 9 insertions(+), 26 deletions(-) diff --git a/src/bindings/PyDP/pydp_lib/algorithm_builder.hpp b/src/bindings/PyDP/pydp_lib/algorithm_builder.hpp index c76b1d83..a4b74005 100644 --- a/src/bindings/PyDP/pydp_lib/algorithm_builder.hpp +++ b/src/bindings/PyDP/pydp_lib/algorithm_builder.hpp @@ -49,34 +49,17 @@ class AlgorithmBuilder { .ValueOrDie(); } + std::map type_to_name = {{typeid(double), "Double"}, + {typeid(int), "Int"}}; + std::map algorithm_to_name = { + {typeid(dp::BoundedMean), "BoundedMean"}, + {typeid(dp::BoundedSum), "BoundedSum"}, + {typeid(dp::BoundedStandardDeviation), "BoundedStandardDeviation"}, + {typeid(dp::BoundedVariance), "BoundedVariance"}}; + std::string get_algorithm_name() { // Set the suffix string - std::string suffix = ""; - // TODO: Change to mapping function - if (typeid(T) == typeid(int)) { - suffix = "Int"; - } else if (typeid(T) == typeid(double)) { - suffix = "Double"; - } else { - throw std::runtime_error("Binding error - Only int and double types supported"); - } - - // Set the algorithm name string - std::string name = ""; - // TODO: Change to mapping function - if (typeid(Algorithm) == typeid(dp::BoundedMean)) { - name = "BoundedMean"; - } else if (typeid(Algorithm) == typeid(dp::BoundedSum)) { - name = "BoundedSum"; - } else if (typeid(Algorithm) == typeid(dp::BoundedStandardDeviation)) { - name = "BoundedStandardDeviation"; - } else if (typeid(Algorithm) == typeid(dp::BoundedVariance)) { - name = "BoundedVariance"; - } else { - throw std::runtime_error(std::string("Binding error - Unsupported algorithm: ") + - std::string(typeid(Algorithm).name())); - } - return (name + suffix); + return (algorithm_to_name[typeid(Algorithm)] + type_to_name[typeid(T)]); } }; From acd5f45ff522a5bf8eed3f18f271fdd8d53ab9e6 Mon Sep 17 00:00:00 2001 From: Chinmay Shah Date: Sat, 8 Aug 2020 03:20:06 +0530 Subject: [PATCH 06/47] added more epsilon property amd budget --- .../PyDP/algorithms/bounded_functions.cpp | 29 ++++++++++++++++++- .../PyDP/pydp_lib/algorithm_builder.hpp | 8 +---- 2 files changed, 29 insertions(+), 8 deletions(-) diff --git a/src/bindings/PyDP/algorithms/bounded_functions.cpp b/src/bindings/PyDP/algorithms/bounded_functions.cpp index d42240de..78466f1a 100644 --- a/src/bindings/PyDP/algorithms/bounded_functions.cpp +++ b/src/bindings/PyDP/algorithms/bounded_functions.cpp @@ -32,6 +32,7 @@ void declareBoundedAlgorithm(py::module& m) { }), py::arg("epsilon"), py::arg("lower_bound"), py::arg("upper_bound"), py::arg("l0_sensitivity") = 1, py::arg("linf_sensitivity") = 1); + bld.def(py::init([](double epsilon, int l0_sensitivity, int linf_sensitivity) { py::print("Building without bounds"); return builder().BuildWithoutBounds(epsilon, l0_sensitivity, @@ -39,8 +40,34 @@ void declareBoundedAlgorithm(py::module& m) { }), py::arg("epsilon"), py::arg("l0_sensitivity") = 1, py::arg("linf_sensitivity") = 1); + + // TODO: can't get it work + // bld.def_property_readonly("l0_sensitivity", [](Algorithm& obj){ + // return obj.GetMaxPartitionsContributed(); + // }); + // bld.def_property_readonly("linf_sensitivity", [](Algorithm& obj){ + // return obj.SetMaxContributionsPerPartition(); + // }); + + bld.def("privacy_budget_left", + [](Algorithm& obj) { return obj.RemainingPrivacyBudget(); }); + + // TODO + // bld.def("add_entries", [](Algorithm& obj, std::vector& v) { + // return obj.AddEntries(v.begin(), v.end()).ValueOrDie(); + // }); + + // bld.def("partial_result", [](Algorithm& obj) { + // return dp::GetValue(obj.PartialResult().ValueOrDie()); + // }) + // bld.def("partial_result", [](Algorithm& obj, double privacy_budget) { + // return dp::GetValue(obj.PartialResult(privacy_budget).ValueOrDie()); + // }) + + bld.def_property_readonly("epsilon", [](Algorithm& obj) { return obj.GetEpsilon(); }); + bld.def("result", [](Algorithm& obj, std::vector& v) { - return dp::GetValue(obj.Result(v.begin(), v.end()).ValueOrDie()); + return dp::GetValue(obj.Result(v.begin(), v.end()).ValueOrDie()); }); } diff --git a/src/bindings/PyDP/pydp_lib/algorithm_builder.hpp b/src/bindings/PyDP/pydp_lib/algorithm_builder.hpp index a4b74005..6123ea4d 100644 --- a/src/bindings/PyDP/pydp_lib/algorithm_builder.hpp +++ b/src/bindings/PyDP/pydp_lib/algorithm_builder.hpp @@ -17,18 +17,13 @@ template class AlgorithmBuilder { public: std::unique_ptr Build(double epsilon) { - return typename Algorithm::Builder() - .SetLaplaceMechanism(absl::make_unique()) - .SetEpsilon(epsilon) - .Build() - .ValueOrDie(); + return typename Algorithm::Builder().SetEpsilon(epsilon).Build().ValueOrDie(); } std::unique_ptr BuildWithBounds(double epsilon, T lower_bound, T upper_bound, int l0_sensitivity = 1, int linf_sensitivity = 1) { return typename Algorithm::Builder() - .SetLaplaceMechanism(absl::make_unique()) .SetEpsilon(epsilon) .SetLower(lower_bound) .SetUpper(upper_bound) @@ -41,7 +36,6 @@ class AlgorithmBuilder { std::unique_ptr BuildWithoutBounds(double epsilon, int l0_sensitivity = 1, int linf_sensitivity = 1) { return typename Algorithm::Builder() - .SetLaplaceMechanism(absl::make_unique()) .SetEpsilon(epsilon) .SetMaxPartitionsContributed(l0_sensitivity) .SetMaxContributionsPerPartition(linf_sensitivity) From 05eeccfe5c447df4657156d8582a0d3b1a969aad Mon Sep 17 00:00:00 2001 From: Chinmay Shah Date: Sat, 8 Aug 2020 17:52:43 +0530 Subject: [PATCH 07/47] added error handing in intialization --- .../PyDP/algorithms/bounded_functions.cpp | 1 + .../PyDP/pydp_lib/algorithm_builder.hpp | 34 +++++++++++++++---- 2 files changed, 28 insertions(+), 7 deletions(-) diff --git a/src/bindings/PyDP/algorithms/bounded_functions.cpp b/src/bindings/PyDP/algorithms/bounded_functions.cpp index 78466f1a..3866219e 100644 --- a/src/bindings/PyDP/algorithms/bounded_functions.cpp +++ b/src/bindings/PyDP/algorithms/bounded_functions.cpp @@ -10,6 +10,7 @@ #include "algorithms/bounded-standard-deviation.h" #include "algorithms/bounded-sum.h" #include "algorithms/bounded-variance.h" +#include "base/statusor.h" #include "../pydp_lib/algorithm_builder.hpp" #include "../pydp_lib/casting.hpp" // our caster helper library diff --git a/src/bindings/PyDP/pydp_lib/algorithm_builder.hpp b/src/bindings/PyDP/pydp_lib/algorithm_builder.hpp index 6123ea4d..82c16f75 100644 --- a/src/bindings/PyDP/pydp_lib/algorithm_builder.hpp +++ b/src/bindings/PyDP/pydp_lib/algorithm_builder.hpp @@ -7,6 +7,7 @@ #include "algorithms/bounded-sum.h" #include "algorithms/bounded-variance.h" #include "algorithms/numerical-mechanisms.h" +#include "base/statusor.h" namespace dp = differential_privacy; @@ -17,30 +18,49 @@ template class AlgorithmBuilder { public: std::unique_ptr Build(double epsilon) { - return typename Algorithm::Builder().SetEpsilon(epsilon).Build().ValueOrDie(); + base::StatusOr> obj; + obj = typename Algorithm::Builder().SetEpsilon(epsilon).Build().ValueOrDie(); + if (obj.ok()){ + return std::move(obj.ValueOrDie()); + } + else{ + throw std::runtime_error(obj.status().error_message()); + } } std::unique_ptr BuildWithBounds(double epsilon, T lower_bound, T upper_bound, int l0_sensitivity = 1, int linf_sensitivity = 1) { - return typename Algorithm::Builder() + base::StatusOr> obj; + obj = typename Algorithm::Builder() .SetEpsilon(epsilon) .SetLower(lower_bound) .SetUpper(upper_bound) .SetMaxPartitionsContributed(l0_sensitivity) .SetMaxContributionsPerPartition(linf_sensitivity) - .Build() - .ValueOrDie(); + .Build(); + if (obj.ok()){ + return std::move(obj.ValueOrDie()); + } + else{ + throw std::runtime_error(obj.status().error_message()); + } } std::unique_ptr BuildWithoutBounds(double epsilon, int l0_sensitivity = 1, int linf_sensitivity = 1) { - return typename Algorithm::Builder() + base::StatusOr> obj; + obj = typename Algorithm::Builder() .SetEpsilon(epsilon) .SetMaxPartitionsContributed(l0_sensitivity) .SetMaxContributionsPerPartition(linf_sensitivity) - .Build() - .ValueOrDie(); + .Build(); + if (obj.ok()){ + return std::move(obj.ValueOrDie()); + } + else{ + throw std::runtime_error(obj.status().error_message()); + } } std::map type_to_name = {{typeid(double), "Double"}, From bcceec786744aa0a6c23b5cdd369ae38478f8768 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alejandro=20S=C3=A1nchez=20Medina?= Date: Sat, 8 Aug 2020 13:41:41 +0100 Subject: [PATCH 08/47] Refactor builders error check --- .../PyDP/algorithms/bounded_functions.cpp | 1 - .../PyDP/pydp_lib/algorithm_builder.hpp | 58 +++++++++---------- 2 files changed, 29 insertions(+), 30 deletions(-) diff --git a/src/bindings/PyDP/algorithms/bounded_functions.cpp b/src/bindings/PyDP/algorithms/bounded_functions.cpp index 3866219e..78466f1a 100644 --- a/src/bindings/PyDP/algorithms/bounded_functions.cpp +++ b/src/bindings/PyDP/algorithms/bounded_functions.cpp @@ -10,7 +10,6 @@ #include "algorithms/bounded-standard-deviation.h" #include "algorithms/bounded-sum.h" #include "algorithms/bounded-variance.h" -#include "base/statusor.h" #include "../pydp_lib/algorithm_builder.hpp" #include "../pydp_lib/casting.hpp" // our caster helper library diff --git a/src/bindings/PyDP/pydp_lib/algorithm_builder.hpp b/src/bindings/PyDP/pydp_lib/algorithm_builder.hpp index 82c16f75..cc847b75 100644 --- a/src/bindings/PyDP/pydp_lib/algorithm_builder.hpp +++ b/src/bindings/PyDP/pydp_lib/algorithm_builder.hpp @@ -18,49 +18,49 @@ template class AlgorithmBuilder { public: std::unique_ptr Build(double epsilon) { - base::StatusOr> obj; - obj = typename Algorithm::Builder().SetEpsilon(epsilon).Build().ValueOrDie(); - if (obj.ok()){ - return std::move(obj.ValueOrDie()); - } - else{ + base::StatusOr> obj = + typename Algorithm::Builder().SetEpsilon(epsilon).Build().ValueOrDie(); + + if (!obj.ok()) { throw std::runtime_error(obj.status().error_message()); } + + return std::move(obj.ValueOrDie()); } std::unique_ptr BuildWithBounds(double epsilon, T lower_bound, T upper_bound, int l0_sensitivity = 1, int linf_sensitivity = 1) { - base::StatusOr> obj; - obj = typename Algorithm::Builder() - .SetEpsilon(epsilon) - .SetLower(lower_bound) - .SetUpper(upper_bound) - .SetMaxPartitionsContributed(l0_sensitivity) - .SetMaxContributionsPerPartition(linf_sensitivity) - .Build(); - if (obj.ok()){ - return std::move(obj.ValueOrDie()); - } - else{ + base::StatusOr> obj = + typename Algorithm::Builder() + .SetEpsilon(epsilon) + .SetLower(lower_bound) + .SetUpper(upper_bound) + .SetMaxPartitionsContributed(l0_sensitivity) + .SetMaxContributionsPerPartition(linf_sensitivity) + .Build(); + + if (!obj.ok()) { throw std::runtime_error(obj.status().error_message()); - } + } + + return std::move(obj.ValueOrDie()); } std::unique_ptr BuildWithoutBounds(double epsilon, int l0_sensitivity = 1, int linf_sensitivity = 1) { - base::StatusOr> obj; - obj = typename Algorithm::Builder() - .SetEpsilon(epsilon) - .SetMaxPartitionsContributed(l0_sensitivity) - .SetMaxContributionsPerPartition(linf_sensitivity) - .Build(); - if (obj.ok()){ - return std::move(obj.ValueOrDie()); - } - else{ + base::StatusOr> obj = + typename Algorithm::Builder() + .SetEpsilon(epsilon) + .SetMaxPartitionsContributed(l0_sensitivity) + .SetMaxContributionsPerPartition(linf_sensitivity) + .Build(); + + if (!obj.ok()) { throw std::runtime_error(obj.status().error_message()); } + + return std::move(obj.ValueOrDie()); } std::map type_to_name = {{typeid(double), "Double"}, From e6069b02be4b31fa73d91f99bea8a9a1173b3ca5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alejandro=20S=C3=A1nchez=20Medina?= Date: Sat, 8 Aug 2020 15:42:38 +0100 Subject: [PATCH 09/47] Bind more functions --- .../PyDP/algorithms/bounded_functions.cpp | 40 ++++++++++++++----- src/bindings/PyDP/algorithms/count.cpp | 8 +++- 2 files changed, 36 insertions(+), 12 deletions(-) diff --git a/src/bindings/PyDP/algorithms/bounded_functions.cpp b/src/bindings/PyDP/algorithms/bounded_functions.cpp index 78466f1a..859f088f 100644 --- a/src/bindings/PyDP/algorithms/bounded_functions.cpp +++ b/src/bindings/PyDP/algorithms/bounded_functions.cpp @@ -52,22 +52,40 @@ void declareBoundedAlgorithm(py::module& m) { bld.def("privacy_budget_left", [](Algorithm& obj) { return obj.RemainingPrivacyBudget(); }); - // TODO - // bld.def("add_entries", [](Algorithm& obj, std::vector& v) { - // return obj.AddEntries(v.begin(), v.end()).ValueOrDie(); - // }); + bld.def("add_entries", [](Algorithm& obj, std::vector& v) { + obj.AddEntries(v.begin(), v.end()); + }); + + bld.def("partial_result", [](Algorithm& obj) { + auto result = obj.PartialResult(); + + if (!result.ok()) { + throw std::runtime_error(result.status().error_message()); + } + + return dp::GetValue(result.ValueOrDie()); + }); - // bld.def("partial_result", [](Algorithm& obj) { - // return dp::GetValue(obj.PartialResult().ValueOrDie()); - // }) - // bld.def("partial_result", [](Algorithm& obj, double privacy_budget) { - // return dp::GetValue(obj.PartialResult(privacy_budget).ValueOrDie()); - // }) + bld.def("partial_result", [](Algorithm& obj, double privacy_budget) { + auto result = obj.PartialResult(privacy_budget); + + if (!result.ok()) { + throw std::runtime_error(result.status().error_message()); + } + + return dp::GetValue(result.ValueOrDie()); + }); bld.def_property_readonly("epsilon", [](Algorithm& obj) { return obj.GetEpsilon(); }); bld.def("result", [](Algorithm& obj, std::vector& v) { - return dp::GetValue(obj.Result(v.begin(), v.end()).ValueOrDie()); + auto result = obj.Result(v.begin(), v.end()); + + if (!result.ok()) { + throw std::runtime_error(result.status().error_message()); + } + + return dp::GetValue(result.ValueOrDie()); }); } diff --git a/src/bindings/PyDP/algorithms/count.cpp b/src/bindings/PyDP/algorithms/count.cpp index 62d3c811..55909a56 100644 --- a/src/bindings/PyDP/algorithms/count.cpp +++ b/src/bindings/PyDP/algorithms/count.cpp @@ -31,7 +31,13 @@ void declareCount(py::module& m, string const& suffix) { .def("memory_used", &dp::Count::MemoryUsed) .def("result", [](dp::Count& obj, std::vector& v) { - return dp::GetValue(obj.Result(v.begin(), v.end()).ValueOrDie()); + auto result = obj.Result(v.begin(), v.end()); + + if (!result.ok()) { + throw std::runtime_error(result.status().error_message()); + } + + return dp::GetValue(result.ValueOrDie()); }) .def("partial_result", [](dp::Count& obj) { From b9c881c58b11412d17644b34a503e45d83153864 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alejandro=20S=C3=A1nchez=20Medina?= Date: Sat, 8 Aug 2020 20:38:20 +0100 Subject: [PATCH 10/47] Unifiy helper build function --- .../PyDP/algorithms/bounded_functions.cpp | 16 ++--- src/bindings/PyDP/algorithms/count.cpp | 2 + .../PyDP/pydp_lib/algorithm_builder.hpp | 63 ++++++++----------- 3 files changed, 33 insertions(+), 48 deletions(-) diff --git a/src/bindings/PyDP/algorithms/bounded_functions.cpp b/src/bindings/PyDP/algorithms/bounded_functions.cpp index 859f088f..78cf748a 100644 --- a/src/bindings/PyDP/algorithms/bounded_functions.cpp +++ b/src/bindings/PyDP/algorithms/bounded_functions.cpp @@ -27,27 +27,21 @@ void declareBoundedAlgorithm(py::module& m) { bld.def(py::init([](double epsilon, T lower_bound, T upper_bound, int l0_sensitivity, int linf_sensitivity) { py::print("Building with bounds"); - return builder().BuildWithBounds(epsilon, lower_bound, upper_bound, - l0_sensitivity, linf_sensitivity); + return builder().Build(epsilon, lower_bound, upper_bound, l0_sensitivity, + linf_sensitivity); }), py::arg("epsilon"), py::arg("lower_bound"), py::arg("upper_bound"), py::arg("l0_sensitivity") = 1, py::arg("linf_sensitivity") = 1); bld.def(py::init([](double epsilon, int l0_sensitivity, int linf_sensitivity) { py::print("Building without bounds"); - return builder().BuildWithoutBounds(epsilon, l0_sensitivity, - linf_sensitivity); + return builder().Build(epsilon, nullopt, nullopt, l0_sensitivity, + linf_sensitivity); }), py::arg("epsilon"), py::arg("l0_sensitivity") = 1, py::arg("linf_sensitivity") = 1); - // TODO: can't get it work - // bld.def_property_readonly("l0_sensitivity", [](Algorithm& obj){ - // return obj.GetMaxPartitionsContributed(); - // }); - // bld.def_property_readonly("linf_sensitivity", [](Algorithm& obj){ - // return obj.SetMaxContributionsPerPartition(); - // }); + bld.def_property_readonly("epsilon", [](Algorithm& obj) { return obj.GetEpsilon(); }); bld.def("privacy_budget_left", [](Algorithm& obj) { return obj.RemainingPrivacyBudget(); }); diff --git a/src/bindings/PyDP/algorithms/count.cpp b/src/bindings/PyDP/algorithms/count.cpp index 55909a56..75d81b69 100644 --- a/src/bindings/PyDP/algorithms/count.cpp +++ b/src/bindings/PyDP/algorithms/count.cpp @@ -29,6 +29,8 @@ void declareCount(py::module& m, string const& suffix) { //.def("serialize", &dp::Count::Serialize) //.def("merge", &dp::Count::Merge) .def("memory_used", &dp::Count::MemoryUsed) + .def_property_readonly("epsilon", + [](dp::Count& obj) { return obj.GetEpsilon(); }) .def("result", [](dp::Count& obj, std::vector& v) { auto result = obj.Result(v.begin(), v.end()); diff --git a/src/bindings/PyDP/pydp_lib/algorithm_builder.hpp b/src/bindings/PyDP/pydp_lib/algorithm_builder.hpp index cc847b75..32139ff7 100644 --- a/src/bindings/PyDP/pydp_lib/algorithm_builder.hpp +++ b/src/bindings/PyDP/pydp_lib/algorithm_builder.hpp @@ -14,48 +14,37 @@ namespace dp = differential_privacy; namespace differential_privacy { namespace python { +template +constexpr bool is_bounded_algorithm() { + return std::is_same>::value || + std::is_same>::value || + std::is_same>::value || + std::is_same>::value; +} + template class AlgorithmBuilder { public: - std::unique_ptr Build(double epsilon) { - base::StatusOr> obj = - typename Algorithm::Builder().SetEpsilon(epsilon).Build().ValueOrDie(); - - if (!obj.ok()) { - throw std::runtime_error(obj.status().error_message()); + std::unique_ptr Build(double epsilon, + std::optional lower_bound = std::nullopt, + std::optional upper_bound = std::nullopt, + std::optional l0_sensitivity = std::nullopt, + std::optional linf_sensitivity = std::nullopt) { + auto builder = typename Algorithm::Builder(); + + builder.SetEpsilon(epsilon); + + if (l0_sensitivity.has_value()) + builder.SetMaxPartitionsContributed(l0_sensitivity.value()); + if (linf_sensitivity.has_value()) + builder.SetMaxContributionsPerPartition(linf_sensitivity.value()); + + if constexpr (is_bounded_algorithm()) { + if (lower_bound.has_value()) builder.SetLower(lower_bound.value()); + if (upper_bound.has_value()) builder.SetUpper(upper_bound.value()); } - return std::move(obj.ValueOrDie()); - } - - std::unique_ptr BuildWithBounds(double epsilon, T lower_bound, - T upper_bound, int l0_sensitivity = 1, - int linf_sensitivity = 1) { - base::StatusOr> obj = - typename Algorithm::Builder() - .SetEpsilon(epsilon) - .SetLower(lower_bound) - .SetUpper(upper_bound) - .SetMaxPartitionsContributed(l0_sensitivity) - .SetMaxContributionsPerPartition(linf_sensitivity) - .Build(); - - if (!obj.ok()) { - throw std::runtime_error(obj.status().error_message()); - } - - return std::move(obj.ValueOrDie()); - } - - std::unique_ptr BuildWithoutBounds(double epsilon, int l0_sensitivity = 1, - int linf_sensitivity = 1) { - base::StatusOr> obj = - typename Algorithm::Builder() - .SetEpsilon(epsilon) - .SetMaxPartitionsContributed(l0_sensitivity) - .SetMaxContributionsPerPartition(linf_sensitivity) - .Build(); - + base::StatusOr> obj = builder.Build(); if (!obj.ok()) { throw std::runtime_error(obj.status().error_message()); } From ad2472518e077dcfd9db424adc68b5e80953a501 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alejandro=20S=C3=A1nchez=20Medina?= Date: Sun, 9 Aug 2020 10:54:49 +0100 Subject: [PATCH 11/47] Allow setting delta param --- src/bindings/PyDP/algorithms/bounded_functions.cpp | 6 +++--- src/bindings/PyDP/pydp_lib/algorithm_builder.hpp | 2 ++ 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/bindings/PyDP/algorithms/bounded_functions.cpp b/src/bindings/PyDP/algorithms/bounded_functions.cpp index 78cf748a..67f706d7 100644 --- a/src/bindings/PyDP/algorithms/bounded_functions.cpp +++ b/src/bindings/PyDP/algorithms/bounded_functions.cpp @@ -27,15 +27,15 @@ void declareBoundedAlgorithm(py::module& m) { bld.def(py::init([](double epsilon, T lower_bound, T upper_bound, int l0_sensitivity, int linf_sensitivity) { py::print("Building with bounds"); - return builder().Build(epsilon, lower_bound, upper_bound, l0_sensitivity, - linf_sensitivity); + return builder().Build(epsilon, nullopt, lower_bound, upper_bound, + l0_sensitivity, linf_sensitivity); }), py::arg("epsilon"), py::arg("lower_bound"), py::arg("upper_bound"), py::arg("l0_sensitivity") = 1, py::arg("linf_sensitivity") = 1); bld.def(py::init([](double epsilon, int l0_sensitivity, int linf_sensitivity) { py::print("Building without bounds"); - return builder().Build(epsilon, nullopt, nullopt, l0_sensitivity, + return builder().Build(epsilon, nullopt, nullopt, nullopt, l0_sensitivity, linf_sensitivity); }), py::arg("epsilon"), py::arg("l0_sensitivity") = 1, diff --git a/src/bindings/PyDP/pydp_lib/algorithm_builder.hpp b/src/bindings/PyDP/pydp_lib/algorithm_builder.hpp index 32139ff7..d6eda33e 100644 --- a/src/bindings/PyDP/pydp_lib/algorithm_builder.hpp +++ b/src/bindings/PyDP/pydp_lib/algorithm_builder.hpp @@ -26,6 +26,7 @@ template class AlgorithmBuilder { public: std::unique_ptr Build(double epsilon, + std::optional delta = std::nullopt, std::optional lower_bound = std::nullopt, std::optional upper_bound = std::nullopt, std::optional l0_sensitivity = std::nullopt, @@ -34,6 +35,7 @@ class AlgorithmBuilder { builder.SetEpsilon(epsilon); + if (delta.has_value()) builder.SetDelta(delta.value()); if (l0_sensitivity.has_value()) builder.SetMaxPartitionsContributed(l0_sensitivity.value()); if (linf_sensitivity.has_value()) From 2bc904f438b52a4dfd3f026ff60fdf6f7076055d Mon Sep 17 00:00:00 2001 From: Chinmay Shah Date: Sun, 9 Aug 2020 19:07:19 +0530 Subject: [PATCH 12/47] added error checking in partial result --- src/bindings/PyDP/algorithms/bounded_functions.cpp | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/bindings/PyDP/algorithms/bounded_functions.cpp b/src/bindings/PyDP/algorithms/bounded_functions.cpp index 78cf748a..6798c93c 100644 --- a/src/bindings/PyDP/algorithms/bounded_functions.cpp +++ b/src/bindings/PyDP/algorithms/bounded_functions.cpp @@ -46,6 +46,10 @@ void declareBoundedAlgorithm(py::module& m) { bld.def("privacy_budget_left", [](Algorithm& obj) { return obj.RemainingPrivacyBudget(); }); + bld.def("add_entry", [](Algorithm& obj, T& v) { + obj.AddEntry(v); + }); + bld.def("add_entries", [](Algorithm& obj, std::vector& v) { obj.AddEntries(v.begin(), v.end()); }); @@ -56,30 +60,28 @@ void declareBoundedAlgorithm(py::module& m) { if (!result.ok()) { throw std::runtime_error(result.status().error_message()); } - return dp::GetValue(result.ValueOrDie()); }); bld.def("partial_result", [](Algorithm& obj, double privacy_budget) { + if (privacy_budget > obj.RemainingPrivacyBudget()){ + throw std::runtime_error("Privacy budget requeted exceeds set privacy budget"); + } auto result = obj.PartialResult(privacy_budget); if (!result.ok()) { throw std::runtime_error(result.status().error_message()); } - return dp::GetValue(result.ValueOrDie()); }); - bld.def_property_readonly("epsilon", [](Algorithm& obj) { return obj.GetEpsilon(); }); - bld.def("result", [](Algorithm& obj, std::vector& v) { auto result = obj.Result(v.begin(), v.end()); if (!result.ok()) { throw std::runtime_error(result.status().error_message()); } - - return dp::GetValue(result.ValueOrDie()); + return dp::GetValue(result.ValueOrDie()); }); } From d4aa0a7ad6c2e3712bd4a0b936910e0b7911ce9d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alejandro=20S=C3=A1nchez=20Medina?= Date: Sun, 9 Aug 2020 14:53:29 +0100 Subject: [PATCH 13/47] Generalise algorithm declaration --- .../PyDP/algorithms/bounded_functions.cpp | 60 +--------- src/bindings/PyDP/algorithms/count.cpp | 45 +------- .../PyDP/pydp_lib/algorithm_builder.hpp | 105 +++++++++++++++++- 3 files changed, 109 insertions(+), 101 deletions(-) diff --git a/src/bindings/PyDP/algorithms/bounded_functions.cpp b/src/bindings/PyDP/algorithms/bounded_functions.cpp index 67f706d7..874d7f9c 100644 --- a/src/bindings/PyDP/algorithms/bounded_functions.cpp +++ b/src/bindings/PyDP/algorithms/bounded_functions.cpp @@ -22,65 +22,7 @@ namespace dp = differential_privacy; template void declareBoundedAlgorithm(py::module& m) { using builder = typename dp::python::AlgorithmBuilder; - py::class_ bld(m, builder().get_algorithm_name().c_str()); - bld.attr("__module__") = "pydp"; - bld.def(py::init([](double epsilon, T lower_bound, T upper_bound, int l0_sensitivity, - int linf_sensitivity) { - py::print("Building with bounds"); - return builder().Build(epsilon, nullopt, lower_bound, upper_bound, - l0_sensitivity, linf_sensitivity); - }), - py::arg("epsilon"), py::arg("lower_bound"), py::arg("upper_bound"), - py::arg("l0_sensitivity") = 1, py::arg("linf_sensitivity") = 1); - - bld.def(py::init([](double epsilon, int l0_sensitivity, int linf_sensitivity) { - py::print("Building without bounds"); - return builder().Build(epsilon, nullopt, nullopt, nullopt, l0_sensitivity, - linf_sensitivity); - }), - py::arg("epsilon"), py::arg("l0_sensitivity") = 1, - py::arg("linf_sensitivity") = 1); - - bld.def_property_readonly("epsilon", [](Algorithm& obj) { return obj.GetEpsilon(); }); - - bld.def("privacy_budget_left", - [](Algorithm& obj) { return obj.RemainingPrivacyBudget(); }); - - bld.def("add_entries", [](Algorithm& obj, std::vector& v) { - obj.AddEntries(v.begin(), v.end()); - }); - - bld.def("partial_result", [](Algorithm& obj) { - auto result = obj.PartialResult(); - - if (!result.ok()) { - throw std::runtime_error(result.status().error_message()); - } - - return dp::GetValue(result.ValueOrDie()); - }); - - bld.def("partial_result", [](Algorithm& obj, double privacy_budget) { - auto result = obj.PartialResult(privacy_budget); - - if (!result.ok()) { - throw std::runtime_error(result.status().error_message()); - } - - return dp::GetValue(result.ValueOrDie()); - }); - - bld.def_property_readonly("epsilon", [](Algorithm& obj) { return obj.GetEpsilon(); }); - - bld.def("result", [](Algorithm& obj, std::vector& v) { - auto result = obj.Result(v.begin(), v.end()); - - if (!result.ok()) { - throw std::runtime_error(result.status().error_message()); - } - - return dp::GetValue(result.ValueOrDie()); - }); + builder().declare(m); } void init_algorithms_bounded_functions(py::module& m) { diff --git a/src/bindings/PyDP/algorithms/count.cpp b/src/bindings/PyDP/algorithms/count.cpp index 75d81b69..554fc030 100644 --- a/src/bindings/PyDP/algorithms/count.cpp +++ b/src/bindings/PyDP/algorithms/count.cpp @@ -12,46 +12,13 @@ using namespace std; namespace py = pybind11; namespace dp = differential_privacy; -template -void declareCount(py::module& m, string const& suffix) { - using builder = typename dp::python::AlgorithmBuilder>; - - py::class_> count(m, ("Count" + suffix).c_str()); - count.attr("__module__") = "pydp"; - count.def(py::init([](double epsilon) { return builder().Build(epsilon); })) - .def("add_entry", &dp::Count::AddEntry) - .def("add_entries", - [](dp::Count& obj, std::vector& v) { - return obj.AddEntries(v.begin(), v.end()); - }) - // TODO: port ConfidenceInterval and Summary - //.def("noise_confidence_interval", &dp::Count::NoiseConfidenceInterval) - //.def("serialize", &dp::Count::Serialize) - //.def("merge", &dp::Count::Merge) - .def("memory_used", &dp::Count::MemoryUsed) - .def_property_readonly("epsilon", - [](dp::Count& obj) { return obj.GetEpsilon(); }) - .def("result", - [](dp::Count& obj, std::vector& v) { - auto result = obj.Result(v.begin(), v.end()); - - if (!result.ok()) { - throw std::runtime_error(result.status().error_message()); - } - - return dp::GetValue(result.ValueOrDie()); - }) - .def("partial_result", - [](dp::Count& obj) { - return dp::GetValue(obj.PartialResult().ValueOrDie()); - }) - - .def("partial_result", [](dp::Count& obj, double privacy_budget) { - return dp::GetValue(obj.PartialResult(privacy_budget).ValueOrDie()); - }); +template +void declareAlgorithm(py::module& m) { + using builder = typename dp::python::AlgorithmBuilder; + builder().declare(m); } void init_algorithms_count(py::module& m) { - declareCount(m, "Int"); - declareCount(m, "Double"); + declareAlgorithm>(m); + declareAlgorithm>(m); } diff --git a/src/bindings/PyDP/pydp_lib/algorithm_builder.hpp b/src/bindings/PyDP/pydp_lib/algorithm_builder.hpp index d6eda33e..4b168a23 100644 --- a/src/bindings/PyDP/pydp_lib/algorithm_builder.hpp +++ b/src/bindings/PyDP/pydp_lib/algorithm_builder.hpp @@ -6,10 +6,12 @@ #include "algorithms/bounded-standard-deviation.h" #include "algorithms/bounded-sum.h" #include "algorithms/bounded-variance.h" +#include "algorithms/count.h" #include "algorithms/numerical-mechanisms.h" #include "base/statusor.h" namespace dp = differential_privacy; +namespace py = pybind11; namespace differential_privacy { namespace python { @@ -25,10 +27,10 @@ constexpr bool is_bounded_algorithm() { template class AlgorithmBuilder { public: - std::unique_ptr Build(double epsilon, - std::optional delta = std::nullopt, + std::unique_ptr build(double epsilon, std::optional lower_bound = std::nullopt, std::optional upper_bound = std::nullopt, + std::optional delta = std::nullopt, std::optional l0_sensitivity = std::nullopt, std::optional linf_sensitivity = std::nullopt) { auto builder = typename Algorithm::Builder(); @@ -60,12 +62,109 @@ class AlgorithmBuilder { {typeid(dp::BoundedMean), "BoundedMean"}, {typeid(dp::BoundedSum), "BoundedSum"}, {typeid(dp::BoundedStandardDeviation), "BoundedStandardDeviation"}, - {typeid(dp::BoundedVariance), "BoundedVariance"}}; + {typeid(dp::BoundedVariance), "BoundedVariance"}, + {typeid(dp::Count), "Count"}}; std::string get_algorithm_name() { // Set the suffix string return (algorithm_to_name[typeid(Algorithm)] + type_to_name[typeid(T)]); } + + void declare(py::module& m) { + py::class_ pyself(m, get_algorithm_name().c_str()); + + pyself.attr("__module__") = "pydp"; + + // Constructors + if constexpr (is_bounded_algorithm()) { + pyself.def( + py::init([this](double epsilon, T lower_bound, T upper_bound, double delta, + int l0_sensitivity, int linf_sensitivity) { + return this->build(epsilon, std::nullopt, lower_bound, upper_bound, + l0_sensitivity, linf_sensitivity); + }), + py::arg("epsilon"), py::arg("lower_bound"), py::arg("upper_bound"), + py::arg("delta") = 0, py::arg("l0_sensitivity") = 1, + py::arg("linf_sensitivity") = 1); + } + + pyself.def(py::init([this](double epsilon, double delta, int l0_sensitivity, + int linf_sensitivity) { + return this->build(epsilon, std::nullopt, std::nullopt, std::nullopt, + l0_sensitivity, linf_sensitivity); + }), + py::arg("epsilon"), py::arg("delta") = 0, py::arg("l0_sensitivity") = 1, + py::arg("linf_sensitivity") = 1); + + // Getters + pyself.def_property_readonly("epsilon", &Algorithm::GetEpsilon); + + pyself.def("privacy_budget_left", &Algorithm::RemainingPrivacyBudget); + + pyself.def("consume_privacy_budget", &Algorithm::ConsumePrivacyBudget); + + pyself.def("memory_used", &Algorithm::MemoryUsed); + + // Input data + pyself.def("add_entries", [](Algorithm& pythis, std::vector& v) { + pythis.AddEntries(v.begin(), v.end()); + }); + + pyself.def("add_entry", &Algorithm::AddEntry); + + // Compute results + pyself.def("result", [](Algorithm& pythis, std::vector& v) { + auto result = pythis.Result(v.begin(), v.end()); + + if (!result.ok()) { + throw std::runtime_error(result.status().error_message()); + } + + return dp::GetValue(result.ValueOrDie()); + }); + + pyself.def("partial_result", [](Algorithm& pythis) { + auto result = pythis.PartialResult(); + + if (!result.ok()) { + throw std::runtime_error(result.status().error_message()); + } + + return dp::GetValue(result.ValueOrDie()); + }); + + pyself.def("partial_result", [](Algorithm& pythis, double privacy_budget) { + auto result = pythis.PartialResult(privacy_budget); + + if (!result.ok()) { + throw std::runtime_error(result.status().error_message()); + } + + return dp::GetValue(result.ValueOrDie()); + }); + + pyself.def("partial_result", [](Algorithm& pythis, double privacy_budget, + double noise_interval_level) { + auto result = pythis.PartialResult(privacy_budget, noise_interval_level); + + if (!result.ok()) { + throw std::runtime_error(result.status().error_message()); + } + + return dp::GetValue(result.ValueOrDie()); + }); + + // Other methods + pyself.def("consume_privacy_budget", &Algorithm::ConsumePrivacyBudget); + + pyself.def("reset", &Algorithm::Reset); + + pyself.def("serialize", &Algorithm::Serialize); + + pyself.def("merge", &Algorithm::Merge); + + pyself.def("noise_confidence_interval", &Algorithm::NoiseConfidenceInterval); + } }; } // namespace python From cda79093750d16ec358405df60d01349edb2f131 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alejandro=20S=C3=A1nchez=20Medina?= Date: Sun, 9 Aug 2020 22:13:38 +0100 Subject: [PATCH 14/47] Fix delta arg position --- .../PyDP/pydp_lib/algorithm_builder.hpp | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/src/bindings/PyDP/pydp_lib/algorithm_builder.hpp b/src/bindings/PyDP/pydp_lib/algorithm_builder.hpp index cac20649..97e20e8f 100644 --- a/src/bindings/PyDP/pydp_lib/algorithm_builder.hpp +++ b/src/bindings/PyDP/pydp_lib/algorithm_builder.hpp @@ -28,9 +28,9 @@ template class AlgorithmBuilder { public: std::unique_ptr build(double epsilon, + std::optional delta = std::nullopt, std::optional lower_bound = std::nullopt, std::optional upper_bound = std::nullopt, - std::optional delta = std::nullopt, std::optional l0_sensitivity = std::nullopt, std::optional linf_sensitivity = std::nullopt) { auto builder = typename Algorithm::Builder(); @@ -77,21 +77,24 @@ class AlgorithmBuilder { // Constructors if constexpr (is_bounded_algorithm()) { + // Explicit bounds constructor pyself.def( - py::init([this](double epsilon, T lower_bound, T upper_bound, double delta, + py::init([this](double epsilon, double delta, T lower_bound, T upper_bound, int l0_sensitivity, int linf_sensitivity) { - return this->build(epsilon, std::nullopt, lower_bound, upper_bound, - l0_sensitivity, linf_sensitivity); + return this->build(epsilon, delta, lower_bound, upper_bound, l0_sensitivity, + linf_sensitivity); }), - py::arg("epsilon"), py::arg("lower_bound"), py::arg("upper_bound"), - py::arg("delta") = 0, py::arg("l0_sensitivity") = 1, + py::arg("epsilon"), py::arg("delta") = 0, py::arg("lower_bound"), + py::arg("upper_bound"), py::arg("l0_sensitivity") = 1, py::arg("linf_sensitivity") = 1); } + // No bounds constructor pyself.def(py::init([this](double epsilon, double delta, int l0_sensitivity, int linf_sensitivity) { - return this->build(epsilon, std::nullopt, std::nullopt, std::nullopt, - l0_sensitivity, linf_sensitivity); + return this->build(epsilon, delta, std::nullopt /*lower_bound*/, + std::nullopt /*upper_bound*/, l0_sensitivity, + linf_sensitivity); }), py::arg("epsilon"), py::arg("delta") = 0, py::arg("l0_sensitivity") = 1, py::arg("linf_sensitivity") = 1); From edfddd82b85359f3a4783737d9a96e833d7c96fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alejandro=20S=C3=A1nchez=20Medina?= Date: Sun, 9 Aug 2020 22:17:23 +0100 Subject: [PATCH 15/47] Fix Count result call --- src/bindings/PyDP/pydp_lib/algorithm_builder.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bindings/PyDP/pydp_lib/algorithm_builder.hpp b/src/bindings/PyDP/pydp_lib/algorithm_builder.hpp index 97e20e8f..8c27d294 100644 --- a/src/bindings/PyDP/pydp_lib/algorithm_builder.hpp +++ b/src/bindings/PyDP/pydp_lib/algorithm_builder.hpp @@ -123,7 +123,7 @@ class AlgorithmBuilder { throw std::runtime_error(result.status().error_message()); } - return dp::GetValue(result.ValueOrDie()); + return dp::GetValue(result.ValueOrDie()); }); pyself.def("partial_result", [](Algorithm& pythis) { From 2d8c86acb274690528abd3496df1407d5792bd80 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alejandro=20S=C3=A1nchez=20Medina?= Date: Mon, 10 Aug 2020 18:23:57 +0100 Subject: [PATCH 16/47] Define algorithms submodule --- src/bindings/PyDP/bindings.cpp | 5 +++-- src/bindings/PyDP/pydp_lib/algorithm_builder.hpp | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/bindings/PyDP/bindings.cpp b/src/bindings/PyDP/bindings.cpp index 4d98bfc4..d83ef71c 100644 --- a/src/bindings/PyDP/bindings.cpp +++ b/src/bindings/PyDP/bindings.cpp @@ -37,12 +37,13 @@ PYBIND11_MODULE(pydp, m) { init_base_percentile(m); // Algorithms - init_algorithms_bounded_functions(m); + auto malgorithms = m.def_submodule("algorithms"); + init_algorithms_bounded_functions(malgorithms); init_algorithms_util(m); init_algorithms_distributions(m); init_algorithms_order_statistics(m); init_algorithms_rand(m); - init_algorithms_count(m); + init_algorithms_count(malgorithms); // Proto init_proto(m); diff --git a/src/bindings/PyDP/pydp_lib/algorithm_builder.hpp b/src/bindings/PyDP/pydp_lib/algorithm_builder.hpp index 8c27d294..bbfec101 100644 --- a/src/bindings/PyDP/pydp_lib/algorithm_builder.hpp +++ b/src/bindings/PyDP/pydp_lib/algorithm_builder.hpp @@ -73,7 +73,7 @@ class AlgorithmBuilder { void declare(py::module& m) { py::class_ pyself(m, get_algorithm_name().c_str()); - pyself.attr("__module__") = "pydp"; + pyself.attr("__module__") = "algorithm"; // Constructors if constexpr (is_bounded_algorithm()) { From c729da623709ffdc1124a1ee22dde85b16904b99 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alejandro=20S=C3=A1nchez=20Medina?= Date: Mon, 10 Aug 2020 22:36:28 +0100 Subject: [PATCH 17/47] Start defining new Python API --- pydp/__init__.py | 2 +- pydp/algorithms/__init__.py | 16 ++++++++++++++++ src/bindings/PyDP/bindings.cpp | 2 +- src/bindings/PyDP/pydp_lib/algorithm_builder.hpp | 2 +- 4 files changed, 19 insertions(+), 3 deletions(-) create mode 100644 pydp/algorithms/__init__.py diff --git a/pydp/__init__.py b/pydp/__init__.py index f0491e47..745d6fc8 100644 --- a/pydp/__init__.py +++ b/pydp/__init__.py @@ -1,3 +1,3 @@ -from .pydp import * +from pydp import algorithms __version__ = "0.1.4" diff --git a/pydp/algorithms/__init__.py b/pydp/algorithms/__init__.py new file mode 100644 index 00000000..031b83ac --- /dev/null +++ b/pydp/algorithms/__init__.py @@ -0,0 +1,16 @@ +__all__ = ["Count"] + + +class Count: + def __init__(self, epsilon=1.0, dtype="int"): + from ..pydp import _algorithms + + if dtype == "int": + self.__algorithm = _algorithms.CountInt(epsilon) + elif dtype == "float": + self.__algorithm = _algorithms.CountDouble(epsilon) + else: + raise RuntimeError(f"dtype: {dtype} is not supported") + + def result(self, list): + return self.__algorithm.result(list) diff --git a/src/bindings/PyDP/bindings.cpp b/src/bindings/PyDP/bindings.cpp index d83ef71c..34a32e9a 100644 --- a/src/bindings/PyDP/bindings.cpp +++ b/src/bindings/PyDP/bindings.cpp @@ -37,7 +37,7 @@ PYBIND11_MODULE(pydp, m) { init_base_percentile(m); // Algorithms - auto malgorithms = m.def_submodule("algorithms"); + auto malgorithms = m.def_submodule("_algorithms"); init_algorithms_bounded_functions(malgorithms); init_algorithms_util(m); init_algorithms_distributions(m); diff --git a/src/bindings/PyDP/pydp_lib/algorithm_builder.hpp b/src/bindings/PyDP/pydp_lib/algorithm_builder.hpp index bbfec101..fe9cf0ce 100644 --- a/src/bindings/PyDP/pydp_lib/algorithm_builder.hpp +++ b/src/bindings/PyDP/pydp_lib/algorithm_builder.hpp @@ -73,7 +73,7 @@ class AlgorithmBuilder { void declare(py::module& m) { py::class_ pyself(m, get_algorithm_name().c_str()); - pyself.attr("__module__") = "algorithm"; + pyself.attr("__module__") = "_algorithms"; // Constructors if constexpr (is_bounded_algorithm()) { From d0f8ce3cf7dd6d0292de116b013f19582882a7be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alejandro=20S=C3=A1nchez=20Medina?= Date: Wed, 12 Aug 2020 16:53:07 +0100 Subject: [PATCH 18/47] Rename pydp.so to _pydp.so --- .github/workflows/pypipublish_linux.yml | 6 +++--- .github/workflows/pypipublish_osx.yml | 8 ++++---- .gitignore | 2 +- build_PyDP.sh | 4 ++-- pydp/__init__.py | 2 +- setup.py | 2 +- src/bindings/BUILD | 2 +- src/bindings/PyDP/bindings.cpp | 2 +- src/python/BUILD | 2 +- 9 files changed, 15 insertions(+), 15 deletions(-) diff --git a/.github/workflows/pypipublish_linux.yml b/.github/workflows/pypipublish_linux.yml index 4c258693..f96cafe3 100644 --- a/.github/workflows/pypipublish_linux.yml +++ b/.github/workflows/pypipublish_linux.yml @@ -19,7 +19,7 @@ jobs: cd third_party/differential-privacy git checkout 0b0a5c2315d84a6a7b1ff34591e33ec11680891e cd - - rm -rf third_party/differential-privacy/java + rm -rf third_party/differential-privacy/java rm -rf third_party/differential-privacy/examples/java - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v1 @@ -55,8 +55,8 @@ jobs: - name: Build pydp lib run: | bazel build src/python:bindings_test --verbose_failures - rm -f pydp.so - cp -f ./bazel-bin/src/bindings/pydp.so ./pydp + rm -f _pydp.so + cp -f ./bazel-bin/src/bindings/_pydp.so ./pydp - name: Build wheel run: | diff --git a/.github/workflows/pypipublish_osx.yml b/.github/workflows/pypipublish_osx.yml index 2534ce64..dc647714 100644 --- a/.github/workflows/pypipublish_osx.yml +++ b/.github/workflows/pypipublish_osx.yml @@ -19,7 +19,7 @@ jobs: cd third_party/differential-privacy git checkout 0b0a5c2315d84a6a7b1ff34591e33ec11680891es cd - - rm -rf third_party/differential-privacy/java + rm -rf third_party/differential-privacy/java rm -rf third_party/differential-privacy/examples/java - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v2 @@ -36,8 +36,8 @@ jobs: - name: Build pydp lib run: | bazel build src/python:bindings_test --verbose_failures - rm -f pydp.so - cp -f ./bazel-bin/src/bindings/pydp.so ./pydp + rm -f _pydp.so + cp -f ./bazel-bin/src/bindings/_pydp.so ./pydp - name: Build wheel @@ -49,4 +49,4 @@ jobs: TWINE_USERNAME: __token__ TWINE_PASSWORD: ${{ secrets.TOKEN }} run: | - twine upload --skip-existing dist/*.whl \ No newline at end of file + twine upload --skip-existing dist/*.whl diff --git a/.gitignore b/.gitignore index 93312cf7..3f031637 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,4 @@ -pydp.so +_pydp.so # bazel files bazel-bin bazel-out diff --git a/build_PyDP.sh b/build_PyDP.sh index fee7b2ca..3227b180 100755 --- a/build_PyDP.sh +++ b/build_PyDP.sh @@ -2,5 +2,5 @@ pipenv install --dev --skip-lock bazel build src/python:bindings_test --verbose_failures -find ./ -name pydp.so -print0 | xargs -0 -I {} rm {} -cp -f ./bazel-bin/src/bindings/pydp.so ./pydp +find ./ -name _pydp.so -print0 | xargs -0 -I {} rm {} +cp -f ./bazel-bin/src/bindings/_pydp.so ./pydp diff --git a/pydp/__init__.py b/pydp/__init__.py index 745d6fc8..c1431116 100644 --- a/pydp/__init__.py +++ b/pydp/__init__.py @@ -1,3 +1,3 @@ -from pydp import algorithms +from ._pydp import * __version__ = "0.1.4" diff --git a/setup.py b/setup.py index 6ecd57b6..89ddb1ca 100644 --- a/setup.py +++ b/setup.py @@ -47,7 +47,7 @@ def read(fname): include_package_data=True, keywords="pydp", name="python-dp", - package_data={"pydp": ["pydp.so"],}, + package_data={"_pydp": ["_pydp.so"],}, packages=find_packages(exclude=["tests"]), # need to check this setup_requires=setup_requirements, test_suite="tests", diff --git a/src/bindings/BUILD b/src/bindings/BUILD index 3fa8691d..df8a4616 100644 --- a/src/bindings/BUILD +++ b/src/bindings/BUILD @@ -1,7 +1,7 @@ load("@pybind11_bazel//:build_defs.bzl", "pybind_extension") pybind_extension( - name = "pydp", + name = "_pydp", srcs = glob([ "PyDP/*.cpp", "PyDP/base/*.cpp", diff --git a/src/bindings/PyDP/bindings.cpp b/src/bindings/PyDP/bindings.cpp index 34a32e9a..c460e7c8 100644 --- a/src/bindings/PyDP/bindings.cpp +++ b/src/bindings/PyDP/bindings.cpp @@ -28,7 +28,7 @@ void init_algorithms_rand(py::module &); // proto void init_proto(py::module &); -PYBIND11_MODULE(pydp, m) { +PYBIND11_MODULE(_pydp, m) { m.doc() = "Google Differential Privacy python extension"; // Base diff --git a/src/python/BUILD b/src/python/BUILD index ad4cae3d..2dfadd94 100644 --- a/src/python/BUILD +++ b/src/python/BUILD @@ -4,5 +4,5 @@ py_binary( name = "bindings_test", srcs = ["__init__.py"], main = "__init__.py", - data = ["//src/bindings:pydp.so"] + data = ["//src/bindings:_pydp.so"] ) From 537032fb02bac00f282de636a7dd1e3260354d6f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alejandro=20S=C3=A1nchez=20Medina?= Date: Wed, 12 Aug 2020 17:23:42 +0100 Subject: [PATCH 19/47] Update Python API --- pydp/__init__.py | 2 ++ pydp/algorithms/__init__.py | 17 ++--------------- pydp/algorithms/laplacian/__init__.py | 3 +++ pydp/algorithms/laplacian/count.py | 21 +++++++++++++++++++++ 4 files changed, 28 insertions(+), 15 deletions(-) create mode 100644 pydp/algorithms/laplacian/__init__.py create mode 100644 pydp/algorithms/laplacian/count.py diff --git a/pydp/__init__.py b/pydp/__init__.py index c1431116..8f2ec341 100644 --- a/pydp/__init__.py +++ b/pydp/__init__.py @@ -1,3 +1,5 @@ from ._pydp import * +from pydp import algorithms + __version__ = "0.1.4" diff --git a/pydp/algorithms/__init__.py b/pydp/algorithms/__init__.py index 031b83ac..afc01e21 100644 --- a/pydp/algorithms/__init__.py +++ b/pydp/algorithms/__init__.py @@ -1,16 +1,3 @@ -__all__ = ["Count"] +from . import laplacian - -class Count: - def __init__(self, epsilon=1.0, dtype="int"): - from ..pydp import _algorithms - - if dtype == "int": - self.__algorithm = _algorithms.CountInt(epsilon) - elif dtype == "float": - self.__algorithm = _algorithms.CountDouble(epsilon) - else: - raise RuntimeError(f"dtype: {dtype} is not supported") - - def result(self, list): - return self.__algorithm.result(list) +__all__ = ["laplacian"] diff --git a/pydp/algorithms/laplacian/__init__.py b/pydp/algorithms/laplacian/__init__.py new file mode 100644 index 00000000..4faab78d --- /dev/null +++ b/pydp/algorithms/laplacian/__init__.py @@ -0,0 +1,3 @@ +from .count import Count + +__all__ = ["Count"] diff --git a/pydp/algorithms/laplacian/count.py b/pydp/algorithms/laplacian/count.py new file mode 100644 index 00000000..38bc31d1 --- /dev/null +++ b/pydp/algorithms/laplacian/count.py @@ -0,0 +1,21 @@ +from ..._pydp import _algorithms + + +def map_type_str(type): + if type == "int": + return "Int" + elif type == "float": + return "Double" + else: + raise RuntimeError(f"dtype: {dtype} is not supported") + + +class Count: + def __init__(self, epsilon=1.0, dtype="int"): + class_ = getattr(_algorithms, f"Count{map_type_str(dtype)}") + + self.dtype = dtype + self.__algorithm = class_(epsilon) + + def result(self, list): + return self.__algorithm.result(list) From fe7a2027d662ed0e87a46cb71c2c4a1cc1f0f325 Mon Sep 17 00:00:00 2001 From: Chinmay Shah Date: Thu, 13 Aug 2020 01:06:13 +0530 Subject: [PATCH 20/47] removed delta as it can't be used for laplacian --- src/bindings/PyDP/pydp_lib/algorithm_builder.hpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/bindings/PyDP/pydp_lib/algorithm_builder.hpp b/src/bindings/PyDP/pydp_lib/algorithm_builder.hpp index fe9cf0ce..7ef52fdd 100644 --- a/src/bindings/PyDP/pydp_lib/algorithm_builder.hpp +++ b/src/bindings/PyDP/pydp_lib/algorithm_builder.hpp @@ -28,7 +28,7 @@ template class AlgorithmBuilder { public: std::unique_ptr build(double epsilon, - std::optional delta = std::nullopt, + // std::optional delta = std::nullopt, std::optional lower_bound = std::nullopt, std::optional upper_bound = std::nullopt, std::optional l0_sensitivity = std::nullopt, @@ -37,7 +37,7 @@ class AlgorithmBuilder { builder.SetEpsilon(epsilon); - if (delta.has_value()) builder.SetDelta(delta.value()); + // if (delta.has_value()) builder.SetDelta(delta.value()); if (l0_sensitivity.has_value()) builder.SetMaxPartitionsContributed(l0_sensitivity.value()); if (linf_sensitivity.has_value()) @@ -79,24 +79,24 @@ class AlgorithmBuilder { if constexpr (is_bounded_algorithm()) { // Explicit bounds constructor pyself.def( - py::init([this](double epsilon, double delta, T lower_bound, T upper_bound, + py::init([this](double epsilon, T lower_bound, T upper_bound, int l0_sensitivity, int linf_sensitivity) { - return this->build(epsilon, delta, lower_bound, upper_bound, l0_sensitivity, + return this->build(epsilon, lower_bound, upper_bound, l0_sensitivity, linf_sensitivity); }), - py::arg("epsilon"), py::arg("delta") = 0, py::arg("lower_bound"), + py::arg("epsilon"), py::arg("lower_bound"), py::arg("upper_bound"), py::arg("l0_sensitivity") = 1, py::arg("linf_sensitivity") = 1); } // No bounds constructor - pyself.def(py::init([this](double epsilon, double delta, int l0_sensitivity, + pyself.def(py::init([this](double epsilon, int l0_sensitivity, int linf_sensitivity) { - return this->build(epsilon, delta, std::nullopt /*lower_bound*/, + return this->build(epsilon, std::nullopt /*lower_bound*/, std::nullopt /*upper_bound*/, l0_sensitivity, linf_sensitivity); }), - py::arg("epsilon"), py::arg("delta") = 0, py::arg("l0_sensitivity") = 1, + py::arg("epsilon"), py::arg("l0_sensitivity") = 1, py::arg("linf_sensitivity") = 1); // Getters From 51277150758665a4ce33e9b81ff9e9acad7d0f89 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alejandro=20S=C3=A1nchez=20Medina?= Date: Wed, 12 Aug 2020 21:19:28 +0100 Subject: [PATCH 21/47] Add bounded algorithms --- pydp/algorithms/algorithm.py | 41 +++++++++++++++++++ pydp/algorithms/laplacian/__init__.py | 13 +++++- pydp/algorithms/laplacian/bounded_mean.py | 5 +++ .../laplacian/bounded_standard_deviation.py | 5 +++ pydp/algorithms/laplacian/bounded_sum.py | 5 +++ pydp/algorithms/laplacian/bounded_variance.py | 5 +++ pydp/algorithms/laplacian/count.py | 22 ++-------- .../PyDP/pydp_lib/algorithm_builder.hpp | 2 - 8 files changed, 76 insertions(+), 22 deletions(-) create mode 100644 pydp/algorithms/algorithm.py create mode 100644 pydp/algorithms/laplacian/bounded_mean.py create mode 100644 pydp/algorithms/laplacian/bounded_standard_deviation.py create mode 100644 pydp/algorithms/laplacian/bounded_sum.py create mode 100644 pydp/algorithms/laplacian/bounded_variance.py diff --git a/pydp/algorithms/algorithm.py b/pydp/algorithms/algorithm.py new file mode 100644 index 00000000..d10e358c --- /dev/null +++ b/pydp/algorithms/algorithm.py @@ -0,0 +1,41 @@ +from functools import wraps + +from .._pydp import _algorithms + + +class Algorithm: + + # Class variables + __methods_to_wrap = [ + "epsilon", + "privacy_budget_left", + "memory_used", + "add_entries", + "add_entry", + "result", + "partial_result", + "consume_privacy_budget", + "reset", + "serialize", + "merge", + "noise_confidence_interval", + ] + + def __init__(self, dtype="int", **kwargs): + cpp_class_name = f"{self.__class__.__name__}{self.__map_type_str(dtype)}" + class_ = getattr(_algorithms, cpp_class_name) + + self.dtype = dtype + self.__algorithm = class_(**kwargs) + + for method in self.__methods_to_wrap: + setattr(self, method, getattr(self.__algorithm, f"{method}")) + + @staticmethod + def __map_type_str(type): + if type == "int": + return "Int" + elif type == "float": + return "Double" + else: + raise RuntimeError(f"dtype: {dtype} is not supported") diff --git a/pydp/algorithms/laplacian/__init__.py b/pydp/algorithms/laplacian/__init__.py index 4faab78d..baa58fe8 100644 --- a/pydp/algorithms/laplacian/__init__.py +++ b/pydp/algorithms/laplacian/__init__.py @@ -1,3 +1,14 @@ +from .bounded_mean import BoundedMean +from .bounded_standard_deviation import BoundedStandardDeviation +from .bounded_sum import BoundedSum +from .bounded_variance import BoundedVariance from .count import Count -__all__ = ["Count"] + +__all__ = [ + "BoundedMean", + "BoundedStandardDeviation", + "BoundedSum", + "BoundedVariance", + "Count", +] diff --git a/pydp/algorithms/laplacian/bounded_mean.py b/pydp/algorithms/laplacian/bounded_mean.py new file mode 100644 index 00000000..fbeadc89 --- /dev/null +++ b/pydp/algorithms/laplacian/bounded_mean.py @@ -0,0 +1,5 @@ +from ..algorithm import Algorithm + + +class BoundedMean(Algorithm): + pass diff --git a/pydp/algorithms/laplacian/bounded_standard_deviation.py b/pydp/algorithms/laplacian/bounded_standard_deviation.py new file mode 100644 index 00000000..7eb8f163 --- /dev/null +++ b/pydp/algorithms/laplacian/bounded_standard_deviation.py @@ -0,0 +1,5 @@ +from ..algorithm import Algorithm + + +class BoundedStandardDeviation(Algorithm): + pass diff --git a/pydp/algorithms/laplacian/bounded_sum.py b/pydp/algorithms/laplacian/bounded_sum.py new file mode 100644 index 00000000..eb588e48 --- /dev/null +++ b/pydp/algorithms/laplacian/bounded_sum.py @@ -0,0 +1,5 @@ +from ..algorithm import Algorithm + + +class BoundedSum(Algorithm): + pass diff --git a/pydp/algorithms/laplacian/bounded_variance.py b/pydp/algorithms/laplacian/bounded_variance.py new file mode 100644 index 00000000..75412550 --- /dev/null +++ b/pydp/algorithms/laplacian/bounded_variance.py @@ -0,0 +1,5 @@ +from ..algorithm import Algorithm + + +class BoundedVariance(Algorithm): + pass diff --git a/pydp/algorithms/laplacian/count.py b/pydp/algorithms/laplacian/count.py index 38bc31d1..e8b8a722 100644 --- a/pydp/algorithms/laplacian/count.py +++ b/pydp/algorithms/laplacian/count.py @@ -1,21 +1,5 @@ -from ..._pydp import _algorithms +from ..algorithm import Algorithm -def map_type_str(type): - if type == "int": - return "Int" - elif type == "float": - return "Double" - else: - raise RuntimeError(f"dtype: {dtype} is not supported") - - -class Count: - def __init__(self, epsilon=1.0, dtype="int"): - class_ = getattr(_algorithms, f"Count{map_type_str(dtype)}") - - self.dtype = dtype - self.__algorithm = class_(epsilon) - - def result(self, list): - return self.__algorithm.result(list) +class Count(Algorithm): + pass diff --git a/src/bindings/PyDP/pydp_lib/algorithm_builder.hpp b/src/bindings/PyDP/pydp_lib/algorithm_builder.hpp index fe9cf0ce..7bc72db8 100644 --- a/src/bindings/PyDP/pydp_lib/algorithm_builder.hpp +++ b/src/bindings/PyDP/pydp_lib/algorithm_builder.hpp @@ -104,8 +104,6 @@ class AlgorithmBuilder { pyself.def("privacy_budget_left", &Algorithm::RemainingPrivacyBudget); - pyself.def("consume_privacy_budget", &Algorithm::ConsumePrivacyBudget); - pyself.def("memory_used", &Algorithm::MemoryUsed); // Input data From 1c641d9c47c5ff8d6c0d0ccb1d5cd2d6529d8f41 Mon Sep 17 00:00:00 2001 From: Chinmay Shah Date: Thu, 13 Aug 2020 03:24:11 +0530 Subject: [PATCH 22/47] added bounded functions in python code --- pydp/algorithms/laplacian/__init__.py | 3 +- .../algorithms/laplacian/boundedalgorithms.py | 29 +++++++++++++++++++ pydp/algorithms/laplacian/count.py | 11 +------ pydp/algorithms/laplacian/helper.py | 7 +++++ .../PyDP/pydp_lib/algorithm_builder.hpp | 2 +- 5 files changed, 40 insertions(+), 12 deletions(-) create mode 100644 pydp/algorithms/laplacian/boundedalgorithms.py create mode 100644 pydp/algorithms/laplacian/helper.py diff --git a/pydp/algorithms/laplacian/__init__.py b/pydp/algorithms/laplacian/__init__.py index 4faab78d..3526ae4c 100644 --- a/pydp/algorithms/laplacian/__init__.py +++ b/pydp/algorithms/laplacian/__init__.py @@ -1,3 +1,4 @@ from .count import Count +from .boundedalgorithms import BoundedMean, BoundedSum, BoundedStandardDeviation, BoundedVariance -__all__ = ["Count"] +__all__ = ["Count", "BoundedMean", "BoundedSum", "BoundedStandardDeviation", "BoundedVariance"] diff --git a/pydp/algorithms/laplacian/boundedalgorithms.py b/pydp/algorithms/laplacian/boundedalgorithms.py new file mode 100644 index 00000000..f766c22e --- /dev/null +++ b/pydp/algorithms/laplacian/boundedalgorithms.py @@ -0,0 +1,29 @@ +from ..._pydp import _algorithms +from .helper import map_type_str + +class Bounded: + def __init__(self, algorithm, epsilon=1.0, lower_bound = None, upper_bound = None, dtype="int"): + class_ = getattr(_algorithms, f"{algorithm}{map_type_str(dtype)}") + + self.dtype = dtype + self.__algorithm = class_(epsilon, lower_bound, upper_bound) + + def result(self, list): + return self.__algorithm.result(list) + +class BoundedMean(Bounded): + def __init__(self, epsilon=1.0, lower_bound = None, upper_bound = None, dtype="int"): + super().__init__("BoundedMean", epsilon, lower_bound, upper_bound, dtype) + + +class BoundedSum(Bounded): + def __init__(self, epsilon=1.0, lower_bound = None, upper_bound = None, dtype="int"): + super().__init__("BoundedSum", epsilon, lower_bound, upper_bound, dtype) + +class BoundedStandardDeviation(Bounded): + def __init__(self, epsilon=1.0, lower_bound = None, upper_bound = None, dtype="int"): + super().__init__("BoundedStandardDeviation", epsilon, lower_bound, upper_bound, dtype) + +class BoundedVariance(Bounded): + def __init__(self, epsilon=1.0, lower_bound = None, upper_bound = None, dtype="int"): + super().__init__("BoundedVariance", epsilon, lower_bound, upper_bound, dtype) \ No newline at end of file diff --git a/pydp/algorithms/laplacian/count.py b/pydp/algorithms/laplacian/count.py index 38bc31d1..17ca85ab 100644 --- a/pydp/algorithms/laplacian/count.py +++ b/pydp/algorithms/laplacian/count.py @@ -1,14 +1,5 @@ from ..._pydp import _algorithms - - -def map_type_str(type): - if type == "int": - return "Int" - elif type == "float": - return "Double" - else: - raise RuntimeError(f"dtype: {dtype} is not supported") - +from .helper import map_type_str class Count: def __init__(self, epsilon=1.0, dtype="int"): diff --git a/pydp/algorithms/laplacian/helper.py b/pydp/algorithms/laplacian/helper.py new file mode 100644 index 00000000..ac244adf --- /dev/null +++ b/pydp/algorithms/laplacian/helper.py @@ -0,0 +1,7 @@ +def map_type_str(type): + if type == "int": + return "Int" + elif type == "float": + return "Double" + else: + raise RuntimeError(f"dtype: {dtype} is not supported") \ No newline at end of file diff --git a/src/bindings/PyDP/pydp_lib/algorithm_builder.hpp b/src/bindings/PyDP/pydp_lib/algorithm_builder.hpp index 7ef52fdd..ffc19fd3 100644 --- a/src/bindings/PyDP/pydp_lib/algorithm_builder.hpp +++ b/src/bindings/PyDP/pydp_lib/algorithm_builder.hpp @@ -123,7 +123,7 @@ class AlgorithmBuilder { throw std::runtime_error(result.status().error_message()); } - return dp::GetValue(result.ValueOrDie()); + return dp::GetValue(result.ValueOrDie()); }); pyself.def("partial_result", [](Algorithm& pythis) { From 727c041c3f55057172574868d68a6e1e21880bba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alejandro=20S=C3=A1nchez=20Medina?= Date: Thu, 13 Aug 2020 12:05:10 +0100 Subject: [PATCH 23/47] Restructure algorithm classes --- pydp/algorithms/algorithm.py | 22 ++++++++++++++++--- pydp/algorithms/laplacian/bounded_mean.py | 4 ++-- .../laplacian/bounded_standard_deviation.py | 4 ++-- pydp/algorithms/laplacian/bounded_sum.py | 4 ++-- pydp/algorithms/laplacian/bounded_variance.py | 4 ++-- .../PyDP/pydp_lib/algorithm_builder.hpp | 6 ++--- 6 files changed, 30 insertions(+), 14 deletions(-) diff --git a/pydp/algorithms/algorithm.py b/pydp/algorithms/algorithm.py index d10e358c..23a89fc0 100644 --- a/pydp/algorithms/algorithm.py +++ b/pydp/algorithms/algorithm.py @@ -3,7 +3,7 @@ from .._pydp import _algorithms -class Algorithm: +class WrapAlgorithm: # Class variables __methods_to_wrap = [ @@ -22,8 +22,8 @@ class Algorithm: ] def __init__(self, dtype="int", **kwargs): - cpp_class_name = f"{self.__class__.__name__}{self.__map_type_str(dtype)}" - class_ = getattr(_algorithms, cpp_class_name) + binded_class = f"{self.__class__.__name__}{self.__map_type_str(dtype)}" + class_ = getattr(_algorithms, binded_class) self.dtype = dtype self.__algorithm = class_(**kwargs) @@ -39,3 +39,19 @@ def __map_type_str(type): return "Double" else: raise RuntimeError(f"dtype: {dtype} is not supported") + + +class Algorithm(WrapAlgorithm): + def __init__(self, epsilon, dtype="int"): + WrapAlgorithm.__init__(self, dtype=dtype, epsilon=epsilon) + + +class BoundedAlgorithm(WrapAlgorithm): + def __init__(self, epsilon, lower_bound, upper_bound, dtype="int"): + WrapAlgorithm.__init__( + self, + dtype=dtype, + epsilon=epsilon, + lower_bound=lower_bound, + upper_bound=upper_bound, + ) diff --git a/pydp/algorithms/laplacian/bounded_mean.py b/pydp/algorithms/laplacian/bounded_mean.py index fbeadc89..96dc8fba 100644 --- a/pydp/algorithms/laplacian/bounded_mean.py +++ b/pydp/algorithms/laplacian/bounded_mean.py @@ -1,5 +1,5 @@ -from ..algorithm import Algorithm +from ..algorithm import BoundedAlgorithm -class BoundedMean(Algorithm): +class BoundedMean(BoundedAlgorithm): pass diff --git a/pydp/algorithms/laplacian/bounded_standard_deviation.py b/pydp/algorithms/laplacian/bounded_standard_deviation.py index 7eb8f163..0402cfa9 100644 --- a/pydp/algorithms/laplacian/bounded_standard_deviation.py +++ b/pydp/algorithms/laplacian/bounded_standard_deviation.py @@ -1,5 +1,5 @@ -from ..algorithm import Algorithm +from ..algorithm import BoundedAlgorithm -class BoundedStandardDeviation(Algorithm): +class BoundedStandardDeviation(BoundedAlgorithm): pass diff --git a/pydp/algorithms/laplacian/bounded_sum.py b/pydp/algorithms/laplacian/bounded_sum.py index eb588e48..85a20d38 100644 --- a/pydp/algorithms/laplacian/bounded_sum.py +++ b/pydp/algorithms/laplacian/bounded_sum.py @@ -1,5 +1,5 @@ -from ..algorithm import Algorithm +from ..algorithm import BoundedAlgorithm -class BoundedSum(Algorithm): +class BoundedSum(BoundedAlgorithm): pass diff --git a/pydp/algorithms/laplacian/bounded_variance.py b/pydp/algorithms/laplacian/bounded_variance.py index 75412550..fbc0ab2e 100644 --- a/pydp/algorithms/laplacian/bounded_variance.py +++ b/pydp/algorithms/laplacian/bounded_variance.py @@ -1,5 +1,5 @@ -from ..algorithm import Algorithm +from ..algorithm import BoundedAlgorithm -class BoundedVariance(Algorithm): +class BoundedVariance(BoundedAlgorithm): pass diff --git a/src/bindings/PyDP/pydp_lib/algorithm_builder.hpp b/src/bindings/PyDP/pydp_lib/algorithm_builder.hpp index 7bc72db8..da1cb9a0 100644 --- a/src/bindings/PyDP/pydp_lib/algorithm_builder.hpp +++ b/src/bindings/PyDP/pydp_lib/algorithm_builder.hpp @@ -131,7 +131,7 @@ class AlgorithmBuilder { throw std::runtime_error(result.status().error_message()); } - return dp::GetValue(result.ValueOrDie()); + return dp::GetValue(result.ValueOrDie()); }); pyself.def("partial_result", [](Algorithm& pythis, double privacy_budget) { @@ -145,7 +145,7 @@ class AlgorithmBuilder { throw std::runtime_error(result.status().error_message()); } - return dp::GetValue(result.ValueOrDie()); + return dp::GetValue(result.ValueOrDie()); }); pyself.def("partial_result", [](Algorithm& pythis, double privacy_budget, @@ -160,7 +160,7 @@ class AlgorithmBuilder { throw std::runtime_error(result.status().error_message()); } - return dp::GetValue(result.ValueOrDie()); + return dp::GetValue(result.ValueOrDie()); }); // Other methods From 8d5db91dee517cb5a210f87633c5ff805972f686 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alejandro=20S=C3=A1nchez=20Medina?= Date: Thu, 13 Aug 2020 12:25:43 +0100 Subject: [PATCH 24/47] Fix typo --- pydp/algorithms/algorithm.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pydp/algorithms/algorithm.py b/pydp/algorithms/algorithm.py index 769e6426..a7d5a2a7 100644 --- a/pydp/algorithms/algorithm.py +++ b/pydp/algorithms/algorithm.py @@ -22,7 +22,7 @@ class WrapAlgorithm: ] def __init__(self, dtype="int", **kwargs): - binded_class = f"{self.__class__.__name__}{self.__map_type_str(dtype)}" + binded_class = f"{self.__class__.__name__}{self.__map_dtype_str(dtype)}" class_ = getattr(_algorithms, binded_class) self.dtype = dtype @@ -32,10 +32,10 @@ def __init__(self, dtype="int", **kwargs): setattr(self, method, getattr(self.__algorithm, f"{method}")) @staticmethod - def __map_type_str(type): - if type == "int": + def __map_dtype_str(dtype): + if dtype == "int": return "Int" - elif type == "float": + elif dtype == "float": return "Double" else: raise RuntimeError(f"dtype: {dtype} is not supported") From 58b4d50a1538f1290531ecfc2dfe68d361914e1d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alejandro=20S=C3=A1nchez=20Medina?= Date: Thu, 13 Aug 2020 12:49:37 +0100 Subject: [PATCH 25/47] Delete consume_privacy_budget method from public interface --- pydp/algorithms/algorithm.py | 1 - .../PyDP/pydp_lib/algorithm_builder.hpp | 36 +++++++++---------- 2 files changed, 16 insertions(+), 21 deletions(-) diff --git a/pydp/algorithms/algorithm.py b/pydp/algorithms/algorithm.py index a7d5a2a7..08bf354c 100644 --- a/pydp/algorithms/algorithm.py +++ b/pydp/algorithms/algorithm.py @@ -14,7 +14,6 @@ class WrapAlgorithm: "add_entry", "result", "partial_result", - "consume_privacy_budget", "reset", "serialize", "merge", diff --git a/src/bindings/PyDP/pydp_lib/algorithm_builder.hpp b/src/bindings/PyDP/pydp_lib/algorithm_builder.hpp index 52c61a37..a25b449c 100644 --- a/src/bindings/PyDP/pydp_lib/algorithm_builder.hpp +++ b/src/bindings/PyDP/pydp_lib/algorithm_builder.hpp @@ -28,7 +28,7 @@ template class AlgorithmBuilder { public: std::unique_ptr build(double epsilon, - // std::optional delta = std::nullopt, + // std::optional delta = std::nullopt, std::optional lower_bound = std::nullopt, std::optional upper_bound = std::nullopt, std::optional l0_sensitivity = std::nullopt, @@ -78,26 +78,24 @@ class AlgorithmBuilder { // Constructors if constexpr (is_bounded_algorithm()) { // Explicit bounds constructor - pyself.def( - py::init([this](double epsilon, T lower_bound, T upper_bound, - int l0_sensitivity, int linf_sensitivity) { - return this->build(epsilon, lower_bound, upper_bound, l0_sensitivity, - linf_sensitivity); - }), - py::arg("epsilon"), py::arg("lower_bound"), - py::arg("upper_bound"), py::arg("l0_sensitivity") = 1, - py::arg("linf_sensitivity") = 1); + pyself.def(py::init([this](double epsilon, T lower_bound, T upper_bound, + int l0_sensitivity, int linf_sensitivity) { + return this->build(epsilon, lower_bound, upper_bound, l0_sensitivity, + linf_sensitivity); + }), + py::arg("epsilon"), py::arg("lower_bound"), py::arg("upper_bound"), + py::arg("l0_sensitivity") = 1, py::arg("linf_sensitivity") = 1); } // No bounds constructor - pyself.def(py::init([this](double epsilon, int l0_sensitivity, - int linf_sensitivity) { - return this->build(epsilon, std::nullopt /*lower_bound*/, - std::nullopt /*upper_bound*/, l0_sensitivity, - linf_sensitivity); - }), - py::arg("epsilon"), py::arg("l0_sensitivity") = 1, - py::arg("linf_sensitivity") = 1); + pyself.def( + py::init([this](double epsilon, int l0_sensitivity, int linf_sensitivity) { + return this->build(epsilon, std::nullopt /*lower_bound*/, + std::nullopt /*upper_bound*/, l0_sensitivity, + linf_sensitivity); + }), + py::arg("epsilon"), py::arg("l0_sensitivity") = 1, + py::arg("linf_sensitivity") = 1); // Getters pyself.def_property_readonly("epsilon", &Algorithm::GetEpsilon); @@ -164,8 +162,6 @@ class AlgorithmBuilder { }); // Other methods - pyself.def("consume_privacy_budget", &Algorithm::ConsumePrivacyBudget); - pyself.def("reset", &Algorithm::Reset); pyself.def("serialize", &Algorithm::Serialize); From b03ee554c5361b50be76e6ffed0f57663daede97 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alejandro=20S=C3=A1nchez=20Medina?= Date: Thu, 13 Aug 2020 16:54:11 +0100 Subject: [PATCH 26/47] Make method wrapping explicit --- pydp/algorithms/algorithm.py | 133 +++++++++++++++++++++++++++-------- 1 file changed, 104 insertions(+), 29 deletions(-) diff --git a/pydp/algorithms/algorithm.py b/pydp/algorithms/algorithm.py index 08bf354c..2c613fe8 100644 --- a/pydp/algorithms/algorithm.py +++ b/pydp/algorithms/algorithm.py @@ -1,34 +1,22 @@ -from functools import wraps - from .._pydp import _algorithms -class WrapAlgorithm: - - # Class variables - __methods_to_wrap = [ - "epsilon", - "privacy_budget_left", - "memory_used", - "add_entries", - "add_entry", - "result", - "partial_result", - "reset", - "serialize", - "merge", - "noise_confidence_interval", - ] - - def __init__(self, dtype="int", **kwargs): +class MetaAlgorithm: + def __init__(self, **kwargs): + dtype = kwargs.pop("dtype") + + # Delete bound params if the are not set to avoid conflicts with builder + if kwargs["lower_bound"] is None: + kwargs.pop("lower_bound") + if kwargs["upper_bound"] is None: + kwargs.pop("upper_bound") + binded_class = f"{self.__class__.__name__}{self.__map_dtype_str(dtype)}" class_ = getattr(_algorithms, binded_class) self.dtype = dtype self.__algorithm = class_(**kwargs) - - for method in self.__methods_to_wrap: - setattr(self, method, getattr(self.__algorithm, f"{method}")) + self.epsilon = self.__algorithm.epsilon @staticmethod def __map_dtype_str(dtype): @@ -39,18 +27,105 @@ def __map_dtype_str(dtype): else: raise RuntimeError(f"dtype: {dtype} is not supported") + def privacy_budget_left(self): + """ + Returns the remaining privacy budget. + """ + return self.__algorithm.privacy_budget_left() + + def memory_used(self): + """ + Returns the memory currently used by the algorithm in bytes. + """ + return self.__algorithm.memory_used() + + def add_entries(self, list): + """ + Adds multiple inputs to the algorithm. + """ + return self.__algorithm.add_entries(list) + + def add_entry(self, value): + """ + Adds one input to the algorithm. + """ + return self.__algorithm.add_entry(value) + + def result(self, list): + """ + Runs the algorithm on the input using the epsilon parameter provided in the constructor and returns output. + + Consumes 100% of the privacy budget. + """ + return self.__algorithm.result(list) + + def partial_result(self): + """ + Gets the algorithm result, consuming the remaining privacy budget. + """ + return self.__algorithm.partial_result() + + def partial_result(self, privacy_budget): + """ + Same as above, but consumes only the `privacy_budget` amount of budget. + + Privacy budget, defined on [0,1], represents the fraction of the total budget to consume. + """ + return self.__algorithm.partial_result(privacy_budget) + + def partial_result(self, privacy_budget, noise_interval_level): + """ + Same as above, but provides the confidence level of the noise confidence interval, which may be included in the algorithm output. + """ + return self.__algorithm.partial_result(privacy_budget, noise_interval_level) -class Algorithm(WrapAlgorithm): + def reset(self): + """ + Resets the algorithm to a state in which it has received no input. After Reset is called, the algorithm should only consider input added after the last Reset call when providing output. + """ + return self.__algorithm.reset() + + def serialize(self): + """ + Serializes summary data of current entries into Summary proto. This allows results from distributed aggregation to be recorded and later merged. + + Returns empty summary for algorithms for which serialize is unimplemented. + """ + return self.__algorithm.serialize() + + def merge(self, summary): + """ + Merges serialized summary data into this algorithm. The summary proto must represent data from the same algorithm type with identical parameters. The data field must contain the algorithm summary type of the corresponding algorithm used. The summary proto cannot be empty. + """ + return self.__algorithm.merge(summary) + + def noise_confidence_interval(self, confidence_level, privacy_budget): + """ + Returns the confidence_level confidence interval of noise added within the algorithm with specified privacy budget, using epsilon and other relevant, algorithm-specific parameters (e.g. bounds) provided by the constructor. + + This metric may be used to gauge the error rate introduced by the noise. + + If the returned value is , then the noise added has a confidence_level chance of being in the domain [x,y]. + + By default, NoiseConfidenceInterval() returns an error. Algorithms for which a confidence interval can feasibly be calculated override this and output the relevant value. + + Conservatively, we do not release the error rate for algorithms whose confidence intervals rely on input size. + """ + return self.__algorithm.noise_confidence_interval( + confidence_level, privacy_budget + ) + + +class Algorithm(MetaAlgorithm): def __init__(self, epsilon=1.0, dtype="int"): - WrapAlgorithm.__init__(self, dtype=dtype, epsilon=epsilon) + super().__init__(epsilon=epsilon, dtype=dtype) -class BoundedAlgorithm(WrapAlgorithm): +class BoundedAlgorithm(MetaAlgorithm): def __init__(self, epsilon=1.0, lower_bound=None, upper_bound=None, dtype="int"): - WrapAlgorithm.__init__( - self, - dtype=dtype, + super().__init__( epsilon=epsilon, lower_bound=lower_bound, upper_bound=upper_bound, + dtype=dtype, ) From bf4147a44e28adfdf075b54e1cb0278efb5c0c17 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alejandro=20S=C3=A1nchez=20Medina?= Date: Thu, 13 Aug 2020 18:10:23 +0100 Subject: [PATCH 27/47] Fix init and type errors --- pydp/algorithms/algorithm.py | 4 ++-- src/bindings/PyDP/pydp_lib/algorithm_builder.hpp | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pydp/algorithms/algorithm.py b/pydp/algorithms/algorithm.py index 2c613fe8..cf32d243 100644 --- a/pydp/algorithms/algorithm.py +++ b/pydp/algorithms/algorithm.py @@ -6,9 +6,9 @@ def __init__(self, **kwargs): dtype = kwargs.pop("dtype") # Delete bound params if the are not set to avoid conflicts with builder - if kwargs["lower_bound"] is None: + if "lower_bound" in kwargs and kwargs["lower_bound"] is None: kwargs.pop("lower_bound") - if kwargs["upper_bound"] is None: + if "upper_bound" in kwargs and kwargs["upper_bound"] is None: kwargs.pop("upper_bound") binded_class = f"{self.__class__.__name__}{self.__map_dtype_str(dtype)}" diff --git a/src/bindings/PyDP/pydp_lib/algorithm_builder.hpp b/src/bindings/PyDP/pydp_lib/algorithm_builder.hpp index a25b449c..45d5cc99 100644 --- a/src/bindings/PyDP/pydp_lib/algorithm_builder.hpp +++ b/src/bindings/PyDP/pydp_lib/algorithm_builder.hpp @@ -119,7 +119,7 @@ class AlgorithmBuilder { throw std::runtime_error(result.status().error_message()); } - return dp::GetValue(result.ValueOrDie()); + return dp::GetValue(result.ValueOrDie()); }); pyself.def("partial_result", [](Algorithm& pythis) { From 557412e16f0e6e43dee8bc70f69156a46b3e9969 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alejandro=20S=C3=A1nchez=20Medina?= Date: Thu, 13 Aug 2020 18:56:59 +0100 Subject: [PATCH 28/47] Fix packaging error --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 89ddb1ca..ebbd6b7c 100644 --- a/setup.py +++ b/setup.py @@ -47,7 +47,7 @@ def read(fname): include_package_data=True, keywords="pydp", name="python-dp", - package_data={"_pydp": ["_pydp.so"],}, + package_data={"pydp": ["_pydp.so"],}, packages=find_packages(exclude=["tests"]), # need to check this setup_requires=setup_requirements, test_suite="tests", From 4d9eb2395d2228e578821afe77ae3bef2e4de28b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alejandro=20S=C3=A1nchez=20Medina?= Date: Thu, 13 Aug 2020 19:11:04 +0100 Subject: [PATCH 29/47] Update tests and fix partial_result call --- pydp/algorithms/algorithm.py | 25 ++++++++++++------------- tests/algorithms/test_count.py | 16 ++++++++-------- 2 files changed, 20 insertions(+), 21 deletions(-) diff --git a/pydp/algorithms/algorithm.py b/pydp/algorithms/algorithm.py index cf32d243..cfbc2bb1 100644 --- a/pydp/algorithms/algorithm.py +++ b/pydp/algorithms/algorithm.py @@ -59,24 +59,23 @@ def result(self, list): """ return self.__algorithm.result(list) - def partial_result(self): + def partial_result(self, privacy_budget=None, noise_interval_level=None): """ - Gets the algorithm result, consuming the remaining privacy budget. - """ - return self.__algorithm.partial_result() + Gets the algorithm result. - def partial_result(self, privacy_budget): - """ - Same as above, but consumes only the `privacy_budget` amount of budget. + The default call consumes the remaining privacy budget. - Privacy budget, defined on [0,1], represents the fraction of the total budget to consume. - """ - return self.__algorithm.partial_result(privacy_budget) + When `privacy_budget` (defined on [0,1]) is set, it consumes only the `privacy_budget` amount of budget. - def partial_result(self, privacy_budget, noise_interval_level): - """ - Same as above, but provides the confidence level of the noise confidence interval, which may be included in the algorithm output. + `noise_interval_level` provides the confidence level of the noise confidence interval, which may be included in the algorithm output. """ + + if privacy_budget is None: + return self.__algorithm.partial_result() + + if noise_interval_level is None: + return self.__algorithm.partial_result(privacy_budget) + return self.__algorithm.partial_result(privacy_budget, noise_interval_level) def reset(self): diff --git a/tests/algorithms/test_count.py b/tests/algorithms/test_count.py index 99a396a5..a289a721 100644 --- a/tests/algorithms/test_count.py +++ b/tests/algorithms/test_count.py @@ -1,17 +1,17 @@ import pytest -import pydp as dp +from pydp.algorithms.laplacian import Count -@pytest.mark.parametrize("input_class", [dp.CountInt, dp.CountDouble]) +@pytest.mark.parametrize("dtype_in", ["int", "float"]) class TestPercentile: - def test_basic(self, input_class): + def test_basic(self, dtype_in): c = [1, 2, 3, 4, 2, 3] - count = input_class(1.7) + count = Count(epsilon=1.7, dtype=dtype_in) count.result(c) - def test_repeated_result(self, input_class): + def test_repeated_result(self, dtype_in): c = [1, 2, 3, 4, 2, 3] - count = input_class(1.7) + count = Count(epsilon=1.7, dtype=dtype_in) count.add_entries(c) count.partial_result(0.5) @@ -48,8 +48,8 @@ def test_memory(self, input_class): class TestCountDataTypes: def test_count_datatypes(self): - count = dp.CountInt(2.0) - assert isinstance(count, dp.CountInt) + count = Count(2.0, dtype="int") + assert isinstance(count, Count) countae = count.add_entry(2) assert isinstance(countae, type(None)) From 7d53895ebfe74af4457e1038781274375facff77 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alejandro=20S=C3=A1nchez=20Medina?= Date: Thu, 13 Aug 2020 19:22:32 +0100 Subject: [PATCH 30/47] Update bounded algorithm tests --- tests/algorithms/test_bounded_mean.py | 16 +++++++++------- .../test_bounded_standard_deviation.py | 6 +++--- tests/algorithms/test_bounded_sum.py | 6 +++--- tests/algorithms/test_bounded_variance.py | 4 ++-- 4 files changed, 17 insertions(+), 15 deletions(-) diff --git a/tests/algorithms/test_bounded_mean.py b/tests/algorithms/test_bounded_mean.py index c42f2bd1..7c5e40a8 100644 --- a/tests/algorithms/test_bounded_mean.py +++ b/tests/algorithms/test_bounded_mean.py @@ -1,21 +1,23 @@ import pytest -import pydp as dp +from pydp.algorithms.laplacian import BoundedMean -def test_c_api(): +def test_python_api(): a = [2, 4, 6, 8] - mean_algorithm = dp.BoundedMean(1.0, 1, 9) + mean_algorithm = BoundedMean( + epsilon=1.0, lower_bound=1, upper_bound=9, dtype="float" + ) assert 1.0 <= mean_algorithm.result(a) <= 9.0 def test_bounded_mean(): - bm1 = dp.BoundedMean(3.4, 1, 2) - assert isinstance(bm1, dp.BoundedMean) + bm1 = BoundedMean(epsilon=3.4, lower_bound=1, upper_bound=2, dtype="float") + assert isinstance(bm1, BoundedMean) assert isinstance(bm1.result([1.5, 2, 2.5]), float) - bm2 = dp.BoundedMean(3.4) - assert isinstance(bm2, dp.BoundedMean) + bm2 = BoundedMean(epsilon=3.4, dtype="int") + assert isinstance(bm2, BoundedMean) # assert isinstance(bm2.result([1.5, 2, 2.5]), float) diff --git a/tests/algorithms/test_bounded_standard_deviation.py b/tests/algorithms/test_bounded_standard_deviation.py index addb9825..bb550572 100644 --- a/tests/algorithms/test_bounded_standard_deviation.py +++ b/tests/algorithms/test_bounded_standard_deviation.py @@ -1,12 +1,12 @@ -import pydp as dp +from pydp.algorithms.laplacian import BoundedStandardDeviation class TestBoundedStandardDeviation: - def test_c_api(self): + def test_python_api(self): example_data = [1, 5, 7, 9, 13] epsilon = 1.0 lower_bound, upper_bound = 0, 15 - bsd = dp.BoundedStandardDeviation(epsilon, lower_bound, upper_bound) + bsd = BoundedStandardDeviation(epsilon, lower_bound, upper_bound, dtype="float") result = bsd.result(example_data) assert type(result) is float and result >= 0 assert result <= (upper_bound - lower_bound) / 2 diff --git a/tests/algorithms/test_bounded_sum.py b/tests/algorithms/test_bounded_sum.py index 73bc1e6a..c69c5e2c 100644 --- a/tests/algorithms/test_bounded_sum.py +++ b/tests/algorithms/test_bounded_sum.py @@ -1,12 +1,12 @@ import pytest -import pydp as dp +from pydp.algorithms.laplacian import BoundedSum class TestBoundedSum: - def test_c_api_sanity_check(self): + def test_pythons_api_sanity_check(self): a = [1, 2, 3, 4] - sum_algorithm = dp.BoundedSum(1.0, 0, 10) + sum_algorithm = BoundedSum(1.0, 0, 10, dtype="float") result = sum_algorithm.result(a) # The result value is -16.0 # Google library tests make use of ZeroNoiseMechanism diff --git a/tests/algorithms/test_bounded_variance.py b/tests/algorithms/test_bounded_variance.py index c0688767..5bd074ac 100644 --- a/tests/algorithms/test_bounded_variance.py +++ b/tests/algorithms/test_bounded_variance.py @@ -1,4 +1,4 @@ -import pydp as dp +from pydp.algorithms.laplacian import BoundedVariance class TestBoundedVariance: @@ -6,7 +6,7 @@ def test_basic(self): example_data = [1, 5, 7, 9, 13] epsilon = 1.0 lower_bound, upper_bound = 0, 16 - bv = dp.BoundedVariance(epsilon, lower_bound, upper_bound) + bv = BoundedVariance(epsilon, lower_bound, upper_bound, dtype="float") result = bv.result(example_data) assert type(result) is float and result >= 0 # Popoviciu's inequality on variances: From 1646460de78357e483ea6e48286ff55de9f3b7db Mon Sep 17 00:00:00 2001 From: Chinmay Shah Date: Fri, 14 Aug 2020 01:55:49 +0530 Subject: [PATCH 31/47] change return datatype to double --- src/bindings/PyDP/pydp_lib/algorithm_builder.hpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/bindings/PyDP/pydp_lib/algorithm_builder.hpp b/src/bindings/PyDP/pydp_lib/algorithm_builder.hpp index 45d5cc99..52197b26 100644 --- a/src/bindings/PyDP/pydp_lib/algorithm_builder.hpp +++ b/src/bindings/PyDP/pydp_lib/algorithm_builder.hpp @@ -119,7 +119,7 @@ class AlgorithmBuilder { throw std::runtime_error(result.status().error_message()); } - return dp::GetValue(result.ValueOrDie()); + return dp::GetValue(result.ValueOrDie()); }); pyself.def("partial_result", [](Algorithm& pythis) { @@ -129,7 +129,7 @@ class AlgorithmBuilder { throw std::runtime_error(result.status().error_message()); } - return dp::GetValue(result.ValueOrDie()); + return dp::GetValue(result.ValueOrDie()); }); pyself.def("partial_result", [](Algorithm& pythis, double privacy_budget) { @@ -143,7 +143,7 @@ class AlgorithmBuilder { throw std::runtime_error(result.status().error_message()); } - return dp::GetValue(result.ValueOrDie()); + return dp::GetValue(result.ValueOrDie()); }); pyself.def("partial_result", [](Algorithm& pythis, double privacy_budget, @@ -158,7 +158,7 @@ class AlgorithmBuilder { throw std::runtime_error(result.status().error_message()); } - return dp::GetValue(result.ValueOrDie()); + return dp::GetValue(result.ValueOrDie()); }); // Other methods From a19b0a5bdd65639dbf00301aa0a9f7806da37e76 Mon Sep 17 00:00:00 2001 From: Chinmay Shah Date: Fri, 14 Aug 2020 01:56:22 +0530 Subject: [PATCH 32/47] added l0&linf sensitivity --- pydp/algorithms/algorithm.py | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/pydp/algorithms/algorithm.py b/pydp/algorithms/algorithm.py index cfbc2bb1..c534f4d4 100644 --- a/pydp/algorithms/algorithm.py +++ b/pydp/algorithms/algorithm.py @@ -17,6 +17,8 @@ def __init__(self, **kwargs): self.dtype = dtype self.__algorithm = class_(**kwargs) self.epsilon = self.__algorithm.epsilon + self.l0_sensitivity = kwargs["l0_sensitivity"] + self.linf_sensitivity = kwargs["linf_sensitivity"] @staticmethod def __map_dtype_str(dtype): @@ -51,7 +53,7 @@ def add_entry(self, value): """ return self.__algorithm.add_entry(value) - def result(self, list): + def quick_result(self, list): """ Runs the algorithm on the input using the epsilon parameter provided in the constructor and returns output. @@ -59,7 +61,7 @@ def result(self, list): """ return self.__algorithm.result(list) - def partial_result(self, privacy_budget=None, noise_interval_level=None): + def result(self, privacy_budget=None, noise_interval_level=None): """ Gets the algorithm result. @@ -72,11 +74,10 @@ def partial_result(self, privacy_budget=None, noise_interval_level=None): if privacy_budget is None: return self.__algorithm.partial_result() - - if noise_interval_level is None: + elif noise_interval_level is None: return self.__algorithm.partial_result(privacy_budget) - - return self.__algorithm.partial_result(privacy_budget, noise_interval_level) + else: + return self.__algorithm.partial_result(privacy_budget, noise_interval_level) def reset(self): """ @@ -121,10 +122,20 @@ def __init__(self, epsilon=1.0, dtype="int"): class BoundedAlgorithm(MetaAlgorithm): - def __init__(self, epsilon=1.0, lower_bound=None, upper_bound=None, dtype="int"): + def __init__( + self, + epsilon=1.0, + lower_bound=None, + upper_bound=None, + l0_sensitivity=1, + linf_sensitivity=1, + dtype="int", + ): super().__init__( epsilon=epsilon, lower_bound=lower_bound, upper_bound=upper_bound, + l0_sensitivity=l0_sensitivity, + linf_sensitivity=linf_sensitivity, dtype=dtype, ) From b9820b1a4013ef90d215ec436150c0e1dd548754 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alejandro=20S=C3=A1nchez=20Medina?= Date: Fri, 14 Aug 2020 10:47:41 +0100 Subject: [PATCH 33/47] Fix sensitivity values init and make getters readonly arguments --- pydp/algorithms/algorithm.py | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/pydp/algorithms/algorithm.py b/pydp/algorithms/algorithm.py index c534f4d4..62bba770 100644 --- a/pydp/algorithms/algorithm.py +++ b/pydp/algorithms/algorithm.py @@ -16,9 +16,8 @@ def __init__(self, **kwargs): self.dtype = dtype self.__algorithm = class_(**kwargs) - self.epsilon = self.__algorithm.epsilon - self.l0_sensitivity = kwargs["l0_sensitivity"] - self.linf_sensitivity = kwargs["linf_sensitivity"] + self._l0_sensitivity = kwargs.get("l0_sensitivity", "Not set") + self._linf_sensitivity = kwargs.get("linf_sensitivity", "Not set") @staticmethod def __map_dtype_str(dtype): @@ -29,6 +28,27 @@ def __map_dtype_str(dtype): else: raise RuntimeError(f"dtype: {dtype} is not supported") + @property + def epsilon(self): + """ + Returns the epsilon set at initialization. + """ + return self.__algorithm.epsilon + + @property + def l0_sensitivity(self): + """ + Returns the l0_sensitivity set at initialization. + """ + return self._l0_sensitivity + + @property + def linf_sensitivity(self): + """ + Returns the linf_sensitivity set at initialization. + """ + return self._linf_sensitivity + def privacy_budget_left(self): """ Returns the remaining privacy budget. From 0ff3ca868738ac664466b38ec4d6877c4b83aa16 Mon Sep 17 00:00:00 2001 From: benjamin de charmoy Date: Fri, 14 Aug 2020 15:07:03 +0200 Subject: [PATCH 34/47] Order statistics refactor (#244) * added order stats to the builder * added the python interface for Max, Min, Median and Percentile. * added OrderStatictics using the algorithm builder. * updated the tests for the new order statictics python interface. * changed parameter name from list to data as list is a python builtin * percentile is now a property at the binding level * removed the old capi as we are no-longer using it. * removed the on dp percentile binding. If we want it we can add it back but need to avoid name clashes. * Updated the builder to build the order statistcs and fixed the return types. * Moved Percentile to its own file. * Reinstated tests and updated to use new python interface. * updated upstream ref to 1b1dc6639173c0a13613189ec21851604a4c7335 * Fixed python formatting. --- pydp/algorithms/algorithm.py | 28 +- pydp/algorithms/laplacian/__init__.py | 9 +- .../laplacian/bounded_algorithms.py | 12 + pydp/algorithms/laplacian/percentile.py | 5 + src/bindings/BUILD | 4 +- .../PyDP/algorithms/order_statistics.cpp | 129 +------ src/bindings/PyDP/base/percentile.cpp | 28 -- src/bindings/PyDP/bindings.cpp | 6 +- .../PyDP/pydp_lib/algorithm_builder.hpp | 108 +++++- src/bindings/c/c_api.cc | 343 ------------------ src/bindings/c/c_api.h | 65 ---- tests/algorithms/conftest.py | 59 +++ tests/algorithms/test_bounded_mean.py | 6 +- .../test_bounded_standard_deviation.py | 2 +- tests/algorithms/test_bounded_sum.py | 2 +- tests/algorithms/test_bounded_variance.py | 2 +- tests/algorithms/test_count.py | 9 +- tests/algorithms/test_order_statistics.py | 168 ++++----- tests/base/test_percentile.py | 12 +- third_party/differential-privacy | 2 +- 20 files changed, 310 insertions(+), 689 deletions(-) create mode 100644 pydp/algorithms/laplacian/percentile.py delete mode 100644 src/bindings/PyDP/base/percentile.cpp delete mode 100644 src/bindings/c/c_api.cc delete mode 100644 src/bindings/c/c_api.h create mode 100644 tests/algorithms/conftest.py diff --git a/pydp/algorithms/algorithm.py b/pydp/algorithms/algorithm.py index 62bba770..5e6d31bd 100644 --- a/pydp/algorithms/algorithm.py +++ b/pydp/algorithms/algorithm.py @@ -73,13 +73,13 @@ def add_entry(self, value): """ return self.__algorithm.add_entry(value) - def quick_result(self, list): + def quick_result(self, data): """ Runs the algorithm on the input using the epsilon parameter provided in the constructor and returns output. Consumes 100% of the privacy budget. """ - return self.__algorithm.result(list) + return self.__algorithm.result(data) def result(self, privacy_budget=None, noise_interval_level=None): """ @@ -159,3 +159,27 @@ def __init__( linf_sensitivity=linf_sensitivity, dtype=dtype, ) + + +class PercentileBase(MetaAlgorithm): + def __init__( + self, + epsilon=1.0, + percentile=0.0, + lower_bound=None, + upper_bound=None, + dtype="int", + ): + super().__init__( + epsilon=epsilon, + percentile=percentile, + lower_bound=lower_bound, + upper_bound=upper_bound, + dtype=dtype, + ) + + @property + def percentile(self): + """percentile Gets the value that was set in the constructor. + """ + return self._MetaAlgorithm__algorithm.percentile diff --git a/pydp/algorithms/laplacian/__init__.py b/pydp/algorithms/laplacian/__init__.py index ded0980e..9ac422df 100644 --- a/pydp/algorithms/laplacian/__init__.py +++ b/pydp/algorithms/laplacian/__init__.py @@ -3,9 +3,12 @@ BoundedSum, BoundedStandardDeviation, BoundedVariance, + Max, + Min, + Median, ) from .count import Count - +from .percentile import Percentile __all__ = [ "BoundedMean", @@ -13,4 +16,8 @@ "BoundedSum", "BoundedVariance", "Count", + "Max", + "Min", + "Median", + "Percentile", ] diff --git a/pydp/algorithms/laplacian/bounded_algorithms.py b/pydp/algorithms/laplacian/bounded_algorithms.py index dbcc03bb..71de419d 100644 --- a/pydp/algorithms/laplacian/bounded_algorithms.py +++ b/pydp/algorithms/laplacian/bounded_algorithms.py @@ -15,3 +15,15 @@ class BoundedStandardDeviation(BoundedAlgorithm): class BoundedVariance(BoundedAlgorithm): pass + + +class Max(BoundedAlgorithm): + pass + + +class Min(BoundedAlgorithm): + pass + + +class Median(BoundedAlgorithm): + pass diff --git a/pydp/algorithms/laplacian/percentile.py b/pydp/algorithms/laplacian/percentile.py new file mode 100644 index 00000000..b1c9e791 --- /dev/null +++ b/pydp/algorithms/laplacian/percentile.py @@ -0,0 +1,5 @@ +from ..algorithm import BoundedAlgorithm, PercentileBase + + +class Percentile(PercentileBase): + pass diff --git a/src/bindings/BUILD b/src/bindings/BUILD index df8a4616..ed3d819c 100644 --- a/src/bindings/BUILD +++ b/src/bindings/BUILD @@ -8,9 +8,7 @@ pybind_extension( "PyDP/algorithms/*.cpp", "PyDP/algorithms/*.cpp", "PyDP/pydp_lib/*.hpp", - "PyDP/proto/*.cpp", - "c/*.cc", - "c/*.h" + "PyDP/proto/*.cpp" ]), visibility = ["//src/python:__pkg__"], diff --git a/src/bindings/PyDP/algorithms/order_statistics.cpp b/src/bindings/PyDP/algorithms/order_statistics.cpp index 97f9753f..80442f6d 100644 --- a/src/bindings/PyDP/algorithms/order_statistics.cpp +++ b/src/bindings/PyDP/algorithms/order_statistics.cpp @@ -1,10 +1,6 @@ -// Provides bindings for Bounded Functions - -#include "../../c/c_api.h" - -#include "../pydp_lib/casting.hpp" // our caster helper library -#include "../pydp_lib/helper_class.hpp" // Dummy helder class - +// Provides bindings for Order Statistics +#include "../pydp_lib/algorithm_builder.hpp" +#include "algorithms/order-statistics.h" #include "pybind11/complex.h" #include "pybind11/functional.h" #include "pybind11/pybind11.h" @@ -13,115 +9,24 @@ using namespace std; namespace py = pybind11; +namespace dp = differential_privacy; -class MaxDummy : public Dummy { - public: - using Dummy::Dummy; - - double Result(py::list l, double privacy_budget) override { - return Result_Max(obj, l, privacy_budget); - } -}; - -class MinDummy : public Dummy { - public: - using Dummy::Dummy; - - double Result(py::list l, double privacy_budget) override { - return Result_Min(obj, l, privacy_budget); - } -}; - -class MedianDummy : public Dummy { - public: - using Dummy::Dummy; - - double Result(py::list l, double privacy_budget) override { - return Result_Median(obj, l, privacy_budget); - } -}; - -class PercentileDummy : public Dummy { - public: - using Dummy::Dummy; - - void setPercentile(double percentile) { - _percentile = percentile; - } - - double getPercentile() { - return _percentile; - } - - double Result(py::list l, double privacy_budget) override { - return Result_Percentile(obj, l, privacy_budget, _percentile); - } - - private: - double _percentile = 0.45; -}; - -void declareMax(py::module& m) { - py::class_ bld(m, "Max"); - bld.attr("__module__") = "pydp"; - bld.def(py::init(), py::return_value_policy::reference, - py::call_guard()); - bld.def(py::init(), py::return_value_policy::reference, - py::call_guard()); - bld.def("result", &MaxDummy::Result); - bld.def_property("l0_sensitivity", &MaxDummy::get_l0_sensitivity, - &MaxDummy::set_l0_sensitivity); - bld.def_property("linf_sensitivity", &MaxDummy::get_linf_sensitivity, - &MaxDummy::set_linf_sensitivity); +template +void declareOrderStat(py::module& m) { + using builder = typename dp::python::AlgorithmBuilder; + builder().declare(m); } -void declareMin(py::module& m) { - py::class_ bld(m, "Min"); - bld.attr("__module__") = "pydp"; - bld.def(py::init(), py::return_value_policy::reference, - py::call_guard()); - bld.def(py::init(), py::return_value_policy::reference, - py::call_guard()); - bld.def("result", &MinDummy::Result); - bld.def_property("l0_sensitivity", &MinDummy::get_l0_sensitivity, - &MinDummy::set_l0_sensitivity); - bld.def_property("linf_sensitivity", &MinDummy::get_linf_sensitivity, - &MinDummy::set_linf_sensitivity); -} +void init_algorithms_order_statistics(py::module& m) { + declareOrderStat>(m); + declareOrderStat>(m); -void declareMedian(py::module& m) { - py::class_ bld(m, "Median"); - bld.attr("__module__") = "pydp"; - bld.def(py::init(), py::return_value_policy::reference, - py::call_guard()); - bld.def(py::init(), py::return_value_policy::reference, - py::call_guard()); - bld.def("result", &MedianDummy::Result); - bld.def_property("l0_sensitivity", &MedianDummy::get_l0_sensitivity, - &MedianDummy::set_l0_sensitivity); - bld.def_property("linf_sensitivity", &MedianDummy::get_linf_sensitivity, - &MedianDummy::set_linf_sensitivity); -} + declareOrderStat>(m); + declareOrderStat>(m); -void declarePercentile(py::module& m) { - py::class_ bld(m, "Percentile"); - bld.attr("__module__") = "pydp"; - bld.def(py::init(), py::return_value_policy::reference, - py::call_guard()); - bld.def(py::init(), py::return_value_policy::reference, - py::call_guard()); - bld.def("result", &PercentileDummy::Result); - bld.def_property("percentile", &PercentileDummy::getPercentile, - &PercentileDummy::setPercentile); - bld.def_property("l0_sensitivity", &PercentileDummy::get_l0_sensitivity, - &PercentileDummy::set_l0_sensitivity); - bld.def_property("linf_sensitivity", &PercentileDummy::get_linf_sensitivity, - &PercentileDummy::set_linf_sensitivity); -} + declareOrderStat>(m); + declareOrderStat>(m); -void init_algorithms_order_statistics(py::module& m) { - declareMax(m); - declareMin(m); - declareMedian(m); - declarePercentile(m); + declareOrderStat>(m); + declareOrderStat>(m); } diff --git a/src/bindings/PyDP/base/percentile.cpp b/src/bindings/PyDP/base/percentile.cpp deleted file mode 100644 index b51bd7df..00000000 --- a/src/bindings/PyDP/base/percentile.cpp +++ /dev/null @@ -1,28 +0,0 @@ -// Provides bindings for percentile class - -#include "pybind11/pybind11.h" - -#include "base/percentile.h" - -using namespace std; -namespace py = pybind11; -namespace dpb = differential_privacy::base; - -template -void declarePercentile(py::module& m, string const& suffix) { - py::class_> percentile(m, ("Percentile" + suffix).c_str()); - percentile.attr("__module__") = "pydp"; - percentile.def(py::init()) - .def("add", &dpb::Percentile::Add) - .def("reset", &dpb::Percentile::Reset) - .def("serialize_to_proto", &dpb::Percentile::SerializeToProto) - .def("merge_from_proton", &dpb::Percentile::MergeFromProto) - .def("memory", &dpb::Percentile::Memory) - .def("num_values", &dpb::Percentile::num_values) - .def("get_relative_rank", &dpb::Percentile::GetRelativeRank); -} - -void init_base_percentile(py::module& m) { - declarePercentile(m, "Int"); - declarePercentile(m, "Double"); -} diff --git a/src/bindings/PyDP/bindings.cpp b/src/bindings/PyDP/bindings.cpp index c460e7c8..c8a46865 100644 --- a/src/bindings/PyDP/bindings.cpp +++ b/src/bindings/PyDP/bindings.cpp @@ -7,7 +7,7 @@ namespace py = pybind11; // base void init_base_status(py::module &); void init_base_logging(py::module &); -void init_base_percentile(py::module &); +// void init_base_percentile(py::module &); // bounded functions void init_algorithms_bounded_functions(py::module &); @@ -34,14 +34,14 @@ PYBIND11_MODULE(_pydp, m) { // Base init_base_status(m); init_base_logging(m); - init_base_percentile(m); + // init_base_percentile(m); // Algorithms auto malgorithms = m.def_submodule("_algorithms"); init_algorithms_bounded_functions(malgorithms); init_algorithms_util(m); init_algorithms_distributions(m); - init_algorithms_order_statistics(m); + init_algorithms_order_statistics(malgorithms); init_algorithms_rand(m); init_algorithms_count(malgorithms); diff --git a/src/bindings/PyDP/pydp_lib/algorithm_builder.hpp b/src/bindings/PyDP/pydp_lib/algorithm_builder.hpp index 52197b26..659e4331 100644 --- a/src/bindings/PyDP/pydp_lib/algorithm_builder.hpp +++ b/src/bindings/PyDP/pydp_lib/algorithm_builder.hpp @@ -8,6 +8,7 @@ #include "algorithms/bounded-variance.h" #include "algorithms/count.h" #include "algorithms/numerical-mechanisms.h" +#include "algorithms/order-statistics.h" #include "base/statusor.h" namespace dp = differential_privacy; @@ -21,23 +22,55 @@ constexpr bool is_bounded_algorithm() { return std::is_same>::value || std::is_same>::value || std::is_same>::value || - std::is_same>::value; + std::is_same>::value || + std::is_same>::value || + std::is_same>::value || + std::is_same>::value || + std::is_same>::value; +} + +template +constexpr bool is_percentile() { + return std::is_same>::value; +} + +template +constexpr bool should_return_T() { + return std::is_same>::value || + std::is_same>::value || + std::is_same>::value || + std::is_same>::value; +} + +template +constexpr bool should_return_double() { + return std::is_same>::value || + std::is_same>::value || + std::is_same>::value || + std::is_same>::value; +} + +template +constexpr bool should_return_int() { + return std::is_same>::value; } template class AlgorithmBuilder { public: std::unique_ptr build(double epsilon, - // std::optional delta = std::nullopt, + std::optional percentile = std::nullopt, std::optional lower_bound = std::nullopt, std::optional upper_bound = std::nullopt, std::optional l0_sensitivity = std::nullopt, std::optional linf_sensitivity = std::nullopt) { auto builder = typename Algorithm::Builder(); + if constexpr (is_percentile()) { + if (percentile.has_value()) builder.SetPercentile(percentile.value()); + } builder.SetEpsilon(epsilon); - // if (delta.has_value()) builder.SetDelta(delta.value()); if (l0_sensitivity.has_value()) builder.SetMaxPartitionsContributed(l0_sensitivity.value()); if (linf_sensitivity.has_value()) @@ -56,14 +89,20 @@ class AlgorithmBuilder { return std::move(obj.ValueOrDie()); } - std::map type_to_name = {{typeid(double), "Double"}, - {typeid(int), "Int"}}; + std::map type_to_name = { + {typeid(double), "Double"}, {typeid(int), "Int"}, {typeid(int64_t), "Int"}}; std::map algorithm_to_name = { {typeid(dp::BoundedMean), "BoundedMean"}, {typeid(dp::BoundedSum), "BoundedSum"}, {typeid(dp::BoundedStandardDeviation), "BoundedStandardDeviation"}, {typeid(dp::BoundedVariance), "BoundedVariance"}, - {typeid(dp::Count), "Count"}}; + {typeid(dp::Count), "Count"}, + {typeid(dp::continuous::Min), "Min"}, + {typeid(dp::continuous::Max), "Max"}, + {typeid(dp::continuous::Median), "Median"}, + {typeid(dp::continuous::Percentile), "Percentile"} + + }; std::string get_algorithm_name() { // Set the suffix string @@ -77,11 +116,23 @@ class AlgorithmBuilder { // Constructors if constexpr (is_bounded_algorithm()) { + if constexpr (is_percentile()) { + // Explicit percentile constructor + pyself.def( + py::init([this](double epsilon, double percentile, T lower_bound, + T upper_bound, int l0_sensitivity, int linf_sensitivity) { + return this->build(epsilon, percentile, lower_bound, upper_bound, + l0_sensitivity, linf_sensitivity); + }), + py::arg("epsilon"), py::arg("percentile"), py::arg("lower_bound"), + py::arg("upper_bound"), py::arg("l0_sensitivity") = 1, + py::arg("linf_sensitivity") = 1); + } // Explicit bounds constructor pyself.def(py::init([this](double epsilon, T lower_bound, T upper_bound, int l0_sensitivity, int linf_sensitivity) { - return this->build(epsilon, lower_bound, upper_bound, l0_sensitivity, - linf_sensitivity); + return this->build(epsilon, std::nullopt /*percentile*/, lower_bound, + upper_bound, l0_sensitivity, linf_sensitivity); }), py::arg("epsilon"), py::arg("lower_bound"), py::arg("upper_bound"), py::arg("l0_sensitivity") = 1, py::arg("linf_sensitivity") = 1); @@ -90,9 +141,9 @@ class AlgorithmBuilder { // No bounds constructor pyself.def( py::init([this](double epsilon, int l0_sensitivity, int linf_sensitivity) { - return this->build(epsilon, std::nullopt /*lower_bound*/, - std::nullopt /*upper_bound*/, l0_sensitivity, - linf_sensitivity); + return this->build(epsilon, std::nullopt /*percentile*/, + std::nullopt /*lower_bound*/, std::nullopt /*upper_bound*/, + l0_sensitivity, linf_sensitivity); }), py::arg("epsilon"), py::arg("l0_sensitivity") = 1, py::arg("linf_sensitivity") = 1); @@ -118,8 +169,12 @@ class AlgorithmBuilder { if (!result.ok()) { throw std::runtime_error(result.status().error_message()); } - - return dp::GetValue(result.ValueOrDie()); + if constexpr ((should_return_T())) + return dp::GetValue(result.ValueOrDie()); + if constexpr ((should_return_double())) + return dp::GetValue(result.ValueOrDie()); + if constexpr ((should_return_int())) + return dp::GetValue(result.ValueOrDie()); }); pyself.def("partial_result", [](Algorithm& pythis) { @@ -129,7 +184,12 @@ class AlgorithmBuilder { throw std::runtime_error(result.status().error_message()); } - return dp::GetValue(result.ValueOrDie()); + if constexpr ((should_return_T())) + return dp::GetValue(result.ValueOrDie()); + if constexpr ((should_return_double())) + return dp::GetValue(result.ValueOrDie()); + if constexpr ((should_return_int())) + return dp::GetValue(result.ValueOrDie()); }); pyself.def("partial_result", [](Algorithm& pythis, double privacy_budget) { @@ -143,7 +203,12 @@ class AlgorithmBuilder { throw std::runtime_error(result.status().error_message()); } - return dp::GetValue(result.ValueOrDie()); + if constexpr ((should_return_T())) + return dp::GetValue(result.ValueOrDie()); + if constexpr ((should_return_double())) + return dp::GetValue(result.ValueOrDie()); + if constexpr ((should_return_int())) + return dp::GetValue(result.ValueOrDie()); }); pyself.def("partial_result", [](Algorithm& pythis, double privacy_budget, @@ -157,8 +222,12 @@ class AlgorithmBuilder { if (!result.ok()) { throw std::runtime_error(result.status().error_message()); } - - return dp::GetValue(result.ValueOrDie()); + if constexpr ((should_return_T())) + return dp::GetValue(result.ValueOrDie()); + if constexpr ((should_return_double())) + return dp::GetValue(result.ValueOrDie()); + if constexpr ((should_return_int())) + return dp::GetValue(result.ValueOrDie()); }); // Other methods @@ -169,6 +238,11 @@ class AlgorithmBuilder { pyself.def("merge", &Algorithm::Merge); pyself.def("noise_confidence_interval", &Algorithm::NoiseConfidenceInterval); + + // Percentile special case. + if constexpr (is_percentile()) { + pyself.def_property_readonly("percentile", &Algorithm::GetPercentile); + } } }; diff --git a/src/bindings/c/c_api.cc b/src/bindings/c/c_api.cc deleted file mode 100644 index 6d2dc3b5..00000000 --- a/src/bindings/c/c_api.cc +++ /dev/null @@ -1,343 +0,0 @@ -#include "c_api.h" - -#include "algorithms/algorithm.h" - -#include "algorithms/bounded-mean.h" -#include "algorithms/bounded-standard-deviation.h" -#include "algorithms/bounded-sum.h" -#include "algorithms/bounded-variance.h" - -#include "absl/random/distributions.h" -#include "algorithms/order-statistics.h" - -#include "pybind11/pybind11.h" - -extern "C" { -namespace differential_privacy { - -// Helper variable in-case bounds are not set -bool has_bounds; -int l0_sensitivity = 1; -int linf_sensitivity = 1; - -// Bounded Mean -double Result_BoundedMean(BoundedFunctionHelperObject* config, pybind11::list l) { - std::vector a; - - for (auto i : l) { - a.push_back(i.cast()); - } - base::StatusOr>> mean_obj; - if (has_bounds) { - mean_obj = BoundedMean::Builder() - .SetEpsilon(config->epsilon) - .SetLower(config->lower_bound) - .SetUpper(config->upper_bound) - .SetMaxPartitionsContributed(l0_sensitivity) - .SetMaxContributionsPerPartition(linf_sensitivity) - .Build(); - } else { - // TODO: a better solution to this is needed similar to ASSIGN_OR_RETURN but with a - // raised exeception - mean_obj = BoundedMean::Builder() - .SetEpsilon(config->epsilon) - .SetMaxPartitionsContributed(l0_sensitivity) - .SetMaxContributionsPerPartition(linf_sensitivity) - .Build(); - } - - if (!mean_obj.ok()) { - // TODO: custtom error? - throw std::runtime_error(mean_obj.status().error_message()); - } - - base::StatusOr resultf = mean_obj.ValueOrDie()->Result(a.begin(), a.end()); - if (resultf.ok()) { - return GetValue(resultf.ValueOrDie()); - } else { - throw std::runtime_error(resultf.status().error_message()); - } -} - -// Bounded Sum -double Result_BoundedSum(BoundedFunctionHelperObject* config, pybind11::list l) { - std::vector a; - - for (auto i : l) { - a.push_back(i.cast()); - } - base::StatusOr>> sum_obj; - if (has_bounds) { - sum_obj = BoundedSum::Builder() - .SetEpsilon(config->epsilon) - .SetLower(config->lower_bound) - .SetUpper(config->upper_bound) - .SetMaxPartitionsContributed(l0_sensitivity) - .SetMaxContributionsPerPartition(linf_sensitivity) - .Build(); - } else { - // TODO: a better solution to this is needed similar to ASSIGN_OR_RETURN but with a - // raised exeception - sum_obj = BoundedSum::Builder() - .SetEpsilon(config->epsilon) - .SetMaxPartitionsContributed(l0_sensitivity) - .SetMaxContributionsPerPartition(linf_sensitivity) - .Build(); - } - - if (!sum_obj.ok()) { - throw std::runtime_error(sum_obj.status().error_message()); - } - - base::StatusOr resultf = sum_obj.ValueOrDie()->Result(a.begin(), a.end()); - if (resultf.ok()) { - return GetValue(resultf.ValueOrDie()); - } else { - throw std::runtime_error(resultf.status().error_message()); - } -} - -double Result_BoundedStandardDeviation(BoundedFunctionHelperObject* config, - pybind11::list l) { - std::vector a; - - for (auto i : l) { - a.push_back(i.cast()); - } - base::StatusOr>> sd_obj; - if (has_bounds) { - sd_obj = BoundedStandardDeviation::Builder() - .SetEpsilon(config->epsilon) - .SetLower(config->lower_bound) - .SetUpper(config->upper_bound) - .SetMaxPartitionsContributed(l0_sensitivity) - .SetMaxContributionsPerPartition(linf_sensitivity) - .Build(); - } else { - // TODO: a better solution to this is needed similar to ASSIGN_OR_RETURN but with a - // raised exeception - sd_obj = BoundedStandardDeviation::Builder() - .SetEpsilon(config->epsilon) - .SetMaxPartitionsContributed(l0_sensitivity) - .SetMaxContributionsPerPartition(linf_sensitivity) - .Build(); - } - - if (!sd_obj.ok()) { - throw std::runtime_error(sd_obj.status().error_message()); - } - - base::StatusOr resultf = sd_obj.ValueOrDie()->Result(a.begin(), a.end()); - if (resultf.ok()) { - return GetValue(resultf.ValueOrDie()); - } else { - throw std::runtime_error(resultf.status().error_message()); - } -} - -double Result_BoundedVariance(BoundedFunctionHelperObject* config, pybind11::list l) { - std::vector a; - - for (auto i : l) { - a.push_back(i.cast()); - } - base::StatusOr>> variance_obj; - if (has_bounds) { - variance_obj = BoundedVariance::Builder() - .SetEpsilon(config->epsilon) - .SetLower(config->lower_bound) - .SetUpper(config->upper_bound) - .SetMaxPartitionsContributed(l0_sensitivity) - .SetMaxContributionsPerPartition(linf_sensitivity) - .Build(); - } else { - variance_obj = BoundedVariance::Builder() - .SetEpsilon(config->epsilon) - .SetMaxPartitionsContributed(l0_sensitivity) - .SetMaxContributionsPerPartition(linf_sensitivity) - .Build(); - } - - if (!variance_obj.ok()) { - throw std::runtime_error(variance_obj.status().error_message()); - } else { - base::StatusOr resultf = - variance_obj.ValueOrDie()->Result(a.begin(), a.end()); - if (resultf.ok()) { - return GetValue(resultf.ValueOrDie()); - } else { - throw std::runtime_error(resultf.status().error_message()); - } - } -} - -// Order Statistics - -// Max - -int64_t Result_Max(BoundedFunctionHelperObject* config, pybind11::list l, - double privacy_budget) { - base::StatusOr>> max; - if (has_bounds) { - max = continuous::Max::Builder() - .SetEpsilon(config->epsilon) - .SetLower(config->lower_bound) - .SetUpper(config->upper_bound) - .SetMaxPartitionsContributed(l0_sensitivity) - .SetMaxContributionsPerPartition(linf_sensitivity) - .Build(); - } else { - max = continuous::Max::Builder().SetEpsilon(config->epsilon).Build(); - } - if (!max.ok()) { - throw std::runtime_error(max.status().error_message()); - } else { - for (auto i : l) { - max.ValueOrDie()->AddEntry(i.cast()); - } - base::StatusOr resultf = max.ValueOrDie()->PartialResult(privacy_budget); - if (resultf.ok()) { - return GetValue(resultf.ValueOrDie()); - } else { - throw std::runtime_error(resultf.status().error_message()); - } - } -} - -// Min - -int64_t Result_Min(BoundedFunctionHelperObject* config, pybind11::list l, - double privacy_budget) { - base::StatusOr>> min; - if (has_bounds) { - min = continuous::Min::Builder() - .SetEpsilon(config->epsilon) - .SetLower(config->lower_bound) - .SetUpper(config->upper_bound) - .SetMaxPartitionsContributed(l0_sensitivity) - .SetMaxContributionsPerPartition(linf_sensitivity) - .Build(); - } else { - min = continuous::Min::Builder() - .SetEpsilon(config->epsilon) - .SetMaxPartitionsContributed(l0_sensitivity) - .SetMaxContributionsPerPartition(linf_sensitivity) - .Build(); - } - if (!min.ok()) { - throw std::runtime_error(min.status().error_message()); - } else { - for (auto i : l) { - min.ValueOrDie()->AddEntry(i.cast()); - } - base::StatusOr resultf = min.ValueOrDie()->PartialResult(privacy_budget); - if (resultf.ok()) { - return GetValue(resultf.ValueOrDie()); - } else { - throw std::runtime_error(resultf.status().error_message()); - } - } -} - -// Max - -int64_t Result_Median(BoundedFunctionHelperObject* config, pybind11::list l, - double privacy_budget) { - base::StatusOr>> median; - if (has_bounds) { - median = continuous::Median::Builder() - .SetEpsilon(config->epsilon) - .SetLower(config->lower_bound) - .SetUpper(config->upper_bound) - .SetMaxPartitionsContributed(l0_sensitivity) - .SetMaxContributionsPerPartition(linf_sensitivity) - .Build(); - } else { - median = continuous::Median::Builder() - .SetEpsilon(config->epsilon) - .SetMaxPartitionsContributed(l0_sensitivity) - .SetMaxContributionsPerPartition(linf_sensitivity) - .Build(); - } - if (!median.ok()) { - throw std::runtime_error(median.status().error_message()); - } else { - for (auto i : l) { - median.ValueOrDie()->AddEntry(i.cast()); - } - base::StatusOr resultf = median.ValueOrDie()->PartialResult(privacy_budget); - if (resultf.ok()) { - return GetValue(resultf.ValueOrDie()); - } else { - throw std::runtime_error(resultf.status().error_message()); - } - } -} - -// Percentile -// can't add error handling to this for some reasons -// TODO -int64_t Result_Percentile(BoundedFunctionHelperObject* config, pybind11::list l, - double privacy_budget, double percentile) { - std::unique_ptr> search; - if (has_bounds) { - search = continuous::Percentile::Builder() - .SetPercentile(percentile) - .SetEpsilon(config->epsilon) - .SetLower(config->lower_bound) - .SetUpper(config->upper_bound) - .SetMaxPartitionsContributed(l0_sensitivity) - .SetMaxContributionsPerPartition(linf_sensitivity) - .Build() - .ValueOrDie(); - } else { - search = continuous::Percentile::Builder() - .SetPercentile(percentile) - .SetEpsilon(config->epsilon) - .SetMaxPartitionsContributed(l0_sensitivity) - .SetMaxContributionsPerPartition(linf_sensitivity) - .Build() - .ValueOrDie(); - } - for (auto i : l) { - search->AddEntry(i.cast()); - } - - return GetValue(search->PartialResult(privacy_budget).ValueOrDie()); -} - -// Common functions - -BoundedFunctionHelperObject* NewBoundedFunctionObject(double epsilon, int lower_bound, - int upper_bound) { - has_bounds = true; - return new BoundedFunctionHelperObject{epsilon, lower_bound, upper_bound}; -} - -BoundedFunctionHelperObject* NewBoundedFunctionObject1(double epsilon) { - has_bounds = false; - return new BoundedFunctionHelperObject{epsilon}; -} - -void DeleteBoundedFunctionObject(BoundedFunctionHelperObject* config) { - delete config; -}; - -void set_l0_sensitivity_(int _l0_sensitivity) { - l0_sensitivity = _l0_sensitivity; -} - -int get_l0_sensitivity_() { - return l0_sensitivity; -} - -void set_linf_sensitivity_(int _linf_sensitivity) { - linf_sensitivity = _linf_sensitivity; -} - -int get_linf_sensitivity_() { - return linf_sensitivity; -} - -} // end namespace differential_privacy -} // end extern "C" diff --git a/src/bindings/c/c_api.h b/src/bindings/c/c_api.h deleted file mode 100644 index aee8fa46..00000000 --- a/src/bindings/c/c_api.h +++ /dev/null @@ -1,65 +0,0 @@ -#ifndef DIFFERENTIAL_PRIVACY_C_C_API_H_ -#define DIFFERENTIAL_PRIVACY_C_C_API_H_ - -#include -#include - -#include "pybind11/pybind11.h" - -#ifdef __cplusplus -extern "C" { -#endif - -// Common Functions for all Bounded Functions - -typedef struct BoundedFunctionHelperObject { - double epsilon; - int lower_bound; - int upper_bound; -} BoundedFunctionHelperObject; - -extern BoundedFunctionHelperObject* NewBoundedFunctionObject(double epsilon, - int lower_bound, - int upper_bound); - -extern BoundedFunctionHelperObject* NewBoundedFunctionObject1(double epsilon); - -extern void DeleteBoundedFunctionObject(BoundedFunctionHelperObject* config); - -// Bounded Mean -extern double Result_BoundedMean(BoundedFunctionHelperObject* config, pybind11::list a); - -extern double Result_BoundedSum(BoundedFunctionHelperObject* config, pybind11::list a); - -extern double Result_BoundedStandardDeviation(BoundedFunctionHelperObject* config, - pybind11::list a); - -extern double Result_BoundedVariance(BoundedFunctionHelperObject* config, - pybind11::list a); - -// Order statistics -extern int64_t Result_Max(BoundedFunctionHelperObject* config, pybind11::list a, - double privacy_budget); - -extern int64_t Result_Min(BoundedFunctionHelperObject* config, pybind11::list a, - double privacy_budget); - -extern int64_t Result_Median(BoundedFunctionHelperObject* config, pybind11::list a, - double privacy_budget); - -extern int64_t Result_Percentile(BoundedFunctionHelperObject* config, pybind11::list a, - double privacy_budget, double percentile); - -extern void set_l0_sensitivity_(int _l0_sensitivity); - -extern int get_l0_sensitivity_(); - -extern void set_linf_sensitivity_(int _linf_sensitivity); - -extern int get_linf_sensitivity_(); - -#ifdef __cplusplus -} /* end extern "C" */ -#endif - -#endif // DIFFERENTIAL_PRIVACY_C_C_API_H_ diff --git a/tests/algorithms/conftest.py b/tests/algorithms/conftest.py new file mode 100644 index 00000000..cc5b308b --- /dev/null +++ b/tests/algorithms/conftest.py @@ -0,0 +1,59 @@ +from typing import List +from itertools import accumulate +import math + + +def skew(samples: List[float], mu: float, sigma: float): + """Unfortunately this is implemented in third_party/differential-privacy/cc/algorithms/distributions_test.cc + and we don't want to pull the test files in. I'm assuming it'll be moved to + third_party/differential-privacy/cc/algorithms/util.h If they (upstream) move it we can use it. + Until then this should suffice. #FIXME: when possible we can fix this. + """ + skew = list( + accumulate(samples, lambda lhs, rhs: lhs + (rhs - mu) * (rhs - mu) * (rhs - mu)) + )[-1] + return skew / (len(samples) * sigma * sigma * sigma) + + +def kurtosis(samples: List[float], mu: float, var: float): + """Unfortunately this is implemented in third_party/differential-privacy/cc/algorithms/distributions_test.cc + and we don't want to pull the test files in. I'm assuming it'll be moved to + third_party/differential-privacy/cc/algorithms/util.h If they (upstream) move it we can use it. + Until then this should suffice. #FIXME: when possible we can fix this. + """ + kurt = list( + accumulate(samples, lambda lhs, rhs: lhs + ((rhs - mu) * (rhs - mu)) ** 2) + )[-1] + n = len(samples) + kurt = (n + 1) * kurt / (n * var * var) + kurt -= 3 * (n - 1) + kurt *= (n - 1) / (n - 2) / (n - 3) + return kurt + + +def percentile(N, percent, key=lambda x: x): + """ + Find the percentile of a list of values. + @parameter N - is a list of values. Note N MUST BE already sorted. + @parameter percent - a float value from 0.0 to 1.0. + @parameter key - optional key function to compute value from each element of N. + @return - the percentile of the values + """ + if not N: + return None + k = (len(N) - 1) * percent + f = math.floor(k) + c = math.ceil(k) + if f == c: + return key(N[int(k)]) + d0 = key(N[int(f)]) * (c - k) + d1 = key(N[int(c)]) * (k - f) + return d0 + d1 + + +# From what I understand @openmined/dp-research are going to look at validating correctness +# Until then we can use this to assert on floating point numbers. +# FIXME: When possible we should add 'correctness' tests. +expect_near = lambda expected, actual, tol: ( + expected + tol >= actual and expected - tol <= actual +) diff --git a/tests/algorithms/test_bounded_mean.py b/tests/algorithms/test_bounded_mean.py index 7c5e40a8..1b909318 100644 --- a/tests/algorithms/test_bounded_mean.py +++ b/tests/algorithms/test_bounded_mean.py @@ -8,17 +8,17 @@ def test_python_api(): mean_algorithm = BoundedMean( epsilon=1.0, lower_bound=1, upper_bound=9, dtype="float" ) - assert 1.0 <= mean_algorithm.result(a) <= 9.0 + assert 1.0 <= mean_algorithm.quick_result(a) <= 9.0 def test_bounded_mean(): bm1 = BoundedMean(epsilon=3.4, lower_bound=1, upper_bound=2, dtype="float") assert isinstance(bm1, BoundedMean) - assert isinstance(bm1.result([1.5, 2, 2.5]), float) + assert isinstance(bm1.quick_result([1.5, 2, 2.5]), float) bm2 = BoundedMean(epsilon=3.4, dtype="int") assert isinstance(bm2, BoundedMean) - # assert isinstance(bm2.result([1.5, 2, 2.5]), float) + # assert isinstance(bm2.quick_result([1.5, 2, 2.5]), float) # TODO: port this test diff --git a/tests/algorithms/test_bounded_standard_deviation.py b/tests/algorithms/test_bounded_standard_deviation.py index bb550572..8c80faa5 100644 --- a/tests/algorithms/test_bounded_standard_deviation.py +++ b/tests/algorithms/test_bounded_standard_deviation.py @@ -7,6 +7,6 @@ def test_python_api(self): epsilon = 1.0 lower_bound, upper_bound = 0, 15 bsd = BoundedStandardDeviation(epsilon, lower_bound, upper_bound, dtype="float") - result = bsd.result(example_data) + result = bsd.quick_result(example_data) assert type(result) is float and result >= 0 assert result <= (upper_bound - lower_bound) / 2 diff --git a/tests/algorithms/test_bounded_sum.py b/tests/algorithms/test_bounded_sum.py index c69c5e2c..56252326 100644 --- a/tests/algorithms/test_bounded_sum.py +++ b/tests/algorithms/test_bounded_sum.py @@ -7,7 +7,7 @@ def test_pythons_api_sanity_check(self): a = [1, 2, 3, 4] sum_algorithm = BoundedSum(1.0, 0, 10, dtype="float") - result = sum_algorithm.result(a) + result = sum_algorithm.quick_result(a) # The result value is -16.0 # Google library tests make use of ZeroNoiseMechanism # for more reasonable expected values, but we don't support diff --git a/tests/algorithms/test_bounded_variance.py b/tests/algorithms/test_bounded_variance.py index 5bd074ac..95dcf7c0 100644 --- a/tests/algorithms/test_bounded_variance.py +++ b/tests/algorithms/test_bounded_variance.py @@ -7,7 +7,7 @@ def test_basic(self): epsilon = 1.0 lower_bound, upper_bound = 0, 16 bv = BoundedVariance(epsilon, lower_bound, upper_bound, dtype="float") - result = bv.result(example_data) + result = bv.quick_result(example_data) assert type(result) is float and result >= 0 # Popoviciu's inequality on variances: assert result <= (upper_bound - lower_bound) ** 2 / 4 diff --git a/tests/algorithms/test_count.py b/tests/algorithms/test_count.py index a289a721..f304b4f9 100644 --- a/tests/algorithms/test_count.py +++ b/tests/algorithms/test_count.py @@ -7,14 +7,14 @@ class TestPercentile: def test_basic(self, dtype_in): c = [1, 2, 3, 4, 2, 3] count = Count(epsilon=1.7, dtype=dtype_in) - count.result(c) + count.quick_result(c) def test_repeated_result(self, dtype_in): c = [1, 2, 3, 4, 2, 3] count = Count(epsilon=1.7, dtype=dtype_in) count.add_entries(c) - count.partial_result(0.5) + count.result(0.5) """ # TODO: port ConfidenceInterval @@ -57,12 +57,13 @@ def test_count_datatypes(self): assert isinstance(countaes, type(None)) mem = count.memory_used() assert isinstance(mem, int) - par = count.partial_result() + par = count.result() assert isinstance(par, int) # TODO # par2 = count.partial_result(1.0) # assert isinstance(par2, int) - res = count.result([2]) + + res = count.quick_result([2]) assert isinstance(res, int) diff --git a/tests/algorithms/test_order_statistics.py b/tests/algorithms/test_order_statistics.py index cda58a02..2b62e314 100644 --- a/tests/algorithms/test_order_statistics.py +++ b/tests/algorithms/test_order_statistics.py @@ -5,129 +5,99 @@ import math import pydp as dp - - -@pytest.fixture -def data(): - kDataSize = 10000 - a = [] - for i in range(kDataSize): - a.append(int(200 * i / kDataSize)) - return a - - -def test_max(data): - maxx = dp.Max(1.0, 0, 2048) - assert 190 < maxx.result(data, 1.0) < 210 - - assert max(data) - 10 < maxx.result(data, 1.0) < max(data) + 10 - - -def test_min(data): - maxx = dp.Min(1.0, 0, 2048) - - assert min(data) - 10 < maxx.result(data, 1.0) < min(data) + 10 - - assert -10 < maxx.result(data, 1.0) < 10 - - -def test_median(data): - maxx = dp.Median(1.0, 0, 2048) - - assert ( - statistics.median(data) - 20 - < int(maxx.result(data, 1.0)) - < statistics.median(data) + 20 +from conftest import expect_near, percentile + +kDataSize = 10000 +data_floats = [200.0 * i / kDataSize for i in range(kDataSize)] +data_ints = [int(200 * i / kDataSize) for i in range(kDataSize)] +type_data_algorithm = [ + ("int", data_ints, dp.algorithms.laplacian.Max), + ("float", data_floats, dp.algorithms.laplacian.Max), + ("int", data_ints, dp.algorithms.laplacian.Min), + ("float", data_floats, dp.algorithms.laplacian.Min), + ("int", data_ints, dp.algorithms.laplacian.Median), + ("float", data_floats, dp.algorithms.laplacian.Median), +] + +# NOTE: One needs to use https://pypi.org/project/pytest-lazy-fixture/ to have fixtues in parameters. +# Using none fixtured data instead of adding a pytest-lazy-fixture. +@pytest.mark.parametrize("dtype, data", [("int", data_ints), ("float", data_floats)]) +def test_min(data, dtype): + + minn = dp.algorithms.laplacian.Min( + dtype=dtype, epsilon=1.0, lower_bound=0, upper_bound=2048 ) - - assert 90 <= int(maxx.result(data, 1.0)) <= 100 + assert expect_near(min(data), minn.quick_result(data), 10) + assert expect_near(0, minn.quick_result(data), 10) -def test_median1(data): - maxx = dp.Median(1.0) - assert ( - statistics.median(data) - 20 - < int(maxx.result(data, 1.0)) - < statistics.median(data) + 20 +@pytest.mark.parametrize("dtype, data", [("int", data_ints), ("float", data_floats)]) +def test_max(data, dtype): + maxx = dp.algorithms.laplacian.Max( + dtype=dtype, epsilon=1.0, lower_bound=0, upper_bound=2048 ) - assert 80 <= int(maxx.result(data, 1.0)) <= 100 - + assert 190 < maxx.quick_result(data) < 210 -def percentile(N, percent, key=lambda x: x): - """ - Find the percentile of a list of values. + assert expect_near(max(data), maxx.quick_result(data), 10) - @parameter N - is a list of values. Note N MUST BE already sorted. - @parameter percent - a float value from 0.0 to 1.0. - @parameter key - optional key function to compute value from each element of N. - @return - the percentile of the values - """ - if not N: - return None - k = (len(N) - 1) * percent - f = math.floor(k) - c = math.ceil(k) - if f == c: - return key(N[int(k)]) - d0 = key(N[int(f)]) * (c - k) - d1 = key(N[int(c)]) * (k - f) - return d0 + d1 +@pytest.mark.parametrize("dtype, data", [("int", data_ints), ("float", data_floats)]) +def test_median(data, dtype): + median = dp.algorithms.laplacian.Median( + dtype=dtype, epsilon=1.0, lower_bound=0, upper_bound=2048 + ) -def test_percentile(data): - maxx = dp.Percentile(1.0, 0, 2048) - maxx.percentile = 0.45 + assert expect_near(statistics.median(data), median.quick_result(data), 20) - actual_percentile = int(percentile(data, 0.45)) - assert maxx.percentile == 0.45 - assert actual_percentile - 10 < maxx.result(data, 1.0) < actual_percentile + 10 - assert 80 < maxx.result(data, 1.0) < 100 +@pytest.mark.parametrize("dtype", ["int", "float"]) +def test_percentile_getter(dtype): + expected_percentile = 0.45 -def test_max_datatypes(data): - ma1 = dp.Max(1.0) - ma2 = dp.Max(1.0, 0, 2048) - assert isinstance(ma1, dp.Max) - assert isinstance(ma2, dp.Max) + dp_percentile = dp.algorithms.laplacian.Percentile( + dtype=dtype, + epsilon=1.0, + percentile=expected_percentile, + lower_bound=0, + upper_bound=2048, + ) + assert dp_percentile.percentile == expected_percentile - res = ma2.result(data, 1.0) - assert isinstance(res, float) +@pytest.mark.parametrize("dtype, data, order_statistic", type_data_algorithm) +def test_order_statistic_datatypes(data, dtype, order_statistic): -def test_min_datatypes(data): - mi1 = dp.Min(1.0) - mi2 = dp.Min(1.0, 0, 2048) - assert isinstance(mi1, dp.Min) - assert isinstance(mi2, dp.Min) + order_statistic_1 = order_statistic(dtype=dtype, epsilon=1.0) + order_statistic_2 = order_statistic( + dtype=dtype, epsilon=1.0, lower_bound=0, upper_bound=2048 + ) + res = order_statistic_2.quick_result(data) - res = mi2.result(data, 1.0) - assert isinstance(res, float) + assert isinstance(order_statistic_1, order_statistic) + assert isinstance(order_statistic_2, order_statistic) + assert isinstance(res, type(data[0])) -def test_median_datatypes(data): - me1 = dp.Median(1.0) - me2 = dp.Median(1.0, 0, 2048) - assert isinstance(me1, dp.Median) - assert isinstance(me2, dp.Median) - res = me2.result(data, 1.0) - assert isinstance(res, float) +@pytest.mark.parametrize( + "dtype, data, dp_percentile", + [ + ("int", data_ints, dp.algorithms.laplacian.Percentile), + ("float", data_floats, dp.algorithms.laplacian.Percentile), + ], +) +def test_percentile_datatypes(data, dtype, dp_percentile): + dp_percentile_2 = dp_percentile( + dtype=dtype, epsilon=1.0, percentile=0.45, lower_bound=0, upper_bound=2048 + ) + res = dp_percentile_2.quick_result(data) -def test_percentile_datatypes(data): - pe1 = dp.Percentile(1.0) - pe2 = dp.Percentile(1.0, 0, 2048) - assert isinstance(pe1, dp.Percentile) - assert isinstance(pe2, dp.Percentile) + assert isinstance(dp_percentile_2, dp_percentile) - res = pe2.result(data, 1.0) assert isinstance(res, float) - # resg = pe2.getPercentile() - # assert isinstance(resg, float) - # ress = pe2.setPercentile(0.5) - # assert isinstance(ress, None) # TODO Yet some more tests diff --git a/tests/base/test_percentile.py b/tests/base/test_percentile.py index 9a45ed1f..f79d8c36 100644 --- a/tests/base/test_percentile.py +++ b/tests/base/test_percentile.py @@ -1,8 +1,10 @@ import pytest import pydp as dp +pytestmark = pytest.mark.skip(reason="this percentile is not the DP implementation") -@pytest.mark.parametrize("input_class", [dp.PercentileInt, dp.PercentileDouble]) + +@pytest.mark.parametrize("input_class", [dp.algorithms.laplacian.Percentile]) class TestPercentile: def test_empty_input_set(self, input_class): percentile = input_class() @@ -64,7 +66,7 @@ def test_memory(self, input_class): assert small_memory < large_memory -# TODO: port the following tests -# -# TYPED_TEST(PercentileTest, SerializeMerge) -# +# # TODO: port the following tests +# # +# # TYPED_TEST(PercentileTest, SerializeMerge) +# # diff --git a/third_party/differential-privacy b/third_party/differential-privacy index b7f4c39d..1b1dc663 160000 --- a/third_party/differential-privacy +++ b/third_party/differential-privacy @@ -1 +1 @@ -Subproject commit b7f4c39d9f73d67b34cdbd1b8483e5f72072fc73 +Subproject commit 1b1dc6639173c0a13613189ec21851604a4c7335 From d71a94f3d9682b0a7317b1f75938c17e781a2512 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alejandro=20S=C3=A1nchez=20Medina?= Date: Fri, 14 Aug 2020 14:52:45 +0100 Subject: [PATCH 35/47] Fix compilation with new DP hash --- prereqs_linux.sh | 3 ++- src/bindings/PyDP/algorithms/order_statistics.cpp | 3 ++- tests/algorithms/test_distributions.py | 2 -- tests/base/test_percentile.py | 5 ++++- 4 files changed, 8 insertions(+), 5 deletions(-) diff --git a/prereqs_linux.sh b/prereqs_linux.sh index 449527a8..2496da7f 100755 --- a/prereqs_linux.sh +++ b/prereqs_linux.sh @@ -57,8 +57,9 @@ fi # Downloading the Google DP library git submodule update --init --recursive + # checkout out to particular commit -cd third_party/differential-privacy && git checkout 0b0a5c2315d84a6a7b1ff34591e33ec11680891e && \ +cd third_party/differential-privacy && git checkout 1b1dc6639173c0a13613189ec21851604a4c7335 && \ cd - # renaming workspace.bazel to workspace mv third_party/differential-privacy/cc/WORKSPACE.bazel third_party/differential-privacy/cc/WORKSPACE diff --git a/src/bindings/PyDP/algorithms/order_statistics.cpp b/src/bindings/PyDP/algorithms/order_statistics.cpp index 80442f6d..d39063cb 100644 --- a/src/bindings/PyDP/algorithms/order_statistics.cpp +++ b/src/bindings/PyDP/algorithms/order_statistics.cpp @@ -1,11 +1,12 @@ // Provides bindings for Order Statistics -#include "../pydp_lib/algorithm_builder.hpp" #include "algorithms/order-statistics.h" #include "pybind11/complex.h" #include "pybind11/functional.h" #include "pybind11/pybind11.h" #include "pybind11/stl.h" +#include "../pydp_lib/algorithm_builder.hpp" + using namespace std; namespace py = pybind11; diff --git a/tests/algorithms/test_distributions.py b/tests/algorithms/test_distributions.py index daf593ba..e9be26eb 100644 --- a/tests/algorithms/test_distributions.py +++ b/tests/algorithms/test_distributions.py @@ -5,8 +5,6 @@ from itertools import accumulate import math -pytestmark = pytest.mark.skip(reason="distributions fail to compile") - k_num_samples = 10000000 k_num_geometric_samples = 1000000 k_gaussian_samples = 1000000 diff --git a/tests/base/test_percentile.py b/tests/base/test_percentile.py index f79d8c36..8e4252d0 100644 --- a/tests/base/test_percentile.py +++ b/tests/base/test_percentile.py @@ -1,7 +1,10 @@ import pytest import pydp as dp -pytestmark = pytest.mark.skip(reason="this percentile is not the DP implementation") +# TODO: check whether to delete this test suit or update it +pytestmark = pytest.mark.skip( + reason="these tests were for the older percentile wrapping." +) @pytest.mark.parametrize("input_class", [dp.algorithms.laplacian.Percentile]) From db3314ed50b191ef805795630deef42e65cfdf0b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alejandro=20S=C3=A1nchez=20Medina?= Date: Fri, 14 Aug 2020 15:55:25 +0100 Subject: [PATCH 36/47] Avoid using string formating --- pydp/algorithms/algorithm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pydp/algorithms/algorithm.py b/pydp/algorithms/algorithm.py index 5e6d31bd..98783996 100644 --- a/pydp/algorithms/algorithm.py +++ b/pydp/algorithms/algorithm.py @@ -11,7 +11,7 @@ def __init__(self, **kwargs): if "upper_bound" in kwargs and kwargs["upper_bound"] is None: kwargs.pop("upper_bound") - binded_class = f"{self.__class__.__name__}{self.__map_dtype_str(dtype)}" + binded_class = self.__class__.__name__ + self.__map_dtype_str(dtype) class_ = getattr(_algorithms, binded_class) self.dtype = dtype From fc83c4c7d37aebb943b36603b78881e96b3ede28 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alejandro=20S=C3=A1nchez=20Medina?= Date: Fri, 14 Aug 2020 16:05:11 +0100 Subject: [PATCH 37/47] Avoid using formatted string --- pydp/algorithms/algorithm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pydp/algorithms/algorithm.py b/pydp/algorithms/algorithm.py index 98783996..11edf264 100644 --- a/pydp/algorithms/algorithm.py +++ b/pydp/algorithms/algorithm.py @@ -26,7 +26,7 @@ def __map_dtype_str(dtype): elif dtype == "float": return "Double" else: - raise RuntimeError(f"dtype: {dtype} is not supported") + raise RuntimeError("dtype: {} is not supported".format(dtype)) @property def epsilon(self): From e461e531b16b423b22404133ab2091d5150c7efc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alejandro=20S=C3=A1nchez=20Medina?= Date: Fri, 14 Aug 2020 20:49:35 +0100 Subject: [PATCH 38/47] Tidy up public interface (#246) * Tidy up public interface * Leave serialize and merge methods commented out --- pydp/__init__.py | 3 +- .../{algorithm.py => _algorithm.py} | 28 ++++++----- pydp/algorithms/laplacian/__init__.py | 6 +-- ...d_algorithms.py => _bounded_algorithms.py} | 2 +- .../laplacian/{count.py => _count.py} | 2 +- pydp/algorithms/laplacian/_percentile.py | 5 ++ pydp/algorithms/laplacian/percentile.py | 5 -- pydp/distributions/__init__.py | 1 + pydp/util/__init__.py | 1 + src/bindings/PyDP/algorithms/util.cpp | 37 +++++++------- src/bindings/PyDP/bindings.cpp | 15 +++--- tests/algorithms/test_algorithms.py | 0 tests/algorithms/test_distributions.py | 49 ++++++++++++------- tests/algorithms/test_rand.py | 6 +-- tests/base/test_logging.py | 3 ++ tests/base/test_percentile.py | 2 +- tests/base/test_status.py | 3 ++ tests/base/test_statusor.py | 0 18 files changed, 95 insertions(+), 73 deletions(-) rename pydp/algorithms/{algorithm.py => _algorithm.py} (87%) rename pydp/algorithms/laplacian/{bounded_algorithms.py => _bounded_algorithms.py} (88%) rename pydp/algorithms/laplacian/{count.py => _count.py} (50%) create mode 100644 pydp/algorithms/laplacian/_percentile.py delete mode 100644 pydp/algorithms/laplacian/percentile.py create mode 100644 pydp/distributions/__init__.py create mode 100644 pydp/util/__init__.py delete mode 100644 tests/algorithms/test_algorithms.py delete mode 100644 tests/base/test_statusor.py diff --git a/pydp/__init__.py b/pydp/__init__.py index 8f2ec341..dc868ffb 100644 --- a/pydp/__init__.py +++ b/pydp/__init__.py @@ -1,5 +1,4 @@ -from ._pydp import * -from pydp import algorithms +from pydp import algorithms, distributions, util __version__ = "0.1.4" diff --git a/pydp/algorithms/algorithm.py b/pydp/algorithms/_algorithm.py similarity index 87% rename from pydp/algorithms/algorithm.py rename to pydp/algorithms/_algorithm.py index 11edf264..3cf81f7d 100644 --- a/pydp/algorithms/algorithm.py +++ b/pydp/algorithms/_algorithm.py @@ -105,19 +105,21 @@ def reset(self): """ return self.__algorithm.reset() - def serialize(self): - """ - Serializes summary data of current entries into Summary proto. This allows results from distributed aggregation to be recorded and later merged. - - Returns empty summary for algorithms for which serialize is unimplemented. - """ - return self.__algorithm.serialize() - - def merge(self, summary): - """ - Merges serialized summary data into this algorithm. The summary proto must represent data from the same algorithm type with identical parameters. The data field must contain the algorithm summary type of the corresponding algorithm used. The summary proto cannot be empty. - """ - return self.__algorithm.merge(summary) + # TODO: Wrap Summary class before exposing serialize and merge methods. + # + # def serialize(self): + # """ + # Serializes summary data of current entries into Summary proto. This allows results from distributed aggregation to be recorded and later merged. + # + # Returns empty summary for algorithms for which serialize is unimplemented. + # """ + # return self.__algorithm.serialize() + # + # def merge(self, summary): + # """ + # Merges serialized summary data into this algorithm. The summary proto must represent data from the same algorithm type with identical parameters. The data field must contain the algorithm summary type of the corresponding algorithm used. The summary proto cannot be empty. + # """ + # return self.__algorithm.merge(summary) def noise_confidence_interval(self, confidence_level, privacy_budget): """ diff --git a/pydp/algorithms/laplacian/__init__.py b/pydp/algorithms/laplacian/__init__.py index 9ac422df..576ae8bb 100644 --- a/pydp/algorithms/laplacian/__init__.py +++ b/pydp/algorithms/laplacian/__init__.py @@ -1,4 +1,4 @@ -from .bounded_algorithms import ( +from ._bounded_algorithms import ( BoundedMean, BoundedSum, BoundedStandardDeviation, @@ -7,8 +7,8 @@ Min, Median, ) -from .count import Count -from .percentile import Percentile +from ._count import Count +from ._percentile import Percentile __all__ = [ "BoundedMean", diff --git a/pydp/algorithms/laplacian/bounded_algorithms.py b/pydp/algorithms/laplacian/_bounded_algorithms.py similarity index 88% rename from pydp/algorithms/laplacian/bounded_algorithms.py rename to pydp/algorithms/laplacian/_bounded_algorithms.py index 71de419d..e1c70afb 100644 --- a/pydp/algorithms/laplacian/bounded_algorithms.py +++ b/pydp/algorithms/laplacian/_bounded_algorithms.py @@ -1,4 +1,4 @@ -from ..algorithm import BoundedAlgorithm +from .._algorithm import BoundedAlgorithm class BoundedMean(BoundedAlgorithm): diff --git a/pydp/algorithms/laplacian/count.py b/pydp/algorithms/laplacian/_count.py similarity index 50% rename from pydp/algorithms/laplacian/count.py rename to pydp/algorithms/laplacian/_count.py index e8b8a722..df1e217c 100644 --- a/pydp/algorithms/laplacian/count.py +++ b/pydp/algorithms/laplacian/_count.py @@ -1,4 +1,4 @@ -from ..algorithm import Algorithm +from .._algorithm import Algorithm class Count(Algorithm): diff --git a/pydp/algorithms/laplacian/_percentile.py b/pydp/algorithms/laplacian/_percentile.py new file mode 100644 index 00000000..50da1d84 --- /dev/null +++ b/pydp/algorithms/laplacian/_percentile.py @@ -0,0 +1,5 @@ +from .._algorithm import BoundedAlgorithm, PercentileBase + + +class Percentile(PercentileBase): + pass diff --git a/pydp/algorithms/laplacian/percentile.py b/pydp/algorithms/laplacian/percentile.py deleted file mode 100644 index b1c9e791..00000000 --- a/pydp/algorithms/laplacian/percentile.py +++ /dev/null @@ -1,5 +0,0 @@ -from ..algorithm import BoundedAlgorithm, PercentileBase - - -class Percentile(PercentileBase): - pass diff --git a/pydp/distributions/__init__.py b/pydp/distributions/__init__.py new file mode 100644 index 00000000..5463f84e --- /dev/null +++ b/pydp/distributions/__init__.py @@ -0,0 +1 @@ +from .._pydp._distributions import * diff --git a/pydp/util/__init__.py b/pydp/util/__init__.py new file mode 100644 index 00000000..dbea346d --- /dev/null +++ b/pydp/util/__init__.py @@ -0,0 +1 @@ +from .._pydp._util import * diff --git a/src/bindings/PyDP/algorithms/util.cpp b/src/bindings/PyDP/algorithms/util.cpp index e88ef840..d8bcdad6 100644 --- a/src/bindings/PyDP/algorithms/util.cpp +++ b/src/bindings/PyDP/algorithms/util.cpp @@ -9,37 +9,36 @@ namespace py = pybind11; namespace dp = differential_privacy; void init_algorithms_util(py::module& m) { - py::module util = m.def_submodule("util", "Some Utility Functions"); - util.attr("__module__") = "pydp"; - util.def("xor_strings", &dp::XorStrings); - util.def("default_epsilon", &dp::DefaultEpsilon); - util.def("get_next_power_of_two", &dp::GetNextPowerOfTwo); - util.def("qnorm", &dp::Qnorm); - util.def("mean", &dp::Mean); - util.def("mean", &dp::Mean); - util.def("variance", &dp::Variance); - util.def("standard_deviation", &dp::StandardDev); - util.def("order_statistics", &dp::OrderStatistic); - util.def("correlation", &dp::Correlation); - util.def("vector_filter", &dp::VectorFilter); - util.def("vector_to_string", &dp::VectorToString); - util.def("round_to_nearest_multiple", &dp::RoundToNearestMultiple); - util.def("safe_add", [](int64_t i, int64_t j) { + m.attr("__module__") = "pydp"; + m.def("xor_strings", &dp::XorStrings); + m.def("default_epsilon", &dp::DefaultEpsilon); + m.def("get_next_power_of_two", &dp::GetNextPowerOfTwo); + m.def("qnorm", &dp::Qnorm); + m.def("mean", &dp::Mean); + m.def("mean", &dp::Mean); + m.def("variance", &dp::Variance); + m.def("standard_deviation", &dp::StandardDev); + m.def("order_statistics", &dp::OrderStatistic); + m.def("correlation", &dp::Correlation); + m.def("vector_filter", &dp::VectorFilter); + m.def("vector_to_string", &dp::VectorToString); + m.def("round_to_nearest_multiple", &dp::RoundToNearestMultiple); + m.def("safe_add", [](int64_t i, int64_t j) { int64_t k; bool result = dp::SafeAdd(i, j, &k); if (result) return k; throw std::runtime_error("Result of addition will overflow."); }); - util.def("safe_subtract", [](int64_t i, int64_t j) { + m.def("safe_subtract", [](int64_t i, int64_t j) { int64_t k; bool result = dp::SafeSubtract(i, j, &k); if (result) return k; throw std::runtime_error("Result of subtraction will overflow."); }); - util.def("safe_square", [](int64_t i) { + m.def("safe_square", [](int64_t i) { int64_t k; bool result = dp::SafeSquare(i, &k); if (result) return k; throw std::runtime_error("Result of squaring will overflow."); }); -} \ No newline at end of file +} diff --git a/src/bindings/PyDP/bindings.cpp b/src/bindings/PyDP/bindings.cpp index c8a46865..24399826 100644 --- a/src/bindings/PyDP/bindings.cpp +++ b/src/bindings/PyDP/bindings.cpp @@ -7,7 +7,6 @@ namespace py = pybind11; // base void init_base_status(py::module &); void init_base_logging(py::module &); -// void init_base_percentile(py::module &); // bounded functions void init_algorithms_bounded_functions(py::module &); @@ -34,17 +33,21 @@ PYBIND11_MODULE(_pydp, m) { // Base init_base_status(m); init_base_logging(m); - // init_base_percentile(m); // Algorithms auto malgorithms = m.def_submodule("_algorithms"); init_algorithms_bounded_functions(malgorithms); - init_algorithms_util(m); - init_algorithms_distributions(m); - init_algorithms_order_statistics(malgorithms); - init_algorithms_rand(m); init_algorithms_count(malgorithms); + init_algorithms_order_statistics(malgorithms); + + auto mdistributions = m.def_submodule("_distributions"); + init_algorithms_distributions(mdistributions); + + auto mutil = m.def_submodule("_util", "Some Utility Functions"); + init_algorithms_rand(mutil); + init_algorithms_util(mutil); // Proto + // TODO: Delete if it is not necessary (we no longer return StatusOr to the user) init_proto(m); } diff --git a/tests/algorithms/test_algorithms.py b/tests/algorithms/test_algorithms.py deleted file mode 100644 index e69de29b..00000000 diff --git a/tests/algorithms/test_distributions.py b/tests/algorithms/test_distributions.py index e9be26eb..ff3d8650 100644 --- a/tests/algorithms/test_distributions.py +++ b/tests/algorithms/test_distributions.py @@ -1,10 +1,16 @@ import pytest +from pydp.distributions import ( + LaplaceDistribution, + GaussianDistribution, + # GeometricDistribution, +) import pydp as dp import math from typing import List from itertools import accumulate import math + k_num_samples = 10000000 k_num_geometric_samples = 1000000 k_gaussian_samples = 1000000 @@ -50,12 +56,12 @@ def kurtosis(samples: List[float], mu: float, var: float): class TestLaplaceDistribution: def test_diversity_getter(self): sensitivity, epsilon = 1.0, 22.0 - dist = dp.LaplaceDistribution(epsilon=epsilon, sensitivity=sensitivity) + dist = LaplaceDistribution(epsilon=epsilon, sensitivity=sensitivity) assert dist.get_diversity() == sensitivity / epsilon def test_check_statistics_for_geo_unit_values(self): - ld = dp.LaplaceDistribution(epsilon=1.0, sensitivity=1.0) + ld = LaplaceDistribution(epsilon=1.0, sensitivity=1.0) samples = [ld.sample(scale=1.0) for _ in range(k_num_geometric_samples)] mean = dp.util.mean(samples) var = dp.util.variance(samples) @@ -69,14 +75,14 @@ def test_check_statistics_for_geo_unit_values(self): class TestGaussianDistribution: def test_standard_deviation_getter(self): stddev = k_one_over_log2 - dist = dp.GaussianDistribution(stddev) + dist = GaussianDistribution(stddev) assert dist.stddev == stddev class TestLaplaceDistributionDatatypes: def test_LaplaceDistributionTypes(self): - ld = dp.LaplaceDistribution(epsilon=1.0, sensitivity=1.0) - assert isinstance(ld, dp.LaplaceDistribution) + ld = LaplaceDistribution(epsilon=1.0, sensitivity=1.0) + assert isinstance(ld, LaplaceDistribution) sud = ld.get_uniform_double() assert isinstance(sud, float) @@ -93,8 +99,8 @@ def test_LaplaceDistributionTypes(self): class TestGaussianDistributionDataTypes: def test_GaussianDistributionTypes(self): - gd = dp.GaussianDistribution(3) - assert isinstance(gd, dp.GaussianDistribution) + gd = GaussianDistribution(3) + assert isinstance(gd, GaussianDistribution) gds = gd.sample() gds1 = gd.sample(1.0) @@ -104,18 +110,23 @@ def test_GaussianDistributionTypes(self): assert isinstance(gdstd, float) -# class TestGeometricDistribution: -# def test_ratios(self): -# from collections import Counter -# p=1e-2 -# dist = dp.GeometricDistribution(lambda_=-1.0*math.log(1-p)) -# samples = [dist.sample() for _ in range(k_num_geometric_samples)] -# counts = list(Counter([s for s in samples if s < 51]).values()) -# ratios = [c_i/c_j for c_i, c_j in zip(counts[:-1], counts[1:])] -# This test fails. It's a replica of -# https://github.com/google/differential-privacy/blob/9923ad4ee1b84a7002085e50345fcc05f2b21bcb/cc/algorithms/distributions_test.cc#L208 -# and should pass. -# assert expect_near(p, dp.util.mean(ratios), p / 1e-2) +class TestGeometricDistribution: + @pytest.mark.skip(reason="This test should pass, see comments") + def test_ratios(self): + """ + This test fails. It's a replica of + https://github.com/google/differential-privacy/blob/9923ad4ee1b84a7002085e50345fcc05f2b21bcb/cc/algorithms/distributions_test.cc#L208 and should pass. + """ + from collections import Counter + + p = 1e-2 + dist = GeometricDistribution(lambda_=-1.0 * math.log(1 - p)) + samples = [dist.sample() for _ in range(k_num_geometric_samples)] + counts = list(Counter([s for s in samples if s < 51]).values()) + ratios = [c_i / c_j for c_i, c_j in zip(counts[:-1], counts[1:])] + + assert expect_near(p, dp.util.mean(ratios), p / 1e-2) + # TODO: port the following tests # diff --git a/tests/algorithms/test_rand.py b/tests/algorithms/test_rand.py index d5d7ef84..21c67770 100644 --- a/tests/algorithms/test_rand.py +++ b/tests/algorithms/test_rand.py @@ -1,14 +1,14 @@ import pytest -import pydp as dp +from pydp.util import Geometric, UniformDouble def test_rand_UniformDouble(): - ud = dp.UniformDouble() + ud = UniformDouble() assert isinstance(ud, float) assert 0 <= ud < 1 def test_rand_Geometric(): - gn = dp.Geometric() + gn = Geometric() assert isinstance(gn, int) assert 0 <= gn <= 1025 diff --git a/tests/base/test_logging.py b/tests/base/test_logging.py index 23687c9d..a3bfca5f 100644 --- a/tests/base/test_logging.py +++ b/tests/base/test_logging.py @@ -1,6 +1,9 @@ import pytest import pydp as dp +# TODO: Check whether we should delete logging public binding or allow it +pytestmark = pytest.mark.skip(reason="we do not return allow user to set up logging.") + class TestLogging: def test_logging(self): diff --git a/tests/base/test_percentile.py b/tests/base/test_percentile.py index 8e4252d0..4823efc5 100644 --- a/tests/base/test_percentile.py +++ b/tests/base/test_percentile.py @@ -3,7 +3,7 @@ # TODO: check whether to delete this test suit or update it pytestmark = pytest.mark.skip( - reason="these tests were for the older percentile wrapping." + reason="these tests were for the older percentile wrapping. See TODO" ) diff --git a/tests/base/test_status.py b/tests/base/test_status.py index 703f1afd..3f6ec77f 100644 --- a/tests/base/test_status.py +++ b/tests/base/test_status.py @@ -2,6 +2,9 @@ import pydp as dp +# TODO: Check whether we should delete status public bindings or return status to the user +pytestmark = pytest.mark.skip(reason="we do not return status to the user.") + class TestStatus: def test_hello(self): diff --git a/tests/base/test_statusor.py b/tests/base/test_statusor.py deleted file mode 100644 index e69de29b..00000000 From 17e834ed01738f4ef6383659e384ea6e6a97bbed Mon Sep 17 00:00:00 2001 From: Chinmay Shah Date: Sun, 16 Aug 2020 00:12:51 +0530 Subject: [PATCH 39/47] added comments for each module --- .../laplacian/_bounded_algorithms.py | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/pydp/algorithms/laplacian/_bounded_algorithms.py b/pydp/algorithms/laplacian/_bounded_algorithms.py index e1c70afb..7ef1bfe0 100644 --- a/pydp/algorithms/laplacian/_bounded_algorithms.py +++ b/pydp/algorithms/laplacian/_bounded_algorithms.py @@ -2,28 +2,56 @@ class BoundedMean(BoundedAlgorithm): + ''' + Bouned Mean Explaination + TODO + ''' pass class BoundedSum(BoundedAlgorithm): + ''' + Bouned Sum Explaination + TODO + ''' pass class BoundedStandardDeviation(BoundedAlgorithm): + ''' + Bouned Standard Deviation Explaination + TODO + ''' pass class BoundedVariance(BoundedAlgorithm): + ''' + Bouned Variance Explaination + TODO + ''' pass class Max(BoundedAlgorithm): + ''' + Max Explaination + TODO + ''' pass class Min(BoundedAlgorithm): + ''' + Min Explaination + TODO + ''' pass class Median(BoundedAlgorithm): + ''' + Median Explaination + TODO + ''' pass From d4c897d70b35b8ee59ed851d2e953e2a5b47d83f Mon Sep 17 00:00:00 2001 From: Chinmay Shah Date: Sun, 16 Aug 2020 00:14:59 +0530 Subject: [PATCH 40/47] added mypy bindings --- pydp/algorithms/_algorithm.py | 37 ++++++++++++++++++----------------- 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/pydp/algorithms/_algorithm.py b/pydp/algorithms/_algorithm.py index 3cf81f7d..dd49c369 100644 --- a/pydp/algorithms/_algorithm.py +++ b/pydp/algorithms/_algorithm.py @@ -1,5 +1,6 @@ from .._pydp import _algorithms +from typing import Union, List class MetaAlgorithm: def __init__(self, **kwargs): @@ -20,7 +21,7 @@ def __init__(self, **kwargs): self._linf_sensitivity = kwargs.get("linf_sensitivity", "Not set") @staticmethod - def __map_dtype_str(dtype): + def __map_dtype_str(dtype: str): if dtype == "int": return "Int" elif dtype == "float": @@ -29,51 +30,51 @@ def __map_dtype_str(dtype): raise RuntimeError("dtype: {} is not supported".format(dtype)) @property - def epsilon(self): + def epsilon(self) -> float: """ Returns the epsilon set at initialization. """ return self.__algorithm.epsilon @property - def l0_sensitivity(self): + def l0_sensitivity(self)-> float: """ Returns the l0_sensitivity set at initialization. """ return self._l0_sensitivity @property - def linf_sensitivity(self): + def linf_sensitivity(self) -> float: """ Returns the linf_sensitivity set at initialization. """ return self._linf_sensitivity - def privacy_budget_left(self): + def privacy_budget_left(self) -> float: """ Returns the remaining privacy budget. """ return self.__algorithm.privacy_budget_left() - def memory_used(self): + def memory_used(self) -> float: """ Returns the memory currently used by the algorithm in bytes. """ return self.__algorithm.memory_used() - def add_entries(self, list): + def add_entries(self, data: List[Union[int, float]]) -> None: """ Adds multiple inputs to the algorithm. """ - return self.__algorithm.add_entries(list) + return self.__algorithm.add_entries(data) - def add_entry(self, value): + def add_entry(self, value: Union[int, float]) -> None: """ Adds one input to the algorithm. """ return self.__algorithm.add_entry(value) - def quick_result(self, data): + def quick_result(self, data: List[Union[int, float]]) -> Union[int, float]: """ Runs the algorithm on the input using the epsilon parameter provided in the constructor and returns output. @@ -81,7 +82,7 @@ def quick_result(self, data): """ return self.__algorithm.result(data) - def result(self, privacy_budget=None, noise_interval_level=None): + def result(self, privacy_budget: float=None, noise_interval_level=None) -> Union[int, float]: """ Gets the algorithm result. @@ -99,7 +100,7 @@ def result(self, privacy_budget=None, noise_interval_level=None): else: return self.__algorithm.partial_result(privacy_budget, noise_interval_level) - def reset(self): + def reset(self)-> None: """ Resets the algorithm to a state in which it has received no input. After Reset is called, the algorithm should only consider input added after the last Reset call when providing output. """ @@ -146,12 +147,12 @@ def __init__(self, epsilon=1.0, dtype="int"): class BoundedAlgorithm(MetaAlgorithm): def __init__( self, - epsilon=1.0, - lower_bound=None, - upper_bound=None, - l0_sensitivity=1, - linf_sensitivity=1, - dtype="int", + epsilon:float=1.0, + lower_bound:Union[int, float, None]=None, + upper_bound:Union[int, float, None]=None, + l0_sensitivity: int=1, + linf_sensitivity:int =1, + dtype:str="int", ): super().__init__( epsilon=epsilon, From 9a147e0b6913efb1330f9eca26b0e55615764db6 Mon Sep 17 00:00:00 2001 From: Chinmay Shah Date: Sun, 16 Aug 2020 00:15:53 +0530 Subject: [PATCH 41/47] removing redundant classes --- pydp/algorithms/_algorithm.py | 30 ------------------------ pydp/algorithms/laplacian/_count.py | 11 ++++++--- pydp/algorithms/laplacian/_percentile.py | 28 +++++++++++++++++++--- 3 files changed, 33 insertions(+), 36 deletions(-) diff --git a/pydp/algorithms/_algorithm.py b/pydp/algorithms/_algorithm.py index dd49c369..e6a799ba 100644 --- a/pydp/algorithms/_algorithm.py +++ b/pydp/algorithms/_algorithm.py @@ -138,12 +138,6 @@ def noise_confidence_interval(self, confidence_level, privacy_budget): confidence_level, privacy_budget ) - -class Algorithm(MetaAlgorithm): - def __init__(self, epsilon=1.0, dtype="int"): - super().__init__(epsilon=epsilon, dtype=dtype) - - class BoundedAlgorithm(MetaAlgorithm): def __init__( self, @@ -162,27 +156,3 @@ def __init__( linf_sensitivity=linf_sensitivity, dtype=dtype, ) - - -class PercentileBase(MetaAlgorithm): - def __init__( - self, - epsilon=1.0, - percentile=0.0, - lower_bound=None, - upper_bound=None, - dtype="int", - ): - super().__init__( - epsilon=epsilon, - percentile=percentile, - lower_bound=lower_bound, - upper_bound=upper_bound, - dtype=dtype, - ) - - @property - def percentile(self): - """percentile Gets the value that was set in the constructor. - """ - return self._MetaAlgorithm__algorithm.percentile diff --git a/pydp/algorithms/laplacian/_count.py b/pydp/algorithms/laplacian/_count.py index df1e217c..d4b3db58 100644 --- a/pydp/algorithms/laplacian/_count.py +++ b/pydp/algorithms/laplacian/_count.py @@ -1,5 +1,10 @@ -from .._algorithm import Algorithm +from .._algorithm import MetaAlgorithm -class Count(Algorithm): - pass +class Count(MetaAlgorithm): + ''' + Count Explaination + TODO + ''' + def __init__(self, epsilon: float=1.0, dtype: str="int"): + super().__init__(epsilon=epsilon, dtype=dtype) diff --git a/pydp/algorithms/laplacian/_percentile.py b/pydp/algorithms/laplacian/_percentile.py index 50da1d84..2c0ac364 100644 --- a/pydp/algorithms/laplacian/_percentile.py +++ b/pydp/algorithms/laplacian/_percentile.py @@ -1,5 +1,27 @@ -from .._algorithm import BoundedAlgorithm, PercentileBase +from .._algorithm import MetaAlgorithm +from typing import Union -class Percentile(PercentileBase): - pass +class Percentile(MetaAlgorithm): + def __init__( + self, + epsilon: float=1.0, + percentile: float=0.0, + lower_bound: Union[int, float, None]=None, + upper_bound: Union[int, float, None]=None, + dtype:str="int", + ): + super().__init__( + epsilon=epsilon, + percentile=percentile, + lower_bound=lower_bound, + upper_bound=upper_bound, + dtype=dtype, + ) + + @property + def percentile(self)-> float: + """ + percentile Gets the value that was set in the constructor. + """ + return self._MetaAlgorithm__algorithm.percentile From 45436cd0db6ffe4029886d5951ef18446ef80956 Mon Sep 17 00:00:00 2001 From: Chinmay Shah Date: Sun, 16 Aug 2020 00:16:19 +0530 Subject: [PATCH 42/47] changed upperbound to a more reaslistic values --- tests/algorithms/test_order_statistics.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/algorithms/test_order_statistics.py b/tests/algorithms/test_order_statistics.py index 2b62e314..737216f0 100644 --- a/tests/algorithms/test_order_statistics.py +++ b/tests/algorithms/test_order_statistics.py @@ -25,7 +25,7 @@ def test_min(data, dtype): minn = dp.algorithms.laplacian.Min( - dtype=dtype, epsilon=1.0, lower_bound=0, upper_bound=2048 + dtype=dtype, epsilon=1.0, lower_bound=0, upper_bound=200 ) assert expect_near(min(data), minn.quick_result(data), 10) assert expect_near(0, minn.quick_result(data), 10) @@ -34,7 +34,7 @@ def test_min(data, dtype): @pytest.mark.parametrize("dtype, data", [("int", data_ints), ("float", data_floats)]) def test_max(data, dtype): maxx = dp.algorithms.laplacian.Max( - dtype=dtype, epsilon=1.0, lower_bound=0, upper_bound=2048 + dtype=dtype, epsilon=1.0, lower_bound=0, upper_bound=200 ) assert 190 < maxx.quick_result(data) < 210 @@ -45,7 +45,7 @@ def test_max(data, dtype): def test_median(data, dtype): median = dp.algorithms.laplacian.Median( - dtype=dtype, epsilon=1.0, lower_bound=0, upper_bound=2048 + dtype=dtype, epsilon=1.0, lower_bound=0, upper_bound=200 ) assert expect_near(statistics.median(data), median.quick_result(data), 20) @@ -61,7 +61,7 @@ def test_percentile_getter(dtype): epsilon=1.0, percentile=expected_percentile, lower_bound=0, - upper_bound=2048, + upper_bound=200, ) assert dp_percentile.percentile == expected_percentile @@ -71,7 +71,7 @@ def test_order_statistic_datatypes(data, dtype, order_statistic): order_statistic_1 = order_statistic(dtype=dtype, epsilon=1.0) order_statistic_2 = order_statistic( - dtype=dtype, epsilon=1.0, lower_bound=0, upper_bound=2048 + dtype=dtype, epsilon=1.0, lower_bound=0, upper_bound=200 ) res = order_statistic_2.quick_result(data) @@ -91,7 +91,7 @@ def test_order_statistic_datatypes(data, dtype, order_statistic): def test_percentile_datatypes(data, dtype, dp_percentile): dp_percentile_2 = dp_percentile( - dtype=dtype, epsilon=1.0, percentile=0.45, lower_bound=0, upper_bound=2048 + dtype=dtype, epsilon=1.0, percentile=0.45, lower_bound=0, upper_bound=200 ) res = dp_percentile_2.quick_result(data) From 5bd1ca33cac46ab7895990b8e5a57abff7f1c389 Mon Sep 17 00:00:00 2001 From: Chinmay Shah Date: Sun, 16 Aug 2020 00:17:26 +0530 Subject: [PATCH 43/47] styling python files --- pydp/algorithms/_algorithm.py | 22 +++++++----- .../laplacian/_bounded_algorithms.py | 35 +++++++++++-------- pydp/algorithms/laplacian/_count.py | 7 ++-- pydp/algorithms/laplacian/_percentile.py | 12 +++---- 4 files changed, 44 insertions(+), 32 deletions(-) diff --git a/pydp/algorithms/_algorithm.py b/pydp/algorithms/_algorithm.py index e6a799ba..7a2ac1f9 100644 --- a/pydp/algorithms/_algorithm.py +++ b/pydp/algorithms/_algorithm.py @@ -2,6 +2,7 @@ from typing import Union, List + class MetaAlgorithm: def __init__(self, **kwargs): dtype = kwargs.pop("dtype") @@ -37,7 +38,7 @@ def epsilon(self) -> float: return self.__algorithm.epsilon @property - def l0_sensitivity(self)-> float: + def l0_sensitivity(self) -> float: """ Returns the l0_sensitivity set at initialization. """ @@ -82,7 +83,9 @@ def quick_result(self, data: List[Union[int, float]]) -> Union[int, float]: """ return self.__algorithm.result(data) - def result(self, privacy_budget: float=None, noise_interval_level=None) -> Union[int, float]: + def result( + self, privacy_budget: float = None, noise_interval_level=None + ) -> Union[int, float]: """ Gets the algorithm result. @@ -100,7 +103,7 @@ def result(self, privacy_budget: float=None, noise_interval_level=None) -> Union else: return self.__algorithm.partial_result(privacy_budget, noise_interval_level) - def reset(self)-> None: + def reset(self) -> None: """ Resets the algorithm to a state in which it has received no input. After Reset is called, the algorithm should only consider input added after the last Reset call when providing output. """ @@ -138,15 +141,16 @@ def noise_confidence_interval(self, confidence_level, privacy_budget): confidence_level, privacy_budget ) + class BoundedAlgorithm(MetaAlgorithm): def __init__( self, - epsilon:float=1.0, - lower_bound:Union[int, float, None]=None, - upper_bound:Union[int, float, None]=None, - l0_sensitivity: int=1, - linf_sensitivity:int =1, - dtype:str="int", + epsilon: float = 1.0, + lower_bound: Union[int, float, None] = None, + upper_bound: Union[int, float, None] = None, + l0_sensitivity: int = 1, + linf_sensitivity: int = 1, + dtype: str = "int", ): super().__init__( epsilon=epsilon, diff --git a/pydp/algorithms/laplacian/_bounded_algorithms.py b/pydp/algorithms/laplacian/_bounded_algorithms.py index 7ef1bfe0..83636b15 100644 --- a/pydp/algorithms/laplacian/_bounded_algorithms.py +++ b/pydp/algorithms/laplacian/_bounded_algorithms.py @@ -2,56 +2,63 @@ class BoundedMean(BoundedAlgorithm): - ''' + """ Bouned Mean Explaination TODO - ''' + """ + pass class BoundedSum(BoundedAlgorithm): - ''' + """ Bouned Sum Explaination TODO - ''' + """ + pass class BoundedStandardDeviation(BoundedAlgorithm): - ''' + """ Bouned Standard Deviation Explaination TODO - ''' + """ + pass class BoundedVariance(BoundedAlgorithm): - ''' + """ Bouned Variance Explaination TODO - ''' + """ + pass class Max(BoundedAlgorithm): - ''' + """ Max Explaination TODO - ''' + """ + pass class Min(BoundedAlgorithm): - ''' + """ Min Explaination TODO - ''' + """ + pass class Median(BoundedAlgorithm): - ''' + """ Median Explaination TODO - ''' + """ + pass diff --git a/pydp/algorithms/laplacian/_count.py b/pydp/algorithms/laplacian/_count.py index d4b3db58..99e9733e 100644 --- a/pydp/algorithms/laplacian/_count.py +++ b/pydp/algorithms/laplacian/_count.py @@ -2,9 +2,10 @@ class Count(MetaAlgorithm): - ''' + """ Count Explaination TODO - ''' - def __init__(self, epsilon: float=1.0, dtype: str="int"): + """ + + def __init__(self, epsilon: float = 1.0, dtype: str = "int"): super().__init__(epsilon=epsilon, dtype=dtype) diff --git a/pydp/algorithms/laplacian/_percentile.py b/pydp/algorithms/laplacian/_percentile.py index 2c0ac364..08917326 100644 --- a/pydp/algorithms/laplacian/_percentile.py +++ b/pydp/algorithms/laplacian/_percentile.py @@ -5,11 +5,11 @@ class Percentile(MetaAlgorithm): def __init__( self, - epsilon: float=1.0, - percentile: float=0.0, - lower_bound: Union[int, float, None]=None, - upper_bound: Union[int, float, None]=None, - dtype:str="int", + epsilon: float = 1.0, + percentile: float = 0.0, + lower_bound: Union[int, float, None] = None, + upper_bound: Union[int, float, None] = None, + dtype: str = "int", ): super().__init__( epsilon=epsilon, @@ -20,7 +20,7 @@ def __init__( ) @property - def percentile(self)-> float: + def percentile(self) -> float: """ percentile Gets the value that was set in the constructor. """ From 91a8dd7b05ee69da948e134961a972cd209ef4e2 Mon Sep 17 00:00:00 2001 From: Chinmay Shah Date: Sun, 16 Aug 2020 16:27:08 +0530 Subject: [PATCH 44/47] added mypy for noise interval --- pydp/algorithms/_algorithm.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pydp/algorithms/_algorithm.py b/pydp/algorithms/_algorithm.py index 7a2ac1f9..880d9b2b 100644 --- a/pydp/algorithms/_algorithm.py +++ b/pydp/algorithms/_algorithm.py @@ -84,7 +84,7 @@ def quick_result(self, data: List[Union[int, float]]) -> Union[int, float]: return self.__algorithm.result(data) def result( - self, privacy_budget: float = None, noise_interval_level=None + self, privacy_budget: Union[float, None] = None, noise_interval_level: Union[float, None]=None ) -> Union[int, float]: """ Gets the algorithm result. @@ -125,7 +125,7 @@ def reset(self) -> None: # """ # return self.__algorithm.merge(summary) - def noise_confidence_interval(self, confidence_level, privacy_budget): + def noise_confidence_interval(self, confidence_level: float, privacy_budget: float) -> float: """ Returns the confidence_level confidence interval of noise added within the algorithm with specified privacy budget, using epsilon and other relevant, algorithm-specific parameters (e.g. bounds) provided by the constructor. From 04831f141cafc073eb6bbf369f2d7b805a4ff60c Mon Sep 17 00:00:00 2001 From: Chinmay Shah Date: Sun, 16 Aug 2020 17:10:37 +0530 Subject: [PATCH 45/47] fixed typos in comments --- pydp/algorithms/_algorithm.py | 2 +- pydp/algorithms/laplacian/_bounded_algorithms.py | 14 +++++++------- tests/base/test_percentile.py | 8 ++++---- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/pydp/algorithms/_algorithm.py b/pydp/algorithms/_algorithm.py index 880d9b2b..13f1f2b1 100644 --- a/pydp/algorithms/_algorithm.py +++ b/pydp/algorithms/_algorithm.py @@ -7,7 +7,7 @@ class MetaAlgorithm: def __init__(self, **kwargs): dtype = kwargs.pop("dtype") - # Delete bound params if the are not set to avoid conflicts with builder + # Delete bound params if they are not set to avoid conflicts with builder if "lower_bound" in kwargs and kwargs["lower_bound"] is None: kwargs.pop("lower_bound") if "upper_bound" in kwargs and kwargs["upper_bound"] is None: diff --git a/pydp/algorithms/laplacian/_bounded_algorithms.py b/pydp/algorithms/laplacian/_bounded_algorithms.py index 83636b15..edc8bf9e 100644 --- a/pydp/algorithms/laplacian/_bounded_algorithms.py +++ b/pydp/algorithms/laplacian/_bounded_algorithms.py @@ -3,7 +3,7 @@ class BoundedMean(BoundedAlgorithm): """ - Bouned Mean Explaination + Bouned Mean Explanation TODO """ @@ -12,7 +12,7 @@ class BoundedMean(BoundedAlgorithm): class BoundedSum(BoundedAlgorithm): """ - Bouned Sum Explaination + Bouned Sum Explanation TODO """ @@ -21,7 +21,7 @@ class BoundedSum(BoundedAlgorithm): class BoundedStandardDeviation(BoundedAlgorithm): """ - Bouned Standard Deviation Explaination + Bouned Standard Deviation Explanation TODO """ @@ -30,7 +30,7 @@ class BoundedStandardDeviation(BoundedAlgorithm): class BoundedVariance(BoundedAlgorithm): """ - Bouned Variance Explaination + Bouned Variance Explanation TODO """ @@ -39,7 +39,7 @@ class BoundedVariance(BoundedAlgorithm): class Max(BoundedAlgorithm): """ - Max Explaination + Max Explanation TODO """ @@ -48,7 +48,7 @@ class Max(BoundedAlgorithm): class Min(BoundedAlgorithm): """ - Min Explaination + Min Explanation TODO """ @@ -57,7 +57,7 @@ class Min(BoundedAlgorithm): class Median(BoundedAlgorithm): """ - Median Explaination + Median Explanation TODO """ diff --git a/tests/base/test_percentile.py b/tests/base/test_percentile.py index 4823efc5..c30da660 100644 --- a/tests/base/test_percentile.py +++ b/tests/base/test_percentile.py @@ -69,7 +69,7 @@ def test_memory(self, input_class): assert small_memory < large_memory -# # TODO: port the following tests -# # -# # TYPED_TEST(PercentileTest, SerializeMerge) -# # +# TODO: port the following tests +# +# TYPED_TEST(PercentileTest, SerializeMerge) +# From f459f0aa3137f5da49169bfb0155528c47abd6fe Mon Sep 17 00:00:00 2001 From: Chinmay Shah Date: Sun, 16 Aug 2020 17:12:02 +0530 Subject: [PATCH 46/47] style fix --- pydp/algorithms/_algorithm.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pydp/algorithms/_algorithm.py b/pydp/algorithms/_algorithm.py index 13f1f2b1..3091462c 100644 --- a/pydp/algorithms/_algorithm.py +++ b/pydp/algorithms/_algorithm.py @@ -84,7 +84,9 @@ def quick_result(self, data: List[Union[int, float]]) -> Union[int, float]: return self.__algorithm.result(data) def result( - self, privacy_budget: Union[float, None] = None, noise_interval_level: Union[float, None]=None + self, + privacy_budget: Union[float, None] = None, + noise_interval_level: Union[float, None] = None, ) -> Union[int, float]: """ Gets the algorithm result. @@ -125,7 +127,9 @@ def reset(self) -> None: # """ # return self.__algorithm.merge(summary) - def noise_confidence_interval(self, confidence_level: float, privacy_budget: float) -> float: + def noise_confidence_interval( + self, confidence_level: float, privacy_budget: float + ) -> float: """ Returns the confidence_level confidence interval of noise added within the algorithm with specified privacy budget, using epsilon and other relevant, algorithm-specific parameters (e.g. bounds) provided by the constructor. From 72c9270ae602af81e62b739b52ad3e8bceec2d6b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alejandro=20S=C3=A1nchez=20Medina?= Date: Mon, 17 Aug 2020 12:58:16 +0100 Subject: [PATCH 47/47] Fix typos --- pydp/algorithms/laplacian/_bounded_algorithms.py | 8 ++++---- pydp/algorithms/laplacian/_count.py | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/pydp/algorithms/laplacian/_bounded_algorithms.py b/pydp/algorithms/laplacian/_bounded_algorithms.py index edc8bf9e..dbe4f5bc 100644 --- a/pydp/algorithms/laplacian/_bounded_algorithms.py +++ b/pydp/algorithms/laplacian/_bounded_algorithms.py @@ -3,7 +3,7 @@ class BoundedMean(BoundedAlgorithm): """ - Bouned Mean Explanation + Bounded Mean Explanation TODO """ @@ -12,7 +12,7 @@ class BoundedMean(BoundedAlgorithm): class BoundedSum(BoundedAlgorithm): """ - Bouned Sum Explanation + Bounded Sum Explanation TODO """ @@ -21,7 +21,7 @@ class BoundedSum(BoundedAlgorithm): class BoundedStandardDeviation(BoundedAlgorithm): """ - Bouned Standard Deviation Explanation + Bounded Standard Deviation Explanation TODO """ @@ -30,7 +30,7 @@ class BoundedStandardDeviation(BoundedAlgorithm): class BoundedVariance(BoundedAlgorithm): """ - Bouned Variance Explanation + Bounded Variance Explanation TODO """ diff --git a/pydp/algorithms/laplacian/_count.py b/pydp/algorithms/laplacian/_count.py index 99e9733e..c66ddcac 100644 --- a/pydp/algorithms/laplacian/_count.py +++ b/pydp/algorithms/laplacian/_count.py @@ -3,7 +3,7 @@ class Count(MetaAlgorithm): """ - Count Explaination + Count Explanation TODO """