diff --git a/.github/workflows/pypipublish_linux.yml b/.github/workflows/pypipublish_linux.yml index 4c258693..f96cafe3 100644 --- a/.github/workflows/pypipublish_linux.yml +++ b/.github/workflows/pypipublish_linux.yml @@ -19,7 +19,7 @@ jobs: cd third_party/differential-privacy git checkout 0b0a5c2315d84a6a7b1ff34591e33ec11680891e cd - - rm -rf third_party/differential-privacy/java + rm -rf third_party/differential-privacy/java rm -rf third_party/differential-privacy/examples/java - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v1 @@ -55,8 +55,8 @@ jobs: - name: Build pydp lib run: | bazel build src/python:bindings_test --verbose_failures - rm -f pydp.so - cp -f ./bazel-bin/src/bindings/pydp.so ./pydp + rm -f _pydp.so + cp -f ./bazel-bin/src/bindings/_pydp.so ./pydp - name: Build wheel run: | diff --git a/.github/workflows/pypipublish_osx.yml b/.github/workflows/pypipublish_osx.yml index 2534ce64..dc647714 100644 --- a/.github/workflows/pypipublish_osx.yml +++ b/.github/workflows/pypipublish_osx.yml @@ -19,7 +19,7 @@ jobs: cd third_party/differential-privacy git checkout 0b0a5c2315d84a6a7b1ff34591e33ec11680891es cd - - rm -rf third_party/differential-privacy/java + rm -rf third_party/differential-privacy/java rm -rf third_party/differential-privacy/examples/java - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v2 @@ -36,8 +36,8 @@ jobs: - name: Build pydp lib run: | bazel build src/python:bindings_test --verbose_failures - rm -f pydp.so - cp -f ./bazel-bin/src/bindings/pydp.so ./pydp + rm -f _pydp.so + cp -f ./bazel-bin/src/bindings/_pydp.so ./pydp - name: Build wheel @@ -49,4 +49,4 @@ jobs: TWINE_USERNAME: __token__ TWINE_PASSWORD: ${{ secrets.TOKEN }} run: | - twine upload --skip-existing dist/*.whl \ No newline at end of file + twine upload --skip-existing dist/*.whl diff --git a/.gitignore b/.gitignore index 93312cf7..3f031637 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,4 @@ -pydp.so +_pydp.so # bazel files bazel-bin bazel-out diff --git a/build_PyDP.sh b/build_PyDP.sh index fee7b2ca..3227b180 100755 --- a/build_PyDP.sh +++ b/build_PyDP.sh @@ -2,5 +2,5 @@ pipenv install --dev --skip-lock bazel build src/python:bindings_test --verbose_failures -find ./ -name pydp.so -print0 | xargs -0 -I {} rm {} -cp -f ./bazel-bin/src/bindings/pydp.so ./pydp +find ./ -name _pydp.so -print0 | xargs -0 -I {} rm {} +cp -f ./bazel-bin/src/bindings/_pydp.so ./pydp diff --git a/prereqs_linux.sh b/prereqs_linux.sh index 449527a8..2496da7f 100755 --- a/prereqs_linux.sh +++ b/prereqs_linux.sh @@ -57,8 +57,9 @@ fi # Downloading the Google DP library git submodule update --init --recursive + # checkout out to particular commit -cd third_party/differential-privacy && git checkout 0b0a5c2315d84a6a7b1ff34591e33ec11680891e && \ +cd third_party/differential-privacy && git checkout 1b1dc6639173c0a13613189ec21851604a4c7335 && \ cd - # renaming workspace.bazel to workspace mv third_party/differential-privacy/cc/WORKSPACE.bazel third_party/differential-privacy/cc/WORKSPACE diff --git a/pydp/__init__.py b/pydp/__init__.py index f0491e47..dc868ffb 100644 --- a/pydp/__init__.py +++ b/pydp/__init__.py @@ -1,3 +1,4 @@ -from .pydp import * +from pydp import algorithms, distributions, util + __version__ = "0.1.4" diff --git a/pydp/algorithms/__init__.py b/pydp/algorithms/__init__.py new file mode 100644 index 00000000..afc01e21 --- /dev/null +++ b/pydp/algorithms/__init__.py @@ -0,0 +1,3 @@ +from . import laplacian + +__all__ = ["laplacian"] diff --git a/pydp/algorithms/_algorithm.py b/pydp/algorithms/_algorithm.py new file mode 100644 index 00000000..3091462c --- /dev/null +++ b/pydp/algorithms/_algorithm.py @@ -0,0 +1,166 @@ +from .._pydp import _algorithms + +from typing import Union, List + + +class MetaAlgorithm: + def __init__(self, **kwargs): + dtype = kwargs.pop("dtype") + + # Delete bound params if they are not set to avoid conflicts with builder + if "lower_bound" in kwargs and kwargs["lower_bound"] is None: + kwargs.pop("lower_bound") + if "upper_bound" in kwargs and kwargs["upper_bound"] is None: + kwargs.pop("upper_bound") + + binded_class = self.__class__.__name__ + self.__map_dtype_str(dtype) + class_ = getattr(_algorithms, binded_class) + + self.dtype = dtype + self.__algorithm = class_(**kwargs) + self._l0_sensitivity = kwargs.get("l0_sensitivity", "Not set") + self._linf_sensitivity = kwargs.get("linf_sensitivity", "Not set") + + @staticmethod + def __map_dtype_str(dtype: str): + if dtype == "int": + return "Int" + elif dtype == "float": + return "Double" + else: + raise RuntimeError("dtype: {} is not supported".format(dtype)) + + @property + def epsilon(self) -> float: + """ + Returns the epsilon set at initialization. + """ + return self.__algorithm.epsilon + + @property + def l0_sensitivity(self) -> float: + """ + Returns the l0_sensitivity set at initialization. + """ + return self._l0_sensitivity + + @property + def linf_sensitivity(self) -> float: + """ + Returns the linf_sensitivity set at initialization. + """ + return self._linf_sensitivity + + def privacy_budget_left(self) -> float: + """ + Returns the remaining privacy budget. + """ + return self.__algorithm.privacy_budget_left() + + def memory_used(self) -> float: + """ + Returns the memory currently used by the algorithm in bytes. + """ + return self.__algorithm.memory_used() + + def add_entries(self, data: List[Union[int, float]]) -> None: + """ + Adds multiple inputs to the algorithm. + """ + return self.__algorithm.add_entries(data) + + def add_entry(self, value: Union[int, float]) -> None: + """ + Adds one input to the algorithm. + """ + return self.__algorithm.add_entry(value) + + def quick_result(self, data: List[Union[int, float]]) -> Union[int, float]: + """ + Runs the algorithm on the input using the epsilon parameter provided in the constructor and returns output. + + Consumes 100% of the privacy budget. + """ + return self.__algorithm.result(data) + + def result( + self, + privacy_budget: Union[float, None] = None, + noise_interval_level: Union[float, None] = None, + ) -> Union[int, float]: + """ + Gets the algorithm result. + + The default call consumes the remaining privacy budget. + + When `privacy_budget` (defined on [0,1]) is set, it consumes only the `privacy_budget` amount of budget. + + `noise_interval_level` provides the confidence level of the noise confidence interval, which may be included in the algorithm output. + """ + + if privacy_budget is None: + return self.__algorithm.partial_result() + elif noise_interval_level is None: + return self.__algorithm.partial_result(privacy_budget) + else: + return self.__algorithm.partial_result(privacy_budget, noise_interval_level) + + def reset(self) -> None: + """ + Resets the algorithm to a state in which it has received no input. After Reset is called, the algorithm should only consider input added after the last Reset call when providing output. + """ + return self.__algorithm.reset() + + # TODO: Wrap Summary class before exposing serialize and merge methods. + # + # def serialize(self): + # """ + # Serializes summary data of current entries into Summary proto. This allows results from distributed aggregation to be recorded and later merged. + # + # Returns empty summary for algorithms for which serialize is unimplemented. + # """ + # return self.__algorithm.serialize() + # + # def merge(self, summary): + # """ + # Merges serialized summary data into this algorithm. The summary proto must represent data from the same algorithm type with identical parameters. The data field must contain the algorithm summary type of the corresponding algorithm used. The summary proto cannot be empty. + # """ + # return self.__algorithm.merge(summary) + + def noise_confidence_interval( + self, confidence_level: float, privacy_budget: float + ) -> float: + """ + Returns the confidence_level confidence interval of noise added within the algorithm with specified privacy budget, using epsilon and other relevant, algorithm-specific parameters (e.g. bounds) provided by the constructor. + + This metric may be used to gauge the error rate introduced by the noise. + + If the returned value is , then the noise added has a confidence_level chance of being in the domain [x,y]. + + By default, NoiseConfidenceInterval() returns an error. Algorithms for which a confidence interval can feasibly be calculated override this and output the relevant value. + + Conservatively, we do not release the error rate for algorithms whose confidence intervals rely on input size. + """ + return self.__algorithm.noise_confidence_interval( + confidence_level, privacy_budget + ) + + +class BoundedAlgorithm(MetaAlgorithm): + def __init__( + self, + epsilon: float = 1.0, + lower_bound: Union[int, float, None] = None, + upper_bound: Union[int, float, None] = None, + l0_sensitivity: int = 1, + linf_sensitivity: int = 1, + dtype: str = "int", + ): + super().__init__( + epsilon=epsilon, + lower_bound=lower_bound, + upper_bound=upper_bound, + l0_sensitivity=l0_sensitivity, + linf_sensitivity=linf_sensitivity, + dtype=dtype, + ) diff --git a/pydp/algorithms/laplacian/__init__.py b/pydp/algorithms/laplacian/__init__.py new file mode 100644 index 00000000..576ae8bb --- /dev/null +++ b/pydp/algorithms/laplacian/__init__.py @@ -0,0 +1,23 @@ +from ._bounded_algorithms import ( + BoundedMean, + BoundedSum, + BoundedStandardDeviation, + BoundedVariance, + Max, + Min, + Median, +) +from ._count import Count +from ._percentile import Percentile + +__all__ = [ + "BoundedMean", + "BoundedStandardDeviation", + "BoundedSum", + "BoundedVariance", + "Count", + "Max", + "Min", + "Median", + "Percentile", +] diff --git a/pydp/algorithms/laplacian/_bounded_algorithms.py b/pydp/algorithms/laplacian/_bounded_algorithms.py new file mode 100644 index 00000000..dbe4f5bc --- /dev/null +++ b/pydp/algorithms/laplacian/_bounded_algorithms.py @@ -0,0 +1,64 @@ +from .._algorithm import BoundedAlgorithm + + +class BoundedMean(BoundedAlgorithm): + """ + Bounded Mean Explanation + TODO + """ + + pass + + +class BoundedSum(BoundedAlgorithm): + """ + Bounded Sum Explanation + TODO + """ + + pass + + +class BoundedStandardDeviation(BoundedAlgorithm): + """ + Bounded Standard Deviation Explanation + TODO + """ + + pass + + +class BoundedVariance(BoundedAlgorithm): + """ + Bounded Variance Explanation + TODO + """ + + pass + + +class Max(BoundedAlgorithm): + """ + Max Explanation + TODO + """ + + pass + + +class Min(BoundedAlgorithm): + """ + Min Explanation + TODO + """ + + pass + + +class Median(BoundedAlgorithm): + """ + Median Explanation + TODO + """ + + pass diff --git a/pydp/algorithms/laplacian/_count.py b/pydp/algorithms/laplacian/_count.py new file mode 100644 index 00000000..c66ddcac --- /dev/null +++ b/pydp/algorithms/laplacian/_count.py @@ -0,0 +1,11 @@ +from .._algorithm import MetaAlgorithm + + +class Count(MetaAlgorithm): + """ + Count Explanation + TODO + """ + + def __init__(self, epsilon: float = 1.0, dtype: str = "int"): + super().__init__(epsilon=epsilon, dtype=dtype) diff --git a/pydp/algorithms/laplacian/_percentile.py b/pydp/algorithms/laplacian/_percentile.py new file mode 100644 index 00000000..08917326 --- /dev/null +++ b/pydp/algorithms/laplacian/_percentile.py @@ -0,0 +1,27 @@ +from .._algorithm import MetaAlgorithm +from typing import Union + + +class Percentile(MetaAlgorithm): + def __init__( + self, + epsilon: float = 1.0, + percentile: float = 0.0, + lower_bound: Union[int, float, None] = None, + upper_bound: Union[int, float, None] = None, + dtype: str = "int", + ): + super().__init__( + epsilon=epsilon, + percentile=percentile, + lower_bound=lower_bound, + upper_bound=upper_bound, + dtype=dtype, + ) + + @property + def percentile(self) -> float: + """ + percentile Gets the value that was set in the constructor. + """ + return self._MetaAlgorithm__algorithm.percentile diff --git a/pydp/distributions/__init__.py b/pydp/distributions/__init__.py new file mode 100644 index 00000000..5463f84e --- /dev/null +++ b/pydp/distributions/__init__.py @@ -0,0 +1 @@ +from .._pydp._distributions import * diff --git a/pydp/util/__init__.py b/pydp/util/__init__.py new file mode 100644 index 00000000..dbea346d --- /dev/null +++ b/pydp/util/__init__.py @@ -0,0 +1 @@ +from .._pydp._util import * diff --git a/setup.py b/setup.py index 6ecd57b6..ebbd6b7c 100644 --- a/setup.py +++ b/setup.py @@ -47,7 +47,7 @@ def read(fname): include_package_data=True, keywords="pydp", name="python-dp", - package_data={"pydp": ["pydp.so"],}, + package_data={"pydp": ["_pydp.so"],}, packages=find_packages(exclude=["tests"]), # need to check this setup_requires=setup_requirements, test_suite="tests", diff --git a/src/bindings/BUILD b/src/bindings/BUILD index 76bbd6e6..ed3d819c 100644 --- a/src/bindings/BUILD +++ b/src/bindings/BUILD @@ -1,16 +1,14 @@ load("@pybind11_bazel//:build_defs.bzl", "pybind_extension") pybind_extension( - name = "pydp", + name = "_pydp", srcs = glob([ "PyDP/*.cpp", "PyDP/base/*.cpp", "PyDP/algorithms/*.cpp", "PyDP/algorithms/*.cpp", "PyDP/pydp_lib/*.hpp", - "PyDP/proto/*.cpp", - "c/*.cc", - "c/*.h" + "PyDP/proto/*.cpp" ]), visibility = ["//src/python:__pkg__"], @@ -29,4 +27,4 @@ pybind_extension( "@google_dp//algorithms:order-statistics", "@google_dp//proto:util-lib" ], -) \ No newline at end of file +) diff --git a/src/bindings/PyDP/algorithms/bounded_functions.cpp b/src/bindings/PyDP/algorithms/bounded_functions.cpp index 8e878336..874d7f9c 100644 --- a/src/bindings/PyDP/algorithms/bounded_functions.cpp +++ b/src/bindings/PyDP/algorithms/bounded_functions.cpp @@ -1,111 +1,40 @@ // Provides bindings for Bounded Functions -#include "../../c/c_api.h" - -#include "../pydp_lib/casting.hpp" // our caster helper library -#include "../pydp_lib/helper_class.hpp" // Dummy helper class - #include "pybind11/complex.h" #include "pybind11/functional.h" #include "pybind11/pybind11.h" #include "pybind11/stl.h" -using namespace std; +#include "algorithms/algorithm.h" +#include "algorithms/bounded-mean.h" +#include "algorithms/bounded-standard-deviation.h" +#include "algorithms/bounded-sum.h" +#include "algorithms/bounded-variance.h" -namespace py = pybind11; +#include "../pydp_lib/algorithm_builder.hpp" +#include "../pydp_lib/casting.hpp" // our caster helper library -class BoundedMeanDummy : public Dummy { - public: - using Dummy::Dummy; - double Result(py::list l) override { - return Result_BoundedMean(obj, l); - } -}; - -class BoundedSumDummy : public Dummy { - public: - using Dummy::Dummy; - double Result(py::list l) override { - return Result_BoundedSum(obj, l); - } -}; - -class BoundedStandardDeviationDummy : public Dummy { - public: - using Dummy::Dummy; - double Result(py::list l) override { - return Result_BoundedStandardDeviation(obj, l); - } -}; +using namespace std; -class BoundedVarianceDummy : public Dummy { - public: - using Dummy::Dummy; - double Result(py::list l) override { - return Result_BoundedVariance(obj, l); - } -}; +namespace py = pybind11; +namespace dp = differential_privacy; -void declareBoundedMean(py::module& m) { - py::class_ bld(m, "BoundedMean"); - bld.attr("__module__") = "pydp"; - bld.def(py::init(), py::return_value_policy::reference, - py::call_guard()); - bld.def(py::init(), py::return_value_policy::reference, - py::call_guard()); - bld.def("result", &BoundedMeanDummy::Result); - bld.def_property("l0_sensitivity", &BoundedMeanDummy::get_l0_sensitivity, - &BoundedMeanDummy::set_l0_sensitivity); - bld.def_property("linf_sensitivity", &BoundedMeanDummy::get_linf_sensitivity, - &BoundedMeanDummy::set_linf_sensitivity); +template +void declareBoundedAlgorithm(py::module& m) { + using builder = typename dp::python::AlgorithmBuilder; + builder().declare(m); } -void declareBoundedSum(py::module& m) { - py::class_ cls(m, "BoundedSum"); - cls.attr("__module__") = "pydp"; - cls.def(py::init(), py::return_value_policy::reference, - py::call_guard()); - cls.def(py::init(), py::return_value_policy::reference, - py::call_guard()); - cls.def("result", &BoundedSumDummy::Result); - cls.def_property("l0_sensitivity", &BoundedSumDummy::get_l0_sensitivity, - &BoundedSumDummy::set_l0_sensitivity); - cls.def_property("linf_sensitivity", &BoundedSumDummy::get_linf_sensitivity, - &BoundedSumDummy::set_linf_sensitivity); -} +void init_algorithms_bounded_functions(py::module& m) { + declareBoundedAlgorithm>(m); + declareBoundedAlgorithm>(m); -void declareBoundedStandardDeviation(py::module& m) { - py::class_ cls(m, "BoundedStandardDeviation"); - cls.attr("__module__") = "pydp"; - cls.def(py::init(), py::return_value_policy::reference, - py::call_guard()); - cls.def(py::init(), py::return_value_policy::reference, - py::call_guard()); - cls.def("result", &BoundedStandardDeviationDummy::Result); - cls.def_property("l0_sensitivity", &BoundedStandardDeviationDummy::get_l0_sensitivity, - &BoundedStandardDeviationDummy::set_l0_sensitivity); - cls.def_property("linf_sensitivity", - &BoundedStandardDeviationDummy::get_linf_sensitivity, - &BoundedSumDummy::set_linf_sensitivity); -} + declareBoundedAlgorithm>(m); + declareBoundedAlgorithm>(m); -void declareBoundedVariance(py::module& m) { - py::class_ cls(m, "BoundedVariance"); - cls.attr("__module__") = "pydp"; - cls.def(py::init(), py::return_value_policy::reference, - py::call_guard()); - cls.def(py::init(), py::return_value_policy::reference, - py::call_guard()); - cls.def("result", &BoundedVarianceDummy::Result); - cls.def_property("l0_sensitivity", &BoundedVarianceDummy::get_l0_sensitivity, - &BoundedVarianceDummy::set_l0_sensitivity); - cls.def_property("linf_sensitivity", &BoundedVarianceDummy::get_linf_sensitivity, - &BoundedVarianceDummy::set_linf_sensitivity); -} + declareBoundedAlgorithm>(m); + declareBoundedAlgorithm>(m); -void init_algorithms_bounded_functions(py::module& m) { - declareBoundedMean(m); - declareBoundedSum(m); - declareBoundedStandardDeviation(m); - declareBoundedVariance(m); + declareBoundedAlgorithm>(m); + declareBoundedAlgorithm>(m); } diff --git a/src/bindings/PyDP/algorithms/count.cpp b/src/bindings/PyDP/algorithms/count.cpp index d4a30ff6..554fc030 100644 --- a/src/bindings/PyDP/algorithms/count.cpp +++ b/src/bindings/PyDP/algorithms/count.cpp @@ -5,46 +5,20 @@ #include "algorithms/count.h" +#include "../pydp_lib/algorithm_builder.hpp" + using namespace std; namespace py = pybind11; namespace dp = differential_privacy; -template -void declareCount(py::module& m, string const& suffix) { - using count_builder = typename dp::Count::Builder; - - py::class_> count(m, ("Count" + suffix).c_str()); - count.attr("__module__") = "pydp"; - count.def(py::init([]() { return count_builder().Build().ValueOrDie(); })) - .def(py::init([](double epsilon) { - return count_builder().SetEpsilon(epsilon).Build().ValueOrDie(); - })) - .def("add_entry", &dp::Count::AddEntry) - .def("add_entries", - [](dp::Count& obj, std::vector& v) { - return obj.AddEntries(v.begin(), v.end()); - }) - // TODO: port ConfidenceInterval and Summary - //.def("noise_confidence_interval", &dp::Count::NoiseConfidenceInterval) - //.def("serialize", &dp::Count::Serialize) - //.def("merge", &dp::Count::Merge) - .def("memory_used", &dp::Count::MemoryUsed) - .def("result", - [](dp::Count& obj, std::vector& v) { - return dp::GetValue(obj.Result(v.begin(), v.end()).ValueOrDie()); - }) - .def("partial_result", - [](dp::Count& obj) { - return dp::GetValue(obj.PartialResult().ValueOrDie()); - }) - - .def("partial_result", [](dp::Count& obj, double privacy_budget) { - return dp::GetValue(obj.PartialResult(privacy_budget).ValueOrDie()); - }); +template +void declareAlgorithm(py::module& m) { + using builder = typename dp::python::AlgorithmBuilder; + builder().declare(m); } void init_algorithms_count(py::module& m) { - declareCount(m, "Int"); - declareCount(m, "Double"); + declareAlgorithm>(m); + declareAlgorithm>(m); } diff --git a/src/bindings/PyDP/algorithms/distributions.cpp b/src/bindings/PyDP/algorithms/distributions.cpp index 5f1b7891..359df583 100644 --- a/src/bindings/PyDP/algorithms/distributions.cpp +++ b/src/bindings/PyDP/algorithms/distributions.cpp @@ -1,5 +1,4 @@ // Provides bindings for distributions - #include "pybind11/pybind11.h" #include "algorithms/distributions.h" @@ -19,7 +18,7 @@ void declareLaplaceDistribution(py::module &m) { py::arg("scale") = 1.0, R"pbdoc( Samples the Laplacian distribution Laplace(u, scale*b). - + Parameters ---------- scale @@ -28,7 +27,7 @@ void declareLaplaceDistribution(py::module &m) { laplace_dist.def("get_diversity", &dpi::LaplaceDistribution::GetDiversity, R"pbdoc( Returns the parameter defining this distribution, often labeled b. - + )pbdoc"); laplace_dist.attr("__doc__") = "Draws samples from the Laplacian distribution."; } @@ -43,12 +42,13 @@ void declareGaussianDistribution(py::module &m) { Parameters ---------- scale - A factor to scale stddev. + A factor to scale stddev. )pbdoc") .def_property_readonly("stddev", &dpi::GaussianDistribution::Stddev, R"pbdoc(Returns stddev)pbdoc"); } +/* void declareGeometricDistribution(py::module &m) { py::class_ geometric_dist(m, "GeometricDistribution"); geometric_dist.attr("__module__") = "pydp"; @@ -64,15 +64,15 @@ void declareGeometricDistribution(py::module &m) { R"pbdoc(Returns lambda. Where p = 1 - e^-lambda)pbdoc"); geometric_dist.attr("__doc__") = R"pbdoc(Draws samples from the geometric distribution of probability - \math{p = 1 - e^{-\lambda}}, i.e. the number of bernoulli trial failures before the - first success where the success probability is as defined above. lambda must + \math{p = 1 - e^{-\lambda}}, i.e. the number of bernoulli trial failures +before the first success where the success probability is as defined above. lambda must be positive. If the result would be higher than the maximum int64_t, returns the maximum int64_t, which means that users should be careful around the edges of their distribution)pbdoc"; } - +*/ void init_algorithms_distributions(py::module &m) { declareLaplaceDistribution(m); declareGaussianDistribution(m); - declareGeometricDistribution(m); + // declareGeometricDistribution(m); } diff --git a/src/bindings/PyDP/algorithms/order_statistics.cpp b/src/bindings/PyDP/algorithms/order_statistics.cpp index 97f9753f..d39063cb 100644 --- a/src/bindings/PyDP/algorithms/order_statistics.cpp +++ b/src/bindings/PyDP/algorithms/order_statistics.cpp @@ -1,127 +1,33 @@ -// Provides bindings for Bounded Functions - -#include "../../c/c_api.h" - -#include "../pydp_lib/casting.hpp" // our caster helper library -#include "../pydp_lib/helper_class.hpp" // Dummy helder class - +// Provides bindings for Order Statistics +#include "algorithms/order-statistics.h" #include "pybind11/complex.h" #include "pybind11/functional.h" #include "pybind11/pybind11.h" #include "pybind11/stl.h" +#include "../pydp_lib/algorithm_builder.hpp" + using namespace std; namespace py = pybind11; +namespace dp = differential_privacy; -class MaxDummy : public Dummy { - public: - using Dummy::Dummy; - - double Result(py::list l, double privacy_budget) override { - return Result_Max(obj, l, privacy_budget); - } -}; - -class MinDummy : public Dummy { - public: - using Dummy::Dummy; - - double Result(py::list l, double privacy_budget) override { - return Result_Min(obj, l, privacy_budget); - } -}; - -class MedianDummy : public Dummy { - public: - using Dummy::Dummy; - - double Result(py::list l, double privacy_budget) override { - return Result_Median(obj, l, privacy_budget); - } -}; - -class PercentileDummy : public Dummy { - public: - using Dummy::Dummy; - - void setPercentile(double percentile) { - _percentile = percentile; - } - - double getPercentile() { - return _percentile; - } - - double Result(py::list l, double privacy_budget) override { - return Result_Percentile(obj, l, privacy_budget, _percentile); - } - - private: - double _percentile = 0.45; -}; - -void declareMax(py::module& m) { - py::class_ bld(m, "Max"); - bld.attr("__module__") = "pydp"; - bld.def(py::init(), py::return_value_policy::reference, - py::call_guard()); - bld.def(py::init(), py::return_value_policy::reference, - py::call_guard()); - bld.def("result", &MaxDummy::Result); - bld.def_property("l0_sensitivity", &MaxDummy::get_l0_sensitivity, - &MaxDummy::set_l0_sensitivity); - bld.def_property("linf_sensitivity", &MaxDummy::get_linf_sensitivity, - &MaxDummy::set_linf_sensitivity); +template +void declareOrderStat(py::module& m) { + using builder = typename dp::python::AlgorithmBuilder; + builder().declare(m); } -void declareMin(py::module& m) { - py::class_ bld(m, "Min"); - bld.attr("__module__") = "pydp"; - bld.def(py::init(), py::return_value_policy::reference, - py::call_guard()); - bld.def(py::init(), py::return_value_policy::reference, - py::call_guard()); - bld.def("result", &MinDummy::Result); - bld.def_property("l0_sensitivity", &MinDummy::get_l0_sensitivity, - &MinDummy::set_l0_sensitivity); - bld.def_property("linf_sensitivity", &MinDummy::get_linf_sensitivity, - &MinDummy::set_linf_sensitivity); -} +void init_algorithms_order_statistics(py::module& m) { + declareOrderStat>(m); + declareOrderStat>(m); -void declareMedian(py::module& m) { - py::class_ bld(m, "Median"); - bld.attr("__module__") = "pydp"; - bld.def(py::init(), py::return_value_policy::reference, - py::call_guard()); - bld.def(py::init(), py::return_value_policy::reference, - py::call_guard()); - bld.def("result", &MedianDummy::Result); - bld.def_property("l0_sensitivity", &MedianDummy::get_l0_sensitivity, - &MedianDummy::set_l0_sensitivity); - bld.def_property("linf_sensitivity", &MedianDummy::get_linf_sensitivity, - &MedianDummy::set_linf_sensitivity); -} + declareOrderStat>(m); + declareOrderStat>(m); -void declarePercentile(py::module& m) { - py::class_ bld(m, "Percentile"); - bld.attr("__module__") = "pydp"; - bld.def(py::init(), py::return_value_policy::reference, - py::call_guard()); - bld.def(py::init(), py::return_value_policy::reference, - py::call_guard()); - bld.def("result", &PercentileDummy::Result); - bld.def_property("percentile", &PercentileDummy::getPercentile, - &PercentileDummy::setPercentile); - bld.def_property("l0_sensitivity", &PercentileDummy::get_l0_sensitivity, - &PercentileDummy::set_l0_sensitivity); - bld.def_property("linf_sensitivity", &PercentileDummy::get_linf_sensitivity, - &PercentileDummy::set_linf_sensitivity); -} + declareOrderStat>(m); + declareOrderStat>(m); -void init_algorithms_order_statistics(py::module& m) { - declareMax(m); - declareMin(m); - declareMedian(m); - declarePercentile(m); + declareOrderStat>(m); + declareOrderStat>(m); } diff --git a/src/bindings/PyDP/algorithms/util.cpp b/src/bindings/PyDP/algorithms/util.cpp index e88ef840..d8bcdad6 100644 --- a/src/bindings/PyDP/algorithms/util.cpp +++ b/src/bindings/PyDP/algorithms/util.cpp @@ -9,37 +9,36 @@ namespace py = pybind11; namespace dp = differential_privacy; void init_algorithms_util(py::module& m) { - py::module util = m.def_submodule("util", "Some Utility Functions"); - util.attr("__module__") = "pydp"; - util.def("xor_strings", &dp::XorStrings); - util.def("default_epsilon", &dp::DefaultEpsilon); - util.def("get_next_power_of_two", &dp::GetNextPowerOfTwo); - util.def("qnorm", &dp::Qnorm); - util.def("mean", &dp::Mean); - util.def("mean", &dp::Mean); - util.def("variance", &dp::Variance); - util.def("standard_deviation", &dp::StandardDev); - util.def("order_statistics", &dp::OrderStatistic); - util.def("correlation", &dp::Correlation); - util.def("vector_filter", &dp::VectorFilter); - util.def("vector_to_string", &dp::VectorToString); - util.def("round_to_nearest_multiple", &dp::RoundToNearestMultiple); - util.def("safe_add", [](int64_t i, int64_t j) { + m.attr("__module__") = "pydp"; + m.def("xor_strings", &dp::XorStrings); + m.def("default_epsilon", &dp::DefaultEpsilon); + m.def("get_next_power_of_two", &dp::GetNextPowerOfTwo); + m.def("qnorm", &dp::Qnorm); + m.def("mean", &dp::Mean); + m.def("mean", &dp::Mean); + m.def("variance", &dp::Variance); + m.def("standard_deviation", &dp::StandardDev); + m.def("order_statistics", &dp::OrderStatistic); + m.def("correlation", &dp::Correlation); + m.def("vector_filter", &dp::VectorFilter); + m.def("vector_to_string", &dp::VectorToString); + m.def("round_to_nearest_multiple", &dp::RoundToNearestMultiple); + m.def("safe_add", [](int64_t i, int64_t j) { int64_t k; bool result = dp::SafeAdd(i, j, &k); if (result) return k; throw std::runtime_error("Result of addition will overflow."); }); - util.def("safe_subtract", [](int64_t i, int64_t j) { + m.def("safe_subtract", [](int64_t i, int64_t j) { int64_t k; bool result = dp::SafeSubtract(i, j, &k); if (result) return k; throw std::runtime_error("Result of subtraction will overflow."); }); - util.def("safe_square", [](int64_t i) { + m.def("safe_square", [](int64_t i) { int64_t k; bool result = dp::SafeSquare(i, &k); if (result) return k; throw std::runtime_error("Result of squaring will overflow."); }); -} \ No newline at end of file +} diff --git a/src/bindings/PyDP/base/percentile.cpp b/src/bindings/PyDP/base/percentile.cpp deleted file mode 100644 index b51bd7df..00000000 --- a/src/bindings/PyDP/base/percentile.cpp +++ /dev/null @@ -1,28 +0,0 @@ -// Provides bindings for percentile class - -#include "pybind11/pybind11.h" - -#include "base/percentile.h" - -using namespace std; -namespace py = pybind11; -namespace dpb = differential_privacy::base; - -template -void declarePercentile(py::module& m, string const& suffix) { - py::class_> percentile(m, ("Percentile" + suffix).c_str()); - percentile.attr("__module__") = "pydp"; - percentile.def(py::init()) - .def("add", &dpb::Percentile::Add) - .def("reset", &dpb::Percentile::Reset) - .def("serialize_to_proto", &dpb::Percentile::SerializeToProto) - .def("merge_from_proton", &dpb::Percentile::MergeFromProto) - .def("memory", &dpb::Percentile::Memory) - .def("num_values", &dpb::Percentile::num_values) - .def("get_relative_rank", &dpb::Percentile::GetRelativeRank); -} - -void init_base_percentile(py::module& m) { - declarePercentile(m, "Int"); - declarePercentile(m, "Double"); -} diff --git a/src/bindings/PyDP/bindings.cpp b/src/bindings/PyDP/bindings.cpp index 4d98bfc4..24399826 100644 --- a/src/bindings/PyDP/bindings.cpp +++ b/src/bindings/PyDP/bindings.cpp @@ -7,7 +7,6 @@ namespace py = pybind11; // base void init_base_status(py::module &); void init_base_logging(py::module &); -void init_base_percentile(py::module &); // bounded functions void init_algorithms_bounded_functions(py::module &); @@ -28,22 +27,27 @@ void init_algorithms_rand(py::module &); // proto void init_proto(py::module &); -PYBIND11_MODULE(pydp, m) { +PYBIND11_MODULE(_pydp, m) { m.doc() = "Google Differential Privacy python extension"; // Base init_base_status(m); init_base_logging(m); - init_base_percentile(m); // Algorithms - init_algorithms_bounded_functions(m); - init_algorithms_util(m); - init_algorithms_distributions(m); - init_algorithms_order_statistics(m); - init_algorithms_rand(m); - init_algorithms_count(m); + auto malgorithms = m.def_submodule("_algorithms"); + init_algorithms_bounded_functions(malgorithms); + init_algorithms_count(malgorithms); + init_algorithms_order_statistics(malgorithms); + + auto mdistributions = m.def_submodule("_distributions"); + init_algorithms_distributions(mdistributions); + + auto mutil = m.def_submodule("_util", "Some Utility Functions"); + init_algorithms_rand(mutil); + init_algorithms_util(mutil); // Proto + // TODO: Delete if it is not necessary (we no longer return StatusOr to the user) init_proto(m); } diff --git a/src/bindings/PyDP/pydp_lib/algorithm_builder.hpp b/src/bindings/PyDP/pydp_lib/algorithm_builder.hpp new file mode 100644 index 00000000..659e4331 --- /dev/null +++ b/src/bindings/PyDP/pydp_lib/algorithm_builder.hpp @@ -0,0 +1,252 @@ +#ifndef PYDP_LIB_ALGORITHM_H_ +#define PYDP_LIB_ALGORITHM_H_ + +#include "algorithms/algorithm.h" +#include "algorithms/bounded-mean.h" +#include "algorithms/bounded-standard-deviation.h" +#include "algorithms/bounded-sum.h" +#include "algorithms/bounded-variance.h" +#include "algorithms/count.h" +#include "algorithms/numerical-mechanisms.h" +#include "algorithms/order-statistics.h" +#include "base/statusor.h" + +namespace dp = differential_privacy; +namespace py = pybind11; + +namespace differential_privacy { +namespace python { + +template +constexpr bool is_bounded_algorithm() { + return std::is_same>::value || + std::is_same>::value || + std::is_same>::value || + std::is_same>::value || + std::is_same>::value || + std::is_same>::value || + std::is_same>::value || + std::is_same>::value; +} + +template +constexpr bool is_percentile() { + return std::is_same>::value; +} + +template +constexpr bool should_return_T() { + return std::is_same>::value || + std::is_same>::value || + std::is_same>::value || + std::is_same>::value; +} + +template +constexpr bool should_return_double() { + return std::is_same>::value || + std::is_same>::value || + std::is_same>::value || + std::is_same>::value; +} + +template +constexpr bool should_return_int() { + return std::is_same>::value; +} + +template +class AlgorithmBuilder { + public: + std::unique_ptr build(double epsilon, + std::optional percentile = std::nullopt, + std::optional lower_bound = std::nullopt, + std::optional upper_bound = std::nullopt, + std::optional l0_sensitivity = std::nullopt, + std::optional linf_sensitivity = std::nullopt) { + auto builder = typename Algorithm::Builder(); + + if constexpr (is_percentile()) { + if (percentile.has_value()) builder.SetPercentile(percentile.value()); + } + builder.SetEpsilon(epsilon); + + if (l0_sensitivity.has_value()) + builder.SetMaxPartitionsContributed(l0_sensitivity.value()); + if (linf_sensitivity.has_value()) + builder.SetMaxContributionsPerPartition(linf_sensitivity.value()); + + if constexpr (is_bounded_algorithm()) { + if (lower_bound.has_value()) builder.SetLower(lower_bound.value()); + if (upper_bound.has_value()) builder.SetUpper(upper_bound.value()); + } + + base::StatusOr> obj = builder.Build(); + if (!obj.ok()) { + throw std::runtime_error(obj.status().error_message()); + } + + return std::move(obj.ValueOrDie()); + } + + std::map type_to_name = { + {typeid(double), "Double"}, {typeid(int), "Int"}, {typeid(int64_t), "Int"}}; + std::map algorithm_to_name = { + {typeid(dp::BoundedMean), "BoundedMean"}, + {typeid(dp::BoundedSum), "BoundedSum"}, + {typeid(dp::BoundedStandardDeviation), "BoundedStandardDeviation"}, + {typeid(dp::BoundedVariance), "BoundedVariance"}, + {typeid(dp::Count), "Count"}, + {typeid(dp::continuous::Min), "Min"}, + {typeid(dp::continuous::Max), "Max"}, + {typeid(dp::continuous::Median), "Median"}, + {typeid(dp::continuous::Percentile), "Percentile"} + + }; + + std::string get_algorithm_name() { + // Set the suffix string + return (algorithm_to_name[typeid(Algorithm)] + type_to_name[typeid(T)]); + } + + void declare(py::module& m) { + py::class_ pyself(m, get_algorithm_name().c_str()); + + pyself.attr("__module__") = "_algorithms"; + + // Constructors + if constexpr (is_bounded_algorithm()) { + if constexpr (is_percentile()) { + // Explicit percentile constructor + pyself.def( + py::init([this](double epsilon, double percentile, T lower_bound, + T upper_bound, int l0_sensitivity, int linf_sensitivity) { + return this->build(epsilon, percentile, lower_bound, upper_bound, + l0_sensitivity, linf_sensitivity); + }), + py::arg("epsilon"), py::arg("percentile"), py::arg("lower_bound"), + py::arg("upper_bound"), py::arg("l0_sensitivity") = 1, + py::arg("linf_sensitivity") = 1); + } + // Explicit bounds constructor + pyself.def(py::init([this](double epsilon, T lower_bound, T upper_bound, + int l0_sensitivity, int linf_sensitivity) { + return this->build(epsilon, std::nullopt /*percentile*/, lower_bound, + upper_bound, l0_sensitivity, linf_sensitivity); + }), + py::arg("epsilon"), py::arg("lower_bound"), py::arg("upper_bound"), + py::arg("l0_sensitivity") = 1, py::arg("linf_sensitivity") = 1); + } + + // No bounds constructor + pyself.def( + py::init([this](double epsilon, int l0_sensitivity, int linf_sensitivity) { + return this->build(epsilon, std::nullopt /*percentile*/, + std::nullopt /*lower_bound*/, std::nullopt /*upper_bound*/, + l0_sensitivity, linf_sensitivity); + }), + py::arg("epsilon"), py::arg("l0_sensitivity") = 1, + py::arg("linf_sensitivity") = 1); + + // Getters + pyself.def_property_readonly("epsilon", &Algorithm::GetEpsilon); + + pyself.def("privacy_budget_left", &Algorithm::RemainingPrivacyBudget); + + pyself.def("memory_used", &Algorithm::MemoryUsed); + + // Input data + pyself.def("add_entries", [](Algorithm& pythis, std::vector& v) { + pythis.AddEntries(v.begin(), v.end()); + }); + + pyself.def("add_entry", &Algorithm::AddEntry); + + // Compute results + pyself.def("result", [](Algorithm& pythis, std::vector& v) { + auto result = pythis.Result(v.begin(), v.end()); + + if (!result.ok()) { + throw std::runtime_error(result.status().error_message()); + } + if constexpr ((should_return_T())) + return dp::GetValue(result.ValueOrDie()); + if constexpr ((should_return_double())) + return dp::GetValue(result.ValueOrDie()); + if constexpr ((should_return_int())) + return dp::GetValue(result.ValueOrDie()); + }); + + pyself.def("partial_result", [](Algorithm& pythis) { + auto result = pythis.PartialResult(); + + if (!result.ok()) { + throw std::runtime_error(result.status().error_message()); + } + + if constexpr ((should_return_T())) + return dp::GetValue(result.ValueOrDie()); + if constexpr ((should_return_double())) + return dp::GetValue(result.ValueOrDie()); + if constexpr ((should_return_int())) + return dp::GetValue(result.ValueOrDie()); + }); + + pyself.def("partial_result", [](Algorithm& pythis, double privacy_budget) { + if (privacy_budget > pythis.RemainingPrivacyBudget()) { + throw std::runtime_error("Privacy budget requeted exceeds set privacy budget"); + } + + auto result = pythis.PartialResult(privacy_budget); + + if (!result.ok()) { + throw std::runtime_error(result.status().error_message()); + } + + if constexpr ((should_return_T())) + return dp::GetValue(result.ValueOrDie()); + if constexpr ((should_return_double())) + return dp::GetValue(result.ValueOrDie()); + if constexpr ((should_return_int())) + return dp::GetValue(result.ValueOrDie()); + }); + + pyself.def("partial_result", [](Algorithm& pythis, double privacy_budget, + double noise_interval_level) { + if (privacy_budget > pythis.RemainingPrivacyBudget()) { + throw std::runtime_error("Privacy budget requeted exceeds set privacy budget"); + } + + auto result = pythis.PartialResult(privacy_budget, noise_interval_level); + + if (!result.ok()) { + throw std::runtime_error(result.status().error_message()); + } + if constexpr ((should_return_T())) + return dp::GetValue(result.ValueOrDie()); + if constexpr ((should_return_double())) + return dp::GetValue(result.ValueOrDie()); + if constexpr ((should_return_int())) + return dp::GetValue(result.ValueOrDie()); + }); + + // Other methods + pyself.def("reset", &Algorithm::Reset); + + pyself.def("serialize", &Algorithm::Serialize); + + pyself.def("merge", &Algorithm::Merge); + + pyself.def("noise_confidence_interval", &Algorithm::NoiseConfidenceInterval); + + // Percentile special case. + if constexpr (is_percentile()) { + pyself.def_property_readonly("percentile", &Algorithm::GetPercentile); + } + } +}; + +} // namespace python +} // namespace differential_privacy + +#endif // PYDP_LIB_ALGORITHM_H_ diff --git a/src/bindings/c/c_api.cc b/src/bindings/c/c_api.cc deleted file mode 100644 index 6d2dc3b5..00000000 --- a/src/bindings/c/c_api.cc +++ /dev/null @@ -1,343 +0,0 @@ -#include "c_api.h" - -#include "algorithms/algorithm.h" - -#include "algorithms/bounded-mean.h" -#include "algorithms/bounded-standard-deviation.h" -#include "algorithms/bounded-sum.h" -#include "algorithms/bounded-variance.h" - -#include "absl/random/distributions.h" -#include "algorithms/order-statistics.h" - -#include "pybind11/pybind11.h" - -extern "C" { -namespace differential_privacy { - -// Helper variable in-case bounds are not set -bool has_bounds; -int l0_sensitivity = 1; -int linf_sensitivity = 1; - -// Bounded Mean -double Result_BoundedMean(BoundedFunctionHelperObject* config, pybind11::list l) { - std::vector a; - - for (auto i : l) { - a.push_back(i.cast()); - } - base::StatusOr>> mean_obj; - if (has_bounds) { - mean_obj = BoundedMean::Builder() - .SetEpsilon(config->epsilon) - .SetLower(config->lower_bound) - .SetUpper(config->upper_bound) - .SetMaxPartitionsContributed(l0_sensitivity) - .SetMaxContributionsPerPartition(linf_sensitivity) - .Build(); - } else { - // TODO: a better solution to this is needed similar to ASSIGN_OR_RETURN but with a - // raised exeception - mean_obj = BoundedMean::Builder() - .SetEpsilon(config->epsilon) - .SetMaxPartitionsContributed(l0_sensitivity) - .SetMaxContributionsPerPartition(linf_sensitivity) - .Build(); - } - - if (!mean_obj.ok()) { - // TODO: custtom error? - throw std::runtime_error(mean_obj.status().error_message()); - } - - base::StatusOr resultf = mean_obj.ValueOrDie()->Result(a.begin(), a.end()); - if (resultf.ok()) { - return GetValue(resultf.ValueOrDie()); - } else { - throw std::runtime_error(resultf.status().error_message()); - } -} - -// Bounded Sum -double Result_BoundedSum(BoundedFunctionHelperObject* config, pybind11::list l) { - std::vector a; - - for (auto i : l) { - a.push_back(i.cast()); - } - base::StatusOr>> sum_obj; - if (has_bounds) { - sum_obj = BoundedSum::Builder() - .SetEpsilon(config->epsilon) - .SetLower(config->lower_bound) - .SetUpper(config->upper_bound) - .SetMaxPartitionsContributed(l0_sensitivity) - .SetMaxContributionsPerPartition(linf_sensitivity) - .Build(); - } else { - // TODO: a better solution to this is needed similar to ASSIGN_OR_RETURN but with a - // raised exeception - sum_obj = BoundedSum::Builder() - .SetEpsilon(config->epsilon) - .SetMaxPartitionsContributed(l0_sensitivity) - .SetMaxContributionsPerPartition(linf_sensitivity) - .Build(); - } - - if (!sum_obj.ok()) { - throw std::runtime_error(sum_obj.status().error_message()); - } - - base::StatusOr resultf = sum_obj.ValueOrDie()->Result(a.begin(), a.end()); - if (resultf.ok()) { - return GetValue(resultf.ValueOrDie()); - } else { - throw std::runtime_error(resultf.status().error_message()); - } -} - -double Result_BoundedStandardDeviation(BoundedFunctionHelperObject* config, - pybind11::list l) { - std::vector a; - - for (auto i : l) { - a.push_back(i.cast()); - } - base::StatusOr>> sd_obj; - if (has_bounds) { - sd_obj = BoundedStandardDeviation::Builder() - .SetEpsilon(config->epsilon) - .SetLower(config->lower_bound) - .SetUpper(config->upper_bound) - .SetMaxPartitionsContributed(l0_sensitivity) - .SetMaxContributionsPerPartition(linf_sensitivity) - .Build(); - } else { - // TODO: a better solution to this is needed similar to ASSIGN_OR_RETURN but with a - // raised exeception - sd_obj = BoundedStandardDeviation::Builder() - .SetEpsilon(config->epsilon) - .SetMaxPartitionsContributed(l0_sensitivity) - .SetMaxContributionsPerPartition(linf_sensitivity) - .Build(); - } - - if (!sd_obj.ok()) { - throw std::runtime_error(sd_obj.status().error_message()); - } - - base::StatusOr resultf = sd_obj.ValueOrDie()->Result(a.begin(), a.end()); - if (resultf.ok()) { - return GetValue(resultf.ValueOrDie()); - } else { - throw std::runtime_error(resultf.status().error_message()); - } -} - -double Result_BoundedVariance(BoundedFunctionHelperObject* config, pybind11::list l) { - std::vector a; - - for (auto i : l) { - a.push_back(i.cast()); - } - base::StatusOr>> variance_obj; - if (has_bounds) { - variance_obj = BoundedVariance::Builder() - .SetEpsilon(config->epsilon) - .SetLower(config->lower_bound) - .SetUpper(config->upper_bound) - .SetMaxPartitionsContributed(l0_sensitivity) - .SetMaxContributionsPerPartition(linf_sensitivity) - .Build(); - } else { - variance_obj = BoundedVariance::Builder() - .SetEpsilon(config->epsilon) - .SetMaxPartitionsContributed(l0_sensitivity) - .SetMaxContributionsPerPartition(linf_sensitivity) - .Build(); - } - - if (!variance_obj.ok()) { - throw std::runtime_error(variance_obj.status().error_message()); - } else { - base::StatusOr resultf = - variance_obj.ValueOrDie()->Result(a.begin(), a.end()); - if (resultf.ok()) { - return GetValue(resultf.ValueOrDie()); - } else { - throw std::runtime_error(resultf.status().error_message()); - } - } -} - -// Order Statistics - -// Max - -int64_t Result_Max(BoundedFunctionHelperObject* config, pybind11::list l, - double privacy_budget) { - base::StatusOr>> max; - if (has_bounds) { - max = continuous::Max::Builder() - .SetEpsilon(config->epsilon) - .SetLower(config->lower_bound) - .SetUpper(config->upper_bound) - .SetMaxPartitionsContributed(l0_sensitivity) - .SetMaxContributionsPerPartition(linf_sensitivity) - .Build(); - } else { - max = continuous::Max::Builder().SetEpsilon(config->epsilon).Build(); - } - if (!max.ok()) { - throw std::runtime_error(max.status().error_message()); - } else { - for (auto i : l) { - max.ValueOrDie()->AddEntry(i.cast()); - } - base::StatusOr resultf = max.ValueOrDie()->PartialResult(privacy_budget); - if (resultf.ok()) { - return GetValue(resultf.ValueOrDie()); - } else { - throw std::runtime_error(resultf.status().error_message()); - } - } -} - -// Min - -int64_t Result_Min(BoundedFunctionHelperObject* config, pybind11::list l, - double privacy_budget) { - base::StatusOr>> min; - if (has_bounds) { - min = continuous::Min::Builder() - .SetEpsilon(config->epsilon) - .SetLower(config->lower_bound) - .SetUpper(config->upper_bound) - .SetMaxPartitionsContributed(l0_sensitivity) - .SetMaxContributionsPerPartition(linf_sensitivity) - .Build(); - } else { - min = continuous::Min::Builder() - .SetEpsilon(config->epsilon) - .SetMaxPartitionsContributed(l0_sensitivity) - .SetMaxContributionsPerPartition(linf_sensitivity) - .Build(); - } - if (!min.ok()) { - throw std::runtime_error(min.status().error_message()); - } else { - for (auto i : l) { - min.ValueOrDie()->AddEntry(i.cast()); - } - base::StatusOr resultf = min.ValueOrDie()->PartialResult(privacy_budget); - if (resultf.ok()) { - return GetValue(resultf.ValueOrDie()); - } else { - throw std::runtime_error(resultf.status().error_message()); - } - } -} - -// Max - -int64_t Result_Median(BoundedFunctionHelperObject* config, pybind11::list l, - double privacy_budget) { - base::StatusOr>> median; - if (has_bounds) { - median = continuous::Median::Builder() - .SetEpsilon(config->epsilon) - .SetLower(config->lower_bound) - .SetUpper(config->upper_bound) - .SetMaxPartitionsContributed(l0_sensitivity) - .SetMaxContributionsPerPartition(linf_sensitivity) - .Build(); - } else { - median = continuous::Median::Builder() - .SetEpsilon(config->epsilon) - .SetMaxPartitionsContributed(l0_sensitivity) - .SetMaxContributionsPerPartition(linf_sensitivity) - .Build(); - } - if (!median.ok()) { - throw std::runtime_error(median.status().error_message()); - } else { - for (auto i : l) { - median.ValueOrDie()->AddEntry(i.cast()); - } - base::StatusOr resultf = median.ValueOrDie()->PartialResult(privacy_budget); - if (resultf.ok()) { - return GetValue(resultf.ValueOrDie()); - } else { - throw std::runtime_error(resultf.status().error_message()); - } - } -} - -// Percentile -// can't add error handling to this for some reasons -// TODO -int64_t Result_Percentile(BoundedFunctionHelperObject* config, pybind11::list l, - double privacy_budget, double percentile) { - std::unique_ptr> search; - if (has_bounds) { - search = continuous::Percentile::Builder() - .SetPercentile(percentile) - .SetEpsilon(config->epsilon) - .SetLower(config->lower_bound) - .SetUpper(config->upper_bound) - .SetMaxPartitionsContributed(l0_sensitivity) - .SetMaxContributionsPerPartition(linf_sensitivity) - .Build() - .ValueOrDie(); - } else { - search = continuous::Percentile::Builder() - .SetPercentile(percentile) - .SetEpsilon(config->epsilon) - .SetMaxPartitionsContributed(l0_sensitivity) - .SetMaxContributionsPerPartition(linf_sensitivity) - .Build() - .ValueOrDie(); - } - for (auto i : l) { - search->AddEntry(i.cast()); - } - - return GetValue(search->PartialResult(privacy_budget).ValueOrDie()); -} - -// Common functions - -BoundedFunctionHelperObject* NewBoundedFunctionObject(double epsilon, int lower_bound, - int upper_bound) { - has_bounds = true; - return new BoundedFunctionHelperObject{epsilon, lower_bound, upper_bound}; -} - -BoundedFunctionHelperObject* NewBoundedFunctionObject1(double epsilon) { - has_bounds = false; - return new BoundedFunctionHelperObject{epsilon}; -} - -void DeleteBoundedFunctionObject(BoundedFunctionHelperObject* config) { - delete config; -}; - -void set_l0_sensitivity_(int _l0_sensitivity) { - l0_sensitivity = _l0_sensitivity; -} - -int get_l0_sensitivity_() { - return l0_sensitivity; -} - -void set_linf_sensitivity_(int _linf_sensitivity) { - linf_sensitivity = _linf_sensitivity; -} - -int get_linf_sensitivity_() { - return linf_sensitivity; -} - -} // end namespace differential_privacy -} // end extern "C" diff --git a/src/bindings/c/c_api.h b/src/bindings/c/c_api.h deleted file mode 100644 index aee8fa46..00000000 --- a/src/bindings/c/c_api.h +++ /dev/null @@ -1,65 +0,0 @@ -#ifndef DIFFERENTIAL_PRIVACY_C_C_API_H_ -#define DIFFERENTIAL_PRIVACY_C_C_API_H_ - -#include -#include - -#include "pybind11/pybind11.h" - -#ifdef __cplusplus -extern "C" { -#endif - -// Common Functions for all Bounded Functions - -typedef struct BoundedFunctionHelperObject { - double epsilon; - int lower_bound; - int upper_bound; -} BoundedFunctionHelperObject; - -extern BoundedFunctionHelperObject* NewBoundedFunctionObject(double epsilon, - int lower_bound, - int upper_bound); - -extern BoundedFunctionHelperObject* NewBoundedFunctionObject1(double epsilon); - -extern void DeleteBoundedFunctionObject(BoundedFunctionHelperObject* config); - -// Bounded Mean -extern double Result_BoundedMean(BoundedFunctionHelperObject* config, pybind11::list a); - -extern double Result_BoundedSum(BoundedFunctionHelperObject* config, pybind11::list a); - -extern double Result_BoundedStandardDeviation(BoundedFunctionHelperObject* config, - pybind11::list a); - -extern double Result_BoundedVariance(BoundedFunctionHelperObject* config, - pybind11::list a); - -// Order statistics -extern int64_t Result_Max(BoundedFunctionHelperObject* config, pybind11::list a, - double privacy_budget); - -extern int64_t Result_Min(BoundedFunctionHelperObject* config, pybind11::list a, - double privacy_budget); - -extern int64_t Result_Median(BoundedFunctionHelperObject* config, pybind11::list a, - double privacy_budget); - -extern int64_t Result_Percentile(BoundedFunctionHelperObject* config, pybind11::list a, - double privacy_budget, double percentile); - -extern void set_l0_sensitivity_(int _l0_sensitivity); - -extern int get_l0_sensitivity_(); - -extern void set_linf_sensitivity_(int _linf_sensitivity); - -extern int get_linf_sensitivity_(); - -#ifdef __cplusplus -} /* end extern "C" */ -#endif - -#endif // DIFFERENTIAL_PRIVACY_C_C_API_H_ diff --git a/src/python/BUILD b/src/python/BUILD index ad4cae3d..2dfadd94 100644 --- a/src/python/BUILD +++ b/src/python/BUILD @@ -4,5 +4,5 @@ py_binary( name = "bindings_test", srcs = ["__init__.py"], main = "__init__.py", - data = ["//src/bindings:pydp.so"] + data = ["//src/bindings:_pydp.so"] ) diff --git a/tests/algorithms/conftest.py b/tests/algorithms/conftest.py new file mode 100644 index 00000000..cc5b308b --- /dev/null +++ b/tests/algorithms/conftest.py @@ -0,0 +1,59 @@ +from typing import List +from itertools import accumulate +import math + + +def skew(samples: List[float], mu: float, sigma: float): + """Unfortunately this is implemented in third_party/differential-privacy/cc/algorithms/distributions_test.cc + and we don't want to pull the test files in. I'm assuming it'll be moved to + third_party/differential-privacy/cc/algorithms/util.h If they (upstream) move it we can use it. + Until then this should suffice. #FIXME: when possible we can fix this. + """ + skew = list( + accumulate(samples, lambda lhs, rhs: lhs + (rhs - mu) * (rhs - mu) * (rhs - mu)) + )[-1] + return skew / (len(samples) * sigma * sigma * sigma) + + +def kurtosis(samples: List[float], mu: float, var: float): + """Unfortunately this is implemented in third_party/differential-privacy/cc/algorithms/distributions_test.cc + and we don't want to pull the test files in. I'm assuming it'll be moved to + third_party/differential-privacy/cc/algorithms/util.h If they (upstream) move it we can use it. + Until then this should suffice. #FIXME: when possible we can fix this. + """ + kurt = list( + accumulate(samples, lambda lhs, rhs: lhs + ((rhs - mu) * (rhs - mu)) ** 2) + )[-1] + n = len(samples) + kurt = (n + 1) * kurt / (n * var * var) + kurt -= 3 * (n - 1) + kurt *= (n - 1) / (n - 2) / (n - 3) + return kurt + + +def percentile(N, percent, key=lambda x: x): + """ + Find the percentile of a list of values. + @parameter N - is a list of values. Note N MUST BE already sorted. + @parameter percent - a float value from 0.0 to 1.0. + @parameter key - optional key function to compute value from each element of N. + @return - the percentile of the values + """ + if not N: + return None + k = (len(N) - 1) * percent + f = math.floor(k) + c = math.ceil(k) + if f == c: + return key(N[int(k)]) + d0 = key(N[int(f)]) * (c - k) + d1 = key(N[int(c)]) * (k - f) + return d0 + d1 + + +# From what I understand @openmined/dp-research are going to look at validating correctness +# Until then we can use this to assert on floating point numbers. +# FIXME: When possible we should add 'correctness' tests. +expect_near = lambda expected, actual, tol: ( + expected + tol >= actual and expected - tol <= actual +) diff --git a/tests/algorithms/test_algorithms.py b/tests/algorithms/test_algorithms.py deleted file mode 100644 index e69de29b..00000000 diff --git a/tests/algorithms/test_bounded_mean.py b/tests/algorithms/test_bounded_mean.py index c42f2bd1..1b909318 100644 --- a/tests/algorithms/test_bounded_mean.py +++ b/tests/algorithms/test_bounded_mean.py @@ -1,22 +1,24 @@ import pytest -import pydp as dp +from pydp.algorithms.laplacian import BoundedMean -def test_c_api(): +def test_python_api(): a = [2, 4, 6, 8] - mean_algorithm = dp.BoundedMean(1.0, 1, 9) - assert 1.0 <= mean_algorithm.result(a) <= 9.0 + mean_algorithm = BoundedMean( + epsilon=1.0, lower_bound=1, upper_bound=9, dtype="float" + ) + assert 1.0 <= mean_algorithm.quick_result(a) <= 9.0 def test_bounded_mean(): - bm1 = dp.BoundedMean(3.4, 1, 2) - assert isinstance(bm1, dp.BoundedMean) - assert isinstance(bm1.result([1.5, 2, 2.5]), float) + bm1 = BoundedMean(epsilon=3.4, lower_bound=1, upper_bound=2, dtype="float") + assert isinstance(bm1, BoundedMean) + assert isinstance(bm1.quick_result([1.5, 2, 2.5]), float) - bm2 = dp.BoundedMean(3.4) - assert isinstance(bm2, dp.BoundedMean) - # assert isinstance(bm2.result([1.5, 2, 2.5]), float) + bm2 = BoundedMean(epsilon=3.4, dtype="int") + assert isinstance(bm2, BoundedMean) + # assert isinstance(bm2.quick_result([1.5, 2, 2.5]), float) # TODO: port this test diff --git a/tests/algorithms/test_bounded_standard_deviation.py b/tests/algorithms/test_bounded_standard_deviation.py index addb9825..8c80faa5 100644 --- a/tests/algorithms/test_bounded_standard_deviation.py +++ b/tests/algorithms/test_bounded_standard_deviation.py @@ -1,12 +1,12 @@ -import pydp as dp +from pydp.algorithms.laplacian import BoundedStandardDeviation class TestBoundedStandardDeviation: - def test_c_api(self): + def test_python_api(self): example_data = [1, 5, 7, 9, 13] epsilon = 1.0 lower_bound, upper_bound = 0, 15 - bsd = dp.BoundedStandardDeviation(epsilon, lower_bound, upper_bound) - result = bsd.result(example_data) + bsd = BoundedStandardDeviation(epsilon, lower_bound, upper_bound, dtype="float") + result = bsd.quick_result(example_data) assert type(result) is float and result >= 0 assert result <= (upper_bound - lower_bound) / 2 diff --git a/tests/algorithms/test_bounded_sum.py b/tests/algorithms/test_bounded_sum.py index 73bc1e6a..56252326 100644 --- a/tests/algorithms/test_bounded_sum.py +++ b/tests/algorithms/test_bounded_sum.py @@ -1,13 +1,13 @@ import pytest -import pydp as dp +from pydp.algorithms.laplacian import BoundedSum class TestBoundedSum: - def test_c_api_sanity_check(self): + def test_pythons_api_sanity_check(self): a = [1, 2, 3, 4] - sum_algorithm = dp.BoundedSum(1.0, 0, 10) - result = sum_algorithm.result(a) + sum_algorithm = BoundedSum(1.0, 0, 10, dtype="float") + result = sum_algorithm.quick_result(a) # The result value is -16.0 # Google library tests make use of ZeroNoiseMechanism # for more reasonable expected values, but we don't support diff --git a/tests/algorithms/test_bounded_variance.py b/tests/algorithms/test_bounded_variance.py index c0688767..95dcf7c0 100644 --- a/tests/algorithms/test_bounded_variance.py +++ b/tests/algorithms/test_bounded_variance.py @@ -1,4 +1,4 @@ -import pydp as dp +from pydp.algorithms.laplacian import BoundedVariance class TestBoundedVariance: @@ -6,8 +6,8 @@ def test_basic(self): example_data = [1, 5, 7, 9, 13] epsilon = 1.0 lower_bound, upper_bound = 0, 16 - bv = dp.BoundedVariance(epsilon, lower_bound, upper_bound) - result = bv.result(example_data) + bv = BoundedVariance(epsilon, lower_bound, upper_bound, dtype="float") + result = bv.quick_result(example_data) assert type(result) is float and result >= 0 # Popoviciu's inequality on variances: assert result <= (upper_bound - lower_bound) ** 2 / 4 diff --git a/tests/algorithms/test_count.py b/tests/algorithms/test_count.py index 3988c941..f304b4f9 100644 --- a/tests/algorithms/test_count.py +++ b/tests/algorithms/test_count.py @@ -1,25 +1,20 @@ import pytest -import pydp as dp +from pydp.algorithms.laplacian import Count -@pytest.mark.parametrize("input_class", [dp.CountInt, dp.CountDouble]) +@pytest.mark.parametrize("dtype_in", ["int", "float"]) class TestPercentile: - def test_basic(self, input_class): + def test_basic(self, dtype_in): c = [1, 2, 3, 4, 2, 3] - count = input_class() - count.result(c) - - def test_basic_epsilon(self, input_class): - c = [1, 2, 3, 4, 2, 3] - count = input_class(1.7) - count.result(c) + count = Count(epsilon=1.7, dtype=dtype_in) + count.quick_result(c) - def test_repeated_result(self, input_class): + def test_repeated_result(self, dtype_in): c = [1, 2, 3, 4, 2, 3] - count = input_class() + count = Count(epsilon=1.7, dtype=dtype_in) count.add_entries(c) - count.partial_result(0.5) + count.result(0.5) """ # TODO: port ConfidenceInterval @@ -53,23 +48,22 @@ def test_memory(self, input_class): class TestCountDataTypes: def test_count_datatypes(self): - ci1 = dp.CountInt() - ci2 = dp.CountInt(2.0) - assert isinstance(ci1, dp.CountInt) - assert isinstance(ci2, dp.CountInt) - - ci2ae = ci2.add_entry(2) - assert isinstance(ci2ae, type(None)) - ci2aes = ci2.add_entries([4, 6, 8]) - assert isinstance(ci2aes, type(None)) - mem = ci2.memory_used() + count = Count(2.0, dtype="int") + assert isinstance(count, Count) + + countae = count.add_entry(2) + assert isinstance(countae, type(None)) + countaes = count.add_entries([4, 6, 8]) + assert isinstance(countaes, type(None)) + mem = count.memory_used() assert isinstance(mem, int) - par = ci2.partial_result() + par = count.result() assert isinstance(par, int) # TODO - # par2 = ci2.partial_result(1.0) + # par2 = count.partial_result(1.0) # assert isinstance(par2, int) - res = ci2.result([2]) + + res = count.quick_result([2]) assert isinstance(res, int) diff --git a/tests/algorithms/test_distributions.py b/tests/algorithms/test_distributions.py index c68cde34..ff3d8650 100644 --- a/tests/algorithms/test_distributions.py +++ b/tests/algorithms/test_distributions.py @@ -1,10 +1,16 @@ import pytest +from pydp.distributions import ( + LaplaceDistribution, + GaussianDistribution, + # GeometricDistribution, +) import pydp as dp import math from typing import List from itertools import accumulate import math + k_num_samples = 10000000 k_num_geometric_samples = 1000000 k_gaussian_samples = 1000000 @@ -15,7 +21,7 @@ def skew(samples: List[float], mu: float, sigma: float): """Unfortunately this is implemented in third_party/differential-privacy/cc/algorithms/distributions_test.cc and we don't want to pull the test files in. I'm assuming it'll be moved to third_party/differential-privacy/cc/algorithms/util.h If they (upstream) move it we can use it. - Until then this should suffice. #FIXME: when possible we can fix this. + Until then this should suffice. #FIXME: when possible we can fix this. """ skew = list( accumulate(samples, lambda lhs, rhs: lhs + (rhs - mu) * (rhs - mu) * (rhs - mu)) @@ -27,7 +33,7 @@ def kurtosis(samples: List[float], mu: float, var: float): """Unfortunately this is implemented in third_party/differential-privacy/cc/algorithms/distributions_test.cc and we don't want to pull the test files in. I'm assuming it'll be moved to third_party/differential-privacy/cc/algorithms/util.h If they (upstream) move it we can use it. - Until then this should suffice. #FIXME: when possible we can fix this. + Until then this should suffice. #FIXME: when possible we can fix this. """ kurt = list( accumulate(samples, lambda lhs, rhs: lhs + ((rhs - mu) * (rhs - mu)) ** 2) @@ -50,12 +56,12 @@ def kurtosis(samples: List[float], mu: float, var: float): class TestLaplaceDistribution: def test_diversity_getter(self): sensitivity, epsilon = 1.0, 22.0 - dist = dp.LaplaceDistribution(epsilon=epsilon, sensitivity=sensitivity) + dist = LaplaceDistribution(epsilon=epsilon, sensitivity=sensitivity) assert dist.get_diversity() == sensitivity / epsilon def test_check_statistics_for_geo_unit_values(self): - ld = dp.LaplaceDistribution(epsilon=1.0, sensitivity=1.0) + ld = LaplaceDistribution(epsilon=1.0, sensitivity=1.0) samples = [ld.sample(scale=1.0) for _ in range(k_num_geometric_samples)] mean = dp.util.mean(samples) var = dp.util.variance(samples) @@ -69,14 +75,14 @@ def test_check_statistics_for_geo_unit_values(self): class TestGaussianDistribution: def test_standard_deviation_getter(self): stddev = k_one_over_log2 - dist = dp.GaussianDistribution(stddev) + dist = GaussianDistribution(stddev) assert dist.stddev == stddev class TestLaplaceDistributionDatatypes: def test_LaplaceDistributionTypes(self): - ld = dp.LaplaceDistribution(epsilon=1.0, sensitivity=1.0) - assert isinstance(ld, dp.LaplaceDistribution) + ld = LaplaceDistribution(epsilon=1.0, sensitivity=1.0) + assert isinstance(ld, LaplaceDistribution) sud = ld.get_uniform_double() assert isinstance(sud, float) @@ -93,8 +99,8 @@ def test_LaplaceDistributionTypes(self): class TestGaussianDistributionDataTypes: def test_GaussianDistributionTypes(self): - gd = dp.GaussianDistribution(3) - assert isinstance(gd, dp.GaussianDistribution) + gd = GaussianDistribution(3) + assert isinstance(gd, GaussianDistribution) gds = gd.sample() gds1 = gd.sample(1.0) @@ -104,18 +110,23 @@ def test_GaussianDistributionTypes(self): assert isinstance(gdstd, float) -# class TestGeometricDistribution: -# def test_ratios(self): -# from collections import Counter -# p=1e-2 -# dist = dp.GeometricDistribution(lambda_=-1.0*math.log(1-p)) -# samples = [dist.sample() for _ in range(k_num_geometric_samples)] -# counts = list(Counter([s for s in samples if s < 51]).values()) -# ratios = [c_i/c_j for c_i, c_j in zip(counts[:-1], counts[1:])] -# This test fails. It's a replica of -# https://github.com/google/differential-privacy/blob/9923ad4ee1b84a7002085e50345fcc05f2b21bcb/cc/algorithms/distributions_test.cc#L208 -# and should pass. -# assert expect_near(p, dp.util.mean(ratios), p / 1e-2) +class TestGeometricDistribution: + @pytest.mark.skip(reason="This test should pass, see comments") + def test_ratios(self): + """ + This test fails. It's a replica of + https://github.com/google/differential-privacy/blob/9923ad4ee1b84a7002085e50345fcc05f2b21bcb/cc/algorithms/distributions_test.cc#L208 and should pass. + """ + from collections import Counter + + p = 1e-2 + dist = GeometricDistribution(lambda_=-1.0 * math.log(1 - p)) + samples = [dist.sample() for _ in range(k_num_geometric_samples)] + counts = list(Counter([s for s in samples if s < 51]).values()) + ratios = [c_i / c_j for c_i, c_j in zip(counts[:-1], counts[1:])] + + assert expect_near(p, dp.util.mean(ratios), p / 1e-2) + # TODO: port the following tests # diff --git a/tests/algorithms/test_order_statistics.py b/tests/algorithms/test_order_statistics.py index cda58a02..737216f0 100644 --- a/tests/algorithms/test_order_statistics.py +++ b/tests/algorithms/test_order_statistics.py @@ -5,129 +5,99 @@ import math import pydp as dp - - -@pytest.fixture -def data(): - kDataSize = 10000 - a = [] - for i in range(kDataSize): - a.append(int(200 * i / kDataSize)) - return a - - -def test_max(data): - maxx = dp.Max(1.0, 0, 2048) - assert 190 < maxx.result(data, 1.0) < 210 - - assert max(data) - 10 < maxx.result(data, 1.0) < max(data) + 10 - - -def test_min(data): - maxx = dp.Min(1.0, 0, 2048) - - assert min(data) - 10 < maxx.result(data, 1.0) < min(data) + 10 - - assert -10 < maxx.result(data, 1.0) < 10 - - -def test_median(data): - maxx = dp.Median(1.0, 0, 2048) - - assert ( - statistics.median(data) - 20 - < int(maxx.result(data, 1.0)) - < statistics.median(data) + 20 +from conftest import expect_near, percentile + +kDataSize = 10000 +data_floats = [200.0 * i / kDataSize for i in range(kDataSize)] +data_ints = [int(200 * i / kDataSize) for i in range(kDataSize)] +type_data_algorithm = [ + ("int", data_ints, dp.algorithms.laplacian.Max), + ("float", data_floats, dp.algorithms.laplacian.Max), + ("int", data_ints, dp.algorithms.laplacian.Min), + ("float", data_floats, dp.algorithms.laplacian.Min), + ("int", data_ints, dp.algorithms.laplacian.Median), + ("float", data_floats, dp.algorithms.laplacian.Median), +] + +# NOTE: One needs to use https://pypi.org/project/pytest-lazy-fixture/ to have fixtues in parameters. +# Using none fixtured data instead of adding a pytest-lazy-fixture. +@pytest.mark.parametrize("dtype, data", [("int", data_ints), ("float", data_floats)]) +def test_min(data, dtype): + + minn = dp.algorithms.laplacian.Min( + dtype=dtype, epsilon=1.0, lower_bound=0, upper_bound=200 ) - - assert 90 <= int(maxx.result(data, 1.0)) <= 100 + assert expect_near(min(data), minn.quick_result(data), 10) + assert expect_near(0, minn.quick_result(data), 10) -def test_median1(data): - maxx = dp.Median(1.0) - assert ( - statistics.median(data) - 20 - < int(maxx.result(data, 1.0)) - < statistics.median(data) + 20 +@pytest.mark.parametrize("dtype, data", [("int", data_ints), ("float", data_floats)]) +def test_max(data, dtype): + maxx = dp.algorithms.laplacian.Max( + dtype=dtype, epsilon=1.0, lower_bound=0, upper_bound=200 ) - assert 80 <= int(maxx.result(data, 1.0)) <= 100 - + assert 190 < maxx.quick_result(data) < 210 -def percentile(N, percent, key=lambda x: x): - """ - Find the percentile of a list of values. + assert expect_near(max(data), maxx.quick_result(data), 10) - @parameter N - is a list of values. Note N MUST BE already sorted. - @parameter percent - a float value from 0.0 to 1.0. - @parameter key - optional key function to compute value from each element of N. - @return - the percentile of the values - """ - if not N: - return None - k = (len(N) - 1) * percent - f = math.floor(k) - c = math.ceil(k) - if f == c: - return key(N[int(k)]) - d0 = key(N[int(f)]) * (c - k) - d1 = key(N[int(c)]) * (k - f) - return d0 + d1 +@pytest.mark.parametrize("dtype, data", [("int", data_ints), ("float", data_floats)]) +def test_median(data, dtype): + median = dp.algorithms.laplacian.Median( + dtype=dtype, epsilon=1.0, lower_bound=0, upper_bound=200 + ) -def test_percentile(data): - maxx = dp.Percentile(1.0, 0, 2048) - maxx.percentile = 0.45 + assert expect_near(statistics.median(data), median.quick_result(data), 20) - actual_percentile = int(percentile(data, 0.45)) - assert maxx.percentile == 0.45 - assert actual_percentile - 10 < maxx.result(data, 1.0) < actual_percentile + 10 - assert 80 < maxx.result(data, 1.0) < 100 +@pytest.mark.parametrize("dtype", ["int", "float"]) +def test_percentile_getter(dtype): + expected_percentile = 0.45 -def test_max_datatypes(data): - ma1 = dp.Max(1.0) - ma2 = dp.Max(1.0, 0, 2048) - assert isinstance(ma1, dp.Max) - assert isinstance(ma2, dp.Max) + dp_percentile = dp.algorithms.laplacian.Percentile( + dtype=dtype, + epsilon=1.0, + percentile=expected_percentile, + lower_bound=0, + upper_bound=200, + ) + assert dp_percentile.percentile == expected_percentile - res = ma2.result(data, 1.0) - assert isinstance(res, float) +@pytest.mark.parametrize("dtype, data, order_statistic", type_data_algorithm) +def test_order_statistic_datatypes(data, dtype, order_statistic): -def test_min_datatypes(data): - mi1 = dp.Min(1.0) - mi2 = dp.Min(1.0, 0, 2048) - assert isinstance(mi1, dp.Min) - assert isinstance(mi2, dp.Min) + order_statistic_1 = order_statistic(dtype=dtype, epsilon=1.0) + order_statistic_2 = order_statistic( + dtype=dtype, epsilon=1.0, lower_bound=0, upper_bound=200 + ) + res = order_statistic_2.quick_result(data) - res = mi2.result(data, 1.0) - assert isinstance(res, float) + assert isinstance(order_statistic_1, order_statistic) + assert isinstance(order_statistic_2, order_statistic) + assert isinstance(res, type(data[0])) -def test_median_datatypes(data): - me1 = dp.Median(1.0) - me2 = dp.Median(1.0, 0, 2048) - assert isinstance(me1, dp.Median) - assert isinstance(me2, dp.Median) - res = me2.result(data, 1.0) - assert isinstance(res, float) +@pytest.mark.parametrize( + "dtype, data, dp_percentile", + [ + ("int", data_ints, dp.algorithms.laplacian.Percentile), + ("float", data_floats, dp.algorithms.laplacian.Percentile), + ], +) +def test_percentile_datatypes(data, dtype, dp_percentile): + dp_percentile_2 = dp_percentile( + dtype=dtype, epsilon=1.0, percentile=0.45, lower_bound=0, upper_bound=200 + ) + res = dp_percentile_2.quick_result(data) -def test_percentile_datatypes(data): - pe1 = dp.Percentile(1.0) - pe2 = dp.Percentile(1.0, 0, 2048) - assert isinstance(pe1, dp.Percentile) - assert isinstance(pe2, dp.Percentile) + assert isinstance(dp_percentile_2, dp_percentile) - res = pe2.result(data, 1.0) assert isinstance(res, float) - # resg = pe2.getPercentile() - # assert isinstance(resg, float) - # ress = pe2.setPercentile(0.5) - # assert isinstance(ress, None) # TODO Yet some more tests diff --git a/tests/algorithms/test_rand.py b/tests/algorithms/test_rand.py index d5d7ef84..21c67770 100644 --- a/tests/algorithms/test_rand.py +++ b/tests/algorithms/test_rand.py @@ -1,14 +1,14 @@ import pytest -import pydp as dp +from pydp.util import Geometric, UniformDouble def test_rand_UniformDouble(): - ud = dp.UniformDouble() + ud = UniformDouble() assert isinstance(ud, float) assert 0 <= ud < 1 def test_rand_Geometric(): - gn = dp.Geometric() + gn = Geometric() assert isinstance(gn, int) assert 0 <= gn <= 1025 diff --git a/tests/base/test_logging.py b/tests/base/test_logging.py index 23687c9d..a3bfca5f 100644 --- a/tests/base/test_logging.py +++ b/tests/base/test_logging.py @@ -1,6 +1,9 @@ import pytest import pydp as dp +# TODO: Check whether we should delete logging public binding or allow it +pytestmark = pytest.mark.skip(reason="we do not return allow user to set up logging.") + class TestLogging: def test_logging(self): diff --git a/tests/base/test_percentile.py b/tests/base/test_percentile.py index 9a45ed1f..c30da660 100644 --- a/tests/base/test_percentile.py +++ b/tests/base/test_percentile.py @@ -1,8 +1,13 @@ import pytest import pydp as dp +# TODO: check whether to delete this test suit or update it +pytestmark = pytest.mark.skip( + reason="these tests were for the older percentile wrapping. See TODO" +) -@pytest.mark.parametrize("input_class", [dp.PercentileInt, dp.PercentileDouble]) + +@pytest.mark.parametrize("input_class", [dp.algorithms.laplacian.Percentile]) class TestPercentile: def test_empty_input_set(self, input_class): percentile = input_class() diff --git a/tests/base/test_status.py b/tests/base/test_status.py index 703f1afd..3f6ec77f 100644 --- a/tests/base/test_status.py +++ b/tests/base/test_status.py @@ -2,6 +2,9 @@ import pydp as dp +# TODO: Check whether we should delete status public bindings or return status to the user +pytestmark = pytest.mark.skip(reason="we do not return status to the user.") + class TestStatus: def test_hello(self): diff --git a/tests/base/test_statusor.py b/tests/base/test_statusor.py deleted file mode 100644 index e69de29b..00000000 diff --git a/third_party/differential-privacy b/third_party/differential-privacy index 0b0a5c23..1b1dc663 160000 --- a/third_party/differential-privacy +++ b/third_party/differential-privacy @@ -1 +1 @@ -Subproject commit 0b0a5c2315d84a6a7b1ff34591e33ec11680891e +Subproject commit 1b1dc6639173c0a13613189ec21851604a4c7335