Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions pydp/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
from ._pydp import *
from pydp import algorithms
from pydp import algorithms, distributions, util


__version__ = "0.1.4"
28 changes: 15 additions & 13 deletions pydp/algorithms/algorithm.py → pydp/algorithms/_algorithm.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,19 +105,21 @@ def reset(self):
"""
return self.__algorithm.reset()

def serialize(self):
Comment thread
alejandrosame marked this conversation as resolved.
"""
Serializes summary data of current entries into Summary proto. This allows results from distributed aggregation to be recorded and later merged.

Returns empty summary for algorithms for which serialize is unimplemented.
"""
return self.__algorithm.serialize()

def merge(self, summary):
"""
Merges serialized summary data into this algorithm. The summary proto must represent data from the same algorithm type with identical parameters. The data field must contain the algorithm summary type of the corresponding algorithm used. The summary proto cannot be empty.
"""
return self.__algorithm.merge(summary)
# TODO: Wrap Summary class before exposing serialize and merge methods.
#
# def serialize(self):
# """
# Serializes summary data of current entries into Summary proto. This allows results from distributed aggregation to be recorded and later merged.
#
# Returns empty summary for algorithms for which serialize is unimplemented.
# """
# return self.__algorithm.serialize()
#
# def merge(self, summary):
# """
# Merges serialized summary data into this algorithm. The summary proto must represent data from the same algorithm type with identical parameters. The data field must contain the algorithm summary type of the corresponding algorithm used. The summary proto cannot be empty.
# """
# return self.__algorithm.merge(summary)

def noise_confidence_interval(self, confidence_level, privacy_budget):
"""
Expand Down
6 changes: 3 additions & 3 deletions pydp/algorithms/laplacian/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from .bounded_algorithms import (
from ._bounded_algorithms import (
BoundedMean,
BoundedSum,
BoundedStandardDeviation,
Expand All @@ -7,8 +7,8 @@
Min,
Median,
)
from .count import Count
from .percentile import Percentile
from ._count import Count
from ._percentile import Percentile

__all__ = [
"BoundedMean",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from ..algorithm import BoundedAlgorithm
from .._algorithm import BoundedAlgorithm


class BoundedMean(BoundedAlgorithm):
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from ..algorithm import Algorithm
from .._algorithm import Algorithm


class Count(Algorithm):
Expand Down
5 changes: 5 additions & 0 deletions pydp/algorithms/laplacian/_percentile.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from .._algorithm import BoundedAlgorithm, PercentileBase


class Percentile(PercentileBase):
pass
5 changes: 0 additions & 5 deletions pydp/algorithms/laplacian/percentile.py

This file was deleted.

1 change: 1 addition & 0 deletions pydp/distributions/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .._pydp._distributions import *
1 change: 1 addition & 0 deletions pydp/util/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .._pydp._util import *
37 changes: 18 additions & 19 deletions src/bindings/PyDP/algorithms/util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,37 +9,36 @@ namespace py = pybind11;
namespace dp = differential_privacy;

void init_algorithms_util(py::module& m) {
py::module util = m.def_submodule("util", "Some Utility Functions");
util.attr("__module__") = "pydp";
util.def("xor_strings", &dp::XorStrings);
util.def("default_epsilon", &dp::DefaultEpsilon);
util.def("get_next_power_of_two", &dp::GetNextPowerOfTwo);
util.def("qnorm", &dp::Qnorm);
util.def("mean", &dp::Mean<double>);
util.def("mean", &dp::Mean<int>);
util.def("variance", &dp::Variance<double>);
util.def("standard_deviation", &dp::StandardDev<double>);
util.def("order_statistics", &dp::OrderStatistic<double>);
util.def("correlation", &dp::Correlation<double>);
util.def("vector_filter", &dp::VectorFilter<double>);
util.def("vector_to_string", &dp::VectorToString<double>);
util.def("round_to_nearest_multiple", &dp::RoundToNearestMultiple);
util.def("safe_add", [](int64_t i, int64_t j) {
m.attr("__module__") = "pydp";
m.def("xor_strings", &dp::XorStrings);
m.def("default_epsilon", &dp::DefaultEpsilon);
m.def("get_next_power_of_two", &dp::GetNextPowerOfTwo);
m.def("qnorm", &dp::Qnorm);
m.def("mean", &dp::Mean<double>);
m.def("mean", &dp::Mean<int>);
m.def("variance", &dp::Variance<double>);
m.def("standard_deviation", &dp::StandardDev<double>);
m.def("order_statistics", &dp::OrderStatistic<double>);
m.def("correlation", &dp::Correlation<double>);
m.def("vector_filter", &dp::VectorFilter<double>);
m.def("vector_to_string", &dp::VectorToString<double>);
m.def("round_to_nearest_multiple", &dp::RoundToNearestMultiple);
m.def("safe_add", [](int64_t i, int64_t j) {
int64_t k;
bool result = dp::SafeAdd(i, j, &k);
if (result) return k;
throw std::runtime_error("Result of addition will overflow.");
});
util.def("safe_subtract", [](int64_t i, int64_t j) {
m.def("safe_subtract", [](int64_t i, int64_t j) {
int64_t k;
bool result = dp::SafeSubtract(i, j, &k);
if (result) return k;
throw std::runtime_error("Result of subtraction will overflow.");
});
util.def("safe_square", [](int64_t i) {
m.def("safe_square", [](int64_t i) {
int64_t k;
bool result = dp::SafeSquare(i, &k);
if (result) return k;
throw std::runtime_error("Result of squaring will overflow.");
});
}
}
15 changes: 9 additions & 6 deletions src/bindings/PyDP/bindings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ namespace py = pybind11;
// base
void init_base_status(py::module &);
void init_base_logging(py::module &);
// void init_base_percentile(py::module &);

// bounded functions
void init_algorithms_bounded_functions(py::module &);
Expand All @@ -34,17 +33,21 @@ PYBIND11_MODULE(_pydp, m) {
// Base
init_base_status(m);
init_base_logging(m);
// init_base_percentile(m);

// Algorithms
auto malgorithms = m.def_submodule("_algorithms");
init_algorithms_bounded_functions(malgorithms);
init_algorithms_util(m);
init_algorithms_distributions(m);
init_algorithms_order_statistics(malgorithms);
init_algorithms_rand(m);
init_algorithms_count(malgorithms);
init_algorithms_order_statistics(malgorithms);

auto mdistributions = m.def_submodule("_distributions");
init_algorithms_distributions(mdistributions);

auto mutil = m.def_submodule("_util", "Some Utility Functions");
init_algorithms_rand(mutil);
init_algorithms_util(mutil);

// Proto
// TODO: Delete if it is not necessary (we no longer return StatusOr to the user)
init_proto(m);
}
Empty file.
49 changes: 30 additions & 19 deletions tests/algorithms/test_distributions.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,16 @@
import pytest
from pydp.distributions import (
LaplaceDistribution,
GaussianDistribution,
# GeometricDistribution,
)
import pydp as dp
import math
from typing import List
from itertools import accumulate
import math


k_num_samples = 10000000
k_num_geometric_samples = 1000000
k_gaussian_samples = 1000000
Expand Down Expand Up @@ -50,12 +56,12 @@ def kurtosis(samples: List[float], mu: float, var: float):
class TestLaplaceDistribution:
def test_diversity_getter(self):
sensitivity, epsilon = 1.0, 22.0
dist = dp.LaplaceDistribution(epsilon=epsilon, sensitivity=sensitivity)
dist = LaplaceDistribution(epsilon=epsilon, sensitivity=sensitivity)
assert dist.get_diversity() == sensitivity / epsilon

def test_check_statistics_for_geo_unit_values(self):

ld = dp.LaplaceDistribution(epsilon=1.0, sensitivity=1.0)
ld = LaplaceDistribution(epsilon=1.0, sensitivity=1.0)
samples = [ld.sample(scale=1.0) for _ in range(k_num_geometric_samples)]
mean = dp.util.mean(samples)
var = dp.util.variance(samples)
Expand All @@ -69,14 +75,14 @@ def test_check_statistics_for_geo_unit_values(self):
class TestGaussianDistribution:
def test_standard_deviation_getter(self):
stddev = k_one_over_log2
dist = dp.GaussianDistribution(stddev)
dist = GaussianDistribution(stddev)
assert dist.stddev == stddev


class TestLaplaceDistributionDatatypes:
def test_LaplaceDistributionTypes(self):
ld = dp.LaplaceDistribution(epsilon=1.0, sensitivity=1.0)
assert isinstance(ld, dp.LaplaceDistribution)
ld = LaplaceDistribution(epsilon=1.0, sensitivity=1.0)
assert isinstance(ld, LaplaceDistribution)

sud = ld.get_uniform_double()
assert isinstance(sud, float)
Expand All @@ -93,8 +99,8 @@ def test_LaplaceDistributionTypes(self):

class TestGaussianDistributionDataTypes:
def test_GaussianDistributionTypes(self):
gd = dp.GaussianDistribution(3)
assert isinstance(gd, dp.GaussianDistribution)
gd = GaussianDistribution(3)
assert isinstance(gd, GaussianDistribution)

gds = gd.sample()
gds1 = gd.sample(1.0)
Expand All @@ -104,18 +110,23 @@ def test_GaussianDistributionTypes(self):
assert isinstance(gdstd, float)


# class TestGeometricDistribution:
# def test_ratios(self):
# from collections import Counter
# p=1e-2
# dist = dp.GeometricDistribution(lambda_=-1.0*math.log(1-p))
# samples = [dist.sample() for _ in range(k_num_geometric_samples)]
# counts = list(Counter([s for s in samples if s < 51]).values())
# ratios = [c_i/c_j for c_i, c_j in zip(counts[:-1], counts[1:])]
# This test fails. It's a replica of
# https://github.com/google/differential-privacy/blob/9923ad4ee1b84a7002085e50345fcc05f2b21bcb/cc/algorithms/distributions_test.cc#L208
# and should pass.
# assert expect_near(p, dp.util.mean(ratios), p / 1e-2)
class TestGeometricDistribution:
@pytest.mark.skip(reason="This test should pass, see comments")
def test_ratios(self):
"""
This test fails. It's a replica of
https://github.com/google/differential-privacy/blob/9923ad4ee1b84a7002085e50345fcc05f2b21bcb/cc/algorithms/distributions_test.cc#L208 and should pass.
"""
from collections import Counter

p = 1e-2
dist = GeometricDistribution(lambda_=-1.0 * math.log(1 - p))
samples = [dist.sample() for _ in range(k_num_geometric_samples)]
counts = list(Counter([s for s in samples if s < 51]).values())
ratios = [c_i / c_j for c_i, c_j in zip(counts[:-1], counts[1:])]

assert expect_near(p, dp.util.mean(ratios), p / 1e-2)


# TODO: port the following tests
#
Expand Down
6 changes: 3 additions & 3 deletions tests/algorithms/test_rand.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
import pytest
import pydp as dp
from pydp.util import Geometric, UniformDouble


def test_rand_UniformDouble():
ud = dp.UniformDouble()
ud = UniformDouble()
assert isinstance(ud, float)
assert 0 <= ud < 1


def test_rand_Geometric():
gn = dp.Geometric()
gn = Geometric()
assert isinstance(gn, int)
assert 0 <= gn <= 1025
3 changes: 3 additions & 0 deletions tests/base/test_logging.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
import pytest
import pydp as dp

# TODO: Check whether we should delete logging public binding or allow it
pytestmark = pytest.mark.skip(reason="we do not return allow user to set up logging.")


class TestLogging:
def test_logging(self):
Expand Down
2 changes: 1 addition & 1 deletion tests/base/test_percentile.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

# TODO: check whether to delete this test suit or update it
pytestmark = pytest.mark.skip(
reason="these tests were for the older percentile wrapping."
reason="these tests were for the older percentile wrapping. See TODO"
)


Expand Down
3 changes: 3 additions & 0 deletions tests/base/test_status.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@

import pydp as dp

# TODO: Check whether we should delete status public bindings or return status to the user
pytestmark = pytest.mark.skip(reason="we do not return status to the user.")


class TestStatus:
def test_hello(self):
Expand Down
Empty file removed tests/base/test_statusor.py
Empty file.