Starting from the SimplerRefactor, going to start vectorization

In [5]:
import numpy as np
import flax
from jax import numpy as jnp

In [6]:
from scipy.optimize import minimize_scalar

In [7]:
from nacl.signing import VerifyKey
from typing import Optional, Callable

In [8]:
# @lru_cache(maxsize=None)
def _IRDP_gaussian(
    sigma: float, value: jnp.array, L: float, alpha: float
) -> jnp.array:
    return (alpha * (L**2) * np.square(value)) / (2 * (sigma**2))

def IRDP_gaussian(sigma, value, L, alpha: float) -> np.ndarray:
    """
    :param params:
        'sigma' --- is the normalized noise level: std divided by global L2 sensitivity
        'value' --- is the output of query on a data point
        'L' --- is the Lipschitz constant of query with respect to the output of query on a data point
    :param alpha: The order of the Renyi Divergence
    :return: Evaluation of the RDP's epsilon
    """
    if sigma <= 0:
        raise Exception("Sigma should be above 0")
    if alpha < 0:
        raise Exception("Sigma should not be below 0")
    return _IRDP_gaussian(sigma=sigma, alpha=alpha, value=value, L=L)

In [9]:
IRDP_gaussian(sigma=1, value=np.ones(10), L=4, alpha=3)

array([24., 24., 24., 24., 24., 24., 24., 24., 24., 24.])

## TODO:
- Make `rdp_to_approxdp` below work when rdp gives arrays instead of single values

In [10]:
from scipy.optimize import minimize

In [11]:
def rdp_to_approxdp(rdp, alpha_max=np.inf):
    # from RDP to approx DP
    # alpha_max is an optional input which sometimes helps avoid numerical issues
    # By default, we are using the RDP to approx-DP conversion due to BBGHS'19's Theorem 21
    # paper: https://arxiv.org/pdf/1905.09982.pdf
    # if you need to use the simpler RDP to approxDP conversion for some reason, turn the flag off

    def approxdp(delta):
        """
        approxdp outputs eps as a function of delta based on rdp calculations
        :param delta:
        :return: the \epsilon with a given delta
        """

        if delta < 0 or delta > 1:
            print("Error! delta is a probability and must be between 0 and 1")
        if delta == 0:
            return rdp(np.inf)
        else:
            def fun(x):  # the input is the RDP's \alpha
                if x <= 1:
                    return np.inf
                else:
                    return np.maximum(rdp(x) + np.log((x-1)/x) - (np.log(delta) + np.log(x))/(x-1), 0)
                    
            results = minimize_scalar(fun, method='Brent', bracket=(1,2), bounds=[1, alpha_max])
            # results = minimize(fun, 
            if results.success:
                return results.fun
            else:
                # There are cases when certain \delta is not feasible.
                # For example, let p and q be uniform the privacy R.V. is either 0 or \infty and unless all \infty
                # events are taken cared of by \delta, \epsilon cannot be < \infty
                return np.inf
    return approxdp

Check to see if fun(x) works with vectors:

In [12]:
def f(rdp, x, delta=1e-6):
    return np.maximum(rdp(x) + np.log((x-1)/x) - (np.log(delta) + np.log(x))/(x-1), 0)

In [13]:
f(rdp=lambda x: IRDP_gaussian(sigma=2, value=x, L=4, alpha=3),x=np.arange(2,10))

array([ 36.4292162 ,  59.95298403,  99.85538999, 152.82837461,
       218.22242866, 295.82411605, 385.5430499 , 487.33450271])

In [14]:
from functools import partial

In [15]:
rdp= lambda x: IRDP_gaussian(sigma=2, value=x, L=4, alpha=3)

In [16]:
rdp(1)

6.0

In [17]:
IRDP_gaussian(sigma=2, value=1, L=4, alpha=3)

6.0

In [18]:
@flax.struct.dataclass
class VectorizedGaussianMechanism:
    """ We're going to store all the values for the Gaussian Mechanisms in a single instance of this class."""
    
    sigma: float
    public_values: jnp.array  # squared L2 norm on min/max metadata
    private_values: jnp.array  # squared L2 norm on raw values
    L: float
    # These two are from the DataSubjectList class
    entity_indices: jnp.array
    entity_lookup: Optional[jnp.array] = None
    use_private: bool = False
    user_key: Optional[VerifyKey] = None
    
    
    def RDP(self, alpha: int) -> jnp.array:
        if self.use_private:
            return IRDP_gaussian(sigma=self.sigma, value=self.private_values, L=self.L, alpha=alpha)
        else:
            return IRDP_gaussian(sigma=self.sigma, value=self.public_values, L=self.L, alpha=alpha)
    
    def total_RDP(self, alpha:int) -> float:
        return np.sum(self.RDP(alpha))
    
    def approxDP(self, delta: float, alpha: int) -> jnp.array:
        return np.minimum(self.RDP(alpha), rdp_to_approxdp(self.RDP)(delta)) #### IF I CAN FIX THIS THEN EVERYTHING WILL WORK
    
    def compose_and_get_epsilon(self, delta: float = 1e-6):
        # Combine previous steps and directly return privacy budget
        # Since compose() creates a new mechanism, propagate_updates() really just uses the `newrdp()` function defined there, which in our case is equivalent to the total_RDP function.
        # Since we're just interested in the total privacy budget spend, we just need to calculate the new approxDP value and evaluate it at the value of delta
        return np.minimum(self.total_RDP, rdp_to_approxdp(self.total_RDP))(delta)  # REPLACE THIS WITH WHATEVER FIX MAKES `approxDP` work

In [19]:
size = 10
vgm = VectorizedGaussianMechanism(
    sigma=2, 
    public_values=np.arange(1, 6), 
    private_values=np.random.randint(low=1, high=6, size=size), 
    L=3, 
    entity_indices=np.arange(size)
)

In [20]:
vgm.RDP(alpha=5)

array([  5.625,  22.5  ,  50.625,  90.   , 140.625])

In [21]:
sum(vgm.RDP(alpha=5))

309.375

In [22]:
vgm.total_RDP(alpha=5)

309.375

In [23]:
vgm.approxDP(delta=1e-6, alpha=5)

ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()