In [111]:
import numpy as np
from scipy.optimize import minimize_scalar

class DataSubjectLedger:
    """for a particular data subject, this is the list
    of all mechanisms releasing informationo about this
    particular subject, stored in a vectorized form"""
    
    def __init__(self, default_delta=1e-6):
        
        self.default_delta = default_delta
        
        self.sigmas = np.array([])
        self.l2_norms = np.array([])
        self.l2_norm_bounds = np.array([])
        self.Ls = np.array([])
        self.coeffs = np.array([])
        self.deltas = np.array([])
        self.entity_ids = np.array([])
        self.entity2budget = np.array([])
        
        self.cache_constant2epsilon = list()     
        
        for i in range(10000):
            alpha, eps = self.get_optimal_alpha_for_constant(i+1)
            self.cache_constant2epsilon.append(eps)
            
        self.cache_constant2epsilon = np.array(self.cache_constant2epsilon)        
        
        
    def append_batch(self, 
                     sigmas: np.ndarray, 
                     l2_norms: np.ndarray, 
                     l2_norm_bounds: np.ndarray, 
                     Ls: np.ndarray, 
                     coeffs: np.ndarray, 
                     deltas: np.ndarray, 
                     entity_ids: np.ndarray):
        
        self.sigmas = np.concatenate([self.sigmas, sigmas])
        self.l2_norms = np.concatenate([self.l2_norms, l2_norms])        
        self.l2_norm_bounds = np.concatenate([self.l2_norm_bounds, l2_norm_bounds])        
        self.Ls = np.concatenate([self.Ls, Ls])        
        self.coeffs = np.concatenate([self.coeffs, coeffs])        
        self.deltas = np.concatenate([self.deltas, deltas])        
        self.entity_ids = np.concatenate([self.entity_ids, entity_ids])                
        
    def get_rdp_func(self, entity_id, private=True):
        
            constant = self.get_rdp_constant(entity_id=entity_id, private=private)
            
            def rdp_func(alpha):
                return alpha * constant

            return rdp_func  
        
    def get_fake_rdp_func(self, constant):
        
        def func(alpha):
            return alpha * constant
        
        return func

    def get_alpha_search_function(self, entity_id, func_override=None):
        
        if func_override is None:
            rdp_compose_func = self.get_rdp_func(entity_id)
        else:
            rdp_compose_func = func_override
            
        if len(self.deltas) > 0:
            delta = np.max(self.deltas)
        else:
            delta = self.default_delta
            
        log_delta = np.log(delta)
        
        def fun(alpha):  # the input is the RDP's \alpha
            
            if alpha <= 1:
                return np.inf
            else:
                alpha_minus_1 = alpha-1
                return np.maximum(rdp_compose_func(alpha) + np.log(alpha_minus_1/alpha)
                                  - (log_delta + np.log(alpha))/alpha_minus_1, 0)
        return fun    
    
    def get_epsilon_spend(self, entity_ids):
        eps_spends = list()
                
        for entity_id in entity_ids:
            
            constant = self.get_rdp_constant(entity_id=entity_id, private=True)
            if constant == 0:
                # when the constant is 0 it typically means no entities were found
                # but regardless it means that the epsilon spend would be 0.
                eps_spends.append(0.0)
            else:
                constant2lookup_constant = int(constant-1)
                print(constant2lookup_constant)
                eps_spends.append(self.cache_constant2epsilon[constant2lookup_constant])
            
        return eps_spends
    
    def get_optimal_alpha_for_constant(self, constant=3):
        
        f = self.get_fake_rdp_func(constant)
        f2 = self.get_alpha_search_function(entity_id=1, func_override=f)
        results = minimize_scalar(f2, method='Brent', bracket=(1,2), bounds=[1, np.inf])
        
        return results.x, results.fun
    

    def get_rdp_constant(self, entity_id, private=True):
    
        squared_Ls = self.Ls**2
        squared_sigma = self.sigmas**2
        entity_mask = self.entity_ids == entity_id
        
        if private:
            
            squared_L2_norms = self.l2_norms**2            
            private_constant = (squared_Ls * squared_L2_norms / (2 * squared_sigma)) * entity_mask
            private_constant = private_constant * self.coeffs
            private_constant = np.sum(private_constant)
            
            return private_constant
        else:
            squared_L2_norm_bounds = self.l2_norms_bound**2            
            public_constant = (squared_Ls * squared_L2_norm_bounds / (2 * squared_sigma)) * entity_mask
            public_constant = public_constant * self.coeffs
            public_constant = np.sum(private_constant)
            return public_constant    


In [112]:
entity_ids_query = np.array([2,3])

In [113]:
indices_batch = np.where(np.in1d(ledger.entity_ids, entity_ids_query))[0]

In [114]:
batch_sigmas = ledger.sigmas.take(indices_batch)
batch_Ls = ledger.Ls.take(indices_batch)
batch_l2_norms = ledger.l2_norms.take(indices_batch)
batch_l2_norm_bounds = ledger.l2_norm_bounds.take(indices_batch)
batch_coeffs = ledger.coeffs.take(indices_batch)
batch_entity_ids = ledger.entity_ids.take(indices_batch)

# squared_Ls = batch_

In [110]:
batch_entity_ids

array([2., 2., 3., 3.])

In [45]:
ledger = DataSubjectLedger()

ledger.append_batch(sigmas=np.ones(2),
                    l2_norms=np.ones(2)*10,
                    l2_norm_bounds=np.ones(2)*40,
                    Ls=np.ones(2)*5,
                    coeffs=np.ones(2),
                    deltas=np.ones(2)*1e6,
                    entity_ids=np.ones(2))

ledger.append_batch(sigmas=np.ones(2),
                    l2_norms=np.ones(2)*10,
                    l2_norm_bounds=np.ones(2)*40,
                    Ls=np.ones(2)*5,
                    coeffs=np.ones(2),
                    deltas=np.ones(2)*1e6,
                    entity_ids=np.ones(2)+1)

ledger.append_batch(sigmas=np.ones(2),
                    l2_norms=np.ones(2)*10,
                    l2_norm_bounds=np.ones(2)*40,
                    Ls=np.ones(2)*5,
                    coeffs=np.ones(2),
                    deltas=np.ones(2)*1e6,
                    entity_ids=np.ones(2)+2)


ledger.get_epsilon_spend(entity_ids=[1,2])

  w = xb - ((xb - xc) * tmp2 - (xb - xa) * tmp1) / denom


2499
2499


[2868.0555591748666, 2868.0555591748666]