In [46]:
""" CDF Estimator
    This code implements Algorithm 1 from the Ektelo paper. There are two methods:
    Kernel - Sets up an instance of the KernelService (KS), which implements a public API 
             for querying the PrivateManager (PM). The PM is the only software component that 
             has access to the unaltered private data. It is intended to be run on a private 
             server.
    Client - Takes a handle to the KS and uses it to create a ProtectedDataSource (PDS), which 
             represents the client's public interface to the private data. Transformations are 
             applied to the PDS by invoking methods on it. Each transformation returns a mutated 
             PDS. The client can call for a measurement on a PDS at any time. A noisy result
             will be returned by the server provided that the total privacy budget has not been
             exceeded.
"""
from ektelo import workload
from ektelo import support
from ektelo.client import service as cservice
from ektelo.private import kernel
from ektelo.private import service as pservice
from ektelo.wrapper import identity
from ektelo.wrapper import non_negative_least_squares
import os
import yaml

CSV_PATH = os.environ['EKTELO_DATA']
CONFIG_PATH = os.path.join(os.environ['EKTELO_HOME'], 'resources', 'config')


def Kernel(eps_total, random_seed):
    """ In an actual deployment, this code would be run on a
        private server with access to the unaltered data.
    """
    # Location of csv data file
    filename = os.path.join(CSV_PATH, 'cps.csv')

    # Configuration for data
    config_file = os.path.join(CONFIG_PATH, 'cps.yml')
    config = yaml.load(open(config_file, 'r').read())['cps_config']

    # Private manager (or kernel) guards access to data
    private_manager = kernel.PrivateManager(filename, 
                                            config, 
                                            random_seed=random_seed, 
                                            budget=eps_total)

    # Kernel service mediates server-side access to kernel
    kernel_service = pservice.KernelService(private_manager)

    return kernel_service


def Client(kernel_service, domain, eta, ratio, n):
    """ This is the code that would run on the client side. The client 
        creates a protected data source, which it queries from time to time. 
        The client also manipulates data returned from the protected data 
        source by applying public operators locally.
    """
    # Protected data source mediates client-side access to kernel service
    R = cservice.ProtectedDataSource(kernel_service)
    
    # Filter data
    R = R.where('sex==2')
    R = R.project(['income'])

    # Transform relation to vector
    x = R.vectorize(domain)

    # Use fraction "ratio" of budget to determine reduced mapping 
    mapping = x.ahp_partition(n, ratio, eta, eps_total)

    # Reduce x according to this mapping
    x_bar = x.reduce_by_partition(mapping)

    # Use remaining budget to get noisy x from reduced domain
    M_bar = identity((len(set(mapping)),))
    y_bar = x_bar.laplace(M_bar, eps_total*(1-ratio))

    # Infer actual x from noisy answer
    x_bar_hat = non_negative_least_squares(M_bar, y_bar)

    # project inferred x back to original domain
    x_hat = support.expansion_matrix(mapping) * x_bar_hat

    # A Prefix workload of queries
    W = workload.Prefix(n)

    # Report query results
    print(W.matrix * x_hat)


# Setup arbitrary private constants
eps_total = 0.01
random_seed = 10

# Instantiate kernel_service on server
kernel_service = Kernel(eps_total, random_seed)

# Setup arbitrary client constants
domain = (50,)
eta = 0.35
ratio = 0.6
n = domain[0]

# Run CDF estimator on client
Client(kernel_service, domain, eta, ratio, n)

[[7450.74204165    0.            0.         ...    0.
     0.            0.        ]
 [7450.74204165 6383.48071358    0.         ...    0.
     0.            0.        ]
 [7450.74204165 6383.48071358 3447.08311331 ...    0.
     0.            0.        ]
 ...
 [7450.74204165 6383.48071358 3447.08311331 ...   98.95539361
     0.            0.        ]
 [7450.74204165 6383.48071358 3447.08311331 ...   98.95539361
    20.39745476    0.        ]
 [7450.74204165 6383.48071358 3447.08311331 ...   98.95539361
    20.39745476   20.39745476]]


  config = yaml.load(open(config_file, 'r').read())['cps_config']


In [47]:
""" Example of the invocation of a standalone plan 
"""
from ektelo import data
from ektelo import workload
from ektelo.plans import standalone
from ektelo.private import transformation
import os
import numpy as np
import yaml

CSV_PATH = os.environ['EKTELO_DATA']
CONFIG_PATH = os.path.join(os.environ['EKTELO_HOME'], 'resources', 'config')

# Load relation 
filename =  os.path.join(CSV_PATH, 'cps.csv')
config_file = os.path.join(CONFIG_PATH, 'cps.yml')
config = yaml.load(open(config_file, 'r').read())['cps_config']
R = data.Relation(config).load_csv(filename, ',')

# Choose reduced domain for relation
domain = (10, 1, 7, 1, 1)

# Vectorize relation
x = transformation.Vectorize('CPS', reduced_domain=domain).transform(R)

# Setup arbitrary constants for MWEM
seed = 0
ratio = 0.5
rounds = 3
data_scale = 1e5
use_history = True
epsilon = 0.1

# Create query workload
W = workload.RandomRange(None, (np.prod(domain),), 25)

# Calculate noisy estimate of x
x_hat = standalone.Mwem(ratio, rounds, data_scale, domain, use_history).Run(W, x, epsilon, seed)

# Report noisy query responses
print(W.matrix * x_hat)

  config = yaml.load(open(config_file, 'r').read())['cps_config']
  out.append(len(numpy.unique(p_grid[ind_slice])))


[2.70122569e+04 7.31911444e+01 2.70100549e+04 1.63752505e+04
 1.40573532e+04 3.72880162e+01 3.48021485e+01 2.33687164e+04
 6.21074474e+01 1.24293387e+01 9.94347099e+00 7.03382834e+01
 7.30656221e+04 2.56368964e+04 2.10110452e+04 9.36438354e+03
 5.05344262e+01 1.95729717e+00 2.34737894e+01 2.95465045e+01
 2.36346003e+03 2.33048571e+04 2.45747690e+01 2.10383897e+04
 4.71118778e+03]
