In [1]:
import math
import random
import numpy as np
import pandas as pd

In [2]:
df = pd.read_csv('../data/Residential-Profiles.csv')

In [3]:
# k = categorical attribute Aj contains k>2 possible values
# B = bound the error with probability (1-B)
# n = number of households 200 

In [4]:
# input parameters
k = 16
B = 0.3
n = 200
epsilon = 1

In [5]:
# compute Lambda
lambda_ = math.sqrt(math.log(2*k/B)/(epsilon*epsilon*n))
lambda_

0.15280230212803836

In [6]:
m = (math.log(k+1) * math.log(2/B))/(lambda_ * lambda_)
m = int(m)
m

230

In [7]:
# Generate random matrix phi
matrix_phi = np.zeros((int(m), k))  # mXd shape
matrix_phi

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [8]:
for i in range(matrix_phi.shape[0]):
    for j in range(matrix_phi.shape[1]):
        toss = np.random.random()
        if toss <= 0.5:
            matrix_phi[i,j] = -1 / math.sqrt(m) 
        else:
            matrix_phi[i,j] = 1 / math.sqrt(m) 
            
matrix_phi

array([[-0.06593805, -0.06593805,  0.06593805, ..., -0.06593805,
        -0.06593805, -0.06593805],
       [ 0.06593805,  0.06593805, -0.06593805, ...,  0.06593805,
        -0.06593805,  0.06593805],
       [ 0.06593805,  0.06593805,  0.06593805, ..., -0.06593805,
         0.06593805, -0.06593805],
       ...,
       [ 0.06593805, -0.06593805,  0.06593805, ..., -0.06593805,
        -0.06593805,  0.06593805],
       [-0.06593805, -0.06593805, -0.06593805, ..., -0.06593805,
        -0.06593805,  0.06593805],
       [ 0.06593805,  0.06593805,  0.06593805, ...,  0.06593805,
        -0.06593805, -0.06593805]])

In [9]:
def perturb(t,m):
    epsilon = 1
    c_epsilon = ((math.exp(epsilon) + 1)/(math.exp(epsilon) - 1))
    p = ((math.exp(epsilon))/(math.exp(epsilon) + 1))
    toss = np.random.random()
    if toss <= p:
        return c_epsilon*m*t
    else:
        return (-1)*c_epsilon*m*t
    

In [10]:
def encode(value):
    hist, bin_edges = np.histogram(value, bins=15 , range=(0,1500))
    if value>1500:
        hist = np.append(hist,1)
    else:
        hist = np.append(hist,0)
    return list(hist)

In [11]:
# First timestamp histogram frequency estimation for all houses
row = list(df. iloc[0, :]) 
row.pop(0)

'1/1/10 0:00'

In [12]:
store = []
for val in row:
    s = random.randint(0, m-1)

    encoded_value = encode(val)
    itemindex = list(encoded_value).index(1)
    t = matrix_phi[s, itemindex]

    # user will send pertub it 
    t_prime = perturb(t,m)
    # send server < 3 , -4.838746933262355>
    temp = [s, t_prime]
    store.append(temp)
    

In [13]:
Z_aggregate = np.array([0.0]*m)
for store_i in store:
    zz_i = [0]*m
    zz_i[store_i[0]] = store_i[1]
    
    Z_aggregate += np.array(zz_i)
    

Z_aggregate = Z_aggregate/n

In [14]:
estimate = []
for ite in range(k):
    temp = np.inner(matrix_phi[:,ite], Z_aggregate)
    estimate.append(temp)

In [15]:
JL_w_neg = estimate
JL_w_neg = [x if x>=0 else 0 for x in JL_w_neg]
JL_w_neg = JL_w_neg/sum(JL_w_neg)

In [16]:
# Histogram probabilistic outputs of 16 bins
# Paper shows average of 5 runs of the following result
print("Relative freqency estimates for JLRR for epsilon=1:")
print(list(JL_w_neg))

Relative freqency estimates for JLRR for epsilon=1:
[0.11111111111111113, 0.0, 0.0707070707070707, 0.17171717171717166, 0.18181818181818182, 0.0, 0.0, 0.020202020202020207, 0.040404040404040414, 0.01010101010101011, 0.07070707070707072, 0.0, 0.0, 0.0707070707070707, 0.0707070707070707, 0.18181818181818182]
