#Project 2: Generate Parallel Databases

Create a list of every parallel database to the one currently contained in the "db" variable. Create a function which both:

* creates the initial database (db) 
* creates all parallel databases

In [0]:
import torch

In [0]:
def create_parallel_db(db, index):

    return torch.cat((db[0:index], 
                      db[index+1:]))

In [4]:
num_els = 5000
db = torch.rand(num_els) > 0.5
db

tensor([0, 0, 1,  ..., 1, 1, 0], dtype=torch.uint8)

In [5]:
create_parallel_db(db, 47)

tensor([0, 0, 1,  ..., 1, 1, 0], dtype=torch.uint8)

In [0]:
def create_parallel_dbs(db):

    parallel_dbs = list()

    for i in range(len(db)):
        pdb = create_parallel_db(db, i)
        parallel_dbs.append(pdb)
    
    return parallel_dbs

In [0]:
pdbs = create_parallel_dbs(db)

In [0]:
def create_db_and_parallel_dbs(num_els):
    
    db = torch.rand(num_els) > 0.5
    pdbs = create_parallel_dbs(db)
    
    return db, pdbs

In [0]:
db, pdbs = create_db_and_parallel_dbs(40)

## Evaluate The Differential Privacy of a Function
Evaluate how much privacy is leaked by measuring the maximum amount the query changes when someone is removed (maximum over all possible people who could be removed). Measure the sensitivity of sum() function

In [0]:
db, pdbs = create_db_and_parallel_dbs(5000)

In [0]:
def query_sum(db):
    return db.sum()

In [0]:
entire_db_result = query_sum(db)

In [0]:
query_sensitivity = 0
for pdb in pdbs:
    pdb_result = query_sum(pdb)
    
    db_difference = torch.abs(pdb_result - entire_db_result)
    
    if(db_difference > query_sensitivity):
        query_sensitivity = db_difference

In [14]:
query_sensitivity

tensor(1)

#Project 3 - Evaluate the Privacy of a Function
Implement function to compute the "sensitivity"of the mean() function we use for the query when measuring the difference between each parallel db's query result and the query result for the entire database (max value).

In [0]:
def query_mean(db):
    return db.float().mean()

In [0]:
def f_sensitivity(f, n_els=1000):

    db, pdbs = create_db_and_parallel_dbs(n_els)
    
    entire_db_result = f(db)
    
    max_difference = 0
    for pdb in pdbs:
        pdb_result = f(pdb)

        db_difference = torch.abs(pdb_result - entire_db_result)

        if(db_difference > max_difference):
            max_difference = db_difference
            
    return max_difference

In [17]:
f_sensitivity(query_mean)

tensor(0.0005)

In [0]:
db, pdbs = create_db_and_parallel_dbs(20)

In [19]:
db

tensor([0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0],
       dtype=torch.uint8)

#Project: Calculate L1 Sensitivity For Threshold
Define function which calculates the sensitivty for a "threshold"  by computing the sum over the databases and return whether that sum is greater than a certain threshold

In [0]:
def query_sum_threshold(db, threshold=5):
    return (db.sum() > threshold).float()

In [21]:
for i in range(10):
    sens_f = f_sensitivity(query_sum_threshold, n_els=10)
    print(sens_f)

0
tensor(1.)
0
tensor(1.)
0
tensor(1.)
tensor(1.)
0
tensor(1.)
0


#Project: Perform a Differencing Attack on Row 10
Create a database and then two different sum queries to expose the value of the person represented by row 10 in the database

In [0]:
db, _ = create_db_and_parallel_dbs(100)

In [0]:
pdb = create_parallel_db(db, index=10)

In [24]:
db[10]

tensor(0, dtype=torch.uint8)

In [25]:
sum(db)

tensor(46, dtype=torch.uint8)

In [26]:
# Differencing attack using sum()

sum(db) - sum(pdb)

tensor(0, dtype=torch.uint8)

In [27]:
# Differencing attack using mean()

(sum(db).float() / len(db)) -  (sum(pdb).float() / len(pdb))

tensor(-0.0046)

In [28]:
# Differencing attack using sum() and threshold

(sum(db).float() > 49) - (sum(pdb).float()  > 49)

tensor(0, dtype=torch.uint8)

#Project: Local Differential Privacy
Create function that will flip a virtual coin twice and randomize data points based on the result. Call this function with incemental databases.

In [0]:
def query_local_noise(db):
  
  without_noise_result = torch.mean(db.float())
    
  first_coin_flip = (torch.rand(len(db)) > 0.5).float()
    
  second_coin_flip = (torch.rand(len(db)) > 0.5).float()

  randomized_db = db.float() * first_coin_flip + (1 - first_coin_flip) * second_coin_flip

  with_noise_result = torch.mean(randomized_db.float()) * 2 - 0.5
    
  return with_noise_result, without_noise_result
  

In [30]:
# db size 10
db, pdbs = create_db_and_parallel_dbs(10)
with_noise_result, without_noise_result = query_local_noise(db)
print("With Noise:" + str(with_noise_result))
print("Without Noise:" + str(without_noise_result))

With Noise:tensor(0.1000)
Without Noise:tensor(0.5000)


In [31]:
# db size 100
db, pdbs = create_db_and_parallel_dbs(100)
with_noise_result, without_noise_result = query_local_noise(db)
print("With Noise:" + str(with_noise_result))
print("Without Noise:" + str(without_noise_result))

With Noise:tensor(0.3800)
Without Noise:tensor(0.4100)


In [32]:
# db size 1000
db, pdbs = create_db_and_parallel_dbs(1000)
with_noise_result, without_noise_result = query_local_noise(db)
print("With Noise:" + str(with_noise_result))
print("Without Noise:" + str(without_noise_result))

With Noise:tensor(0.5280)
Without Noise:tensor(0.5310)


In [33]:
# db size 10000
db, pdbs = create_db_and_parallel_dbs(10000)
with_noise_result, without_noise_result = query_local_noise(db)
print("With Noise:" + str(with_noise_result))
print("Without Noise:" + str(without_noise_result))

With Noise:tensor(0.5000)
Without Noise:tensor(0.4936)


#Project: Varying Amounts of Noise
Define function which takes noise arg to allow for varying amounts of randomness to be added to database i.e.The first coin flip will have varying probabilities of being 1 or 0. Experiment with different values of noise

In [0]:
def query_varying_noise(db, noise=0.2):
    
    without_noise_result = torch.mean(db.float())

    first_coin_flip = (torch.rand(len(db)) < noise).float()
    
    second_coin_flip = (torch.rand(len(db)) > 0.5).float()

    randomized_db = db.float() * first_coin_flip + (1 - first_coin_flip) * second_coin_flip

    skewed_result = randomized_db.float().mean()

    with_noise_result = ((skewed_result / noise) - 0.5) * noise / (1 - noise)

    return with_noise_result, without_noise_result

In [35]:
# noise value 0.1 - db size 100
db, pdbs = create_db_and_parallel_dbs(100)
with_noise_result, without_noise_result = query_varying_noise(db, noise=0.1)
print("With Noise:" + str(with_noise_result))
print("Without Noise:" + str(without_noise_result))

With Noise:tensor(0.5667)
Without Noise:tensor(0.4900)


In [36]:
# noise value 0.2 - db size 100
db, pdbs = create_db_and_parallel_dbs(100)
with_noise_result, without_noise_result = query_varying_noise(db, noise=0.2)
print("With Noise:" + str(with_noise_result))
print("Without Noise:" + str(without_noise_result))

With Noise:tensor(0.5625)
Without Noise:tensor(0.5100)


In [37]:
# noise value 0.4 - db size 100
db, pdbs = create_db_and_parallel_dbs(100)
with_noise_result, without_noise_result = query_varying_noise(db, noise=0.4)
print("With Noise:" + str(with_noise_result))
print("Without Noise:" + str(without_noise_result))

With Noise:tensor(0.4167)
Without Noise:tensor(0.5700)


In [38]:
# noise value 0.8 - db size 100
db, pdbs = create_db_and_parallel_dbs(100)
with_noise_result, without_noise_result = query_varying_noise(db, noise=0.8)
print("With Noise:" + str(with_noise_result))
print("Without Noise:" + str(without_noise_result))

With Noise:tensor(0.0500)
Without Noise:tensor(0.4100)


In [39]:
# noise value 0.1 - db size 10000
db, pdbs = create_db_and_parallel_dbs(10000)
with_noise_result, without_noise_result = query_varying_noise(db, noise=0.1)
print("With Noise:" + str(with_noise_result))
print("Without Noise:" + str(without_noise_result))

With Noise:tensor(0.5004)
Without Noise:tensor(0.4953)


In [40]:
# noise value 0.2 - db size 10000
db, pdbs = create_db_and_parallel_dbs(10000)
with_noise_result, without_noise_result = query_varying_noise(db, noise=0.2)
print("With Noise:" + str(with_noise_result))
print("Without Noise:" + str(without_noise_result))

With Noise:tensor(0.4996)
Without Noise:tensor(0.5040)


In [41]:
# noise value 0.4 - db size 10000
db, pdbs = create_db_and_parallel_dbs(10000)
with_noise_result, without_noise_result = query_varying_noise(db, noise=0.4)
print("With Noise:" + str(with_noise_result))
print("Without Noise:" + str(without_noise_result))

With Noise:tensor(0.5103)
Without Noise:tensor(0.4995)


In [42]:
# noise value 0.8 - db size 10000
db, pdbs = create_db_and_parallel_dbs(10000)
with_noise_result, without_noise_result = query_varying_noise(db, noise=0.8)
print("With Noise:" + str(with_noise_result))
print("Without Noise:" + str(without_noise_result))

With Noise:tensor(0.4690)
Without Noise:tensor(0.4940)


#Project: Create a Differentially Private Query
Create sum() and mean() query functions which add the right amount of noise to the query output (Global Differential Privacy) such that it satisfies an epsilon constraint.

In [0]:
import numpy as np

In [0]:
# low epsilon budget
epsilon = 0.0001

In [0]:
# small data entries
db, pdbs = create_db_and_parallel_dbs(100)

def laplacian_M(db, query, sensitivity):
    
    beta = sensitivity / epsilon
    noise = torch.tensor(np.random.laplace(0, beta, 1))
    
    return query(db) + noise

In [60]:
laplacian_M(db, query_sum, 1)

tensor([372.2610], dtype=torch.float64)

In [61]:
laplacian_M(db, query_mean, 1/100)

tensor([1.0933], dtype=torch.float64)

In [0]:
# medium epsilon budget
epsilon = 0.005

In [62]:
laplacian_M(db, query_sum, 1)

tensor([64.3783], dtype=torch.float64)

In [64]:
laplacian_M(db, query_mean, 1/100)

tensor([-0.1626], dtype=torch.float64)

In [0]:
# high epsilon budget 
epsilon = 0.5

In [66]:
laplacian_M(db, query_sum, 1)

tensor([54.6232], dtype=torch.float64)

In [67]:
laplacian_M(db, query_mean, 1/100)

tensor([0.4660], dtype=torch.float64)

In [0]:
# mead data entries
db, pdbs = create_db_and_parallel_dbs(1000)

In [75]:
laplacian_M(db, query_sum, 1)

tensor([477.5692], dtype=torch.float64)

In [76]:
laplacian_M(db, query_mean, 1/1000)

tensor([0.4777], dtype=torch.float64)