Now we're going to add the functionality of varying the amount of noise in the data

In [1]:
# Database generation funcitions

import torch

def get_parallel_db(db,remove_index):
    return torch.cat((db[0:remove_index], db[remove_index+1:]))

def get_parallel_dbs(db):
    parallel_dbs = list()
    
    for i in range(len(db)):
        parallel_dbs.append(get_parallel_db(db, i))
    
    return parallel_dbs

def create_db_and_pdbs(num_entries):
    
    db = (torch.rand(num_entries) > 0.5).float()
    pdbs = get_parallel_dbs(db)
    
    return db, pdbs

In [4]:
# Now we can write our altered query function
# Since we want to change/bias the first coin flip we can add a noise parameter which acts as a threshold
# Now what's left is to de-skew the outcome of our noised dataset acordingly
def query(db, noise=0.2):
    true_result = torch.mean(db.float())
    first_coin_flip = (torch.rand(len(db))>noise).float()
    second_coin_flip = (torch.rand(len(db))>0.5).float()
    augmented_database = db.float()*first_coin_flip+(1-first_coin_flip)*second_coin_flip
    # Here we have de-skewed the mean to be closer to the mean of the original database
    skewed_res = augmented_database.float().mean()
    private_result = ((skewed_res/noise)-0.5)*noise/(1-noise)
    
    # dp_result = torch.mean(augmented_database.float())*2-0.5
    return private_result, true_result

In [5]:
# Now we can conduct an experment to see how the query differs with varying amounts of noise added
db, pdbs = create_db_and_pdbs(100)
private_result, true_result = query(db, noise=0.1)
print("With Noise:"+str(private_result))
print("Without Noise:"+str(true_result))

With Noise:tensor(0.4222)
Without Noise:tensor(0.4400)


In [6]:
db, pdbs = create_db_and_pdbs(100)
private_result, true_result = query(db, noise=0.2)
print("With Noise:"+str(private_result))
print("Without Noise:"+str(true_result))

With Noise:tensor(0.4375)
Without Noise:tensor(0.4700)


In [7]:
db, pdbs = create_db_and_pdbs(100)
private_result, true_result = query(db, noise=0.4)
print("With Noise:"+str(private_result))
print("Without Noise:"+str(true_result))

With Noise:tensor(0.4667)
Without Noise:tensor(0.4900)


In [8]:
db, pdbs = create_db_and_pdbs(100)
private_result, true_result = query(db, noise=0.8)
print("With Noise:"+str(private_result))
print("Without Noise:"+str(true_result))

With Noise:tensor(0.6500)
Without Noise:tensor(0.5400)


In [11]:
# As seen above the difference increases significantly as the noise increases
# We can counter the increasing noise with increasing the size of the database
db, pdbs = create_db_and_pdbs(10000)
private_result, true_result = query(db, noise=0.8)
print("With Noise:"+str(private_result))
print("Without Noise:"+str(true_result))

With Noise:tensor(0.5020)
Without Noise:tensor(0.5037)


Thus the larger the dataset the more privacy protection we can provide to the individuals in the dataset