In [8]:
# Import modules
import torch
import numpy as np
torch.manual_seed(7)

<torch._C.Generator at 0x2cbb6698dd0>

In [9]:
# Create a parallel db generator which removes one record from the original db and keeps the rest of the records
def par_db_generator(db, idx):
    return torch.cat((db[0:idx], db[idx+1:]))

In [10]:
def coinflip(noise):
    answer = 0
    if torch.rand(1)>noise:
        answer=1
        return answer
    else:
        if torch.rand(1)>0.5:
            answer=1
            return answer
        return answer
            

In [11]:
# Backle up the parallel databases together
# it depends entirely on the length of the first db
def par_dbs(db):
    """This creates an array with the datase columns = len(db-1)
    Since we are removing one record at a time and returning the remaining values
    """
    db_all = []
    par_db = torch.ones(db.shape).type(torch.int)
    for i in range(len(db)):
        par_db = torch.cat((db[0:i], db[i+1:]))
        
        db_all.append(par_db)
        
    return db_all

In [12]:
# Create a function to generate the required data based on a given sample of users or records
def generate_db(rec_no):
    """This function generates the db that is then usd to generate the parallel dbs"""
    db = torch.rand(rec_no)>0.5
    # print('Original DB\n{}'.format(db))
    
    #Generate the entire parallel db
    # My Solution
    pdbs = par_dbs(db)
    
    
    # Lecture Solution
#     first_coin_flip = (torch.rand(len(db))>0.5).float()
#     second_coin_flip = (torch.rand(len(db))>0.5).float()
#     pdbs = db.float()*first_coin_flip + (1-first_coin_flip)*second_coin_flip
    
    return db, pdbs    

In [90]:
def query(num_entries, noise):
    db, _ = generate_db(num_entries)
    
    ful_db_query = db.float().mean()
    baised_first_flip = np.random.choice(2, size=num_entries, p=[noise, 1-noise])
    ex_based_first_flip = (torch.rand(num_entries) > (1-noise)).float()
    
    toss_one = torch.tensor(baised_first_flip).float()
    # print(toss_one)
    toss_two = (torch.rand(len(db))>0.5).float()
    # print(toss_two)
    augmented_output = toss_one*db.float() + (1-toss_one)*toss_two
    
    augmented_output_ex = ex_based_first_flip*db.float() + (1-ex_based_first_flip)*toss_two
    
    query_result = augmented_output.mean()*2 - 0.5
    
    # Skewed Results
    skewed_result = augmented_output.float().mean()    
    skewed_result_ex = augmented_output_ex.float().mean()
    
    # Private Results
    private_result = ((skewed_result/noise) - 0.5)*(noise/ (1-noise))
    private_result_ex = ((skewed_result_ex/(1-noise)) - 0.5)*((1-noise)/noise)
    
    return private_result, ful_db_query, private_result_ex

In [91]:

for i in [10, 100, 1000, 10000]:
    print(i, 'Entries')
    for noise in [0.2, 0.4, 0.6, 0.8]:
        print('Noise: ', noise)
        print("Without Noise: ", query(i, noise)[1].item())
        print("Mine With Noise: ", query(i, noise)[0].item())
        print("Ex With Noise: ", query(i, noise)[2].item(), '\n')

10 Entries
Noise:  0.2
Without Noise:  0.4000000059604645
Mine With Noise:  0.375
Ex With Noise:  1.0 

Noise:  0.4
Without Noise:  0.30000001192092896
Mine With Noise:  0.5
Ex With Noise:  0.0 

Noise:  0.6
Without Noise:  0.4000000059604645
Mine With Noise:  0.24999994039535522
Ex With Noise:  0.0 

Noise:  0.8
Without Noise:  0.5
Mine With Noise:  0.5
Ex With Noise:  0.375 

100 Entries
Noise:  0.2
Without Noise:  0.44999998807907104
Mine With Noise:  0.550000011920929
Ex With Noise:  0.7999999523162842 

Noise:  0.4
Without Noise:  0.4699999988079071
Mine With Noise:  0.40000003576278687
Ex With Noise:  0.5999999642372131 

Noise:  0.6
Without Noise:  0.44999998807907104
Mine With Noise:  0.2749999165534973
Ex With Noise:  0.46666663885116577 

Noise:  0.8
Without Noise:  0.5099999904632568
Mine With Noise:  1.0499999523162842
Ex With Noise:  0.48750001192092896 

1000 Entries
Noise:  0.2
Without Noise:  0.5080000162124634
Mine With Noise:  0.48874998092651367
Ex With Noise:  0.589

In [None]:
Here is a query I made instead of the one that was illustrated in the video. 5.6. Could I be wrong in any way, since I am making a choice based on probability instead of using this noise as a threshold as in the example in the video