## Section 1

### Project: Generate Parallel Databases

In [1]:
import torch

In [2]:
def create_random_db(num_entries):

    db = torch.rand(num_entries) > 0.5
    return db

def get_parallel_db(db, remove_idx):

    return torch.cat((db[:remove_idx], db[remove_idx+1:]))

def create_parallel_dbs(n_entries):
    all_dbs = []
    db = create_random_db(n_entries)

    for idx in range(len(db)):
        parallel_db = get_parallel_db(db, idx)
        all_dbs.append(parallel_db)

    return db, all_dbs

In [3]:
create_parallel_dbs(5)

(tensor([False,  True, False, False,  True]),
 [tensor([ True, False, False,  True]),
  tensor([False, False, False,  True]),
  tensor([False,  True, False,  True]),
  tensor([False,  True, False,  True]),
  tensor([False,  True, False, False])])

---

## Section 2

### Project: Evaluating the Privacy of a Function

In [4]:
def sensitivity(query, n_entries=2000):

    db, pdbs = create_parallel_dbs(n_entries)
    full_db_res = query(db)

    max_dist = 0
    for pdb in pdbs:
        pdb_res = query(pdb)

        db_dist = torch.abs(pdb_res - full_db_res)

        if (db_dist > max_dist):
            max_dist = db_dist

    return max_dist

In [5]:
def query(db):
    return db.float().mean()

In [6]:
sensitivity(query)

tensor(0.0003)

### Calculate L1 Sensitivity for Threshold

In [7]:
def query(db, threshold=5):
    return (db.sum() > threshold).float()

In [8]:
for i in range(10):
    sensitivity_val = sensitivity(query, 10)
    print(sensitivity_val)

0
0
0
0
0
0
0
0
0
tensor(1.)


### Project: Perform a Differencing Attack

In [9]:
db, _ = create_parallel_dbs(100)

In [10]:
pdb = get_parallel_db(db, remove_idx=10)

In [11]:
# Differencing attacking using sum

sum(db) - sum(pdb)

tensor(1)

In [12]:
# Differencing attacking using mean

(sum(db).float() / len(db)) - (sum(pdb).float() / len(pdb))

tensor(0.0049)

In [13]:
# Differencing attacking using threshold

threshold = 49
(sum(db).float() > threshold).int() - (sum(pdb).float() > threshold).int()

tensor(0, dtype=torch.int32)

---

## Section 3

### Randomized Response (Local Differential Privacy)

In [14]:
db, pdbs = create_parallel_dbs(100)

In [15]:
def query(db):

    first_coin_flip = (torch.rand(len(db)) > 0.5).float()
    second_coin_flip = (torch.rand(len(db)) > 0.5).float()

    augmented_db = db.float() * first_coin_flip + (1 - first_coin_flip) * second_coin_flip

    db_result = torch.mean(augmented_db.float()) * 2 - 0.5
    true_result = torch.mean(db.float())

    return db_result, true_result

In [16]:
db, pdbs = create_parallel_dbs(10)
private_result, true_result = query(db)

print(f"With Noise: {private_result}")
print(f"Without Noise: {true_result}")

With Noise: 0.30000001192092896
Without Noise: 0.30000001192092896


In [17]:
db, pdbs = create_parallel_dbs(100)
private_result, true_result = query(db)

print(f"With Noise: {private_result}")
print(f"Without Noise: {true_result}")

With Noise: 0.5
Without Noise: 0.41999998688697815


In [18]:
db, pdbs = create_parallel_dbs(1000)
private_result, true_result = query(db)

print(f"With Noise: {private_result}")
print(f"Without Noise: {true_result}")

With Noise: 0.5759999752044678
Without Noise: 0.5109999775886536


In [19]:
db, pdbs = create_parallel_dbs(10000)
private_result, true_result = query(db)

print(f"With Noise: {private_result}")
print(f"Without Noise: {true_result}")

With Noise: 0.5118000507354736
Without Noise: 0.4975000023841858


### Project: Varying Amount of Noise

In [20]:
def query(db, noise=0.2):

    first_coin_flip = (torch.rand(len(db)) > noise).float()
    second_coin_flip = (torch.rand(len(db)) > 0.5).float()

    augmented_db = db.float() * first_coin_flip + (1 - first_coin_flip) * second_coin_flip

    sk_result = augmented_db.float().mean()
    true_result = ((sk_result / noise) - 0.5) * noise / (1 - noise)

    return sk_result, true_result

In [21]:
db, pdbs = create_parallel_dbs(10000)
private_result, true_result = query(db, noise=0.1)

print(f"With Noise: {private_result}")
print(f"Without Noise: {true_result}")

With Noise: 0.49869999289512634
Without Noise: 0.49855557084083557


In [22]:
db, pdbs = create_parallel_dbs(10000)
private_result, true_result = query(db, noise=0.2)

print(f"With Noise: {private_result}")
print(f"Without Noise: {true_result}")

With Noise: 0.4966000020503998
Without Noise: 0.4957500100135803


In [23]:
db, pdbs = create_parallel_dbs(10000)
private_result, true_result = query(db, noise=0.4)

print(f"With Noise: {private_result}")
print(f"Without Noise: {true_result}")

With Noise: 0.5023000240325928
Without Noise: 0.5038333535194397


In [24]:
db, pdbs = create_parallel_dbs(10000)
private_result, true_result = query(db, noise=0.8)

print(f"With Noise: {private_result}")
print(f"Without Noise: {true_result}")

With Noise: 0.503000020980835
Without Noise: 0.5150001049041748
