# Diferential Privacy

In [30]:
# Configuration enviroment

- conda create -n pysyft python=3
- conda activate pysyft # some older version of conda require "source activate pysyft" instead.
- conda install jupyter notebook
- pip install syft
- pip install numpy

In [1]:
import torch

num_entries = 3000
db = torch.rand(num_entries) > 0.5
db

tensor([1, 0, 1,  ..., 0, 0, 0], dtype=torch.uint8)

To ask the question "When querying a database, if I removed someone from the database, would the output of the query be any different?"

we must construct what we term "parallel databases" -> databases with one entry removed.

## Generate parallel database

In [5]:
import torch

num_entries = 3000

def create_parallel_db(db, num_index):
    return torch.cat((db[0:num_index], db[num_index + 1:len(db)]))

def create_parallel_dbs(db):
    parallel_dbs = list()
    for i in range(len(db)):
        parallel_dbs.append(create_parallel_db(db, i))
    return parallel_dbs

def get_db_parallel_dbs(num_entries):
    db = torch.rand(num_entries) > 0.5
    pdbs = create_parallel_dbs(db)
    return db, pdbs

db, pdbs = get_db_parallel_dbs(3)
db, pdbs

(tensor([1, 0, 0], dtype=torch.uint8),
 [tensor([0, 0], dtype=torch.uint8),
  tensor([1, 0], dtype=torch.uint8),
  tensor([1, 0], dtype=torch.uint8)])

## Sensitivy

In [21]:
## The maximum amount that the query changes when removing an individual from the database

In [6]:
sensitivy = 0

def query(db):
    return db.sum()

db_result = query(db)
for pdb in pdbs:
    pdb_result = query(pdb)
    diferential = torch.abs(pdb_result - db_result)
    if diferential > sensitivy:
        sensitivy = diferential
    
sensitivy

tensor(1)

In [11]:

def sensitivity(query, n_entries = 1000):
    max_distance = 0
    db, dbs = get_db_parallel_dbs(n_entries)
    values = query(db)
    for idb in dbs:
        distance = torch.abs(values - query(idb))
        if distance > max_distance:
            max_distance = distance
    return max_distance

def query(db):
    return db.float().mean()

sensitivity(query, 1000)

tensor(0.0005)

In [47]:
# L1 Sensitivy for threshold

def query_threshold(db, threshold = 5):
    return (db.sum() > threshold).float()

for i in range(10):
    max_distance = sensitivity(query_threshold, n_entries = 10)
    print(max_distance)

0
tensor(1.)
0
0
0
0
0
0
tensor(1.)
0


In [56]:
# differencing attack using sum query

num_entries = 100
db, _ = get_db_parallel_dbs(num_entries)
pdb = create_parallel_db(db, num_index = 10)

print('attack using sum query')
print(db[10])
print(sum(db) - sum(pdb))

# differencing attack using mean query
print('attack using mean query')
print(sum(db).float()/len(db) - sum(pdb).float()/len(pdb))

# differencing attack using threshold query

print('attack using threshold query')
print((sum(db).float() > 49) - (sum(pdb).float() >49))

attack using sum query
tensor(0, dtype=torch.uint8)
tensor(0, dtype=torch.uint8)
attack using mean query
tensor(-0.0052)
attack using threshold query
tensor(0, dtype=torch.uint8)
