## Creating a threshold query function

In [1]:
# Creating a dabases of 1s and 0s 
import torch
num_entries = 10
db = torch.rand(num_entries) > 0.5 
db

tensor([1, 0, 1, 0, 0, 1, 0, 1, 0, 0], dtype=torch.uint8)

In [2]:
# Setting a threshold value.

sum(db) > 5    #THRESHOLD=5;
               #IT RETUNS A BINARY VALUE 1 (TRUE) OR 0 (FALSE)

tensor(0, dtype=torch.uint8)

In [3]:
# Creating the threshold query function
def query(db, threshold):
        return (sum(db) > threshold).float() #NB. I want tensors containing float values to be employed in the calculation of sensitivity

In [4]:
import torch
num_entries = 10
db = torch.rand(num_entries) > 0.5 

In [5]:
query(db, 5)

tensor(0.)

## Sensitivity employing the threshold query function

In [6]:
import torch
num_entries = 10
db = torch.rand(num_entries) > 0.5 
db

def parallel_db(db, i):
        return torch.cat([db[:i], db[i+1:]])

def parallel_dbs(db):
    list_dbs=[]
    for i in range(len(db)):
        new_dbs=parallel_db(db, i)
        list_dbs.append(new_dbs)
    return list_dbs

def create_list_parallel_databases(num_entries):
    db = torch.rand(num_entries)>0.5
    pdbs = parallel_dbs(db)
    return db, pdbs

def query(db, threshold=5):
    return (db.sum() > threshold).float()


In [7]:
query(db, 5)

tensor(1.)

In [8]:
def sensitivity(query, num_entries=10):
    db, pdbs = create_list_parallel_databases(num_entries) #NB.function creating input database from which the parallel dbs are generated MUST be included inside the sensitivity function 
    query_db = query(db)
    max_distance=0
    for pdb in pdbs:
        query_pdb = query(pdb)
        db_distance=torch.abs(query_pdb - query_db)
        if (db_distance > max_distance):          
            max_distance=db_distance
    return max_distance

In [9]:
query(db, 5)

tensor(1.)

In [10]:
sensitivity(query, 10)

0

### Sensitivity for databases of size 10 using a threshold query with threshold value equal to five.

In [11]:
import torch
num_entries = 10
db = torch.rand(num_entries) > 0.5 

db, pdbs = create_list_parallel_databases(10)

query(db, threshold=5)

tensor(0.)

In [12]:
for i in range(10):                                               #database of size 10                            
    sensitivity_function=sensitivity(query, num_entries=10)
    print(sensitivity_function)

0
0
0
tensor(1.)
0
tensor(1.)
0
0
0
0


### Basic Differencing Attack to disclose a certain value in a database.

In [13]:
db, _ = create_list_parallel_databases(100)

In [14]:
_

[tensor([0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
         1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0,
         0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1,
         1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1,
         0, 1, 1], dtype=torch.uint8),
 tensor([0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
         1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0,
         0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1,
         1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1,
         0, 1, 1], dtype=torch.uint8),
 tensor([0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
         1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0,
         0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1,
         1, 0, 1, 0, 0, 0, 1, 1

In [15]:
def get_parallel_db(db, remove_index =10):      #removing index 10 from parallel database (pdb)
    return parallel_db(db, remove_index)

In [16]:
pdb=get_parallel_db(db, remove_index=10)
pdb

tensor([0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
        1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0,
        0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1,
        1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1,
        0, 1, 1], dtype=torch.uint8)

In [17]:
print("Length of the parallel database without index 10 is {}. Such database is generated from an input database of length {}".format(len(pdb), len(db)))

Length of the parallel database without index 10 is 99. Such database is generated from an input database of length 100


#### Differencing attack: using sum query

In [18]:
sum(db)-sum(pdb)

tensor(0, dtype=torch.uint8)

#### Differencing attack: using mean query (method 1)

In [19]:
num_entries=100
db=torch.rand(num_entries)

In [20]:
def get_parallel_db(db, remove_index =10):      #removing index 10 from parallel database (pdb)
    return parallel_db(db, remove_index)

In [21]:
print("db mean is {}; pdb mean is {}.".format(sum(db).float()/len(db), sum(pdb).float()/len(pdb)))

db mean is 0.49819669127464294; pdb mean is 0.46464645862579346.


In [22]:
(sum(db).float()/len(db) - sum(pdb).float()/len(pdb))

tensor(0.0336)

#### Differencing attack: using mean query (method 2)

In [23]:
num_entries=100
db=torch.rand(num_entries)

In [24]:
def get_parallel_db(db, remove_index =10):      #removing index 10 from parallel database (pdb)
    return parallel_db(db, remove_index)

In [25]:
import numpy as np
import pandas as pd
print("db mean is {}; pdb mean is {}.".format(pd.Series(db).mean(), pd.Series(pdb).mean()))

db mean is 0.540575385093689; pdb mean is 0.46464646464646464.


In [26]:
torch.tensor(pd.Series(db).mean() - pd.Series(pdb).mean()).float()   

tensor(0.0759)

### Note
<p>Datasets db and pdb used in method 1 and 3 are different due to the random nature of db.</p>
<p>Therefore, the resulting values obtained from method 1 and 2 are different.</p>

#### Differencing attack: using threshold query

In [27]:
num_entries=100
db, _ = create_list_parallel_databases(num_entries)   

In [28]:
sum(db)

tensor(52, dtype=torch.uint8)

In [29]:
threshold=49
(sum(db).float() > threshold)    

tensor(1, dtype=torch.uint8)

#### That means True

In [30]:
threshold=49
(sum(pdb).float()>threshold)

tensor(0, dtype=torch.uint8)

#### That means False

In [31]:
threshold=49
(sum(db).float() > threshold) - (sum(pdb).float() > threshold)

tensor(1, dtype=torch.uint8)

#### True

#### Links and further readings
<p>
<a href="https://eu.udacity.com/course/secure-and-private-ai--ud185">Udacity</a> <br/ >
<a href="https://robertovitillo.com/2016/07/29/differential-privacy-for-dummies/">Differential Privacy for Dummies</a> <br/ >
<a href= "https://www.brighthubengineering.com/machine-design/120073-difference-between-tensors-and-vectors/">Tensors vs Vectors</a><br/ >
<a href="https://www.kdnuggets.com/2018/05/pytorch-tensor-basics.html"> More about Tensors </a> <br/ >
<a href="https://doc.embedded-wizard.de/uint8-uint16-uint32-type?v=9.00"> About bits </a>
</p>