# DAY 6 [10.0%] | 60

In [199]:
import torch
torch.manual_seed(0)

<torch._C.Generator at 0x7f3955c166f0>

In [200]:
def create_databases(p, num_entries, num_databases):
    """
    This function create a number of databases from db,
    where each new database has one missing row (element).
    
    params:
    ------
    
    p             -- Probability for the database.
    num_entries   -- Number of entries in the original db.
    num_databases -- Number of database to create.
    
    returns
    -------
    num_databases
    """
    assert num_entries == num_databases, 'Total entries {} different from total DB {}'.format(num_entries, num_databases)
    databases = []
    db = torch.rand(num_entries) > p
    for k in range(0, num_databases):
        databases.append(torch.cat([db[:k], db[k+1:]]))
    return db, databases

In [201]:
def create_db_and_parallels(p, num_entries):
    """
    Final function, which creates a db and parallels databases,
    using the function defined above.
    
    params:
    ------
    p            -- Data probability distribution.
    num_entries  -- Number of rows|samples.
    
    returns:
    -------
    db           -- Original database.
    databases    -- List of databases.
    """
    db, databases = create_databases(p, num_entries, num_entries)
    return db, databases

In [202]:
def mean_query(db):
    """
    Appy the mean query over the db.
    
    params:
    ------
    
    db     -- Database.
    
    returns:
    -------
    Mean.
    """
    return db.float().mean()

In [203]:
def query(db):
    """
    Apply a sum query over the database.
    
    params:
    ------
    db      -- A db containing a single feature.
    returns:
    -------
    Application of the sum query.
    """
    return db.sum()

In [204]:
def sensitivity(query, n_entries):
    """
    Calculates the sensitivity of a database.
    
    params:
    ------
    query      -- A query function.
    n_entries  -- Total entries in the db.
    
    returns:
    -------
    Sensitivity
    """
    # Initialize a database and parallel databases
    db, databases = create_db_and_parallels(0.5, n_entries)
    # run query over the original db
    full_query_db = query(db)
    # run query over the databases
    sensitivity = 0
    
    for pdb in databases:
        pbd_result = query(pdb)
        db_distance = torch.abs(pbd_result - full_query_db) #L1 sensitity
        if db_distance > sensitivity:
            sensitivity = db_distance
    return sensitivity

In [205]:
# evaluate the sensitivity of the mean query over 1000 samples
print(sensitivity(mean_query, 1000))

tensor(0.0005)


In [206]:
# evaluate the sensitivity of the sum query over 5000 samples
print(sensitivity(query, 5000))

tensor(1)


In [207]:
def threshold_query(db, threshold = 5):
    """
    Apply a threshold query over the db.
    
    params:
    ------
    db         -- Database.
    threshold  -- Set to 5.
    """
    # You need to cast this to float db.sum() > threshold
    # otherwise it will recieve only binary values.
    return (db.sum() > threshold).float()

In [224]:
def new_sensitivity(query, n_entries):
    # Initialize a database and parallel databases
    db, databases = create_db_and_parallels(0.5, n_entries)
    # run query over the original db
    full_query_db = query(db)
    # run query over the databases
    sensitivity = 0
    for index, pdb in enumerate(databases):
        pbd_result = query(pdb)
        db_distance = torch.abs(pbd_result - full_query_db) #L1 sensitity
        if db_distance > sensitivity:
            sensitivity = db_distance
        print('For pdb {}, the sensitivity is {}'.format(index + 1, sensitivity))
    return sensitivity

In [233]:
print(new_sensitivity(threshold_query, 10))

For pdb 1, the sensitivity is 0
For pdb 2, the sensitivity is 1.0
For pdb 3, the sensitivity is 1.0
For pdb 4, the sensitivity is 1.0
For pdb 5, the sensitivity is 1.0
For pdb 6, the sensitivity is 1.0
For pdb 7, the sensitivity is 1.0
For pdb 8, the sensitivity is 1.0
For pdb 9, the sensitivity is 1.0
For pdb 10, the sensitivity is 1.0
tensor(1.)


In [234]:
# We obtain different results each time due to the random nature of the database.

## Project: Basic Differencing Attack

In [243]:
db, _ = create_db_and_parallels(0.5, 100)

In [244]:
def get_parallel_db(db, remove_index = 10):
    return torch.cat([db[:remove_index], db[remove_index+1:]])

In [245]:
pdb = get_parallel_db(db, remove_index = 10)

In [246]:
db[10]

tensor(1, dtype=torch.uint8)

In [247]:
pdb

tensor([0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1,
        1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0,
        0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0,
        0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1,
        0, 0, 1], dtype=torch.uint8)

In [248]:
# differencing attack using sum
sum(db) - sum(pdb)

tensor(1, dtype=torch.uint8)

In [249]:
# differencing attack using mean query
(sum(db).float()/len(db)) - (sum(pdb).float() / len(pdb))

tensor(0.0054)

In [250]:
sum(db)

tensor(47, dtype=torch.uint8)

In [251]:
# differencing attack using mean query
(sum(db).float() > 46) - (sum(pdb).float() > 46)

tensor(1, dtype=torch.uint8)