# DAY 4 [6.7%] | 60

## Project: Evaluating The Privacy Of A Function

In [1]:
import torch
torch.manual_seed(0)

<torch._C.Generator at 0x7f555c212850>

In [2]:
def create_databases(p, num_entries, num_databases):
    """
    This function create a number of databases from db,
    where each new database has one missing row (element).
    
    params:
    ------
    
    p             -- Probability for the database.
    num_entries   -- Number of entries in the original db.
    num_databases -- Number of database to create.
    
    returns
    -------
    num_databases
    """
    assert num_entries == num_databases, 'Total entries {} different from total DB {}'.format(num_entries, num_databases)
    databases = []
    db = torch.rand(num_entries) > p
    for k in range(0, num_databases):
        databases.append(torch.cat([db[:k], db[k+1:]]))
    return db, databases

In [3]:
def create_db_and_parallels(p, num_entries):
    """
    Final function, which creates a db and parallels databases,
    using the function defined above.
    
    params:
    ------
    p            -- Data probability distribution.
    num_entries  -- Number of rows|samples.
    
    returns:
    -------
    db           -- Original database.
    databases    -- List of databases.
    """
    db, databases = create_databases(p, num_entries, num_entries)
    return db, databases

In [4]:
def query(db):
    """
    Apply a sum query over the database.
    
    params:
    ------
    db      -- A db containing a single feature.
    returns:
    -------
    Application of the sum query.
    """
    return db.sum()

In [5]:
# create the data bases
db, databases = create_db_and_parallels(p = 0.5, num_entries = 5000)

In [6]:
# apply a query on a single db
full_query_db = query(db)
full_query_db

tensor(2471)

In [7]:
# calculate the sensitivity of the db and databases
sensitivity = 0
for pdb in databases:
    pbd_result = query(pdb)
    db_distance = torch.abs(pbd_result - full_query_db) #L1 sensitity
    if db_distance >sensitivity:
        sensitivity = db_distance

In [8]:
sensitivity

tensor(1)

In [9]:
def sensitivity(query, n_entries):
    """
    Calculates the sensitivity of a database.
    
    params:
    ------
    query      -- A query function.
    n_entries  -- Total entries in the db.
    
    returns:
    -------
    Sensitivity
    """
    # Initialize a database and parallel databases
    db, databases = create_db_and_parallels(n_entries)
    # run query over the original db
    full_query_db = query(db)
    # run query over the databases
    sensitivity = 0
    
    for pdb in databases:
        pbd_result = query(pdb)
        db_distance = torch.abs(pbd_result - full_query_db) #L1 sensitity
        if db_distance > sensitivity:
            sensitivity = db_distance
    return sensitivity