<a href="https://colab.research.google.com/github/MKrupauskas/colab/blob/master/differential-privacy.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import torch

In [0]:
def get_parallel_db(db, remove_index):
  
  return torch.cat((db[0 : remove_index],
                    db[remove_index + 1 :]))

In [0]:
def get_parallel_dbs(db):
  
  parallel_dbs = list()

  for i in range(len(db)):
    
    parallel_db = get_parallel_db(db, i)
    
    parallel_dbs.append(parallel_db)
    
  return parallel_dbs

In [0]:
def get_db_and_parallel_dbs(num_entries):
  
  db = torch.rand(num_entries) > 0.5
  
  parallel_dbs = get_parallel_dbs(db)
  
  return db, parallel_dbs

Determine the privacy

In [0]:
def sum_query(db):

  return db.sum()

def mean_query(db):
  
  return db.float().mean()

def threshold_query(db, threshold = 5):
  
  return (db.sum() > threshold).float()

Create a function that calculates the sensitivity of a query

In [0]:
def calculate_sensitivity(query, num_entries = 100):
  
  db, parallel_dbs = get_db_and_parallel_dbs(num_entries)
  
  db_result = query(db)

  sensitivity = 0;
  
  for parrallel_db in parallel_dbs:
    parallel_db_result = query(parrallel_db)

    db_distance = torch.abs(parallel_db_result - db_result)

    if (db_distance > sensitivity):
      sensitivity = db_distance
      
  return sensitivity

In [7]:
calculate_sensitivity(mean_query)

tensor(0.0005)

In [8]:
for index in range(10):
  sensitivity = calculate_sensitivity(threshold_query, 10)  
  
  print(sensitivity)

0
0
0
0
0
0
0
0
0
0


Performing a differencing attack

In [9]:
db, _ = get_db_and_parallel_dbs(100)

pdb = get_parallel_db(db, remove_index = 10)

db[10]

tensor(1, dtype=torch.uint8)

In [10]:
sum(db) - sum(pdb)

tensor(1, dtype=torch.uint8)

In [12]:
(sum(db).float() / len(db)) - (sum(pdb).float() / len(pdb))

tensor(0.0048)

In [15]:
(sum(db).float() > 49) - (sum(pdb).float() > 49)

tensor(0, dtype=torch.uint8)

Local differential privacy

In [52]:
def get_private_query_results(db):
  
  true_result = torch.mean(db.float())

  first_coin_flip = (torch.rand(len(db)) > 0.5).float()
  second_coin_flip = (torch.rand(len(db)) > 0.5).float()

  real_answer = db.float() * first_coin_flip
  fake_answer = (1 - first_coin_flip) * second_coin_flip

  private_db = real_answer + fake_answer

  private_result = torch.mean(private_db.float()) * 2 - 0.5
  
  return true_result, private_result

db, pdbs = get_db_and_parallel_dbs(1000)

get_private_query_results(db)

(tensor(0.4500), tensor(0.4600))

In [87]:

def get_private_query_results_with_bias(db, noise = 0.5):
  true_result = torch.mean(db.float())

  first_coin_flip = (torch.rand(len(db)) > noise).float()
  second_coin_flip = (torch.rand(len(db)) > 0.5).float()

  real_answer = db.float() * first_coin_flip
  fake_answer = (1 - first_coin_flip) * second_coin_flip

  private_db = real_answer + fake_answer

  skewed_result = private_db.float().mean()
  
  private_result = ((skewed_result / noise) - 0.5) * noise / (1 - noise)
  
  return true_result, private_result


db, pdbs = get_db_and_parallel_dbs(1000)

get_private_query_results_with_bias(db, 0.8)

(tensor(0.5010), tensor(0.5450))

Global differential privacy

In [0]:
import numpy as np

epsilon = 0.000005

db, pdbs = get_db_and_parallel_dbs(100)

In [0]:
def laplacian_mechanism(db, query, sensitivity): 
  
  beta = sensitivity / epsilon
  
  noise = torch.tensor(np.random.laplace(0, beta, 1))
  
  return query(db) + noise

In [126]:
laplacian_mechanism(db, mean_query, 1 / 100)

tensor([0.4273], dtype=torch.float64)

Differential privacy for deep learning

In [0]:
import numpy as np

In [0]:
num_teachers = 10
num_examples = 10000
num_labels = 10

In [0]:
predictions = (np.random.rand(num_teachers, num_examples) * num_labels).astype(int).transpose(1, 0)

In [0]:
new_labels = list()

for image in predictions:
  
  label_counts = np.bincount(image, minlength = num_labels)
  
  epsilon = 0.1
  beta = 1 / epsilon

  for i in range(len(label_counts)):
    
    label_counts[i] += np.random.laplace(0, beta, 1)

  new_label = np.argmax(label_counts)
  
  new_labels.append(new_label)

In [10]:
new_labels

[0,
 2,
 0,
 7,
 8,
 3,
 5,
 2,
 0,
 7,
 7,
 9,
 1,
 7,
 9,
 4,
 7,
 5,
 2,
 0,
 8,
 8,
 8,
 9,
 1,
 7,
 8,
 1,
 6,
 4,
 1,
 1,
 2,
 7,
 1,
 6,
 8,
 0,
 4,
 1,
 8,
 1,
 4,
 5,
 9,
 2,
 3,
 4,
 5,
 6,
 9,
 9,
 7,
 1,
 1,
 0,
 6,
 8,
 4,
 8,
 8,
 9,
 2,
 9,
 8,
 8,
 8,
 7,
 8,
 2,
 5,
 4,
 5,
 4,
 4,
 4,
 0,
 6,
 5,
 8,
 6,
 6,
 2,
 3,
 9,
 1,
 3,
 6,
 0,
 9,
 8,
 4,
 8,
 6,
 4,
 0,
 0,
 2,
 7,
 5,
 3,
 8,
 3,
 8,
 3,
 1,
 4,
 0,
 9,
 8,
 5,
 4,
 2,
 5,
 9,
 9,
 3,
 9,
 1,
 5,
 3,
 3,
 7,
 2,
 8,
 2,
 1,
 6,
 4,
 9,
 5,
 7,
 5,
 9,
 0,
 3,
 3,
 0,
 1,
 0,
 2,
 2,
 1,
 5,
 1,
 5,
 7,
 8,
 5,
 7,
 6,
 4,
 6,
 0,
 9,
 9,
 2,
 1,
 7,
 3,
 8,
 7,
 8,
 8,
 2,
 7,
 3,
 2,
 7,
 6,
 3,
 3,
 9,
 5,
 1,
 0,
 1,
 1,
 7,
 9,
 2,
 4,
 1,
 6,
 5,
 9,
 5,
 8,
 4,
 7,
 9,
 4,
 3,
 2,
 4,
 2,
 9,
 4,
 2,
 8,
 5,
 2,
 6,
 2,
 4,
 0,
 4,
 8,
 1,
 5,
 3,
 3,
 9,
 7,
 7,
 4,
 6,
 0,
 2,
 8,
 6,
 2,
 6,
 8,
 4,
 0,
 0,
 3,
 5,
 6,
 9,
 6,
 5,
 0,
 2,
 2,
 5,
 1,
 5,
 3,
 0,
 5,
 2,
 3,
 4,
 3,
 4,
 1,
 4,
 5,


Pate

In [0]:
labels = np.array([9, 9, 3, 6, 9, 9, 9, 9, 8, 2])
counts = np.bincount(labels, minlength = 10)
result = np.argmax(counts)

In [0]:
# !pip install syft
from syft.frameworks.torch.differential_privacy import pate

In [0]:
num_teachers, num_examples, num_labels = (100, 100, 10)
predictions = (np.random.rand(num_teachers, num_examples) * num_labels).astype(int)
indices = (np.random.rand(num_examples) * num_labels).astype(int)

data_dependant_epsilon, data_independant_epsilon = pate.perform_analysis(teacher_preds = predictions, indices = indices, noise_eps = 0.1, delta = 1e-5)

In [0]:
predictions[:,0:5] *= 0

data_dependant_epsilon, data_independant_epsilon = pate.perform_analysis(teacher_preds = predictions, indices = indices, noise_eps = 0.1, delta = 1e-5)