## Hospital Data
Here we assume that we are dealing with private data from the hospitals.
Suppose we have 10 hospitals who trained our model for their private data and we want to use their model to predict our unlabeled data
### E.g
We have 10,000 unlabeled images that we need to classify using these 10 hospital models and use the outcome to be what we shall use for our model retrainer


In [57]:
# imports
import numpy as np

In [58]:
# Define the data parameters
num_trainers = 10             # The number of Hospitls that are training/running out models
num_examples = 100          # The Number of unlabled images we have at hand that we want to predict using the trained models from the hospitals
num_classes = 10              # Number of labels in the model (our classifier)

In [59]:
# Generate a random prediction (Fakes) of the models for our 10000 samples
predictions = (np.random.rand(num_trainers, num_examples)*num_classes).astype(int)
predictions

array([[7, 3, 3, 7, 9, 8, 4, 6, 6, 7, 5, 4, 9, 9, 0, 3, 0, 7, 2, 4, 0, 9,
        9, 3, 8, 9, 0, 7, 7, 9, 2, 2, 7, 3, 6, 4, 0, 6, 9, 5, 9, 7, 7, 6,
        7, 8, 0, 0, 9, 5, 7, 1, 9, 1, 6, 6, 0, 5, 8, 9, 1, 3, 5, 7, 9, 3,
        8, 3, 7, 9, 9, 1, 8, 5, 5, 8, 6, 3, 3, 9, 9, 4, 0, 5, 5, 3, 6, 6,
        4, 6, 7, 2, 7, 0, 3, 1, 7, 2, 8, 8],
       [7, 4, 3, 9, 2, 4, 3, 9, 4, 9, 9, 5, 5, 8, 0, 4, 9, 8, 5, 1, 2, 0,
        5, 9, 4, 3, 9, 1, 5, 0, 3, 9, 4, 3, 1, 5, 8, 0, 3, 2, 1, 0, 0, 6,
        3, 5, 1, 8, 0, 9, 6, 3, 1, 2, 4, 0, 4, 4, 6, 5, 8, 0, 6, 2, 8, 4,
        3, 9, 2, 1, 7, 0, 1, 6, 2, 6, 4, 8, 9, 9, 3, 3, 7, 7, 8, 3, 5, 6,
        8, 7, 1, 7, 6, 3, 3, 9, 7, 5, 6, 1],
       [3, 6, 6, 8, 4, 2, 2, 2, 4, 9, 9, 6, 9, 9, 7, 4, 3, 1, 1, 1, 1, 2,
        1, 3, 8, 6, 9, 4, 5, 3, 6, 1, 5, 7, 4, 4, 6, 1, 9, 7, 6, 0, 0, 1,
        9, 9, 6, 7, 5, 8, 3, 9, 7, 8, 0, 7, 3, 3, 0, 3, 7, 7, 6, 7, 9, 9,
        0, 1, 5, 0, 9, 4, 2, 8, 5, 7, 2, 4, 6, 6, 2, 3, 6, 8, 4, 1, 1, 5,
        9, 5, 2, 3, 8,

In [60]:
# Prediction shape
predictions.transpose()

array([[7, 7, 3, 5, 5, 3, 8, 1, 1, 7],
       [3, 4, 6, 0, 0, 8, 5, 1, 3, 6],
       [3, 3, 6, 3, 6, 1, 6, 9, 5, 9],
       [7, 9, 8, 9, 3, 7, 6, 6, 4, 5],
       [9, 2, 4, 1, 6, 7, 1, 4, 9, 2],
       [8, 4, 2, 7, 0, 1, 9, 9, 8, 0],
       [4, 3, 2, 7, 2, 2, 8, 9, 6, 2],
       [6, 9, 2, 8, 9, 0, 0, 5, 2, 4],
       [6, 4, 4, 5, 4, 7, 9, 8, 3, 5],
       [7, 9, 9, 2, 4, 5, 2, 6, 3, 3],
       [5, 9, 9, 3, 6, 5, 8, 0, 3, 0],
       [4, 5, 6, 2, 7, 5, 7, 7, 9, 5],
       [9, 5, 9, 8, 9, 1, 5, 9, 7, 0],
       [9, 8, 9, 0, 5, 7, 3, 9, 0, 7],
       [0, 0, 7, 3, 6, 5, 4, 7, 8, 5],
       [3, 4, 4, 8, 1, 9, 9, 4, 2, 8],
       [0, 9, 3, 1, 8, 5, 2, 0, 7, 1],
       [7, 8, 1, 8, 2, 0, 5, 3, 1, 2],
       [2, 5, 1, 3, 0, 2, 9, 9, 3, 3],
       [4, 1, 1, 5, 4, 8, 4, 1, 8, 6],
       [0, 2, 1, 5, 3, 6, 6, 0, 2, 7],
       [9, 0, 2, 1, 1, 7, 5, 0, 0, 8],
       [9, 5, 1, 7, 1, 5, 2, 3, 1, 9],
       [3, 9, 3, 9, 5, 9, 7, 8, 9, 7],
       [8, 4, 8, 2, 7, 7, 0, 1, 7, 0],
       [9, 3, 6, 9, 9, 5,

In [61]:
# For the first image, the 10 prredictions from the 10 hospitals is
image_label = predictions[:, 0]
image_label

array([7, 7, 3, 5, 5, 3, 8, 1, 1, 7])

In [62]:
# Count the number of labels so as to get the reliable prediction
pred_label_count = np.bincount(image_label, minlength=num_classes)
pred_label_count

array([0, 2, 0, 2, 0, 2, 0, 3, 1, 0], dtype=int64)

In [63]:
predicted_label = np.argmax(pred_label_count)
predicted_label

7

In [64]:
# Hyper parameters for noise 
epsilon = 0.1
sensitivity = 1
beta = sensitivity/epsilon
beta

10.0

In [65]:
for i in range(len(pred_label_count)):
    pred_label_count[i] += np.random.laplace(0, beta, 1)
    
new_labels = pred_label_count
new_labels

array([-11, -12,   0,   9, -17,  15, -29,   9,  23,   1], dtype=int64)

In [66]:
def predict_labels(predictions):
    new_predictions = []
    for img_data in predictions.transpose():

        pred_label_count = np.bincount(img_data, minlength=num_classes)

        pred_level_count_before = np.bincount(img_data, minlength=num_classes)
        pred_before = np.argmax(pred_level_count_before)

        epsilon = 0.1
        sensitivity = 1
        beta = sensitivity/epsilon

        for i in range(len(pred_label_count)):
            pred_label_count[i] += np.random.laplace(0, beta, 1)

        pred =np.argmax(pred_label_count)

        new_predictions.append(pred)
    
        # print(img_data, '->', pred_level_count_before,'->', pred_label_count, '--->',pred_before, '->', pred)

        return pred_level_count_before, new_predictions

In [67]:
teacher_pred, new_predictions = predict_labels(predictions)

In [68]:
import syft

In [69]:
from syft.frameworks.torch.differential_privacy import pate

In [70]:
# help(pate)
num_teachers, num_examples, num_labels = (100, 100, 10)
preds = (np.random.rand(num_teachers, num_examples)*num_labels).astype(int)
indices = (np.random.rand(num_examples)*num_labels).astype(int)

print(preds.shape)
print(indices.shape)

(100, 100)
(100,)


In [71]:
data_dep_eps, data_ind_eps = pate.perform_analysis(teacher_preds=preds, indices=indices, noise_eps=0.1, delta=1e-5)
print('Data Dependent Epsilon: ',data_dep_eps)
print('Data Independent Epsilon: ',data_ind_eps)

Data Dependent Epsilon:  11.756462732485105
Data Independent Epsilon:  11.756462732485115


In [72]:
preds[:, 0:5] *= 0
data_dep_eps, data_ind_eps = pate.perform_analysis(teacher_preds=preds, indices=indices, noise_eps=0.1, delta=1e-5)
print('Data Dependent Epsilon: ',data_dep_eps)
print('Data Independent Epsilon: ',data_ind_eps)

Data Dependent Epsilon:  7.788594079766298
Data Independent Epsilon:  11.756462732485115


In [73]:
preds[:, 0:50] *= 0
data_dep_eps, data_ind_eps = pate.perform_analysis(teacher_preds=preds, indices=indices, noise_eps=0.1, delta=1e-5, moments=20)
print('Data Dependent Epsilon: ',data_dep_eps)
print('Data Independent Epsilon: ',data_ind_eps)

Data Dependent Epsilon:  0.9029013677789843
Data Independent Epsilon:  11.756462732485115
