In [1]:
# install syft if we are on google colab
import sys, os; os.system("pip install --pre syft") if "google.colab" in sys.modules else ""
import syft as sy
import numpy as np
import pandas as pd
from tqdm import tqdm
import time


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
domains = {} # our logged in domain clients
domain_credentials = set() # our set of domain credentials

In [3]:
# enter the dict given from the data owner
def add_credentials(credentials_dict_list):
    for credentials_dict in credentials_dict_list:
        fs = frozenset(credentials_dict.items())
        domain_credentials.add(fs)

In [4]:
add_credentials(
    [
        {'url': 'localhost', 'name': 'Samantha Carter', 'email': 'info@openmined.org', 'password': 'changethis', 'dataset_name': 'MedNIST Data 1/10'},
    ]   
)

In [5]:
domain_credentials

{frozenset({('dataset_name', 'MedNIST Data 1/10'),
            ('email', 'info@openmined.org'),
            ('name', 'Samantha Carter'),
            ('password', 'changethis'),
            ('url', 'localhost')})}

In [6]:
def login_to_domains(force: bool = False):
    for fd in domain_credentials:
        credentials = dict(fd)
        if credentials["url"] not in domains or force:
            try:
                details = credentials.copy()
                del details["name"]
                client = sy.login(**details)
                domains[credentials["url"]] = client
            except Exception as e:
                print(e)

In [7]:
login_to_domains()


Anyone can login as an admin to your node right now because your password is still the default PySyft username and password!!!

Connecting to localhost... done! 	 Logging into canada... done!


In [9]:
domain_clients = list(domains.values())


In [10]:
domain_clients[0].datasets[-1]

Dataset: MedNIST Data 1/10
Description: The MedNIST dataset was gathered from several sets from TCIA, the RSNA Bone Age Challenge, and the NIH Chest X-ray dataset. The dataset is kindly made available by Dr. Bradley J. Erickson M.D., Ph.D. (Department of Radiology, Mayo Clinic) under the Creative Commons CC BY-SA 4.0 license.
Label Count: 6
Label Mapping: {"AbdomenCT": 0, "BreastMRI": 1, "CXR": 2, "ChestCT": 3, "Hand": 4, "HeadCT": 5}
Image Dimensions: (64, 64)
Total Images: 5895




Asset Key,Type,Shape
"[""train_images""]",,"(4731, 4096)"
"[""train_labels""]",,"(4731,)"
"[""val_images""]",,"(626, 4096)"
"[""val_labels""]",,"(626,)"
"[""test_images""]",,"(538, 4096)"
"[""test_labels""]",,"(538,)"


In [11]:
X_train , Y_train, X_dev, Y_dev = [] , [], [], []
for idx, domain in enumerate(domain_clients):
    data = domain.datasets[-1]

    X_train.append(data["train_images"])
    Y_train.append(data["train_labels"])

    X_dev.append(data["val_images"])
    Y_dev.append(data["val_labels"])
    
    #Pre process data
    X_train[idx] = (X_train[idx].T) *(1/255.0)
    X_dev[idx] = (X_dev[idx].T) *(1/255.0)
    
m,n = X_train[0].public_shape

In [12]:
(X_train[0], Y_train[0]), (X_dev[0], Y_dev[0])

((<TensorPointer -> canada:2a3b1cb6f47d41728737d79f1182272f>,
  <TensorPointer -> canada:84d45abe84e9485ab97c2f5a7743b494>),
 (<TensorPointer -> canada:2a3b1cb6f47d41728737d79f1182272f>,
  <TensorPointer -> canada:84d45abe84e9485ab97c2f5a7743b494>))

In [16]:
def init_params(input_size: int):
    print(f"Using input size: {input_size}")
    W1 = np.random.rand(6, input_size) - 0.5
    b1 = np.random.rand(6, 1) - 0.5
    W2 = np.random.rand(6, 6) - 0.5
    b2 = np.random.rand(6, 1) - 0.5
    return W1, b1, W2, b2


def ReLU(Z):
    return Z * (Z > 0)


def softmax(Z):
    exp_cache = Z.exp()
    inv = exp_cache.sum().reciprocal()

    A = exp_cache * inv
    return A


def forward_prop(W1, b1, W2, b2, X):
    Z1 = X.__rmatmul__(W1) + b1
    A1 = ReLU(Z1)
    Z2 = A1.__rmatmul__(W2) + b2
    A2 = softmax(Z2)
    return Z1, A1, Z2, A2


def ReLU_deriv(Z):
    return Z > 0


def one_hot(Y):
    one_hot_Y = np.zeros((Y.size, Y.max() + 1))
    one_hot_Y[np.arange(Y.size), Y] = 1
    one_hot_Y = one_hot_Y.T
    return one_hot_Y


def backward_prop(Z1, A1, Z2, A2, W1, W2, X, Y):
    one_hot_Y = Y.one_hot()
    dZ2 = A2 - one_hot_Y
    dW2 = dZ2 @ (A1.T) * (1 / m)
    db2 = dZ2.sum() * (1 / m)
    dZ1 = dZ2.__rmatmul__(W2.T) * ReLU_deriv(Z1)
    dW1 = dZ1 @ (X.T) * (1 / m)
    db1 = dZ1.sum() * (1 / m)
    return dW1, db1, dW2, db2


def update_params(W1, b1, W2, b2, dW1, db1, dW2, db2, alpha):
    W1 = (dW1 * alpha - W1) * -1
    b1 = (db1 * alpha - b1) * -1
    W2 = (dW2 * alpha - W2) * -1
    b2 = (db2 * alpha - b2) * -1
    return W1, b1, W2, b2


def gradient_descent(X, Y, alpha, iterations):
    W1, b1, W2, b2 = init_params(X.public_shape[0])
    print("[INFO]: Starting training!\n")
    for i in tqdm(range(iterations)):
        Z1, A1, Z2, A2 = forward_prop(W1, b1, W2, b2, X)
        dW1, db1, dW2, db2 = backward_prop(Z1, A1, Z2, A2, W1, W2, X, Y)
        W1, b1, W2, b2 = update_params(W1, b1, W2, b2, dW1, db1, dW2, db2, alpha)

    return W1, b1, W2, b2


In [None]:
W1, b1, W2, b2 = [], [], [], [] #Contains the weight from all domain nodes
for X_train, Y_train in zip(X_train, Y_train):
    W1_train, b1_train, W2_train, b2_train = gradient_descent(X_train, Y_train, 0.10, 1)

    W1.append(W1_train)
    b1.append(b1_train)
    W2.append(W2_train)
    b2.append(b2_train)


In [None]:
for p1,p2,p3,p4 in zip(W1,b1,W2,b2):
    p1.block_with_timeout(60)
    p2.block_with_timeout(60)
    p3.block_with_timeout(60)
    p4.block_with_timeout(60)

    print(f"Training Successful on  Domain Client ✅:{p1.client} ")

In [None]:
# SMPC Averaging of the Weights
n = len(W1)
W1 = sum(W1) * (1 / n)
b1 = sum(b1) * (1 / n)
W2 = sum(W2) * (1 / n)
b2 = sum(b1) * (1 / n)

In [None]:
for ptr in [W1,b1,W2,b2]:
    ptr.block_with_timeout(60)
    print("SMPC Averaging Successful  ✅")

In [None]:
#Publish Weights Using DP
sigma = 100
W1 = W1.publish(sigma=sigma)
b1 = b1.publish(sigma=sigma)
W2 = W2.publish(sigma=sigma)
b2 = b2.publish(sigma=sigma)
print("You have witnessed and trained one of a kind ML Model Training with SMPC +DP ")