In [121]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [122]:
from syft.grid.client.client import connect
from syft.grid.client.grid_connection import GridHTTPConnection
from syft.core.node.domain.client import DomainClient
from syft.core.plan.plan_builder import PLAN_BUILDER_VM, make_plan, ROOT_CLIENT
from syft import SyModule
from syft import logger
import syft as sy
import torch as th

sy.VERBOSE = False
logger.remove()

# Settings

In [81]:
DOMAIN_URL, NETWORK_URL = "http://localhost:5000", "http://localhost:7000"
do_email, do_pw = "owner@openmined.org", "12345"
ds_email, ds_pw = "data_scientist@email.com", "data_scientist_pwd123"
net_email, net_pw = "network@mymail.com", "network_pw"
token = "9G9MJ06OQH"
network_whitelist = [DOMAIN_URL]

This notebook assumes a clean database state for both domain and network, running both on localhost. if you want to delete their databases and start from scratch you can run:

In [126]:
# !rm /Users/koen/workspace/PyGrid/apps/domain/src/nodedatabase.db
# !rm /Users/koen/workspace/PyGrid/apps/network/src/nodedatabase.db

Run a `Domain` on `http://localhost:5000` and a `Network` on `http://localhost:7000` **\*\*make sure to `export MEMORY_STORE=True`\*\*** before starting the `Domain`.

# Utils

In [83]:
def setup_and_connect(url, email, pw, token, node_name="My Node", domain_name="Openmined Domain"):
    def _connect(): return connect(url=url, credentials={"email": email, "password": pw})
    try:
        return _connect()
    except Exception as e:
        client = connect(url=url)
        client.setup(email=email, password=pw, node_name=node_name, domain_name=domain_name, token=token)
        return _connect()

In [84]:
def is_associated(client, network_url):
    requests = client.association_requests.all() 
    return any([x["address"] == network_url and x["accepted"] == True for x in requests])

# Setup Network

For networks, there is a small naming issue in pygrid `setup_serice.py` line 108 ("msg" -> "message"), you might have to run this twice to work. 

In [127]:
network_client = setup_and_connect(NETWORK_URL, net_email, net_pw, token, domain_name="Network")

# Data owner: setup, connect to network, and load data

## Setup

In [90]:
do_client = setup_and_connect(DOMAIN_URL, do_email, do_pw, token)

In [91]:
do_client.users.all()

[{'id': 1,
  'email': 'owner@openmined.org',
  'private_key': '7829ec684b08dd5eda1548c7f178b519f53c9bfad2f683cbdfa1ddc500e55872',
  'verify_key': 'f47acf600a246d97346483bbc8498236e3da0d57e8cd60ec06c6ef69bcdd2016',
  'role': 4,
  'groups': []}]

## Association Request

In [93]:
if not is_associated(do_client, NETWORK_URL):
    do_client.association_requests.create(name="My request", address=NETWORK_URL, sender_address=DOMAIN_URL)

### Network Accepts requests from whitelisted urls

In [94]:
for req in network_client.association_requests.all():
    if req["address"] in network_whitelist and req["accepted"] == False:
        network_client.association_requests[req["id"]].accept();

In [95]:
network_client.association_requests.all(pandas=True)

Unnamed: 0,id,date,name,address,sender_address,accepted,pending,handshake_value
0,1,2021-06-01 14:48:13.569228,My request,http://localhost:5000,http://localhost:7000,True,False,b53a6255237890f207f2d856a994cffa893b009b1ed8e4...


## Create dataset (DO)

In [96]:
tag = "#mydataset"

In [97]:
data_x, data_y = th.randn(1024,1,28,28), th.randint(10, (1024,))

In [98]:
x_ptr = data_x.send(do_client, pointable=True, tags=[f"{tag}:x"])
y_ptr = data_y.send(do_client, pointable=True, tags=[f"{tag}:y"])

## Create DS account (DO)

If we do this for multiple `Domains` we need to make sure that we dont use the same password for different domains. As this may leak access keys between domains

In [99]:
if not any([x["email"] == ds_email for x in do_client.users.all()]):
    do_client.users.create(email=ds_email, password=ds_pw)

# Data scientist: search & train

## Search

TODO: this should ideally be a separate client (with user permissions) in the future. For now this is oke as we assume that the network owner and the data scientist are from the same org.

In [100]:
query = f"{tag}:x"

In [101]:
network_client.search(query=[query], pandas=True)

Unnamed: 0,match-nodes
0,http://localhost:5000


In [102]:
urls = network_client.search(query=[query])["match-nodes"]
url = urls[0]

## Connect to domain

In [103]:
ds_client = connect(url=url, credentials={"email": ds_email, "password": ds_pw})

## Create plan

In [104]:
class MySyModule(SyModule):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        self.layer1 = th.nn.Linear(28*28,100)
        self.relu1 = th.nn.ReLU()
        self.layer2 = th.nn.Linear(100,10)
    
    def forward(self, x):
        x_reshaped = x.view(-1, 28 * 28)
        o1 = self.layer1(x_reshaped)
        a1 = self.relu1(o1)
        out = self.layer2(a1)
        return out

In [105]:
model = MySyModule(input_size=(32,28*28))
dummy_dl = sy.lib.python.List([(th.randn(1024,1,28,28), th.randint(10, (1024,)))])

In [106]:
remote_torch = ROOT_CLIENT.torch

In [107]:
@make_plan
def train(x=data_x, y=data_y, model=model):
    optimizer = remote_torch.optim.SGD(model.parameters(), lr=1e-1, momentum=0)
    optimizer.zero_grad()
    out = model(x=x)[0]
    loss = remote_torch.nn.functional.cross_entropy(out, y)
    loss.backward()
    optimizer.step()

    return [model]

## Run plan

### Get dataset pointer

In [109]:
x = [t for t in ds_client.store if f"{tag}:x" in t.tags][-1]
y = [t for t in ds_client.store if f"{tag}:y" in t.tags][-1]
model = MySyModule(input_size=(32,28*28))
train_ptr = train.send(ds_client)

Run the plan on the domain

In [30]:
out_ptr = train_ptr(x=x, y=y, model=model.send(do_client))

We need to request the resulting model, because we have no permissions

In [31]:
out_ptr.request()

In [33]:
try:
    updated_model, = out_ptr.get()
except:
    print("First DO needs to accept")
else:
    raise PermissionError("Permissions are broken")

First DO needs to accept


In [34]:
do_client.requests[-1].accept()

In [35]:
updated_model, = out_ptr.get()
updated_model

MySyModule(
  (layer1): Linear(in_features=784, out_features=100, bias=True)
  (relu1): ReLU()
  (layer2): Linear(in_features=100, out_features=10, bias=True)
)

Model is updated, so orginal and updated model are not the same

In [36]:
th.equal(list(model.parameters())[0], list(updated_model.parameters())[0])

False