In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from syft.grid.client.client import connect
from syft.grid.client.grid_connection import GridHTTPConnection
from syft.core.node.domain.client import DomainClient
from syft.core.plan.plan_builder import PLAN_BUILDER_VM, make_plan, ROOT_CLIENT
from syft import SyModule
from syft import logger
import syft as sy
import torch as th

sy.VERBOSE = False
logger.remove()

# Settings

In [3]:
DOMAIN_URL, NETWORK_URL = "http://localhost:5000", "http://localhost:7000"
do_email, do_pw = "owner@openmined.org", "12345"
ds_email, ds_pw = "data_scientist@email.com", "data_scientist_pwd123"
net_email, net_pw = "network@mymail.com", "network_pw"
token = "9G9MJ06OQH"
network_whitelist = [DOMAIN_URL]

This notebook assumes a clean database state for both domain and network, if you want to delete their databases and start from scratch you can run:

In [4]:
# !rm /Users/koen/workspace/PyGrid/apps/domain/src/nodedatabase.db
# !rm /Users/koen/workspace/PyGrid/apps/network/src/nodedatabase.db

Run a `Domain` on `http://localhost:5000` and a `Network` on `http://localhost:7000` **make sure to `export MEMORY_STORE=True`** before starting the `Network`.

# Utils

In [5]:
def setup_and_connect(url, email, pw, token, node_name="My Node", domain_name="Openmined Domain"):
    def _connect(): return connect(url=url, credentials={"email": email, "password": pw})
    try:
        return _connect()
    except Exception as e:
        client = connect(url=url)
        client.setup(email=email, password=pw, node_name=node_name, domain_name=domain_name, token=token)
        return _connect()

In [6]:
def is_associated(client, network_url):
    requests = client.association_requests.all() 
    return any([x["address"] == network_url and x["accepted"] == True for x in requests])

# Setup Network

In [7]:
network_client = setup_and_connect(NETWORK_URL, net_email, net_pw, token, domain_name="Network")

# Data owner: setup, connect to network, and load data

## Setup

In [8]:
do_client = setup_and_connect(DOMAIN_URL, do_email, do_pw, token)

## Association Request

In [9]:
if not is_associated(do_client, NETWORK_URL):
    do_client.association_requests.create(name="My request", address=NETWORK_URL, sender_address=DOMAIN_URL)

### Network Accepts requests from whitelisted urls

In [10]:
for req in network_client.association_requests.all():
    if req["address"] in network_whitelist and req["accepted"] == False:
        network_client.association_requests[req["id"]].accept();

In [11]:
network_client.association_requests.all(pandas=True)

Unnamed: 0,id,date,name,address,sender_address,accepted,pending,handshake_value
0,1,2021-05-20 16:45:08.934553,My request,http://localhost:5000,http://localhost:7000,True,False,b53a6255237890f207f2d856a994cffa893b009b1ed8e4...


## Create dataset (DO)

In [12]:
tag = "#mydataset"

In [13]:
x = th.randn(32, 1, 28, 28)
x_s = x.send(do_client, pointable=True, tags=[tag, "x"])

## Create DS account (DO)

If we do this for multiple `Domains` we need to make sure that we dont use the same password for different domains. As this may leak access keys between domains

In [14]:
if not any([x["email"] == ds_email for x in do_client.users.all()]):
    do_client.users.create(email=ds_email, password=ds_pw)

# Data scientist: search & train

## Search

TODO: this should ideally be a separate client (with user permissions) in the future. For now this is oke as we assume that the network owner and the data scientist are from the same org.

In [15]:
network_client.search(query=[tag], pandas=True)

Unnamed: 0,match-nodes
0,http://localhost:5000


In [16]:
urls = network_client.search(query=[tag])["match-nodes"]
url = urls[0]

## Connect to domain

In [17]:
ds_client = connect(url=url, credentials={"email": ds_email, "password": ds_pw})

## Create plan

In [18]:
class MySyModule(SyModule):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        self.layer1 = th.nn.Linear(28*28,100)
        self.relu1 = th.nn.ReLU()
        self.layer2 = th.nn.Linear(100,10)
    
    def forward(self, x):
        x_reshaped = x.view(-1, 28 * 28)
        o1 = self.layer1(x_reshaped)
        a1 = self.relu1(o1)
        out = self.layer2(a1)
        return out

In [19]:
model = MySyModule(input_size=(32,28*28))
dummy_dl = sy.lib.python.List([(th.randn(1024,1,28,28), th.randint(10, (1024,)))])

In [20]:
remote_torch = ROOT_CLIENT.torch

In [21]:

@make_plan
def train(dl=dummy_dl, model = model):
    optimizer = remote_torch.optim.SGD(model.parameters(), lr=1e-1, momentum=0)
    for xy in dl:
        optimizer.zero_grad()
        x, y = xy[0], xy[1]
        out = model(x=x)[0]
        loss = remote_torch.nn.functional.cross_entropy(out, y)
        loss.backward()
        optimizer.step()

        return [model]

## Run plan

### Get dataset pointer

In [22]:
dataset = [x for x in ds_client.store if tag in x.tags][-1]
model = MySyModule(input_size=(32,28*28))
train_ptr = train.send(ds_client)

TODO: think about how dummy variables in the plan can be re-used. Why do permissions not work correctly without an explicit .send()?

In [34]:
# model_ptr = model.send(do_client)
out_ptr = train_ptr(dl=dummy_dl, model=model.send(do_client))

In [35]:
out_ptr

<syft.proxy.syft.lib.python.ListPointer at 0x7fb9a4c07610>

In [36]:
out_ptr.request()

do_client.requests

try:
    updated_model, = out_ptr.get()
except:
    pass
else:
    print(updated_model)
    raise

In [37]:
do_client.requests[-1].id

<UID: a1a54dd5752449a583de6a6ef6c1594b>

In [38]:
do_client.requests[-1].accept()

updated_model, = out_ptr.get()

updated_model

MySyModule(
  (layer1): Linear(in_features=784, out_features=100, bias=True)
  (relu1): ReLU()
  (layer2): Linear(in_features=100, out_features=10, bias=True)
)

In [39]:
list(model.parameters())[0]

Parameter containing:
tensor([[-0.0336,  0.0043,  0.0315,  ...,  0.0355, -0.0279,  0.0026],
        [-0.0114, -0.0169,  0.0144,  ..., -0.0108, -0.0243,  0.0037],
        [ 0.0219, -0.0219,  0.0278,  ...,  0.0146,  0.0278,  0.0243],
        ...,
        [-0.0118,  0.0343, -0.0063,  ..., -0.0185,  0.0164, -0.0291],
        [-0.0289,  0.0158,  0.0329,  ..., -0.0243,  0.0027, -0.0222],
        [ 0.0335,  0.0029,  0.0200,  ...,  0.0133,  0.0093,  0.0127]],
       requires_grad=True)

In [40]:
list(updated_model.parameters())[0]

Parameter containing:
tensor([[-0.0336,  0.0042,  0.0315,  ...,  0.0357, -0.0282,  0.0026],
        [-0.0116, -0.0168,  0.0143,  ..., -0.0108, -0.0246,  0.0038],
        [ 0.0219, -0.0219,  0.0277,  ...,  0.0144,  0.0279,  0.0242],
        ...,
        [-0.0117,  0.0343, -0.0067,  ..., -0.0187,  0.0163, -0.0292],
        [-0.0288,  0.0156,  0.0330,  ..., -0.0242,  0.0030, -0.0224],
        [ 0.0334,  0.0029,  0.0200,  ...,  0.0133,  0.0093,  0.0128]],
       requires_grad=True)

# Appendix (experimental)

## DataLoader

In [39]:
from syft.util import get_root_data_path
from torchvision import datasets, transforms
mnist_path = get_root_data_path()

mnist_train = datasets.MNIST(str(mnist_path), train=True, download=True,
               transform=transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]))

mnist_test = datasets.MNIST((mnist_path), train=False, 
              transform=transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]))

train_loader = th.utils.data.DataLoader(mnist_train, batch_size=1024, shuffle=True, pin_memory=True)
test_loader = th.utils.data.DataLoader(mnist_test, batch_size=1024, shuffle=True, pin_memory=True)

In [40]:
x, y = next(iter(train_loader))

In [22]:
x_name, y_name = "x.pt", "y.pt"

In [23]:
th.save(x, x_name)

In [24]:
transforms.ToTensor??

In [25]:
tag = "mydataset"

In [26]:
# train_loader.send(domain_node)

In [17]:
# dataset = RemoteDataset(path=x_name, transform=transforms.Compose([transforms.ToTensor()]))
# dataloader = RemoteDataLoader(dataset)

## Transforms

In [18]:
from torchvision import transforms

In [19]:
# transforms.ToTensor().send(domain_node)

In [55]:
th.tensor([1,2,3]).send(domain_node, tags=[tag])

<syft.proxy.torch.TensorPointer at 0x7fa0dc4b9f10>

In [60]:
x_ptr = [x for x in domain_node.store if tag in x.tags][0]

In [61]:
from syft.core.remote_dataloader import RemoteDataLoader
from syft.core.remote_dataloader import RemoteDataset

In [None]:
ten = th.rand((1000, ))

In [20]:
# domain_node.tensors

In [21]:
# domain_node.datasets.create(z)

In [22]:
# %debug

In [14]:
domain_node.datasets.all(pandas=True)

In [77]:
x = th.tensor([1,2,3])

In [26]:
ptr = x.send(domain_node)

In [27]:
new_ptr = ptr+5

In [32]:
new_ptr.request()

In [75]:
# new_ptr.get()

In [29]:
# domain_node.datasets