In [10]:
epochs = 1

# Part 7 - Federated Learning with FederatedDataset

Here we introduce a new tool for using federated datasets. We have created a `FederatedDataset` class which is intended to be used like the PyTorch Dataset class, and is given to a federated data loader `FederatedDataLoader` which will iterate on it in a federated fashion.


In [None]:
import torch
import torch as th
import syft as sy
from syft.grid.clients.data_centric_fl_client import DataCentricFLClient
import pandas as pd

In [None]:
hook = sy.TorchHook(torch)
# The local worker
me = hook.local_worker
me.is_client_worker = False
# The remote workers
bob = DataCentricFLClient(hook, "http://18.220.216.78:5001/")
alice = DataCentricFLClient(hook, "http://18.220.216.78:5002/")
# The crypto provider
crypto_provider = DataCentricFLClient(hook, "http://18.220.216.78:5003/")
my_grid = sy.PrivateGridNetwork(bob,alice,crypto_provider )

In [31]:
kim = DataCentricFLClient(hook, "http://18.220.216.78:5001/")


In [34]:
from sklearn.datasets import load_boston
from sklearn.datasets import load_breast_cancer
from sklearn.datasets import load_digits
from sklearn.datasets import load_diabetes
def load_sklearn(func, *tags):
        dataset = func()
        data = (
            torch.tensor(dataset["data"])
            .float()
            .tag(*(list(tags) + ["#data"] + dataset["DESCR"].split("\n")[0].lower().split(" ")))
            .describe(dataset["DESCR"])
        )
        target = (
            torch.tensor(dataset["target"])
            .float()
            .tag(
                *(list(tags) + ["#target"] + dataset["DESCR"].split("\n")[0].lower().split(" "))
            )
            .describe(dataset["DESCR"])
        )
        me.register_obj(data)
        me.register_obj(target)

        return data, target
data,targer=load_sklearn(load_boston, *["#boston", "#housing", "#boston_housing"])
load_sklearn(load_diabetes, *["#diabetes"])
load_sklearn(load_breast_cancer)

(tensor([[1.7990e+01, 1.0380e+01, 1.2280e+02,  ..., 2.6540e-01, 4.6010e-01,
          1.1890e-01],
         [2.0570e+01, 1.7770e+01, 1.3290e+02,  ..., 1.8600e-01, 2.7500e-01,
          8.9020e-02],
         [1.9690e+01, 2.1250e+01, 1.3000e+02,  ..., 2.4300e-01, 3.6130e-01,
          8.7580e-02],
         ...,
         [1.6600e+01, 2.8080e+01, 1.0830e+02,  ..., 1.4180e-01, 2.2180e-01,
          7.8200e-02],
         [2.0600e+01, 2.9330e+01, 1.4010e+02,  ..., 2.6500e-01, 4.0870e-01,
          1.2400e-01],
         [7.7600e+00, 2.4540e+01, 4.7920e+01,  ..., 0.0000e+00, 2.8710e-01,
          7.0390e-02]])
 	Tags: _breast_cancer_dataset: .. #data 
 	Description: .. _breast_cancer_dataset:...
 	Shape: torch.Size([569, 30]),
 tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 1., 1., 1., 1., 1., 0.,
         0., 1., 0., 0., 1., 

In [35]:
data.send(kim)

(Wrapper)>[PointerTensor | me:40419911065 -> Bob:17862325987]
	Tags: #housing #boston_housing _boston_dataset: .. #data #boston 
	Shape: torch.Size([506, 13])
	Description: .. _boston_dataset:...

In [3]:
data = grid.search("#boston")

In [19]:
data

{'bob': [(Wrapper)>[PointerTensor | me:93065209375 -> bob:82502429288]
  	Tags: #housing #boston #target _boston_dataset: #boston_housing .. 
  	Shape: torch.Size([506])
  	Description: .. _boston_dataset:...,
  (Wrapper)>[PointerTensor | me:29805147575 -> bob:55315003130]
  	Tags: #data #housing #boston _boston_dataset: #boston_housing .. 
  	Shape: torch.Size([506, 13])
  	Description: .. _boston_dataset:...],
 'sam': [(Wrapper)>[PointerTensor | me:25764275142 -> sam:7544631372]
  	Tags: #housing #boston #target _boston_dataset: #boston_housing .. 
  	Shape: torch.Size([506])
  	Description: .. _boston_dataset:...,
  (Wrapper)>[PointerTensor | me:57892890242 -> sam:18565750255]
  	Tags: #data #boston #housing _boston_dataset: #boston_housing .. 
  	Shape: torch.Size([506, 13])
  	Description: .. _boston_dataset:...]}

In [11]:
ptr = data['bob'][0]

In [13]:
ptr.shape

torch.Size([506])

In [14]:
ptr.tags

{'#boston', '#boston_housing', '#housing', '#target', '..', '_boston_dataset:'}

In [16]:
ptr.description

".. _boston_dataset:\n\nBoston house prices dataset\n---------------------------\n\n**Data Set Characteristics:**  \n\n    :Number of Instances: 506 \n\n    :Number of Attributes: 13 numeric/categorical predictive. Median Value (attribute 14) is usually the target.\n\n    :Attribute Information (in order):\n        - CRIM     per capita crime rate by town\n        - ZN       proportion of residential land zoned for lots over 25,000 sq.ft.\n        - INDUS    proportion of non-retail business acres per town\n        - CHAS     Charles River dummy variable (= 1 if tract bounds river; 0 otherwise)\n        - NOX      nitric oxides concentration (parts per 10 million)\n        - RM       average number of rooms per dwelling\n        - AGE      proportion of owner-occupied units built prior to 1940\n        - DIS      weighted distances to five Boston employment centres\n        - RAD      index of accessibility to radial highways\n        - TAX      full-value property-tax rate per $10,000

In [17]:
nd = data.copy()

In [23]:
nd = [[id,val[0].description, (list(val[0].shape)), val[0].tags] for id,val in data.items()]

In [24]:
nd

[['bob',
  ".. _boston_dataset:\n\nBoston house prices dataset\n---------------------------\n\n**Data Set Characteristics:**  \n\n    :Number of Instances: 506 \n\n    :Number of Attributes: 13 numeric/categorical predictive. Median Value (attribute 14) is usually the target.\n\n    :Attribute Information (in order):\n        - CRIM     per capita crime rate by town\n        - ZN       proportion of residential land zoned for lots over 25,000 sq.ft.\n        - INDUS    proportion of non-retail business acres per town\n        - CHAS     Charles River dummy variable (= 1 if tract bounds river; 0 otherwise)\n        - NOX      nitric oxides concentration (parts per 10 million)\n        - RM       average number of rooms per dwelling\n        - AGE      proportion of owner-occupied units built prior to 1940\n        - DIS      weighted distances to five Boston employment centres\n        - RAD      index of accessibility to radial highways\n        - TAX      full-value property-tax rate 

In [30]:
me._known_workers['bob'].url

'ws://localhost:5005'

In [28]:
df = pd.DataFrame(nd,columns=['Location','Description','Tags','Size'])
print (df)

  Location                                        Description   Tags  \
0      bob  .. _boston_dataset:\n\nBoston house prices dat...  [506]   
1      sam  .. _boston_dataset:\n\nBoston house prices dat...  [506]   

                                                Size  
0  {#housing, #boston_housing, _boston_dataset:, ...  
1  {#housing, #boston_housing, _boston_dataset:, ...  


Then search for a dataset

In [20]:
nd['bob']= ptr.description
nd['sam'] = data['sam'][0].description

In [18]:
df = pd.Series(nd, name='Description')

In [21]:
df = pd.DataFrame.from_dict(nd)

In [22]:
df

Unnamed: 0,bob,sam
0,.. _boston_dataset:\n\nBoston house prices dat...,.. _boston_dataset:\n\nBoston house prices dat...
1,[506],[506]
2,"{#housing, #boston_housing, _boston_dataset:, ...","{#housing, #boston_housing, _boston_dataset:, ..."


In [19]:
df.index.name = 'location'

In [20]:
df.to_frame()

Unnamed: 0_level_0,Description
location,Unnamed: 1_level_1
bob,[.. _boston_dataset:\n\nBoston house prices da...
sam,[.. _boston_dataset:\n\nBoston house prices da...


In [16]:
grid.search( "#data")

{'bob': [(Wrapper)>[PointerTensor | me:53394938518 -> bob:28521435288]
  	Tags: #data .. _breast_cancer_dataset: 
  	Shape: torch.Size([569, 30])
  	Description: .. _breast_cancer_dataset:...,
  (Wrapper)>[PointerTensor | me:11550955434 -> bob:87195006662]
  	Tags: #diabetes #data .. _diabetes_dataset: 
  	Shape: torch.Size([442, 10])
  	Description: .. _diabetes_dataset:...,
  (Wrapper)>[PointerTensor | me:93695107669 -> bob:96238330179]
  	Tags: #boston_housing #data _boston_dataset: #housing #boston .. 
  	Shape: torch.Size([506, 13])
  	Description: .. _boston_dataset:...],
 'alice': [(Wrapper)>[PointerTensor | me:61477233258 -> alice:61146921641]
  	Tags: #boston_housing #data _boston_dataset: #housing #boston .. 
  	Shape: torch.Size([506, 13])
  	Description: .. _boston_dataset:...,
  (Wrapper)>[PointerTensor | me:10648270773 -> alice:55156661739]
  	Tags: #diabetes #data .. _diabetes_dataset: 
  	Shape: torch.Size([442, 10])
  	Description: .. _diabetes_dataset:...,
  (Wrapper)

In [17]:
boston_data = grid.search("#boston", "#data")
boston_target = grid.search("#boston", "#target")

In [18]:
boston_data

{'bob': [(Wrapper)>[PointerTensor | me:36808794346 -> bob:96238330179]
  	Tags: #boston_housing #data _boston_dataset: #housing #boston .. 
  	Shape: torch.Size([506, 13])
  	Description: .. _boston_dataset:...],
 'alice': [(Wrapper)>[PointerTensor | me:64223157257 -> alice:61146921641]
  	Tags: #boston_housing #data _boston_dataset: #housing #boston .. 
  	Shape: torch.Size([506, 13])
  	Description: .. _boston_dataset:...],
 'sam': [(Wrapper)>[PointerTensor | me:6827837124 -> sam:45460266713]
  	Tags: #boston_housing #data _boston_dataset: #housing #boston .. 
  	Shape: torch.Size([506, 13])
  	Description: .. _boston_dataset:...]}

We load a model and an optimizer

In [6]:
n_features = boston_data['alice'][0].shape[1]
n_targets = 1

model = th.nn.Linear(n_features, n_targets)

Here we cast the data fetched in a `FederatedDataset`. See the workers which hold part of the data.

In [7]:
# Cast the result in BaseDatasets
datasets = []
for worker in boston_data.keys():
    dataset = sy.BaseDataset(boston_data[worker][0], boston_target[worker][0])
    datasets.append(dataset)

# Build the FederatedDataset object
dataset = sy.FederatedDataset(datasets)
print(dataset.workers)
optimizers = {}
for worker in dataset.workers:
    optimizers[worker] = th.optim.Adam(params=model.parameters(),lr=1e-2)

['bob', 'alice', 'sam']


We put it in a `FederatedDataLoader` and specify options

In [8]:
train_loader = sy.FederatedDataLoader(dataset, batch_size=3, shuffle=False, drop_last=False)

And finally we iterate over epochs. You can see how similar this is compared to pure and local PyTorch training!

In [11]:
for epoch in range(1, epochs + 1):
    loss_accum = 0
    for batch_idx, (data, target) in enumerate(train_loader):
        model.send(data.location)
        
        optimizer = optimizers[data.location.id]
        optimizer.zero_grad()
        pred = model(data)
        loss = ((pred.view(-1) - target)**2).mean()
        loss.backward()
        optimizer.step()
        
        model.get()
        loss = loss.get()
        
        loss_accum += float(loss)
        
        if batch_idx % 8 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tBatch loss: {:.6f}'.format(
                epoch, batch_idx, len(train_loader),
                       100. * batch_idx / len(train_loader), loss.item()))            
            
    print('Total loss', loss_accum)


Total loss 160505.12345564365
