# Federated Learning Training Plan: Host Plan & Model

Here we load Plan and Model params created earlier in "Create Plan" notebook, host them to PyGrid, 
and run sample syft.js app that executes them.  

In [1]:
%load_ext autoreload
%autoreload 2
import websockets
import json
import base64
import requests
import torch

import syft as sy
from syft.grid.grid_client import GridClient
from syft.serde import protobuf
from syft_proto.execution.v1.plan_pb2 import Plan as PlanPB
from syft_proto.execution.v1.state_pb2 import State as StatePB

sy.make_hook(globals())
# force protobuf serialization for tensors
hook.local_worker.framework = None

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
Falling back to insecure randomness since the required custom op could not be found for the installed version of TensorFlow. Fix this by compiling custom ops. Missing file was 'C:\Users\Vova\AppData\Local\conda\conda\envs\pysyft\lib\site-packages\tf_encrypted/operations/secure_random/secure_random_module_tf_1.13.1.so'


Setting up Sandbox...
Done!


In [2]:
async def sendWsMessage(data):
    async with websockets.connect('ws://' + gatewayWsUrl) as websocket:
        await websocket.send(json.dumps(data))
        message = await websocket.recv()
        return json.loads(message)

def deserializeFromBin(worker, filename, pb):
    with open(filename, "rb") as f:
        bin = f.read()
    pb.ParseFromString(bin)
    return protobuf.serde._unbufferize(worker, pb)

## Step 4a: Host in PyGrid

Here we load "ops list" Plan.
PyGrid should translate it to other types (e.g. torchscript) automatically. 

In [3]:
# Load files with protobuf created in "Create Plan" notebook.
training_plan = deserializeFromBin(hook.local_worker, "tp_full.pb", PlanPB())
model_params_state = deserializeFromBin(hook.local_worker, "model_params.pb", StatePB())

Follow PyGrid README.md to build `openmined/grid-gateway` image from the latest `dev` branch 
and spin up PyGrid using `docker-compose up --build`.

In [4]:
# Default gateway address when running locally 
gatewayWsUrl = "127.0.0.1:5000"
grid = GridClient(id="test", address=gatewayWsUrl, secure=False)
grid.connect()

Define name, version, configs.

In [5]:
# These name/version you use in worker
name = "mnist"
version = "1.0.0"
client_config = {
            "name": name,  
            "version": version,
            "batch_size": 64,
            "lr": 0.01,
            "max_updates": 100  # custom syft.js option that limits number of training loops per worker
        }

server_config = {
            "min_workers": 3,  # temporarily this plays role "min # of worker's diffs" for triggering cycle end event
            "max_workers": 3,
            "pool_selection": "random",
            "num_cycles": 5,
            "do_not_reuse_workers_until_cycle": 4,
            "cycle_length": 28800,
            "minimum_upload_speed": 0,
            "minimum_download_speed": 0
        }

Shoot!

If everything's good, success is returned.
If the name/version already exists in PyGrid, change them above or cleanup PyGrid db by re-creating docker containers (e.g. `docker-compose up --force-recreate`). 


In [6]:
response = grid.host_federated_training(
    model=model_params_state,
    client_plans={'training_plan': training_plan},
    client_protocols={},
    server_averaging_plan=None,
    client_config=client_config,
    server_config=server_config
)

print("Host response:", response)

Host response: {'type': 'federated/host-training', 'data': {'status': 'success'}}


Let's double-check that data is loaded by requesting a cycle.

(Request is made directly, will be methods on grid client in the future)

In [7]:
auth_request = {
    "type": "federated/authenticate",
    "data": {}
}
auth_response = await sendWsMessage(auth_request)
print('Auth response: ', json.dumps(auth_response, indent=2))

cycle_request = {
    "type": "federated/cycle-request",
    "data": {
        "worker_id": auth_response['data']['worker_id'],
        "model": name,
        "version": version,
        "ping": 1,
        "download": 10000,
        "upload": 10000,
    }
}
cycle_response = await sendWsMessage(cycle_request)
print('Cycle response:', json.dumps(cycle_response, indent=2))

worker_id = auth_response['data']['worker_id']
request_key = cycle_response['data']['request_key']
model_id = cycle_response['data']['model_id'] 
training_plan_id = cycle_response['data']['plans']['training_plan']

Auth response:  {
  "type": "federated/authenticate",
  "data": {
    "status": "success",
    "worker_id": "f95bf0b9-de40-4eba-9ac2-4a39f49769dd"
  }
}
Cycle response: {
  "type": "federated/cycle-request",
  "data": {
    "status": "accepted",
    "request_key": "f82850aac99516de92f3b9f14dffc0a3bbf058c8f92fe7bab869340c282daf2f",
    "version": "1.0.0",
    "model": "mnist",
    "plans": {
      "training_plan": 2
    },
    "protocols": {},
    "client_config": {
      "name": "mnist",
      "version": "1.0.0",
      "batch_size": 64,
      "lr": 0.01,
      "max_updates": 100
    },
    "model_id": 1
  }
}


Let's download model and plan (both versions) and check they are actually workable.

In [8]:
# Model
req = requests.get(f"http://{gatewayWsUrl}/federated/get-model?worker_id={worker_id}&request_key={request_key}&model_id={model_id}")
model_data = req.content
pb = StatePB()
pb.ParseFromString(req.content)
model_params_downloaded = protobuf.serde._unbufferize(hook.local_worker, pb)
print(model_params_downloaded)

<State: PlaceHolder[Tags:]>tensor([[-0.0545, -0.0268, -0.0234,  ..., -0.0366,  0.0367, -0.0069],
        [-0.0172,  0.0025,  0.0130,  ...,  0.0187, -0.0071,  0.0256],
        [-0.0751, -0.0244,  0.0076,  ...,  0.0527, -0.0103,  0.0087],
        ...,
        [-0.0342,  0.0394, -0.0270,  ...,  0.0038, -0.0150,  0.0195],
        [-0.0166,  0.0126,  0.0174,  ..., -0.0004,  0.0120,  0.0872],
        [ 0.0269,  0.0080,  0.0367,  ...,  0.0404, -0.0204, -0.0162]])
	Description: ...
	Shape: torch.Size([392, 784]) PlaceHolder[Tags:]>tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,

In [9]:
# Plan "list of ops"
req = requests.get(f"http://{gatewayWsUrl}/federated/get-plan?worker_id={worker_id}&request_key={request_key}&plan_id={training_plan_id}&receive_operations_as=list")
pb = PlanPB()
pb.ParseFromString(req.content)
plan_ops = protobuf.serde._unbufferize(hook.local_worker, pb)
print(plan_ops.role.actions)
print(plan_ops.torchscript)

[<syft.execution.computation.ComputationAction object at 0x0000018BDD4BEEB8>, <syft.execution.computation.ComputationAction object at 0x0000018BDD4A2FD0>, <syft.execution.computation.ComputationAction object at 0x0000018BDD4A2EF0>, <syft.execution.computation.ComputationAction object at 0x0000018BDD4A27F0>, <syft.execution.computation.ComputationAction object at 0x0000018BDD4A2CF8>, <syft.execution.computation.ComputationAction object at 0x0000018BDD4A2240>, <syft.execution.computation.ComputationAction object at 0x0000018BDD4A2C18>, <syft.execution.computation.ComputationAction object at 0x0000018BDD4C25F8>, <syft.execution.computation.ComputationAction object at 0x0000018BDD4C2588>, <syft.execution.computation.ComputationAction object at 0x0000018BDD4C2B38>, <syft.execution.computation.ComputationAction object at 0x0000018BDD4C2A58>, <syft.execution.computation.ComputationAction object at 0x0000018BDD4C2D68>, <syft.execution.computation.ComputationAction object at 0x0000018BDD6560F0>

In [10]:
# Plan "torchscript"
req = requests.get(f"http://{gatewayWsUrl}/federated/get-plan?worker_id={worker_id}&request_key={request_key}&plan_id={training_plan_id}&receive_operations_as=torchscript")
pb = PlanPB()
pb.ParseFromString(req.content)
plan_ts = protobuf.serde._unbufferize(hook.local_worker, pb)
print(plan_ts.role.actions)
print(plan_ts.torchscript.code)

[]
def forward(self,
    argument_1: Tensor,
    argument_2: Tensor,
    argument_3: Tensor,
    argument_4: Tensor,
    argument_5: Tensor,
    argument_6: Tensor,
    argument_7: Tensor,
    argument_8: Tensor) -> Tuple[Tensor, Tensor, Tensor, Tensor, Tensor, Tensor]:
  _0 = torch.matmul(argument_1, torch.t(argument_5))
  _1 = torch.add(_0, argument_6, alpha=1)
  _2 = torch.relu(_1)
  _3 = torch.add(torch.matmul(_2, torch.t(argument_7)), argument_8, alpha=1)
  _4 = torch.softmax(_3, 1, None)
  _5 = torch.mean(torch.mul(argument_2, torch.log(_4)), dtype=None)
  _6 = torch.neg(_5)
  _7 = torch.div(torch.sub(_4, argument_2, alpha=1), torch.mul(argument_3, CONSTANTS.c0))
  _8 = torch.matmul(_7, argument_7)
  _9 = torch.to(torch.gt(_1, 0), 6, False, False, None)
  _10 = torch.mul(_8, _9)
  _11 = torch.matmul(torch.t(_10), argument_1)
  _12 = torch.sum(_10, [0], False, dtype=None)
  _13 = torch.matmul(torch.t(_7), _2)
  _14 = torch.sum(_7, [0], False, dtype=None)
  _15 = torch.sub(argument

## Step 5a: Train

Start and open "with-grid" example in syft.js project (http://localhost:8080 by default), 
enter model name and version and start FL training.


## Step 6a: Submit diff

This emulates submitting worker's diff (created earlier in Execute Plan notebook) to PyGrid.
After several diffs submitted, PyGrid will end the cycle and create new model checkpoint and cycle. 
(Request is made directly, will be methods on grid client in the future)

In [11]:
with open("diff.pb", "rb") as f:
    diff = f.read()

report_request = {
    "type": "federated/report",
    "data": {
        "worker_id": auth_response['data']['worker_id'],
        "request_key": cycle_response['data']['request_key'],
        "diff": base64.b64encode(diff).decode("utf-8")
    }
}

report_response = await sendWsMessage(report_request)
print('Report response:', json.dumps(report_response, indent=2)) 

Report response: {
  "type": "federated/report",
  "data": {
    "status": "success"
  }
}
