## PART 1: Launch a Duet Server and Connect

As a Data Owner, you want to allow someone else to perform data science on data that you own and likely want to protect.

In order to do this, we must load our data into a locally running server within this notebook. We call this server a "Duet".

To begin, you must launch Duet and help your Duet "partner" (a Data Scientist) connect to this server.

You do this by running the code below and sending the code snippet containing your unique Server ID to your partner and following the instructions it gives!

In [None]:
# stdlib
from itertools import count
import time
import shutil
import os
from pathlib import Path

# third party
import gym
import numpy as np
import torch
from torch.distributions import Categorical
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import threading
import asyncio

loop = asyncio.get_event_loop()

In [None]:
import syft as sy
duet = sy.launch_duet(loopback=True)
sy.logger.add(sink="./syft_do.log")

## PART 2: Initialize environment

In [None]:
config = {"gamma": 0.99,
          "seed": 543,
          "render":False,
          "log_interval":2,
          "wait_interval":1}

In [None]:
# handler with no tags accepts everything. Better handlers coming soon.
duet.requests.add_handler(action="accept")

In [None]:
env = gym.make("CartPole-v1")
env.seed(config["seed"])

In [None]:
reward_threshold_ptr = sy.lib.python.Float(env.spec.reward_threshold)
reward_threshold_ptr.tag(f"reward_threshold")
reward_threshold_ptr.send(duet, searchable=True)     

In [None]:
duet.store.pandas

In [None]:
# We'll use this foldre to monitor the status of data scientist
try:
  shutil.rmtree("checkpoints")
except:
  pass
os.mkdir("checkpoints")

## PART 3: Training

In [None]:
count = 0
running_reward = 10

for i_episode in range(10):
    state = env.reset()
    ep_reward = 0
    
    # 10000
    for t in range(1, 30):
        # send state to data scientist
        state = torch.from_numpy(state).float().unsqueeze(0)
        state.tag(f"state_{count}")
        state.send(duet, searchable=True)
        Path(f"checkpoints/DO_checkpoint_state_{count}").touch()
        
        # wait for data scientist to send action
        for retry in range(360):
            if Path(f"checkpoints/DS_checkpoint_action_{count}").exists():
                break
            task = loop.create_task(asyncio.sleep(config["wait_interval"]))
            loop.run_until_complete(task)
        assert Path(f"checkpoints/DS_checkpoint_action_{count}").exists()
        # receive action from data scientist
        action_ptr = duet.store[f"action_{count}"]         
        action = action_ptr.get(delete_obj=False)
        
        # steo the environment
        state, reward, done, _ = env.step(action)  
        
        # send reward to data scientist
        reward_ptr = sy.lib.python.Float(reward)
        reward_ptr.tag(f"reward_{count}")
        reward_ptr.send(duet, searchable=True)
        Path(f"checkpoints/DO_checkpoint_reward_{count}").touch()
        
        ep_reward += reward
        
        # send done to data scientist
        done_ptr = sy.lib.python.Bool(False)
        done_ptr.tag(f"done_{count}")
        done_ptr.send(duet, searchable=True)
        Path(f"checkpoints/DO_checkpoint_done_{count}").touch()
        
        count += 1
        
        if done:
            break
            
    running_reward = 0.05 * ep_reward + (1 - 0.05) * running_reward
    if running_reward > env.spec.reward_threshold:
        print("Solved! Running reward is now {} and "
                  "the last episode runs to {} time steps!".format(running_reward, t))
        break

In [None]:
try:
  shutil.rmtree("checkpoints")
except:
  pass