In [None]:
from stable_baselines_external_data import SBPPORemoteData
import pickle
import numpy as np
from matplotlib import pyplot as plt

import socket
import sys
from multiprocessing import Queue as queue
import pickle
import multiprocessing
import traceback

The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.



In [None]:
class MultipleWorker(object):
    """Handles multiple workers identified by IDs."""
    
    def __init__(self):
        """Initialize with zero workers."""
        self.workers = set()
        self.kwargs_queues = {}
        self.return_queues = {}
        self.worker_processes = {}
        assert hasattr(self, '_process_fcn'), "Please implement _process_fcn"
        self._process_init()
        
    def target_process(self, worker_id):
        """Process to run in each worker."""
        #print("Starting process", worker_id)
        while True:
            kwargs = pickle.loads(self.kwargs_queues[worker_id].get())
            try:
                result = self._process_fcn(**kwargs, worker_id=worker_id)
            except Exception as e:
                result = ("Exception", traceback.format_exc())
                pass
            self.return_queues[worker_id].put(pickle.dumps(result))
            
    def _process_init(self):
        """Initialize client code."""
        pass
            
    def _process_fcn(self, **kwargs):
        """Process function, to be implemented."""
        #print("Called with:", kwargs)
        return str(kwargs)
        
    def new_worker(self, worker_id):
        """Create a worker with a given ID."""
        if worker_id in self.workers:
            return
        else:
            self.workers.add(worker_id)
            self.kwargs_queues[worker_id] = queue()
            self.return_queues[worker_id] = queue()
            self.worker_processes[worker_id] = multiprocessing.Process(target=self.target_process,
                                                                       kwargs=dict(worker_id=worker_id))
            self.worker_processes[worker_id].start()
            
    def process(self, worker_id, kwargs):
        """Process a request."""
        if worker_id not in self.workers:
            print("Creating worker", worker_id)
            self.new_worker(worker_id)
        self.kwargs_queues[worker_id].put(pickle.dumps(kwargs))
        res = self.return_queues[worker_id].get()
        return pickle.loads(res)
    
    def __del__(self):
        """Close all processes."""
        for p in self.worker_processes.values():
            p.kill()
            
class MultiStepTrainer(object):
    """Train with stable baselines on external data, supporting multiple trainers."""
    def __init__(self):
        self.trainers = {}
    def create(self, uid, config):
        if uid in self.trainers:
            print("Trainer %s already exists, doing nothing" % uid)
        else:
            self.trainers[uid] = SBPPORemoteData(config=config)
    def process(self, uid, rollouts, weights):
        if uid not in self.trainers:
            print("Error: trainer %s does not exist" % uid)
            return None
        
        self.trainers[uid].set_weights(weights)
        info = self.trainers[uid].learn(rollouts)
        new_weights = self.trainers[uid].get_weights()
        return {'info': info, 'weights': new_weights}
            
class MultipleWorkerTrainer(MultipleWorker):
    """Train with multiple workers."""
    
    def _process_init(self):
        self.trainer = None
    
    def _process_fcn(self, uid, config, data_path, answer_path, worker_id):
        if self.trainer is None:
            self.trainer = MultiStepTrainer()
        self.trainer.create(uid, config)
        data = pickle.load(open(data_path, 'rb'))
        rollouts = data['rollouts']
        weights = data['weights']
        
        result = self.trainer.process(uid, rollouts, weights)
        
        pickle.dump(result, open(answer_path, 'wb'))
        return True

In [5]:
from asgiref.sync import async_to_sync
from tornado import ioloop, web
from jsonrpcserver import method, dispatch as dispatch, serve
import argparse

parser = argparse.ArgumentParser(description='Launch the multiprocess stable baselines server.')
parser.add_argument('--port', metavar='N', default=1234,
                    help='port to listen on')


# Server for DatabasePreferenceLearner
class MainHandler(web.RequestHandler):
    def post(self):
        request = self.request.body.decode()
        print(request)
        response = dispatch(request)
        print(response)
        if response.wanted:
            self.write(str(response))

app = web.Application([(r"/", MainHandler)])
trainer = None
            
def run_server(port=50001):
    """Run server."""

    print("Listening on port %d" % port)

    global trainer
    trainer = MultipleWorkerTrainer()

    @method
    def process(*args, **kwargs):
        global trainer
        return trainer.process(*args, **kwargs)
    serve(port=port)

if __name__ == "__main__":
    args = parser.parse_args()
    run_server(port=8899)

usage: ipykernel_launcher.py [-h] [--port N]
ipykernel_launcher.py: error: unrecognized arguments: -f /home/sergei/.local/share/jupyter/runtime/kernel-4c365b2d-38a4-465f-9ade-8ae6738116b6.json


SystemExit: 2

In [6]:
run_server()

Listening on port 50001
Creating worker aba_policy_player_1
Creating agent humanoid_blocker
Reading agent XML from: /home/sergei/git/chai/multiagent-competition/gym_compete/new_envs/assets/humanoid_body.xml
Creating agent humanoid
Reading agent XML from: /home/sergei/git/chai/multiagent-competition/gym_compete/new_envs/assets/humanoid_body.xml
Scene XML path: /home/sergei/git/chai/multiagent-competition/gym_compete/new_envs/assets/world_body.humanoid_body.humanoid_body.xml
Created Scene with agents




































Instructions for updating:
Use `tf.cast` instead.


Instructions for updating:
Use `tf.cast` instead.




















Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where




















-----------------------------------
| approxkl           | 0.06915925 |
| clipfrac           | 0.52264327 |
| explained_variance | -6.38      |
| fps                | 1372       |
| n_updates          | 1          |
| policy_entropy     | 24.122137  |
| policy_loss        | -0.1785963 |
| serial_timesteps   | 628        |
| time_elapsed       | 2.62e-05   |
| total_timesteps    | 628        |
| value_loss         | 28006.676  |
-----------------------------------
Creating worker aba_policy_player_2


127.0.0.1 - - [07/Sep/2020 00:24:37] "POST / HTTP/1.1" 200 -


Creating agent humanoid_blocker
Reading agent XML from: /home/sergei/git/chai/multiagent-competition/gym_compete/new_envs/assets/humanoid_body.xml
Creating agent humanoid
Reading agent XML from: /home/sergei/git/chai/multiagent-competition/gym_compete/new_envs/assets/humanoid_body.xml
Scene XML path: /home/sergei/git/chai/multiagent-competition/gym_compete/new_envs/assets/world_body.humanoid_body.humanoid_body.xml
Created Scene with agents




































Instructions for updating:
Use `tf.cast` instead.


Instructions for updating:
Use `tf.cast` instead.




















Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where




















------------------------------------
| approxkl           | 0.13145663  |
| clipfrac           | 0.6714968   |
| explained_variance | -3.05       |
| fps                | 1319        |
| n_updates          | 1           |
| policy_entropy     | 2.6566029   |
| policy_loss        | -0.18494165 |
| serial_timesteps   | 628         |
| time_elapsed       | 2.79e-05    |
| total_timesteps    | 628         |
| value_loss         | 143010.02   |
------------------------------------


127.0.0.1 - - [07/Sep/2020 00:24:39] "POST / HTTP/1.1" 200 -


Trainer 0 already exists, doing nothing
-----------------------------------
| approxkl           | 0.08314504 |
| clipfrac           | 0.52       |
| explained_variance | -6.45      |
| fps                | 2261       |
| n_updates          | 1          |
| policy_entropy     | 24.11175   |
| policy_loss        | -0.1782296 |
| serial_timesteps   | 651        |
| time_elapsed       | 1.41e-05   |
| total_timesteps    | 651        |
| value_loss         | 17387.547  |
-----------------------------------
Trainer 0 already exists, doing nothing


127.0.0.1 - - [07/Sep/2020 00:26:19] "POST / HTTP/1.1" 200 -


------------------------------------
| approxkl           | 0.1687577   |
| clipfrac           | 0.65757287  |
| explained_variance | -4.34       |
| fps                | 2316        |
| n_updates          | 1           |
| policy_entropy     | 2.6631653   |
| policy_loss        | -0.19491127 |
| serial_timesteps   | 651         |
| time_elapsed       | 1.53e-05    |
| total_timesteps    | 651         |
| value_loss         | 177388.5    |
------------------------------------


127.0.0.1 - - [07/Sep/2020 00:26:19] "POST / HTTP/1.1" 200 -


Trainer 0 already exists, doing nothing
------------------------------------
| approxkl           | 0.09739331  |
| clipfrac           | 0.5525522   |
| explained_variance | -6.02       |
| fps                | 2475        |
| n_updates          | 1           |
| policy_entropy     | 24.11979    |
| policy_loss        | -0.17488614 |
| serial_timesteps   | 623         |
| time_elapsed       | 1.57e-05    |
| total_timesteps    | 623         |
| value_loss         | 12023.959   |
------------------------------------
Trainer 0 already exists, doing nothing


127.0.0.1 - - [07/Sep/2020 00:27:09] "POST / HTTP/1.1" 200 -


------------------------------------
| approxkl           | 0.21780328  |
| clipfrac           | 0.72028893  |
| explained_variance | -3.41       |
| fps                | 2232        |
| n_updates          | 1           |
| policy_entropy     | 2.6520212   |
| policy_loss        | -0.21072704 |
| serial_timesteps   | 623         |
| time_elapsed       | 1.5e-05     |
| total_timesteps    | 623         |
| value_loss         | 189153.66   |
------------------------------------


127.0.0.1 - - [07/Sep/2020 00:27:09] "POST / HTTP/1.1" 200 -


Trainer 0 already exists, doing nothing
-----------------------------------
| approxkl           | 0.1215145  |
| clipfrac           | 0.616332   |
| explained_variance | -6.91      |
| fps                | 2260       |
| n_updates          | 1          |
| policy_entropy     | 24.116104  |
| policy_loss        | -0.1870755 |
| serial_timesteps   | 518        |
| time_elapsed       | 1.55e-05   |
| total_timesteps    | 518        |
| value_loss         | 17239.387  |
-----------------------------------
Trainer 0 already exists, doing nothing


127.0.0.1 - - [07/Sep/2020 00:27:52] "POST / HTTP/1.1" 200 -


------------------------------------
| approxkl           | 0.28200954  |
| clipfrac           | 0.73664093  |
| explained_variance | -3.84       |
| fps                | 2104        |
| n_updates          | 1           |
| policy_entropy     | 2.6567543   |
| policy_loss        | -0.21154171 |
| serial_timesteps   | 518         |
| time_elapsed       | 1.88e-05    |
| total_timesteps    | 518         |
| value_loss         | 179554.92   |
------------------------------------


127.0.0.1 - - [07/Sep/2020 00:27:52] "POST / HTTP/1.1" 200 -
Process Process-3:
Process Process-2:
Traceback (most recent call last):


KeyboardInterrupt: 

Traceback (most recent call last):
  File "/home/sergei/miniconda3/envs/tf1/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
  File "/home/sergei/miniconda3/envs/tf1/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
  File "/home/sergei/miniconda3/envs/tf1/lib/python3.7/multiprocessing/process.py", line 99, in run
    self._target(*self._args, **self._kwargs)
  File "/home/sergei/miniconda3/envs/tf1/lib/python3.7/multiprocessing/process.py", line 99, in run
    self._target(*self._args, **self._kwargs)
  File "<ipython-input-2-d8eddcf039a3>", line 17, in target_process
    kwargs = pickle.loads(self.kwargs_queues[worker_id].get())
  File "<ipython-input-2-d8eddcf039a3>", line 17, in target_process
    kwargs = pickle.loads(self.kwargs_queues[worker_id].get())
  File "/home/sergei/miniconda3/envs/tf1/lib/python3.7/multiprocessing/queues.py", line 94, in get
    res = self._recv_bytes()
  File "/home/sergei/miniconda3/envs/

In [7]:
data = pickle.load(open('rollout.pkl', 'rb'))

In [8]:
mst = MultiStepTrainer()
config = data['config']

In [9]:
new_weights = {}
for player in data['policies'].keys():
    uid = str(data['uid']) + '/' + player
    weights = data['weights_dict'][player]
    rollout = data['samples'][player]
    print(len(rollout['t']), config['train_batch_size'])
    mst.create(uid=uid, config=config)
    new_weights[player] = mst.process(uid=uid, rollouts=rollout, weights=weights)

65071 50000
Creating agent humanoid_blocker
Reading agent XML from: /home/sergei/git/chai/multiagent-competition/gym_compete/new_envs/assets/humanoid_body.xml
Creating agent humanoid
Reading agent XML from: /home/sergei/git/chai/multiagent-competition/gym_compete/new_envs/assets/humanoid_body.xml
Scene XML path: /home/sergei/git/chai/multiagent-competition/gym_compete/new_envs/assets/world_body.humanoid_body.humanoid_body.xml
Created Scene with agents
-------------------------------------
| approxkl           | 0.022438252  |
| clipfrac           | 0.14191274   |
| explained_variance | -5.93        |
| fps                | 4508         |
| n_updates          | 1            |
| policy_entropy     | 24.084276    |
| policy_loss        | -0.054289613 |
| serial_timesteps   | 65070        |
| time_elapsed       | 3.12e-05     |
| total_timesteps    | 65070        |
| value_loss         | 4455.2646    |
-------------------------------------
65071 50000
Creating agent humanoid_blocker
Readin

In [13]:
!du -s --si rollout.pkl

459M	rollout.pkl
