In [1]:
import ray
from ray import rllib, tune
import pandas as pd
import numpy as np
from datetime import datetime
import trading_gym
from trading_gym.registry.gaia.v9.env import GAIAPredictorsContinuousV9
from trading_gym.ray.walkforward import WalkForwardRunner, WalkForwardResults
%matplotlib inline
print(trading_gym.__package__, trading_gym.__version__)
print(ray.__package__, ray.__version__)

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


trading-gym 0.8.1
ray 0.7.3


In [2]:
from trading_gym.registry.gaia.v11.env import GAIAPredictorsContinuousV11
# from trading_gym.registry.gaia.v12.env import GAIAPredictorsContinuousV12

In [3]:
ray.init()

2019-08-24 23:30:52,776	INFO node.py:498 -- Process STDOUT and STDERR is being redirected to /tmp/ray/session_2019-08-24_23-30-52_773639_101236/logs.
2019-08-24 23:30:52,964	INFO services.py:409 -- Waiting for redis server at 127.0.0.1:54747 to respond...
2019-08-24 23:30:53,110	INFO services.py:409 -- Waiting for redis server at 127.0.0.1:21000 to respond...
2019-08-24 23:30:53,120	INFO services.py:809 -- Starting Redis shard with 10.0 GB max memory.
2019-08-24 23:30:53,195	INFO node.py:512 -- Process STDOUT and STDERR is being redirected to /tmp/ray/session_2019-08-24_23-30-52_773639_101236/logs.
2019-08-24 23:30:53,206	INFO services.py:1475 -- Starting the Plasma object store with 20.0 GB memory using /dev/shm.


{'node_ip_address': '10.0.5.4',
 'redis_address': '10.0.5.4:54747',
 'object_store_address': '/tmp/ray/session_2019-08-24_23-30-52_773639_101236/sockets/plasma_store',
 'raylet_socket_name': '/tmp/ray/session_2019-08-24_23-30-52_773639_101236/sockets/raylet',
 'webui_url': None,
 'session_dir': '/tmp/ray/session_2019-08-24_23-30-52_773639_101236'}

## Create the walk-forward partitions
It's responsibility of the user to create the training/test (and maybe validation) partitions to turn the walk forward training. Note that 2-fold split is a particular case of walk-forward training, so you are still able to run a simple 2-fold split.


In [4]:
partitions = list()
for year in range(2007, 2018):
    partition = {
        'training-set': [datetime.min, datetime(year, 12, 31)],
        'test-set': [datetime(year + 1, 1, 1), datetime(year + 1, 12, 31)],
    }
    partitions.append(partition)
partitions


[{'training-set': [datetime.datetime(1, 1, 1, 0, 0),
   datetime.datetime(2007, 12, 31, 0, 0)],
  'test-set': [datetime.datetime(2008, 1, 1, 0, 0),
   datetime.datetime(2008, 12, 31, 0, 0)]},
 {'training-set': [datetime.datetime(1, 1, 1, 0, 0),
   datetime.datetime(2008, 12, 31, 0, 0)],
  'test-set': [datetime.datetime(2009, 1, 1, 0, 0),
   datetime.datetime(2009, 12, 31, 0, 0)]},
 {'training-set': [datetime.datetime(1, 1, 1, 0, 0),
   datetime.datetime(2009, 12, 31, 0, 0)],
  'test-set': [datetime.datetime(2010, 1, 1, 0, 0),
   datetime.datetime(2010, 12, 31, 0, 0)]},
 {'training-set': [datetime.datetime(1, 1, 1, 0, 0),
   datetime.datetime(2010, 12, 31, 0, 0)],
  'test-set': [datetime.datetime(2011, 1, 1, 0, 0),
   datetime.datetime(2011, 12, 31, 0, 0)]},
 {'training-set': [datetime.datetime(1, 1, 1, 0, 0),
   datetime.datetime(2011, 12, 31, 0, 0)],
  'test-set': [datetime.datetime(2012, 1, 1, 0, 0),
   datetime.datetime(2012, 12, 31, 0, 0)]},
 {'training-set': [datetime.datetime(1, 

In [5]:
## Create the config dict
config = ray.rllib.agents.ppo.DEFAULT_CONFIG.copy()
config['env'] = GAIAPredictorsContinuousV11
config['env_config'] = {
    'cost_of_commissions': tune.grid_search([0.00005]),
    'cost_of_spread': 0.0001,
}
config['gamma'] = tune.grid_search([0.95]) # 2 weeks

In [6]:
config['vf_clip_param'] = 0
config['vf_loss_coeff'] = 0
config['lambda'] = 0
config['use_gae'] = True

# need to have vf share layers if lstm is used
# config['vf_share_layers'] = True
# config['model']['use_lstm'] = True
config['batch_mode'] = 'complete_episodes'
config['train_batch_size'] = 4000 # tune.grid_search([4000])
config['sgd_minibatch_size'] = 128
config['num_sgd_iter'] = tune.grid_search([8])
config['entropy_coeff'] =  1e-5 # tune.grid_search([1e-5])
config['kl_coeff'] = 0.2 #tune.grid_search([0.2])
config['kl_target'] = tune.grid_search([0.01])
config['clip_param'] = tune.grid_search([0.8])

config['lr'] = tune.grid_search([1e-5])

# config['model']['custom_model'] = CNN.__name__


In [7]:
# env = GAIAPredictorsContinuousV9()
# length = env.episode_length()

In [8]:
env = GAIAPredictorsContinuousV11()
env.action_space.sample()


The 'contains' method is deprecated and will be removed in a future version. Use 'key in index' instead of 'index.contains(key)'



array([-0.06164829,  0.04413464])

In [9]:
## Run your walk-forward experiment
walk_forward = WalkForwardRunner(
    env_partitions=partitions,
    trainable=ray.rllib.agents.ppo.PPOTrainer,
    config=config,
    stop={'timesteps_total': 500000},
    checkpoint_freq=1,
)

Note that WalkForwardRunner has constructed the implied ray Experiment(s) from your walk forward settings.

walk_forward.experiments

Note that trials are associated with a `RestoreID`. This `ID` is all you need to restore an agent. Here we are using a grid search of two values for `cost_of_commissions` on two partitions, so we have a total of 4 experiments.

In [10]:
trials = walk_forward.run(verbose=0)
trials

2019-08-24 23:30:58,015	INFO trial_runner.py:176 -- Starting a new experiment.
2019-08-24 23:30:58,085	ERROR log_sync.py:34 -- Log sync requires cluster to be setup with `ray up`.


[2m[36m(pid=101362)[0m   _np_qint8 = np.dtype([("qint8", np.int8, 1)])
[2m[36m(pid=101362)[0m   _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
[2m[36m(pid=101362)[0m   _np_qint16 = np.dtype([("qint16", np.int16, 1)])
[2m[36m(pid=101362)[0m   _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
[2m[36m(pid=101362)[0m   _np_qint32 = np.dtype([("qint32", np.int32, 1)])
[2m[36m(pid=101362)[0m   np_resource = np.dtype([("resource", np.ubyte, 1)])
[2m[36m(pid=101362)[0m 
[2m[36m(pid=101362)[0m The 'contains' method is deprecated and will be removed in a future version. Use 'key in index' instead of 'index.contains(key)'
[2m[36m(pid=101362)[0m 
[2m[36m(pid=101362)[0m 2019-08-24 23:31:06,518	INFO rollout_worker.py:319 -- Creating policy evaluation worker 0 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=101362)[0m 2019-08-24 23:31:06.520130: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary 

[2m[36m(pid=101368)[0m 
[2m[36m(pid=101368)[0m The 'contains' method is deprecated and will be removed in a future version. Use 'key in index' instead of 'index.contains(key)'
[2m[36m(pid=101368)[0m 
[2m[36m(pid=101365)[0m 
[2m[36m(pid=101365)[0m The 'contains' method is deprecated and will be removed in a future version. Use 'key in index' instead of 'index.contains(key)'
[2m[36m(pid=101365)[0m 
[2m[36m(pid=101368)[0m 2019-08-24 23:31:16,530	INFO rollout_worker.py:319 -- Creating policy evaluation worker 1 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=101368)[0m 2019-08-24 23:31:16.558755: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
[2m[36m(pid=101365)[0m 2019-08-24 23:31:16,684	INFO rollout_worker.py:319 -- Creating policy evaluation worker 2 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=101365)[0m 2019-08-24 23:31:16.711909: I tensor

[2m[36m(pid=101367)[0m   _np_qint8 = np.dtype([("qint8", np.int8, 1)])
[2m[36m(pid=101367)[0m   _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
[2m[36m(pid=101367)[0m   _np_qint16 = np.dtype([("qint16", np.int16, 1)])
[2m[36m(pid=101367)[0m   _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
[2m[36m(pid=101367)[0m   _np_qint32 = np.dtype([("qint32", np.int32, 1)])
[2m[36m(pid=101367)[0m   np_resource = np.dtype([("resource", np.ubyte, 1)])
[2m[36m(pid=101368)[0m 2019-08-24 23:31:22,791	INFO rollout_worker.py:485 -- Completed sample batch:
[2m[36m(pid=101368)[0m 
[2m[36m(pid=101368)[0m { 'data': { 'action_prob': np.ndarray((205,), dtype=float32, min=0.0, max=0.158, mean=0.08),
[2m[36m(pid=101368)[0m             'actions': np.ndarray((205, 2), dtype=float32, min=-3.711, max=3.086, mean=-0.049),
[2m[36m(pid=101368)[0m             'advantages': np.ndarray((205,), dtype=float32, min=-0.009, max=0.007, mean=-0.0),
[2m[36m(pid=101368)[0m             'age

[2m[36m(pid=101364)[0m   _np_qint8 = np.dtype([("qint8", np.int8, 1)])
[2m[36m(pid=101364)[0m   _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
[2m[36m(pid=101364)[0m   _np_qint16 = np.dtype([("qint16", np.int16, 1)])
[2m[36m(pid=101364)[0m   _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
[2m[36m(pid=101364)[0m   _np_qint32 = np.dtype([("qint32", np.int32, 1)])
[2m[36m(pid=101364)[0m   np_resource = np.dtype([("resource", np.ubyte, 1)])
[2m[36m(pid=101362)[0m 2019-08-24 23:31:40,203	INFO tf_policy.py:355 -- Optimizing variable <tf.Variable 'default_policy/default_model/fc1/kernel:0' shape=(5, 256) dtype=float32_ref>
[2m[36m(pid=101362)[0m 2019-08-24 23:31:40,205	INFO tf_policy.py:355 -- Optimizing variable <tf.Variable 'default_policy/default_model/fc1/bias:0' shape=(256,) dtype=float32_ref>
[2m[36m(pid=101362)[0m 2019-08-24 23:31:40,205	INFO tf_policy.py:355 -- Optimizing variable <tf.Variable 'default_policy/default_model/fc2/kernel:0' shape=(256, 256

























































































































































































































































2019-08-25 00:18:36,517	INFO trial_runner.py:176 -- Starting a new experiment.


[2m[36m(pid=101364)[0m 
[2m[36m(pid=101364)[0m The 'contains' method is deprecated and will be removed in a future version. Use 'key in index' instead of 'index.contains(key)'
[2m[36m(pid=101364)[0m 
[2m[36m(pid=101364)[0m 2019-08-25 00:18:39,742	INFO rollout_worker.py:319 -- Creating policy evaluation worker 0 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=101364)[0m 2019-08-25 00:18:39.744443: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
[2m[36m(pid=101364)[0m 2019-08-25 00:18:40,062	INFO dynamic_tf_policy.py:324 -- Initializing loss function with dummy input:
[2m[36m(pid=101364)[0m 
[2m[36m(pid=101364)[0m { 'action_prob': <tf.Tensor 'default_policy/action_prob:0' shape=(?,) dtype=float32>,
[2m[36m(pid=101364)[0m   'actions': <tf.Tensor 'default_policy/actions:0' shape=(?, 2) dtype=float32>,
[2m[36m(pid=101364)[0m   'advantages': <tf.Tensor 'defa

[2m[36m(pid=101367)[0m 2019-08-25 00:18:51,023	INFO sample_batch_builder.py:161 -- Trajectory fragment after postprocess_trajectory():
[2m[36m(pid=101367)[0m 
[2m[36m(pid=101367)[0m { 'agent0': { 'data': { 'action_prob': np.ndarray((41,), dtype=float32, min=0.006, max=0.159, mean=0.087),
[2m[36m(pid=101367)[0m                         'actions': np.ndarray((41, 2), dtype=float32, min=-2.408, max=2.096, mean=0.043),
[2m[36m(pid=101367)[0m                         'advantages': np.ndarray((41,), dtype=float32, min=-0.014, max=0.009, mean=-0.0),
[2m[36m(pid=101367)[0m                         'agent_index': np.ndarray((41,), dtype=int64, min=0.0, max=0.0, mean=0.0),
[2m[36m(pid=101367)[0m                         'behaviour_logits': np.ndarray((41, 4), dtype=float32, min=-0.012, max=0.009, mean=-0.004),
[2m[36m(pid=101367)[0m                         'dones': np.ndarray((41,), dtype=bool, min=0.0, max=1.0, mean=0.024),
[2m[36m(pid=101367)[0m                         '

[2m[36m(pid=101364)[0m 2019-08-25 00:19:08,580	INFO tf_run_builder.py:92 -- Executing TF run without tracing. To dump TF timeline traces to disk, set the TF_TIMELINE_DIR environment variable.


































































































































































































2019-08-25 01:00:09,203	INFO trial_runner.py:176 -- Starting a new experiment.


[2m[36m(pid=101361)[0m 
[2m[36m(pid=101361)[0m The 'contains' method is deprecated and will be removed in a future version. Use 'key in index' instead of 'index.contains(key)'
[2m[36m(pid=101361)[0m 
[2m[36m(pid=101361)[0m 2019-08-25 01:00:13,005	INFO rollout_worker.py:319 -- Creating policy evaluation worker 0 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=101361)[0m 2019-08-25 01:00:13.006327: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
[2m[36m(pid=101361)[0m 2019-08-25 01:00:13,374	INFO dynamic_tf_policy.py:324 -- Initializing loss function with dummy input:
[2m[36m(pid=101361)[0m 
[2m[36m(pid=101361)[0m { 'action_prob': <tf.Tensor 'default_policy/action_prob:0' shape=(?,) dtype=float32>,
[2m[36m(pid=101361)[0m   'actions': <tf.Tensor 'default_policy/actions:0' shape=(?, 2) dtype=float32>,
[2m[36m(pid=101361)[0m   'advantages': <tf.Tensor 'defa

[2m[36m(pid=14047)[0m 
[2m[36m(pid=14047)[0m The 'contains' method is deprecated and will be removed in a future version. Use 'key in index' instead of 'index.contains(key)'
[2m[36m(pid=14047)[0m 
[2m[36m(pid=101361)[0m 2019-08-25 01:00:23,956	INFO trainable.py:105 -- _setup took 12.642 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
[2m[36m(pid=14047)[0m 2019-08-25 01:00:24,827	INFO rollout_worker.py:319 -- Creating policy evaluation worker 2 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=14047)[0m 2019-08-25 01:00:24.871145: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
[2m[36m(pid=101363)[0m 2019-08-25 01:00:25,531	INFO rollout_worker.py:451 -- Generating sample batch of size 200
[2m[36m(pid=101363)[0m 2019-08-25 01:00:25,576	INFO sampler.py:304 -- Raw obs from env: { 0: { 'agent0

[2m[36m(pid=101361)[0m 2019-08-25 01:00:40,117	INFO tf_policy.py:355 -- Optimizing variable <tf.Variable 'default_policy/default_model/fc1/kernel:0' shape=(5, 256) dtype=float32_ref>
[2m[36m(pid=101361)[0m 2019-08-25 01:00:40,117	INFO tf_policy.py:355 -- Optimizing variable <tf.Variable 'default_policy/default_model/fc1/bias:0' shape=(256,) dtype=float32_ref>
[2m[36m(pid=101361)[0m 2019-08-25 01:00:40,117	INFO tf_policy.py:355 -- Optimizing variable <tf.Variable 'default_policy/default_model/fc2/kernel:0' shape=(256, 256) dtype=float32_ref>
[2m[36m(pid=101361)[0m 2019-08-25 01:00:40,117	INFO tf_policy.py:355 -- Optimizing variable <tf.Variable 'default_policy/default_model/fc2/bias:0' shape=(256,) dtype=float32_ref>
[2m[36m(pid=101361)[0m 2019-08-25 01:00:40,118	INFO tf_policy.py:355 -- Optimizing variable <tf.Variable 'default_policy/default_model/fc_out/kernel:0' shape=(256, 4) dtype=float32_ref>
[2m[36m(pid=101361)[0m 2019-08-25 01:00:40,118	INFO tf_policy.py:355 -







































































































































2019-08-25 01:40:33,972	INFO trial_runner.py:176 -- Starting a new experiment.


[2m[36m(pid=33498)[0m   _np_qint8 = np.dtype([("qint8", np.int8, 1)])
[2m[36m(pid=33498)[0m   _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
[2m[36m(pid=33498)[0m   _np_qint16 = np.dtype([("qint16", np.int16, 1)])
[2m[36m(pid=33498)[0m   _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
[2m[36m(pid=33498)[0m   _np_qint32 = np.dtype([("qint32", np.int32, 1)])
[2m[36m(pid=33498)[0m   np_resource = np.dtype([("resource", np.ubyte, 1)])
[2m[36m(pid=33498)[0m 
[2m[36m(pid=33498)[0m The 'contains' method is deprecated and will be removed in a future version. Use 'key in index' instead of 'index.contains(key)'
[2m[36m(pid=33498)[0m 
[2m[36m(pid=33498)[0m 2019-08-25 01:40:41,650	INFO rollout_worker.py:319 -- Creating policy evaluation worker 0 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=33498)[0m 2019-08-25 01:40:41.651410: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not com

[2m[36m(pid=33699)[0m   _np_qint8 = np.dtype([("qint8", np.int8, 1)])
[2m[36m(pid=33699)[0m   _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
[2m[36m(pid=33699)[0m   _np_qint16 = np.dtype([("qint16", np.int16, 1)])
[2m[36m(pid=33699)[0m   _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
[2m[36m(pid=33699)[0m   _np_qint32 = np.dtype([("qint32", np.int32, 1)])
[2m[36m(pid=33699)[0m   np_resource = np.dtype([("resource", np.ubyte, 1)])
[2m[36m(pid=33680)[0m 
[2m[36m(pid=33680)[0m The 'contains' method is deprecated and will be removed in a future version. Use 'key in index' instead of 'index.contains(key)'
[2m[36m(pid=33680)[0m 
[2m[36m(pid=33679)[0m 
[2m[36m(pid=33679)[0m The 'contains' method is deprecated and will be removed in a future version. Use 'key in index' instead of 'index.contains(key)'
[2m[36m(pid=33679)[0m 
[2m[36m(pid=33498)[0m 2019-08-25 01:40:52,979	INFO trainable.py:105 -- _setup took 12.979 seconds. If your trainable is slow t

[2m[36m(pid=33679)[0m 2019-08-25 01:40:57,348	INFO sample_batch_builder.py:161 -- Trajectory fragment after postprocess_trajectory():
[2m[36m(pid=33679)[0m 
[2m[36m(pid=33679)[0m { 'agent0': { 'data': { 'action_prob': np.ndarray((41,), dtype=float32, min=0.001, max=0.143, mean=0.076),
[2m[36m(pid=33679)[0m                         'actions': np.ndarray((41, 2), dtype=float32, min=-2.168, max=2.363, mean=0.175),
[2m[36m(pid=33679)[0m                         'advantages': np.ndarray((41,), dtype=float32, min=-0.005, max=0.0, mean=-0.0),
[2m[36m(pid=33679)[0m                         'agent_index': np.ndarray((41,), dtype=int64, min=0.0, max=0.0, mean=0.0),
[2m[36m(pid=33679)[0m                         'behaviour_logits': np.ndarray((41, 4), dtype=float32, min=-0.001, max=0.008, mean=0.002),
[2m[36m(pid=33679)[0m                         'dones': np.ndarray((41,), dtype=bool, min=0.0, max=1.0, mean=0.024),
[2m[36m(pid=33679)[0m                         'eps_id': np.

[2m[36m(pid=33498)[0m 2019-08-25 01:41:14,830	INFO tf_run_builder.py:92 -- Executing TF run without tracing. To dump TF timeline traces to disk, set the TF_TIMELINE_DIR environment variable.




































































































































2019-08-25 02:21:38,437	INFO trial_runner.py:176 -- Starting a new experiment.


[2m[36m(pid=33699)[0m 
[2m[36m(pid=33699)[0m The 'contains' method is deprecated and will be removed in a future version. Use 'key in index' instead of 'index.contains(key)'
[2m[36m(pid=33699)[0m 
[2m[36m(pid=33699)[0m 2019-08-25 02:21:41,841	INFO rollout_worker.py:319 -- Creating policy evaluation worker 0 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=33699)[0m 2019-08-25 02:21:41.842107: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
[2m[36m(pid=33699)[0m 2019-08-25 02:21:42,163	INFO dynamic_tf_policy.py:324 -- Initializing loss function with dummy input:
[2m[36m(pid=33699)[0m 
[2m[36m(pid=33699)[0m { 'action_prob': <tf.Tensor 'default_policy/action_prob:0' shape=(?,) dtype=float32>,
[2m[36m(pid=33699)[0m   'actions': <tf.Tensor 'default_policy/actions:0' shape=(?, 2) dtype=float32>,
[2m[36m(pid=33699)[0m   'advantages': <tf.Tensor 'default_policy

[2m[36m(pid=53250)[0m 
[2m[36m(pid=53250)[0m The 'contains' method is deprecated and will be removed in a future version. Use 'key in index' instead of 'index.contains(key)'
[2m[36m(pid=53250)[0m 
[2m[36m(pid=53251)[0m 
[2m[36m(pid=53251)[0m The 'contains' method is deprecated and will be removed in a future version. Use 'key in index' instead of 'index.contains(key)'
[2m[36m(pid=53251)[0m 
[2m[36m(pid=33699)[0m 2019-08-25 02:21:53,583	INFO trainable.py:105 -- _setup took 13.235 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
[2m[36m(pid=53250)[0m 2019-08-25 02:21:54,166	INFO rollout_worker.py:319 -- Creating policy evaluation worker 1 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=53250)[0m 2019-08-25 02:21:54.193978: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
[2m[36m(pid=53

[2m[36m(pid=53250)[0m 2019-08-25 02:21:58,383	INFO rollout_worker.py:485 -- Completed sample batch:
[2m[36m(pid=53250)[0m 
[2m[36m(pid=53250)[0m { 'data': { 'action_prob': np.ndarray((205,), dtype=float32, min=0.001, max=0.159, mean=0.084),
[2m[36m(pid=53250)[0m             'actions': np.ndarray((205, 2), dtype=float32, min=-2.653, max=2.772, mean=-0.013),
[2m[36m(pid=53250)[0m             'advantages': np.ndarray((205,), dtype=float32, min=-0.021, max=0.021, mean=-0.0),
[2m[36m(pid=53250)[0m             'agent_index': np.ndarray((205,), dtype=int64, min=0.0, max=0.0, mean=0.0),
[2m[36m(pid=53250)[0m             'behaviour_logits': np.ndarray((205, 4), dtype=float32, min=-0.005, max=0.011, mean=0.001),
[2m[36m(pid=53250)[0m             'dones': np.ndarray((205,), dtype=bool, min=0.0, max=1.0, mean=0.024),
[2m[36m(pid=53250)[0m             'eps_id': np.ndarray((205,), dtype=int64, min=466166070.0, max=1989929065.0, mean=885806066.8),
[2m[36m(pid=53250)[0m  









































































































































































2019-08-25 03:02:12,260	INFO trial_runner.py:176 -- Starting a new experiment.


[2m[36m(pid=53270)[0m 
[2m[36m(pid=53270)[0m The 'contains' method is deprecated and will be removed in a future version. Use 'key in index' instead of 'index.contains(key)'
[2m[36m(pid=53270)[0m 
[2m[36m(pid=53270)[0m 2019-08-25 03:02:15,680	INFO rollout_worker.py:319 -- Creating policy evaluation worker 0 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=53270)[0m 2019-08-25 03:02:15.681557: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
[2m[36m(pid=53270)[0m 2019-08-25 03:02:15,985	INFO dynamic_tf_policy.py:324 -- Initializing loss function with dummy input:
[2m[36m(pid=53270)[0m 
[2m[36m(pid=53270)[0m { 'action_prob': <tf.Tensor 'default_policy/action_prob:0' shape=(?,) dtype=float32>,
[2m[36m(pid=53270)[0m   'actions': <tf.Tensor 'default_policy/actions:0' shape=(?, 2) dtype=float32>,
[2m[36m(pid=53270)[0m   'advantages': <tf.Tensor 'default_policy

[2m[36m(pid=72481)[0m 
[2m[36m(pid=72481)[0m The 'contains' method is deprecated and will be removed in a future version. Use 'key in index' instead of 'index.contains(key)'
[2m[36m(pid=72481)[0m 
[2m[36m(pid=72482)[0m 
[2m[36m(pid=72482)[0m The 'contains' method is deprecated and will be removed in a future version. Use 'key in index' instead of 'index.contains(key)'
[2m[36m(pid=72482)[0m 
[2m[36m(pid=53270)[0m 2019-08-25 03:02:26,904	INFO trainable.py:105 -- _setup took 12.739 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
[2m[36m(pid=72482)[0m 2019-08-25 03:02:27,899	INFO rollout_worker.py:319 -- Creating policy evaluation worker 2 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=72482)[0m 2019-08-25 03:02:27.920303: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
[2m[36m(pid=72

[2m[36m(pid=72481)[0m 2019-08-25 03:02:32,386	INFO rollout_worker.py:485 -- Completed sample batch:
[2m[36m(pid=72481)[0m 
[2m[36m(pid=72481)[0m { 'data': { 'action_prob': np.ndarray((205,), dtype=float32, min=0.002, max=0.156, mean=0.082),
[2m[36m(pid=72481)[0m             'actions': np.ndarray((205, 2), dtype=float32, min=-2.663, max=2.57, mean=-0.012),
[2m[36m(pid=72481)[0m             'advantages': np.ndarray((205,), dtype=float32, min=-0.019, max=0.017, mean=-0.0),
[2m[36m(pid=72481)[0m             'agent_index': np.ndarray((205,), dtype=int64, min=0.0, max=0.0, mean=0.0),
[2m[36m(pid=72481)[0m             'behaviour_logits': np.ndarray((205, 4), dtype=float32, min=-0.01, max=0.012, mean=0.002),
[2m[36m(pid=72481)[0m             'dones': np.ndarray((205,), dtype=bool, min=0.0, max=1.0, mean=0.024),
[2m[36m(pid=72481)[0m             'eps_id': np.ndarray((205,), dtype=int64, min=74894453.0, max=1924520715.0, mean=1154872316.0),
[2m[36m(pid=72481)[0m    





















































































































































































2019-08-25 03:44:12,299	INFO trial_runner.py:176 -- Starting a new experiment.


[2m[36m(pid=72503)[0m 
[2m[36m(pid=72503)[0m The 'contains' method is deprecated and will be removed in a future version. Use 'key in index' instead of 'index.contains(key)'
[2m[36m(pid=72503)[0m 
[2m[36m(pid=72503)[0m 2019-08-25 03:44:15,913	INFO rollout_worker.py:319 -- Creating policy evaluation worker 0 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=72503)[0m 2019-08-25 03:44:15.915036: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
[2m[36m(pid=72503)[0m 2019-08-25 03:44:16,224	INFO dynamic_tf_policy.py:324 -- Initializing loss function with dummy input:
[2m[36m(pid=72503)[0m 
[2m[36m(pid=72503)[0m { 'action_prob': <tf.Tensor 'default_policy/action_prob:0' shape=(?,) dtype=float32>,
[2m[36m(pid=72503)[0m   'actions': <tf.Tensor 'default_policy/actions:0' shape=(?, 2) dtype=float32>,
[2m[36m(pid=72503)[0m   'advantages': <tf.Tensor 'default_policy

[2m[36m(pid=92894)[0m 
[2m[36m(pid=92894)[0m The 'contains' method is deprecated and will be removed in a future version. Use 'key in index' instead of 'index.contains(key)'
[2m[36m(pid=92894)[0m 
[2m[36m(pid=92893)[0m 
[2m[36m(pid=92893)[0m The 'contains' method is deprecated and will be removed in a future version. Use 'key in index' instead of 'index.contains(key)'
[2m[36m(pid=92893)[0m 
[2m[36m(pid=92894)[0m 2019-08-25 03:44:27,873	INFO rollout_worker.py:319 -- Creating policy evaluation worker 1 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=92894)[0m 2019-08-25 03:44:27.895757: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
[2m[36m(pid=72503)[0m 2019-08-25 03:44:28,090	INFO trainable.py:105 -- _setup took 13.767 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
[2m[36m(pid=92

[2m[36m(pid=92894)[0m 2019-08-25 03:44:32,273	INFO rollout_worker.py:485 -- Completed sample batch:
[2m[36m(pid=92894)[0m 
[2m[36m(pid=92894)[0m { 'data': { 'action_prob': np.ndarray((205,), dtype=float32, min=0.002, max=0.159, mean=0.087),
[2m[36m(pid=92894)[0m             'actions': np.ndarray((205, 2), dtype=float32, min=-2.689, max=2.578, mean=-0.058),
[2m[36m(pid=92894)[0m             'advantages': np.ndarray((205,), dtype=float32, min=-0.017, max=0.014, mean=-0.001),
[2m[36m(pid=92894)[0m             'agent_index': np.ndarray((205,), dtype=int64, min=0.0, max=0.0, mean=0.0),
[2m[36m(pid=92894)[0m             'behaviour_logits': np.ndarray((205, 4), dtype=float32, min=-0.008, max=0.012, mean=0.001),
[2m[36m(pid=92894)[0m             'dones': np.ndarray((205,), dtype=bool, min=0.0, max=1.0, mean=0.024),
[2m[36m(pid=92894)[0m             'eps_id': np.ndarray((205,), dtype=int64, min=1064255322.0, max=1866129808.0, mean=1453727054.8),
[2m[36m(pid=92894)[





















































































































































2019-08-25 04:24:38,925	INFO trial_runner.py:176 -- Starting a new experiment.


[2m[36m(pid=92944)[0m 
[2m[36m(pid=92944)[0m The 'contains' method is deprecated and will be removed in a future version. Use 'key in index' instead of 'index.contains(key)'
[2m[36m(pid=92944)[0m 
[2m[36m(pid=92944)[0m 2019-08-25 04:24:43,140	INFO rollout_worker.py:319 -- Creating policy evaluation worker 0 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=92944)[0m 2019-08-25 04:24:43.143747: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
[2m[36m(pid=92944)[0m 2019-08-25 04:24:43,488	INFO dynamic_tf_policy.py:324 -- Initializing loss function with dummy input:
[2m[36m(pid=92944)[0m 
[2m[36m(pid=92944)[0m { 'action_prob': <tf.Tensor 'default_policy/action_prob:0' shape=(?,) dtype=float32>,
[2m[36m(pid=92944)[0m   'actions': <tf.Tensor 'default_policy/actions:0' shape=(?, 2) dtype=float32>,
[2m[36m(pid=92944)[0m   'advantages': <tf.Tensor 'default_policy

[2m[36m(pid=112684)[0m 
[2m[36m(pid=112684)[0m The 'contains' method is deprecated and will be removed in a future version. Use 'key in index' instead of 'index.contains(key)'
[2m[36m(pid=112684)[0m 
[2m[36m(pid=112683)[0m 
[2m[36m(pid=112683)[0m The 'contains' method is deprecated and will be removed in a future version. Use 'key in index' instead of 'index.contains(key)'
[2m[36m(pid=112683)[0m 
[2m[36m(pid=92944)[0m 2019-08-25 04:24:55,385	INFO trainable.py:105 -- _setup took 14.077 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
[2m[36m(pid=112683)[0m 2019-08-25 04:24:56,062	INFO rollout_worker.py:319 -- Creating policy evaluation worker 1 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=112683)[0m 2019-08-25 04:24:56.092296: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
[2m[36

[2m[36m(pid=112683)[0m 2019-08-25 04:25:00,970	INFO rollout_worker.py:485 -- Completed sample batch:
[2m[36m(pid=112683)[0m 
[2m[36m(pid=112683)[0m { 'data': { 'action_prob': np.ndarray((205,), dtype=float32, min=0.003, max=0.159, mean=0.082),
[2m[36m(pid=112683)[0m             'actions': np.ndarray((205, 2), dtype=float32, min=-2.759, max=2.807, mean=-0.011),
[2m[36m(pid=112683)[0m             'advantages': np.ndarray((205,), dtype=float32, min=-0.03, max=0.01, mean=-0.001),
[2m[36m(pid=112683)[0m             'agent_index': np.ndarray((205,), dtype=int64, min=0.0, max=0.0, mean=0.0),
[2m[36m(pid=112683)[0m             'behaviour_logits': np.ndarray((205, 4), dtype=float32, min=-0.01, max=0.013, mean=0.002),
[2m[36m(pid=112683)[0m             'dones': np.ndarray((205,), dtype=bool, min=0.0, max=1.0, mean=0.024),
[2m[36m(pid=112683)[0m             'eps_id': np.ndarray((205,), dtype=int64, min=501851931.0, max=1390962300.0, mean=944428730.6),
[2m[36m(pid=1126





































































































































































































2019-08-25 05:06:55,022	INFO trial_runner.py:176 -- Starting a new experiment.


[2m[36m(pid=112711)[0m 
[2m[36m(pid=112711)[0m The 'contains' method is deprecated and will be removed in a future version. Use 'key in index' instead of 'index.contains(key)'
[2m[36m(pid=112711)[0m 
[2m[36m(pid=112711)[0m 2019-08-25 05:06:58,388	INFO rollout_worker.py:319 -- Creating policy evaluation worker 0 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=112711)[0m 2019-08-25 05:06:58.390506: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
[2m[36m(pid=112711)[0m 2019-08-25 05:06:58,667	INFO dynamic_tf_policy.py:324 -- Initializing loss function with dummy input:
[2m[36m(pid=112711)[0m 
[2m[36m(pid=112711)[0m { 'action_prob': <tf.Tensor 'default_policy/action_prob:0' shape=(?,) dtype=float32>,
[2m[36m(pid=112711)[0m   'actions': <tf.Tensor 'default_policy/actions:0' shape=(?, 2) dtype=float32>,
[2m[36m(pid=112711)[0m   'advantages': <tf.Tensor 'defa

[2m[36m(pid=1856)[0m 
[2m[36m(pid=1856)[0m The 'contains' method is deprecated and will be removed in a future version. Use 'key in index' instead of 'index.contains(key)'
[2m[36m(pid=1856)[0m 
[2m[36m(pid=1857)[0m 
[2m[36m(pid=1857)[0m The 'contains' method is deprecated and will be removed in a future version. Use 'key in index' instead of 'index.contains(key)'
[2m[36m(pid=1857)[0m 
[2m[36m(pid=112711)[0m 2019-08-25 05:07:10,971	INFO trainable.py:105 -- _setup took 13.988 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
[2m[36m(pid=1857)[0m 2019-08-25 05:07:11,276	INFO rollout_worker.py:319 -- Creating policy evaluation worker 2 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=1857)[0m 2019-08-25 05:07:11.307876: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
[2m[36m(pid=1856)[0m

[2m[36m(pid=1856)[0m 2019-08-25 05:07:16,249	INFO rollout_worker.py:485 -- Completed sample batch:
[2m[36m(pid=1856)[0m 
[2m[36m(pid=1856)[0m { 'data': { 'action_prob': np.ndarray((205,), dtype=float32, min=0.0, max=0.157, mean=0.078),
[2m[36m(pid=1856)[0m             'actions': np.ndarray((205, 2), dtype=float32, min=-3.461, max=2.748, mean=-0.037),
[2m[36m(pid=1856)[0m             'advantages': np.ndarray((205,), dtype=float32, min=-0.013, max=0.006, mean=-0.0),
[2m[36m(pid=1856)[0m             'agent_index': np.ndarray((205,), dtype=int64, min=0.0, max=0.0, mean=0.0),
[2m[36m(pid=1856)[0m             'behaviour_logits': np.ndarray((205, 4), dtype=float32, min=-0.006, max=0.009, mean=0.001),
[2m[36m(pid=1856)[0m             'dones': np.ndarray((205,), dtype=bool, min=0.0, max=1.0, mean=0.024),
[2m[36m(pid=1856)[0m             'eps_id': np.ndarray((205,), dtype=int64, min=44331415.0, max=1998239391.0, mean=881664507.8),
[2m[36m(pid=1856)[0m             'i

















































































































2019-08-25 05:47:57,972	INFO trial_runner.py:176 -- Starting a new experiment.


[2m[36m(pid=1877)[0m 
[2m[36m(pid=1877)[0m The 'contains' method is deprecated and will be removed in a future version. Use 'key in index' instead of 'index.contains(key)'
[2m[36m(pid=1877)[0m 
[2m[36m(pid=1877)[0m 2019-08-25 05:48:01,651	INFO rollout_worker.py:319 -- Creating policy evaluation worker 0 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=1877)[0m 2019-08-25 05:48:01.653494: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
[2m[36m(pid=1877)[0m 2019-08-25 05:48:02,009	INFO dynamic_tf_policy.py:324 -- Initializing loss function with dummy input:
[2m[36m(pid=1877)[0m 
[2m[36m(pid=1877)[0m { 'action_prob': <tf.Tensor 'default_policy/action_prob:0' shape=(?,) dtype=float32>,
[2m[36m(pid=1877)[0m   'actions': <tf.Tensor 'default_policy/actions:0' shape=(?, 2) dtype=float32>,
[2m[36m(pid=1877)[0m   'advantages': <tf.Tensor 'default_policy/advantage

[2m[36m(pid=21718)[0m 
[2m[36m(pid=21718)[0m The 'contains' method is deprecated and will be removed in a future version. Use 'key in index' instead of 'index.contains(key)'
[2m[36m(pid=21718)[0m 
[2m[36m(pid=21719)[0m 
[2m[36m(pid=21719)[0m The 'contains' method is deprecated and will be removed in a future version. Use 'key in index' instead of 'index.contains(key)'
[2m[36m(pid=21719)[0m 
[2m[36m(pid=1877)[0m 2019-08-25 05:48:14,080	INFO trainable.py:105 -- _setup took 13.956 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
[2m[36m(pid=21718)[0m 2019-08-25 05:48:14,760	INFO rollout_worker.py:319 -- Creating policy evaluation worker 2 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=21718)[0m 2019-08-25 05:48:14.803699: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
[2m[36m(pid=217

[2m[36m(pid=21719)[0m 2019-08-25 05:48:20,097	INFO rollout_worker.py:485 -- Completed sample batch:
[2m[36m(pid=21719)[0m 
[2m[36m(pid=21719)[0m { 'data': { 'action_prob': np.ndarray((205,), dtype=float32, min=0.002, max=0.159, mean=0.085),
[2m[36m(pid=21719)[0m             'actions': np.ndarray((205, 2), dtype=float32, min=-2.305, max=2.784, mean=-0.026),
[2m[36m(pid=21719)[0m             'advantages': np.ndarray((205,), dtype=float32, min=-0.016, max=0.011, mean=-0.0),
[2m[36m(pid=21719)[0m             'agent_index': np.ndarray((205,), dtype=int64, min=0.0, max=0.0, mean=0.0),
[2m[36m(pid=21719)[0m             'behaviour_logits': np.ndarray((205, 4), dtype=float32, min=-0.009, max=0.008, mean=0.001),
[2m[36m(pid=21719)[0m             'dones': np.ndarray((205,), dtype=bool, min=0.0, max=1.0, mean=0.024),
[2m[36m(pid=21719)[0m             'eps_id': np.ndarray((205,), dtype=int64, min=201786834.0, max=1802301329.0, mean=775253419.8),
[2m[36m(pid=21719)[0m  













































































































































































2019-08-25 06:28:02,439	INFO trial_runner.py:176 -- Starting a new experiment.


[2m[36m(pid=21755)[0m 
[2m[36m(pid=21755)[0m The 'contains' method is deprecated and will be removed in a future version. Use 'key in index' instead of 'index.contains(key)'
[2m[36m(pid=21755)[0m 
[2m[36m(pid=21755)[0m 2019-08-25 06:28:06,351	INFO rollout_worker.py:319 -- Creating policy evaluation worker 0 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=21755)[0m 2019-08-25 06:28:06.352663: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
[2m[36m(pid=21755)[0m 2019-08-25 06:28:06,695	INFO dynamic_tf_policy.py:324 -- Initializing loss function with dummy input:
[2m[36m(pid=21755)[0m 
[2m[36m(pid=21755)[0m { 'action_prob': <tf.Tensor 'default_policy/action_prob:0' shape=(?,) dtype=float32>,
[2m[36m(pid=21755)[0m   'actions': <tf.Tensor 'default_policy/actions:0' shape=(?, 2) dtype=float32>,
[2m[36m(pid=21755)[0m   'advantages': <tf.Tensor 'default_policy

[2m[36m(pid=41383)[0m 
[2m[36m(pid=41383)[0m The 'contains' method is deprecated and will be removed in a future version. Use 'key in index' instead of 'index.contains(key)'
[2m[36m(pid=41383)[0m 
[2m[36m(pid=41384)[0m 
[2m[36m(pid=41384)[0m The 'contains' method is deprecated and will be removed in a future version. Use 'key in index' instead of 'index.contains(key)'
[2m[36m(pid=41384)[0m 
[2m[36m(pid=21755)[0m 2019-08-25 06:28:18,121	INFO trainable.py:105 -- _setup took 13.448 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
[2m[36m(pid=41383)[0m 2019-08-25 06:28:19,307	INFO rollout_worker.py:319 -- Creating policy evaluation worker 1 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=41383)[0m 2019-08-25 06:28:19.383208: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
[2m[36m(pid=41

[2m[36m(pid=41383)[0m 2019-08-25 06:28:23,047	INFO rollout_worker.py:485 -- Completed sample batch:
[2m[36m(pid=41383)[0m 
[2m[36m(pid=41383)[0m { 'data': { 'action_prob': np.ndarray((205,), dtype=float32, min=0.003, max=0.159, mean=0.085),
[2m[36m(pid=41383)[0m             'actions': np.ndarray((205, 2), dtype=float32, min=-2.607, max=2.834, mean=0.082),
[2m[36m(pid=41383)[0m             'advantages': np.ndarray((205,), dtype=float32, min=-0.012, max=0.005, mean=-0.001),
[2m[36m(pid=41383)[0m             'agent_index': np.ndarray((205,), dtype=int64, min=0.0, max=0.0, mean=0.0),
[2m[36m(pid=41383)[0m             'behaviour_logits': np.ndarray((205, 4), dtype=float32, min=-0.006, max=0.009, mean=-0.0),
[2m[36m(pid=41383)[0m             'dones': np.ndarray((205,), dtype=bool, min=0.0, max=1.0, mean=0.024),
[2m[36m(pid=41383)[0m             'eps_id': np.ndarray((205,), dtype=int64, min=126191526.0, max=1795253015.0, mean=992793469.6),
[2m[36m(pid=41383)[0m  























































































































[PPO_GAIAPredictorsContinuousV11_restoreID=5710329639900672484_runID=TFAyTCuz_0_clip_param=0.8,cost_of_commissions=5e-05,gamma=0.95,kl_target=0.01,lr=1e-05,num_sgd_iter=8,
 PPO_GAIAPredictorsContinuousV11_restoreID=5710329639900672484_runID=TFAyTCuz_0_clip_param=0.8,cost_of_commissions=5e-05,gamma=0.95,kl_target=0.01,lr=1e-05,num_sgd_iter=8,
 PPO_GAIAPredictorsContinuousV11_restoreID=5710329639900672484_runID=TFAyTCuz_0_clip_param=0.8,cost_of_commissions=5e-05,gamma=0.95,kl_target=0.01,lr=1e-05,num_sgd_iter=8,
 PPO_GAIAPredictorsContinuousV11_restoreID=5710329639900672484_runID=TFAyTCuz_0_clip_param=0.8,cost_of_commissions=5e-05,gamma=0.95,kl_target=0.01,lr=1e-05,num_sgd_iter=8,
 PPO_GAIAPredictorsContinuousV11_restoreID=5710329639900672484_runID=TFAyTCuz_0_clip_param=0.8,cost_of_commissions=5e-05,gamma=0.95,kl_target=0.01,lr=1e-05,num_sgd_iter=8,
 PPO_GAIAPredictorsContinuousV11_restoreID=5710329639900672484_runID=TFAyTCuz_0_clip_param=0.8,cost_of_commissions=5e-05,gamma=0.95,kl_targe

In [None]:
# stop

# Temp

In [None]:
agent = ray.rllib.agents.ppo.PPOTrainer(config, GAIAPredictorsContinuousV11)

In [None]:
env = GAIAPredictorsContinuousV9()
state = env.reset()
state

In [None]:
agent.compute_action(state)

# Restore

In [16]:
results = WalkForwardResults('/home/Nicholas/trading-gym_0.8.1/trading-gym/notebooks/registry/gaia/v9/logs')
results

WalkForwardResults(['GAIAPredictorsContinuousV11', 'GAIAPredictorsContinuousV12'])

In [17]:
env_results = results['GAIAPredictorsContinuousV11']
env_results

EnvResults(GAIAPredictorsContinuousV11)

In [18]:
# Step 1.
env = env_results.make_env(
    env_config={
        'cost_of_commissions': 0.00005,
        'cost_of_spread': 0.0001,
        'folds': {
            'training-set': [datetime.min, datetime(2008, 3, 18)],
            'test-set': [datetime(2008, 3, 19), datetime.max],
        }
    },
)

In [19]:
env_results.restore_ids

{-6695192150303784323: [AgentResults(GAIAPredictorsContinuousV11_1-01-01_to_2011-12-31/PPO_GAIAPredictorsContinuousV11_restoreID=-6695192150303784323_runID=bNl30CUT_0_clip_param=0.8,cost_of_commissions=5e-05,gamma=0.9_2019-08-11_01-48-221_87dum0),
  AgentResults(GAIAPredictorsContinuousV11_1-01-01_to_2017-12-31/PPO_GAIAPredictorsContinuousV11_restoreID=-6695192150303784323_runID=bNl30CUT_0_clip_param=0.8,cost_of_commissions=5e-05,gamma=0.9_2019-08-11_06-39-165dnrmr38),
  AgentResults(GAIAPredictorsContinuousV11_1-01-01_to_2008-12-31/PPO_GAIAPredictorsContinuousV11_restoreID=-6695192150303784323_runID=bNl30CUT_0_clip_param=0.8,cost_of_commissions=5e-05,gamma=0.9_2019-08-10_23-22-37dumw8eu0),
  AgentResults(GAIAPredictorsContinuousV11_1-01-01_to_2016-12-31/PPO_GAIAPredictorsContinuousV11_restoreID=-6695192150303784323_runID=bNl30CUT_0_clip_param=0.8,cost_of_commissions=5e-05,gamma=0.9_2019-08-11_05-51-10jxzlovyl),
  AgentResults(GAIAPredictorsContinuousV11_1-01-01_to_2012-12-31/PPO_GAIAP

In [15]:
2557338427780778455
print(list(env_results.restore_ids)[-1])

2557338427780778455


In [25]:
restore_id = list(env_results.restore_ids)[-1]
restore_id = 5710329639900672484

In [26]:
policy = env_results.make_policy(
    env=env,
    restore_id=restore_id,
    checkpoint_nr=None,  # use None (or don't specify) to use last checkpoint available
)
policy

<trading_gym.ray.walkforward.policy.WalkForwardPolicy at 0x7f29a5e699e8>

In [27]:
history = policy.history()
# history

In [28]:
# Step 3.
episode = env.sample_episode(fold='test-set', policy=policy, verbose=False)

2019-08-25 10:55:57,228	INFO rollout_worker.py:319 -- Creating policy evaluation worker 0 on CPU (please ignore any CUDA init errors)
2019-08-25 10:55:59,391	INFO rollout_worker.py:742 -- Built policy map: {'default_policy': <ray.rllib.policy.tf_policy_template.PPOTFPolicy object at 0x7f29a292b630>}
2019-08-25 10:55:59,392	INFO rollout_worker.py:743 -- Built preprocessor map: {'default_policy': <ray.rllib.models.preprocessors.NoPreprocessor object at 0x7f29a292b208>}
2019-08-25 10:55:59,393	INFO rollout_worker.py:356 -- Built filter map: {'default_policy': <ray.rllib.utils.filter.NoFilter object at 0x7f2f8ce09e10>}
2019-08-25 10:55:59,488	INFO multi_gpu_optimizer.py:93 -- LocalMultiGPUOptimizer devices ['/cpu:0']
2019-08-25 10:56:07,739	INFO rollout_worker.py:319 -- Creating policy evaluation worker 0 on CPU (please ignore any CUDA init errors)
2019-08-25 10:56:09,803	INFO rollout_worker.py:742 -- Built policy map: {'default_policy': <ray.rllib.policy.tf_policy_template.PPOTFPolicy obj

2019-08-25 10:57:18,918	INFO rollout_worker.py:742 -- Built policy map: {'default_policy': <ray.rllib.policy.tf_policy_template.PPOTFPolicy object at 0x7f297bc533c8>}
2019-08-25 10:57:18,920	INFO rollout_worker.py:743 -- Built preprocessor map: {'default_policy': <ray.rllib.models.preprocessors.NoPreprocessor object at 0x7f297bc7cf60>}
2019-08-25 10:57:18,921	INFO rollout_worker.py:356 -- Built filter map: {'default_policy': <ray.rllib.utils.filter.NoFilter object at 0x7f297bc7cd30>}
2019-08-25 10:57:18,977	INFO multi_gpu_optimizer.py:93 -- LocalMultiGPUOptimizer devices ['/cpu:0']
2019-08-25 10:57:26,333	INFO rollout_worker.py:319 -- Creating policy evaluation worker 0 on CPU (please ignore any CUDA init errors)
2019-08-25 10:57:28,340	INFO rollout_worker.py:742 -- Built policy map: {'default_policy': <ray.rllib.policy.tf_policy_template.PPOTFPolicy object at 0x7f2976deea58>}
2019-08-25 10:57:28,341	INFO rollout_worker.py:743 -- Built preprocessor map: {'default_policy': <ray.rllib.mo

In [29]:
episode.renderer.cumulative_performance.to_plotly()
episode.renderer.target_weights.to_plotly()
episode.renderer.annual_returns.to_plotly()
episode.renderer.tearsheet()

Unnamed: 0,Unnamed: 1,Strategy,Index(Aric-Benchmark),Index(USD 1M Deposit),Cash(USD),ETF(Russell 1000),ETF(7-10Y T-Bills)
Context,From,2008-03-19,2008-03-19,2008-03-19,2008-03-19,2008-03-19,2008-03-19
Context,To,2018-08-28,2018-08-28,2018-08-28,2018-08-28,2018-08-28,2018-08-28
Context,Years,10.4493,10.4493,10.4493,10.4493,10.4493,10.4493
Context,Observations,2725,2725,2725,2725,2725,2725
Context,Risk-free asset,Index(USD 1M Deposit),Index(USD 1M Deposit),Index(USD 1M Deposit),Index(USD 1M Deposit),Index(USD 1M Deposit),Index(USD 1M Deposit)
Context,Risk-free CAGR,0.00681294,0.00681294,0.00681294,0.00681294,0.00681294,0.00681294
Return,CAGR,0.0789021,0.158586,0.00681294,0,0.104507,0.0339243
Return,CAGR over cash,0.0720891,0.151773,0,-0.00681294,0.0976941,0.0271113
Return,Overall return,1.21126,3.65592,0.0735266,0,1.82541,0.417089
Risk,Volatility,0.0905925,0.0970738,0.000598812,0,0.197859,0.0766871


In [30]:
# cost2restore_id = {
#     0.00001: -3264367374635941251,    
# }

In [21]:
# nr2episode = dict()
# for cost_of_commissions, restore_id in cost2restore_id.items():
#     nr2episode[cost_of_commissions] = env_results.get_nr2episode(
#         restore_id=restore_id,
#         checkpoint_nrs=np.arange(1, 126, 1),
#         fold='test-set',
#         env_config={
#             'folds': {
#                 'training-set': [datetime.min, datetime(2008, 3, 18)],
#                 'test-set': [datetime(2008, 3, 19), datetime.max],
#             }
#         }
#     )

In [None]:
cost_of_commissions = 0.00001
nr2episode[cost_of_commissions].plot_weights()

In [None]:
nr2episode[cost_of_commissions].plot_levels()

In [None]:
nr2episode[cost_of_commissions].plot_metrics_as_we_train()