In [1]:
import ray
from ray import rllib, tune
import pandas as pd
import numpy as np
from datetime import datetime
import trading_gym
from trading_gym.registry.gaia.v9.env import GAIAPredictorsContinuousV9
from trading_gym.ray.walkforward import WalkForwardRunner, WalkForwardResults
%matplotlib inline
print(trading_gym.__package__, trading_gym.__version__)
print(ray.__package__, ray.__version__)

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


trading-gym 0.8.1
ray 0.7.3


In [2]:
# from trading_gym.registry.gaia.v11.env import GAIAPredictorsContinuousV11
from trading_gym.registry.gaia.v12.env import GAIAPredictorsContinuousV12

In [3]:
ray.init()

2019-08-24 23:27:59,360	INFO node.py:498 -- Process STDOUT and STDERR is being redirected to /tmp/ray/session_2019-08-24_23-27-59_359363_99481/logs.
2019-08-24 23:27:59,544	INFO services.py:409 -- Waiting for redis server at 127.0.0.1:38993 to respond...
2019-08-24 23:27:59,688	INFO services.py:409 -- Waiting for redis server at 127.0.0.1:33160 to respond...
2019-08-24 23:27:59,693	INFO services.py:809 -- Starting Redis shard with 10.0 GB max memory.
2019-08-24 23:27:59,754	INFO node.py:512 -- Process STDOUT and STDERR is being redirected to /tmp/ray/session_2019-08-24_23-27-59_359363_99481/logs.
2019-08-24 23:27:59,759	INFO services.py:1475 -- Starting the Plasma object store with 20.0 GB memory using /dev/shm.


{'node_ip_address': '10.0.5.4',
 'redis_address': '10.0.5.4:38993',
 'object_store_address': '/tmp/ray/session_2019-08-24_23-27-59_359363_99481/sockets/plasma_store',
 'raylet_socket_name': '/tmp/ray/session_2019-08-24_23-27-59_359363_99481/sockets/raylet',
 'webui_url': None,
 'session_dir': '/tmp/ray/session_2019-08-24_23-27-59_359363_99481'}

## Create the walk-forward partitions
It's responsibility of the user to create the training/test (and maybe validation) partitions to turn the walk forward training. Note that 2-fold split is a particular case of walk-forward training, so you are still able to run a simple 2-fold split.


In [4]:
partitions = list()
for year in range(2007, 2018):
    partition = {
        'training-set': [datetime.min, datetime(year, 12, 31)],
        'test-set': [datetime(year + 1, 1, 1), datetime(year + 1, 12, 31)],
    }
    partitions.append(partition)
partitions


[{'training-set': [datetime.datetime(1, 1, 1, 0, 0),
   datetime.datetime(2007, 12, 31, 0, 0)],
  'test-set': [datetime.datetime(2008, 1, 1, 0, 0),
   datetime.datetime(2008, 12, 31, 0, 0)]},
 {'training-set': [datetime.datetime(1, 1, 1, 0, 0),
   datetime.datetime(2008, 12, 31, 0, 0)],
  'test-set': [datetime.datetime(2009, 1, 1, 0, 0),
   datetime.datetime(2009, 12, 31, 0, 0)]},
 {'training-set': [datetime.datetime(1, 1, 1, 0, 0),
   datetime.datetime(2009, 12, 31, 0, 0)],
  'test-set': [datetime.datetime(2010, 1, 1, 0, 0),
   datetime.datetime(2010, 12, 31, 0, 0)]},
 {'training-set': [datetime.datetime(1, 1, 1, 0, 0),
   datetime.datetime(2010, 12, 31, 0, 0)],
  'test-set': [datetime.datetime(2011, 1, 1, 0, 0),
   datetime.datetime(2011, 12, 31, 0, 0)]},
 {'training-set': [datetime.datetime(1, 1, 1, 0, 0),
   datetime.datetime(2011, 12, 31, 0, 0)],
  'test-set': [datetime.datetime(2012, 1, 1, 0, 0),
   datetime.datetime(2012, 12, 31, 0, 0)]},
 {'training-set': [datetime.datetime(1, 

In [5]:
## Create the config dict
config = ray.rllib.agents.ppo.DEFAULT_CONFIG.copy()
config['env'] = GAIAPredictorsContinuousV12
config['env_config'] = {
    'cost_of_commissions': tune.grid_search([0.00005]),
    'cost_of_spread': 0.0001,
}
config['gamma'] = tune.grid_search([0.95]) # 2 weeks

In [6]:
config['vf_clip_param'] = 0
config['vf_loss_coeff'] = 0
config['lambda'] = 0
config['use_gae'] = True

# need to have vf share layers if lstm is used
config['vf_share_layers'] = True
config['use_lstm']: True
config['batch_mode'] = 'complete_episodes'
config['train_batch_size'] = 4000 # tune.grid_search([4000])
config['sgd_minibatch_size'] = 128
config['num_sgd_iter'] = tune.grid_search([8])
config['entropy_coeff'] =  1e-5 # tune.grid_search([1e-5])
config['kl_coeff'] = 0.2 #tune.grid_search([0.2])
config['kl_target'] = tune.grid_search([0.01])
config['clip_param'] = tune.grid_search([0.8])

config['lr'] = tune.grid_search([1e-5])

# config['model']['custom_model'] = CNN.__name__


In [7]:
# env = GAIAPredictorsContinuousV9()
# length = env.episode_length()

In [8]:
env = GAIAPredictorsContinuousV12()
env.action_space.sample()


The 'contains' method is deprecated and will be removed in a future version. Use 'key in index' instead of 'index.contains(key)'



array([0.07634861, 0.02470711])

In [9]:
## Run your walk-forward experiment
walk_forward = WalkForwardRunner(
    env_partitions=partitions,
    trainable=ray.rllib.agents.ppo.PPOTrainer,
    config=config,
    stop={'timesteps_total': 500000},
    checkpoint_freq=1,
)

Note that WalkForwardRunner has constructed the implied ray Experiment(s) from your walk forward settings.

walk_forward.experiments

Note that trials are associated with a `RestoreID`. This `ID` is all you need to restore an agent. Here we are using a grid search of two values for `cost_of_commissions` on two partitions, so we have a total of 4 experiments.

In [10]:
trials = walk_forward.run(verbose=0)
trials

2019-08-24 23:28:03,415	INFO trial_runner.py:176 -- Starting a new experiment.
2019-08-24 23:28:03,471	ERROR log_sync.py:34 -- Log sync requires cluster to be setup with `ray up`.


[2m[36m(pid=99579)[0m   _np_qint8 = np.dtype([("qint8", np.int8, 1)])
[2m[36m(pid=99579)[0m   _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
[2m[36m(pid=99579)[0m   _np_qint16 = np.dtype([("qint16", np.int16, 1)])
[2m[36m(pid=99579)[0m   _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
[2m[36m(pid=99579)[0m   _np_qint32 = np.dtype([("qint32", np.int32, 1)])
[2m[36m(pid=99579)[0m   np_resource = np.dtype([("resource", np.ubyte, 1)])
[2m[36m(pid=99579)[0m 
[2m[36m(pid=99579)[0m The 'contains' method is deprecated and will be removed in a future version. Use 'key in index' instead of 'index.contains(key)'
[2m[36m(pid=99579)[0m 
[2m[36m(pid=99579)[0m 2019-08-24 23:28:10,525	INFO rollout_worker.py:319 -- Creating policy evaluation worker 0 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=99579)[0m 2019-08-24 23:28:10.526151: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not com

[2m[36m(pid=99574)[0m 
[2m[36m(pid=99574)[0m The 'contains' method is deprecated and will be removed in a future version. Use 'key in index' instead of 'index.contains(key)'
[2m[36m(pid=99574)[0m 
[2m[36m(pid=99580)[0m 
[2m[36m(pid=99580)[0m The 'contains' method is deprecated and will be removed in a future version. Use 'key in index' instead of 'index.contains(key)'
[2m[36m(pid=99580)[0m 
[2m[36m(pid=99580)[0m 2019-08-24 23:28:18,868	INFO rollout_worker.py:319 -- Creating policy evaluation worker 1 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=99580)[0m 2019-08-24 23:28:18.892170: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
[2m[36m(pid=99574)[0m 2019-08-24 23:28:18,895	INFO rollout_worker.py:319 -- Creating policy evaluation worker 2 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=99574)[0m 2019-08-24 23:28:18.935328: I tensorflow/core/

[2m[36m(pid=99580)[0m 2019-08-24 23:28:22,423	INFO rollout_worker.py:485 -- Completed sample batch:
[2m[36m(pid=99580)[0m 
[2m[36m(pid=99580)[0m { 'data': { 'action_prob': np.ndarray((205,), dtype=float32, min=0.0, max=0.159, mean=0.083),
[2m[36m(pid=99580)[0m             'actions': np.ndarray((205, 2), dtype=float32, min=-2.784, max=3.371, mean=-0.039),
[2m[36m(pid=99580)[0m             'advantages': np.ndarray((205,), dtype=float32, min=-2.479, max=2.283, mean=-0.048),
[2m[36m(pid=99580)[0m             'agent_index': np.ndarray((205,), dtype=int64, min=0.0, max=0.0, mean=0.0),
[2m[36m(pid=99580)[0m             'behaviour_logits': np.ndarray((205, 4), dtype=float32, min=-0.01, max=0.009, mean=-0.002),
[2m[36m(pid=99580)[0m             'dones': np.ndarray((205,), dtype=bool, min=0.0, max=1.0, mean=0.024),
[2m[36m(pid=99580)[0m             'eps_id': np.ndarray((205,), dtype=int64, min=444579787.0, max=1742621616.0, mean=975485564.4),
[2m[36m(pid=99580)[0m  

[2m[36m(pid=99578)[0m   _np_qint8 = np.dtype([("qint8", np.int8, 1)])
[2m[36m(pid=99578)[0m   _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
[2m[36m(pid=99578)[0m   _np_qint16 = np.dtype([("qint16", np.int16, 1)])
[2m[36m(pid=99578)[0m   _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
[2m[36m(pid=99578)[0m   _np_qint32 = np.dtype([("qint32", np.int32, 1)])
[2m[36m(pid=99578)[0m   np_resource = np.dtype([("resource", np.ubyte, 1)])
[2m[36m(pid=99575)[0m   _np_qint8 = np.dtype([("qint8", np.int8, 1)])
[2m[36m(pid=99575)[0m   _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
[2m[36m(pid=99575)[0m   _np_qint16 = np.dtype([("qint16", np.int16, 1)])
[2m[36m(pid=99575)[0m   _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
[2m[36m(pid=99575)[0m   _np_qint32 = np.dtype([("qint32", np.int32, 1)])
[2m[36m(pid=99575)[0m   np_resource = np.dtype([("resource", np.ubyte, 1)])
[2m[36m(pid=99579)[0m 2019-08-24 23:28:35,033	INFO tf_policy.py:355 -- Optimizi

[2m[36m(pid=99579)[0m 2019-08-24 23:28:39,165	INFO tf_run_builder.py:92 -- Executing TF run without tracing. To dump TF timeline traces to disk, set the TF_TIMELINE_DIR environment variable.


















































































































































































































































2019-08-25 00:15:35,331	INFO trial_runner.py:176 -- Starting a new experiment.


[2m[36m(pid=99576)[0m 
[2m[36m(pid=99576)[0m The 'contains' method is deprecated and will be removed in a future version. Use 'key in index' instead of 'index.contains(key)'
[2m[36m(pid=99576)[0m 
[2m[36m(pid=99576)[0m 2019-08-25 00:15:39,015	INFO rollout_worker.py:319 -- Creating policy evaluation worker 0 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=99576)[0m 2019-08-25 00:15:39.016816: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
[2m[36m(pid=99576)[0m 2019-08-25 00:15:39,369	INFO dynamic_tf_policy.py:324 -- Initializing loss function with dummy input:
[2m[36m(pid=99576)[0m 
[2m[36m(pid=99576)[0m { 'action_prob': <tf.Tensor 'default_policy/action_prob:0' shape=(?,) dtype=float32>,
[2m[36m(pid=99576)[0m   'actions': <tf.Tensor 'default_policy/actions:0' shape=(?, 2) dtype=float32>,
[2m[36m(pid=99576)[0m   'advantages': <tf.Tensor 'default_policy

[2m[36m(pid=99578)[0m 2019-08-25 00:15:49,216	INFO sample_batch_builder.py:161 -- Trajectory fragment after postprocess_trajectory():
[2m[36m(pid=99578)[0m 
[2m[36m(pid=99578)[0m { 'agent0': { 'data': { 'action_prob': np.ndarray((41,), dtype=float32, min=0.013, max=0.154, mean=0.081),
[2m[36m(pid=99578)[0m                         'actions': np.ndarray((41, 2), dtype=float32, min=-1.88, max=1.985, mean=-0.03),
[2m[36m(pid=99578)[0m                         'advantages': np.ndarray((41,), dtype=float32, min=-0.444, max=0.345, mean=-0.03),
[2m[36m(pid=99578)[0m                         'agent_index': np.ndarray((41,), dtype=int64, min=0.0, max=0.0, mean=0.0),
[2m[36m(pid=99578)[0m                         'behaviour_logits': np.ndarray((41, 4), dtype=float32, min=-0.006, max=0.013, mean=0.001),
[2m[36m(pid=99578)[0m                         'dones': np.ndarray((41,), dtype=bool, min=0.0, max=1.0, mean=0.024),
[2m[36m(pid=99578)[0m                         'eps_id': n

[2m[36m(pid=99576)[0m 2019-08-25 00:16:05,602	INFO tf_run_builder.py:92 -- Executing TF run without tracing. To dump TF timeline traces to disk, set the TF_TIMELINE_DIR environment variable.






























































































































































2019-08-25 00:56:31,399	INFO trial_runner.py:176 -- Starting a new experiment.


[2m[36m(pid=99581)[0m 
[2m[36m(pid=99581)[0m The 'contains' method is deprecated and will be removed in a future version. Use 'key in index' instead of 'index.contains(key)'
[2m[36m(pid=99581)[0m 
[2m[36m(pid=99581)[0m 2019-08-25 00:56:35,843	INFO rollout_worker.py:319 -- Creating policy evaluation worker 0 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=99581)[0m 2019-08-25 00:56:35.844194: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
[2m[36m(pid=99581)[0m 2019-08-25 00:56:36,096	INFO dynamic_tf_policy.py:324 -- Initializing loss function with dummy input:
[2m[36m(pid=99581)[0m 
[2m[36m(pid=99581)[0m { 'action_prob': <tf.Tensor 'default_policy/action_prob:0' shape=(?,) dtype=float32>,
[2m[36m(pid=99581)[0m   'actions': <tf.Tensor 'default_policy/actions:0' shape=(?, 2) dtype=float32>,
[2m[36m(pid=99581)[0m   'advantages': <tf.Tensor 'default_policy

[2m[36m(pid=99577)[0m 2019-08-25 00:56:46,672	INFO rollout_worker.py:451 -- Generating sample batch of size 200
[2m[36m(pid=99577)[0m 2019-08-25 00:56:46,699	INFO sampler.py:304 -- Raw obs from env: { 0: { 'agent0': np.ndarray((5,), dtype=float64, min=-1.0, max=0.339, mean=-0.1)}}
[2m[36m(pid=99577)[0m 2019-08-25 00:56:46,699	INFO sampler.py:305 -- Info return from env: {0: {'agent0': None}}
[2m[36m(pid=99577)[0m 2019-08-25 00:56:46,700	INFO sampler.py:403 -- Preprocessed obs: np.ndarray((5,), dtype=float64, min=-1.0, max=0.339, mean=-0.1)
[2m[36m(pid=99577)[0m 2019-08-25 00:56:46,700	INFO sampler.py:407 -- Filtered obs: np.ndarray((5,), dtype=float64, min=-1.0, max=0.339, mean=-0.1)
[2m[36m(pid=99577)[0m 2019-08-25 00:56:46,701	INFO sampler.py:521 -- Inputs to compute_actions():
[2m[36m(pid=99577)[0m 
[2m[36m(pid=99577)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=99577)[0m                                   'env_id': 0,
[2m[36m(pi

[2m[36m(pid=99581)[0m 2019-08-25 00:57:01,566	INFO tf_policy.py:355 -- Optimizing variable <tf.Variable 'default_policy/default_model/fc1/kernel:0' shape=(5, 256) dtype=float32_ref>
[2m[36m(pid=99581)[0m 2019-08-25 00:57:01,566	INFO tf_policy.py:355 -- Optimizing variable <tf.Variable 'default_policy/default_model/fc1/bias:0' shape=(256,) dtype=float32_ref>
[2m[36m(pid=99581)[0m 2019-08-25 00:57:01,566	INFO tf_policy.py:355 -- Optimizing variable <tf.Variable 'default_policy/default_model/fc2/kernel:0' shape=(256, 256) dtype=float32_ref>
[2m[36m(pid=99581)[0m 2019-08-25 00:57:01,566	INFO tf_policy.py:355 -- Optimizing variable <tf.Variable 'default_policy/default_model/fc2/bias:0' shape=(256,) dtype=float32_ref>
[2m[36m(pid=99581)[0m 2019-08-25 00:57:01,567	INFO tf_policy.py:355 -- Optimizing variable <tf.Variable 'default_policy/default_model/fc_out/kernel:0' shape=(256, 4) dtype=float32_ref>
[2m[36m(pid=99581)[0m 2019-08-25 00:57:01,567	INFO tf_policy.py:355 -- Opti





















































































































































2019-08-25 01:36:57,827	INFO trial_runner.py:176 -- Starting a new experiment.


[2m[36m(pid=31500)[0m   _np_qint8 = np.dtype([("qint8", np.int8, 1)])
[2m[36m(pid=31500)[0m   _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
[2m[36m(pid=31500)[0m   _np_qint16 = np.dtype([("qint16", np.int16, 1)])
[2m[36m(pid=31500)[0m   _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
[2m[36m(pid=31500)[0m   _np_qint32 = np.dtype([("qint32", np.int32, 1)])
[2m[36m(pid=31500)[0m   np_resource = np.dtype([("resource", np.ubyte, 1)])
[2m[36m(pid=31500)[0m 
[2m[36m(pid=31500)[0m The 'contains' method is deprecated and will be removed in a future version. Use 'key in index' instead of 'index.contains(key)'
[2m[36m(pid=31500)[0m 
[2m[36m(pid=31500)[0m 2019-08-25 01:37:05,344	INFO rollout_worker.py:319 -- Creating policy evaluation worker 0 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=31500)[0m 2019-08-25 01:37:05.345943: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not com

[2m[36m(pid=31682)[0m   _np_qint8 = np.dtype([("qint8", np.int8, 1)])
[2m[36m(pid=31682)[0m   _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
[2m[36m(pid=31682)[0m   _np_qint16 = np.dtype([("qint16", np.int16, 1)])
[2m[36m(pid=31682)[0m   _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
[2m[36m(pid=31682)[0m   _np_qint32 = np.dtype([("qint32", np.int32, 1)])
[2m[36m(pid=31682)[0m   np_resource = np.dtype([("resource", np.ubyte, 1)])
[2m[36m(pid=31662)[0m 
[2m[36m(pid=31662)[0m The 'contains' method is deprecated and will be removed in a future version. Use 'key in index' instead of 'index.contains(key)'
[2m[36m(pid=31662)[0m 
[2m[36m(pid=31500)[0m 2019-08-25 01:37:16,227	INFO trainable.py:105 -- _setup took 12.670 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
[2m[36m(pid=31663)[0m 
[2m[36m(pid=31663)[0m The 'contains' method is deprecated and will be removed in a future ve

[2m[36m(pid=31662)[0m 2019-08-25 01:37:20,330	INFO sample_batch_builder.py:161 -- Trajectory fragment after postprocess_trajectory():
[2m[36m(pid=31662)[0m 
[2m[36m(pid=31662)[0m { 'agent0': { 'data': { 'action_prob': np.ndarray((41,), dtype=float32, min=0.0, max=0.156, mean=0.089),
[2m[36m(pid=31662)[0m                         'actions': np.ndarray((41, 2), dtype=float32, min=-2.023, max=3.081, mean=0.001),
[2m[36m(pid=31662)[0m                         'advantages': np.ndarray((41,), dtype=float32, min=-0.214, max=-0.011, mean=-0.02),
[2m[36m(pid=31662)[0m                         'agent_index': np.ndarray((41,), dtype=int64, min=0.0, max=0.0, mean=0.0),
[2m[36m(pid=31662)[0m                         'behaviour_logits': np.ndarray((41, 4), dtype=float32, min=-0.004, max=0.001, mean=-0.001),
[2m[36m(pid=31662)[0m                         'dones': np.ndarray((41,), dtype=bool, min=0.0, max=1.0, mean=0.024),
[2m[36m(pid=31662)[0m                         'eps_id': 

[2m[36m(pid=31500)[0m 2019-08-25 01:37:38,202	INFO tf_run_builder.py:92 -- Executing TF run without tracing. To dump TF timeline traces to disk, set the TF_TIMELINE_DIR environment variable.






































































































































2019-08-25 02:17:41,241	INFO trial_runner.py:176 -- Starting a new experiment.


[2m[36m(pid=31682)[0m 
[2m[36m(pid=31682)[0m The 'contains' method is deprecated and will be removed in a future version. Use 'key in index' instead of 'index.contains(key)'
[2m[36m(pid=31682)[0m 
[2m[36m(pid=31682)[0m 2019-08-25 02:17:44,997	INFO rollout_worker.py:319 -- Creating policy evaluation worker 0 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=31682)[0m 2019-08-25 02:17:44.998755: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
[2m[36m(pid=31682)[0m 2019-08-25 02:17:45,254	INFO dynamic_tf_policy.py:324 -- Initializing loss function with dummy input:
[2m[36m(pid=31682)[0m 
[2m[36m(pid=31682)[0m { 'action_prob': <tf.Tensor 'default_policy/action_prob:0' shape=(?,) dtype=float32>,
[2m[36m(pid=31682)[0m   'actions': <tf.Tensor 'default_policy/actions:0' shape=(?, 2) dtype=float32>,
[2m[36m(pid=31682)[0m   'advantages': <tf.Tensor 'default_policy

[2m[36m(pid=51103)[0m 
[2m[36m(pid=51103)[0m The 'contains' method is deprecated and will be removed in a future version. Use 'key in index' instead of 'index.contains(key)'
[2m[36m(pid=51103)[0m 
[2m[36m(pid=51102)[0m 
[2m[36m(pid=51102)[0m The 'contains' method is deprecated and will be removed in a future version. Use 'key in index' instead of 'index.contains(key)'
[2m[36m(pid=51102)[0m 
[2m[36m(pid=31682)[0m 2019-08-25 02:17:56,696	INFO trainable.py:105 -- _setup took 13.166 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
[2m[36m(pid=51102)[0m 2019-08-25 02:17:57,221	INFO rollout_worker.py:319 -- Creating policy evaluation worker 2 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=51102)[0m 2019-08-25 02:17:57.248854: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
[2m[36m(pid=51

[2m[36m(pid=51103)[0m 2019-08-25 02:18:01,341	INFO rollout_worker.py:485 -- Completed sample batch:
[2m[36m(pid=51103)[0m 
[2m[36m(pid=51103)[0m { 'data': { 'action_prob': np.ndarray((205,), dtype=float32, min=0.001, max=0.158, mean=0.081),
[2m[36m(pid=51103)[0m             'actions': np.ndarray((205, 2), dtype=float32, min=-2.571, max=3.022, mean=0.046),
[2m[36m(pid=51103)[0m             'advantages': np.ndarray((205,), dtype=float32, min=-1.358, max=0.895, mean=0.015),
[2m[36m(pid=51103)[0m             'agent_index': np.ndarray((205,), dtype=int64, min=0.0, max=0.0, mean=0.0),
[2m[36m(pid=51103)[0m             'behaviour_logits': np.ndarray((205, 4), dtype=float32, min=-0.009, max=0.011, mean=0.001),
[2m[36m(pid=51103)[0m             'dones': np.ndarray((205,), dtype=bool, min=0.0, max=1.0, mean=0.024),
[2m[36m(pid=51103)[0m             'eps_id': np.ndarray((205,), dtype=int64, min=510545075.0, max=1457008344.0, mean=886872937.8),
[2m[36m(pid=51103)[0m  









































































































































2019-08-25 02:57:54,290	INFO trial_runner.py:176 -- Starting a new experiment.


[2m[36m(pid=51130)[0m 
[2m[36m(pid=51130)[0m The 'contains' method is deprecated and will be removed in a future version. Use 'key in index' instead of 'index.contains(key)'
[2m[36m(pid=51130)[0m 
[2m[36m(pid=51130)[0m 2019-08-25 02:57:58,063	INFO rollout_worker.py:319 -- Creating policy evaluation worker 0 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=51130)[0m 2019-08-25 02:57:58.064510: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
[2m[36m(pid=51130)[0m 2019-08-25 02:57:58,310	INFO dynamic_tf_policy.py:324 -- Initializing loss function with dummy input:
[2m[36m(pid=51130)[0m 
[2m[36m(pid=51130)[0m { 'action_prob': <tf.Tensor 'default_policy/action_prob:0' shape=(?,) dtype=float32>,
[2m[36m(pid=51130)[0m   'actions': <tf.Tensor 'default_policy/actions:0' shape=(?, 2) dtype=float32>,
[2m[36m(pid=51130)[0m   'advantages': <tf.Tensor 'default_policy

[2m[36m(pid=51130)[0m 2019-08-25 02:58:07,210	INFO trainable.py:105 -- _setup took 10.937 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
[2m[36m(pid=70239)[0m 
[2m[36m(pid=70239)[0m The 'contains' method is deprecated and will be removed in a future version. Use 'key in index' instead of 'index.contains(key)'
[2m[36m(pid=70239)[0m 
[2m[36m(pid=70238)[0m 
[2m[36m(pid=70238)[0m The 'contains' method is deprecated and will be removed in a future version. Use 'key in index' instead of 'index.contains(key)'
[2m[36m(pid=70238)[0m 
[2m[36m(pid=70239)[0m 2019-08-25 02:58:09,524	INFO rollout_worker.py:319 -- Creating policy evaluation worker 1 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=70239)[0m 2019-08-25 02:58:09.547835: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
[2m[36m(pid=70

[2m[36m(pid=70239)[0m 2019-08-25 02:58:13,327	INFO rollout_worker.py:485 -- Completed sample batch:
[2m[36m(pid=70239)[0m 
[2m[36m(pid=70239)[0m { 'data': { 'action_prob': np.ndarray((205,), dtype=float32, min=0.001, max=0.157, mean=0.074),
[2m[36m(pid=70239)[0m             'actions': np.ndarray((205, 2), dtype=float32, min=-2.554, max=3.091, mean=-0.006),
[2m[36m(pid=70239)[0m             'advantages': np.ndarray((205,), dtype=float32, min=-0.487, max=0.532, mean=-0.006),
[2m[36m(pid=70239)[0m             'agent_index': np.ndarray((205,), dtype=int64, min=0.0, max=0.0, mean=0.0),
[2m[36m(pid=70239)[0m             'behaviour_logits': np.ndarray((205, 4), dtype=float32, min=-0.008, max=0.009, mean=0.001),
[2m[36m(pid=70239)[0m             'dones': np.ndarray((205,), dtype=bool, min=0.0, max=1.0, mean=0.024),
[2m[36m(pid=70239)[0m             'eps_id': np.ndarray((205,), dtype=int64, min=111958108.0, max=1992504149.0, mean=1188961659.2),
[2m[36m(pid=70239)[0

































































































































































2019-08-25 03:39:30,895	INFO trial_runner.py:176 -- Starting a new experiment.


[2m[36m(pid=70262)[0m 
[2m[36m(pid=70262)[0m The 'contains' method is deprecated and will be removed in a future version. Use 'key in index' instead of 'index.contains(key)'
[2m[36m(pid=70262)[0m 
[2m[36m(pid=70262)[0m 2019-08-25 03:39:34,400	INFO rollout_worker.py:319 -- Creating policy evaluation worker 0 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=70262)[0m 2019-08-25 03:39:34.401320: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
[2m[36m(pid=70262)[0m 2019-08-25 03:39:34,636	INFO dynamic_tf_policy.py:324 -- Initializing loss function with dummy input:
[2m[36m(pid=70262)[0m 
[2m[36m(pid=70262)[0m { 'action_prob': <tf.Tensor 'default_policy/action_prob:0' shape=(?,) dtype=float32>,
[2m[36m(pid=70262)[0m   'actions': <tf.Tensor 'default_policy/actions:0' shape=(?, 2) dtype=float32>,
[2m[36m(pid=70262)[0m   'advantages': <tf.Tensor 'default_policy

[2m[36m(pid=70262)[0m 2019-08-25 03:39:44,734	INFO trainable.py:105 -- _setup took 11.889 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
[2m[36m(pid=90402)[0m 
[2m[36m(pid=90402)[0m The 'contains' method is deprecated and will be removed in a future version. Use 'key in index' instead of 'index.contains(key)'
[2m[36m(pid=90402)[0m 
[2m[36m(pid=90401)[0m 
[2m[36m(pid=90401)[0m The 'contains' method is deprecated and will be removed in a future version. Use 'key in index' instead of 'index.contains(key)'
[2m[36m(pid=90401)[0m 
[2m[36m(pid=90401)[0m 2019-08-25 03:39:46,517	INFO rollout_worker.py:319 -- Creating policy evaluation worker 1 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=90401)[0m 2019-08-25 03:39:46.540402: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
[2m[36m(pid=90

[2m[36m(pid=90401)[0m 2019-08-25 03:39:50,491	INFO rollout_worker.py:485 -- Completed sample batch:
[2m[36m(pid=90401)[0m 
[2m[36m(pid=90401)[0m { 'data': { 'action_prob': np.ndarray((205,), dtype=float32, min=0.0, max=0.159, mean=0.078),
[2m[36m(pid=90401)[0m             'actions': np.ndarray((205, 2), dtype=float32, min=-3.137, max=3.208, mean=-0.027),
[2m[36m(pid=90401)[0m             'advantages': np.ndarray((205,), dtype=float32, min=-1.967, max=2.394, mean=0.012),
[2m[36m(pid=90401)[0m             'agent_index': np.ndarray((205,), dtype=int64, min=0.0, max=0.0, mean=0.0),
[2m[36m(pid=90401)[0m             'behaviour_logits': np.ndarray((205, 4), dtype=float32, min=-0.011, max=0.014, mean=0.002),
[2m[36m(pid=90401)[0m             'dones': np.ndarray((205,), dtype=bool, min=0.0, max=1.0, mean=0.024),
[2m[36m(pid=90401)[0m             'eps_id': np.ndarray((205,), dtype=int64, min=55588389.0, max=1699073319.0, mean=1065949486.4),
[2m[36m(pid=90401)[0m   





























































































































2019-08-25 04:18:59,423	INFO trial_runner.py:176 -- Starting a new experiment.


[2m[36m(pid=90421)[0m 
[2m[36m(pid=90421)[0m The 'contains' method is deprecated and will be removed in a future version. Use 'key in index' instead of 'index.contains(key)'
[2m[36m(pid=90421)[0m 
[2m[36m(pid=90421)[0m 2019-08-25 04:19:03,302	INFO rollout_worker.py:319 -- Creating policy evaluation worker 0 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=90421)[0m 2019-08-25 04:19:03.303425: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
[2m[36m(pid=90421)[0m 2019-08-25 04:19:03,591	INFO dynamic_tf_policy.py:324 -- Initializing loss function with dummy input:
[2m[36m(pid=90421)[0m 
[2m[36m(pid=90421)[0m { 'action_prob': <tf.Tensor 'default_policy/action_prob:0' shape=(?,) dtype=float32>,
[2m[36m(pid=90421)[0m   'actions': <tf.Tensor 'default_policy/actions:0' shape=(?, 2) dtype=float32>,
[2m[36m(pid=90421)[0m   'advantages': <tf.Tensor 'default_policy

[2m[36m(pid=109842)[0m 
[2m[36m(pid=109842)[0m The 'contains' method is deprecated and will be removed in a future version. Use 'key in index' instead of 'index.contains(key)'
[2m[36m(pid=109842)[0m 
[2m[36m(pid=109841)[0m 
[2m[36m(pid=109841)[0m The 'contains' method is deprecated and will be removed in a future version. Use 'key in index' instead of 'index.contains(key)'
[2m[36m(pid=109841)[0m 
[2m[36m(pid=90421)[0m 2019-08-25 04:19:14,556	INFO trainable.py:105 -- _setup took 12.893 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
[2m[36m(pid=109842)[0m 2019-08-25 04:19:15,702	INFO rollout_worker.py:319 -- Creating policy evaluation worker 2 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=109842)[0m 2019-08-25 04:19:15.723789: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
[2m[36

[2m[36m(pid=109841)[0m 2019-08-25 04:19:19,711	INFO rollout_worker.py:485 -- Completed sample batch:
[2m[36m(pid=109841)[0m 
[2m[36m(pid=109841)[0m { 'data': { 'action_prob': np.ndarray((205,), dtype=float32, min=0.0, max=0.159, mean=0.078),
[2m[36m(pid=109841)[0m             'actions': np.ndarray((205, 2), dtype=float32, min=-2.537, max=3.656, mean=-0.102),
[2m[36m(pid=109841)[0m             'advantages': np.ndarray((205,), dtype=float32, min=-0.375, max=0.368, mean=0.007),
[2m[36m(pid=109841)[0m             'agent_index': np.ndarray((205,), dtype=int64, min=0.0, max=0.0, mean=0.0),
[2m[36m(pid=109841)[0m             'behaviour_logits': np.ndarray((205, 4), dtype=float32, min=-0.004, max=0.008, mean=0.002),
[2m[36m(pid=109841)[0m             'dones': np.ndarray((205,), dtype=bool, min=0.0, max=1.0, mean=0.024),
[2m[36m(pid=109841)[0m             'eps_id': np.ndarray((205,), dtype=int64, min=524820693.0, max=1753876311.0, mean=942114793.6),
[2m[36m(pid=1098





























































































































































2019-08-25 05:00:59,329	INFO trial_runner.py:176 -- Starting a new experiment.


[2m[36m(pid=109878)[0m 
[2m[36m(pid=109878)[0m The 'contains' method is deprecated and will be removed in a future version. Use 'key in index' instead of 'index.contains(key)'
[2m[36m(pid=109878)[0m 
[2m[36m(pid=109878)[0m 2019-08-25 05:01:03,720	INFO rollout_worker.py:319 -- Creating policy evaluation worker 0 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=109878)[0m 2019-08-25 05:01:03.721622: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
[2m[36m(pid=109878)[0m 2019-08-25 05:01:04,125	INFO dynamic_tf_policy.py:324 -- Initializing loss function with dummy input:
[2m[36m(pid=109878)[0m 
[2m[36m(pid=109878)[0m { 'action_prob': <tf.Tensor 'default_policy/action_prob:0' shape=(?,) dtype=float32>,
[2m[36m(pid=109878)[0m   'actions': <tf.Tensor 'default_policy/actions:0' shape=(?, 2) dtype=float32>,
[2m[36m(pid=109878)[0m   'advantages': <tf.Tensor 'defa

[2m[36m(pid=129620)[0m 
[2m[36m(pid=129620)[0m The 'contains' method is deprecated and will be removed in a future version. Use 'key in index' instead of 'index.contains(key)'
[2m[36m(pid=129620)[0m 
[2m[36m(pid=129619)[0m 
[2m[36m(pid=129619)[0m The 'contains' method is deprecated and will be removed in a future version. Use 'key in index' instead of 'index.contains(key)'
[2m[36m(pid=129619)[0m 
[2m[36m(pid=129620)[0m 2019-08-25 05:01:15,698	INFO rollout_worker.py:319 -- Creating policy evaluation worker 1 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=129620)[0m 2019-08-25 05:01:15.723637: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
[2m[36m(pid=109878)[0m 2019-08-25 05:01:16,124	INFO trainable.py:105 -- _setup took 14.409 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
[2m[3

[2m[36m(pid=129620)[0m 2019-08-25 05:01:20,184	INFO rollout_worker.py:485 -- Completed sample batch:
[2m[36m(pid=129620)[0m 
[2m[36m(pid=129620)[0m { 'data': { 'action_prob': np.ndarray((205,), dtype=float32, min=0.001, max=0.159, mean=0.075),
[2m[36m(pid=129620)[0m             'actions': np.ndarray((205, 2), dtype=float32, min=-3.012, max=3.154, mean=0.063),
[2m[36m(pid=129620)[0m             'advantages': np.ndarray((205,), dtype=float32, min=-0.317, max=0.537, mean=-0.008),
[2m[36m(pid=129620)[0m             'agent_index': np.ndarray((205,), dtype=int64, min=0.0, max=0.0, mean=0.0),
[2m[36m(pid=129620)[0m             'behaviour_logits': np.ndarray((205, 4), dtype=float32, min=-0.008, max=0.009, mean=0.001),
[2m[36m(pid=129620)[0m             'dones': np.ndarray((205,), dtype=bool, min=0.0, max=1.0, mean=0.024),
[2m[36m(pid=129620)[0m             'eps_id': np.ndarray((205,), dtype=int64, min=180443125.0, max=1991185530.0, mean=1254891209.4),
[2m[36m(pid=1

















































































































































































2019-08-25 05:42:07,900	INFO trial_runner.py:176 -- Starting a new experiment.


[2m[36m(pid=129639)[0m 
[2m[36m(pid=129639)[0m The 'contains' method is deprecated and will be removed in a future version. Use 'key in index' instead of 'index.contains(key)'
[2m[36m(pid=129639)[0m 
[2m[36m(pid=129639)[0m 2019-08-25 05:42:11,605	INFO rollout_worker.py:319 -- Creating policy evaluation worker 0 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=129639)[0m 2019-08-25 05:42:11.606818: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
[2m[36m(pid=129639)[0m 2019-08-25 05:42:11,861	INFO dynamic_tf_policy.py:324 -- Initializing loss function with dummy input:
[2m[36m(pid=129639)[0m 
[2m[36m(pid=129639)[0m { 'action_prob': <tf.Tensor 'default_policy/action_prob:0' shape=(?,) dtype=float32>,
[2m[36m(pid=129639)[0m   'actions': <tf.Tensor 'default_policy/actions:0' shape=(?, 2) dtype=float32>,
[2m[36m(pid=129639)[0m   'advantages': <tf.Tensor 'defa

[2m[36m(pid=129639)[0m 2019-08-25 05:42:22,208	INFO trainable.py:105 -- _setup took 12.421 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
[2m[36m(pid=18698)[0m 
[2m[36m(pid=18698)[0m The 'contains' method is deprecated and will be removed in a future version. Use 'key in index' instead of 'index.contains(key)'
[2m[36m(pid=18698)[0m 
[2m[36m(pid=18699)[0m 
[2m[36m(pid=18699)[0m The 'contains' method is deprecated and will be removed in a future version. Use 'key in index' instead of 'index.contains(key)'
[2m[36m(pid=18699)[0m 
[2m[36m(pid=18699)[0m 2019-08-25 05:42:24,363	INFO rollout_worker.py:319 -- Creating policy evaluation worker 2 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=18699)[0m 2019-08-25 05:42:24.402132: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
[2m[36m(pid=1

[2m[36m(pid=18698)[0m 2019-08-25 05:42:28,439	INFO rollout_worker.py:485 -- Completed sample batch:
[2m[36m(pid=18698)[0m 
[2m[36m(pid=18698)[0m { 'data': { 'action_prob': np.ndarray((205,), dtype=float32, min=0.0, max=0.159, mean=0.079),
[2m[36m(pid=18698)[0m             'actions': np.ndarray((205, 2), dtype=float32, min=-2.885, max=3.559, mean=0.066),
[2m[36m(pid=18698)[0m             'advantages': np.ndarray((205,), dtype=float32, min=-0.976, max=0.937, mean=0.012),
[2m[36m(pid=18698)[0m             'agent_index': np.ndarray((205,), dtype=int64, min=0.0, max=0.0, mean=0.0),
[2m[36m(pid=18698)[0m             'behaviour_logits': np.ndarray((205, 4), dtype=float32, min=-0.009, max=0.008, mean=-0.001),
[2m[36m(pid=18698)[0m             'dones': np.ndarray((205,), dtype=bool, min=0.0, max=1.0, mean=0.024),
[2m[36m(pid=18698)[0m             'eps_id': np.ndarray((205,), dtype=int64, min=179140945.0, max=892594450.0, mean=632721064.8),
[2m[36m(pid=18698)[0m    

































































































































2019-08-25 06:22:36,468	INFO trial_runner.py:176 -- Starting a new experiment.


[2m[36m(pid=18723)[0m 
[2m[36m(pid=18723)[0m The 'contains' method is deprecated and will be removed in a future version. Use 'key in index' instead of 'index.contains(key)'
[2m[36m(pid=18723)[0m 
[2m[36m(pid=18723)[0m 2019-08-25 06:22:40,050	INFO rollout_worker.py:319 -- Creating policy evaluation worker 0 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=18723)[0m 2019-08-25 06:22:40.051632: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
[2m[36m(pid=18723)[0m 2019-08-25 06:22:40,419	INFO dynamic_tf_policy.py:324 -- Initializing loss function with dummy input:
[2m[36m(pid=18723)[0m 
[2m[36m(pid=18723)[0m { 'action_prob': <tf.Tensor 'default_policy/action_prob:0' shape=(?,) dtype=float32>,
[2m[36m(pid=18723)[0m   'actions': <tf.Tensor 'default_policy/actions:0' shape=(?, 2) dtype=float32>,
[2m[36m(pid=18723)[0m   'advantages': <tf.Tensor 'default_policy

[2m[36m(pid=38364)[0m 
[2m[36m(pid=38364)[0m The 'contains' method is deprecated and will be removed in a future version. Use 'key in index' instead of 'index.contains(key)'
[2m[36m(pid=38364)[0m 
[2m[36m(pid=18723)[0m 2019-08-25 06:22:50,361	INFO trainable.py:105 -- _setup took 11.864 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
[2m[36m(pid=38365)[0m 
[2m[36m(pid=38365)[0m The 'contains' method is deprecated and will be removed in a future version. Use 'key in index' instead of 'index.contains(key)'
[2m[36m(pid=38365)[0m 
[2m[36m(pid=38364)[0m 2019-08-25 06:22:51,770	INFO rollout_worker.py:319 -- Creating policy evaluation worker 1 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=38364)[0m 2019-08-25 06:22:51.795447: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
[2m[36m(pid=38

[2m[36m(pid=38364)[0m 2019-08-25 06:22:55,913	INFO rollout_worker.py:485 -- Completed sample batch:
[2m[36m(pid=38364)[0m 
[2m[36m(pid=38364)[0m { 'data': { 'action_prob': np.ndarray((205,), dtype=float32, min=0.0, max=0.159, mean=0.079),
[2m[36m(pid=38364)[0m             'actions': np.ndarray((205, 2), dtype=float32, min=-3.328, max=2.693, mean=-0.045),
[2m[36m(pid=38364)[0m             'advantages': np.ndarray((205,), dtype=float32, min=-1.436, max=1.16, mean=-0.02),
[2m[36m(pid=38364)[0m             'agent_index': np.ndarray((205,), dtype=int64, min=0.0, max=0.0, mean=0.0),
[2m[36m(pid=38364)[0m             'behaviour_logits': np.ndarray((205, 4), dtype=float32, min=-0.012, max=0.011, mean=0.001),
[2m[36m(pid=38364)[0m             'dones': np.ndarray((205,), dtype=bool, min=0.0, max=1.0, mean=0.024),
[2m[36m(pid=38364)[0m             'eps_id': np.ndarray((205,), dtype=int64, min=281378600.0, max=1912418448.0, mean=1105239718.0),
[2m[36m(pid=38364)[0m   





















































































































[PPO_GAIAPredictorsContinuousV12_restoreID=-5130043545937450704_runID=9fAi6FX2_0_clip_param=0.8,cost_of_commissions=5e-05,gamma=0.95,kl_target=0.01,lr=1e-05,num_sgd_iter=8,
 PPO_GAIAPredictorsContinuousV12_restoreID=-5130043545937450704_runID=9fAi6FX2_0_clip_param=0.8,cost_of_commissions=5e-05,gamma=0.95,kl_target=0.01,lr=1e-05,num_sgd_iter=8,
 PPO_GAIAPredictorsContinuousV12_restoreID=-5130043545937450704_runID=9fAi6FX2_0_clip_param=0.8,cost_of_commissions=5e-05,gamma=0.95,kl_target=0.01,lr=1e-05,num_sgd_iter=8,
 PPO_GAIAPredictorsContinuousV12_restoreID=-5130043545937450704_runID=9fAi6FX2_0_clip_param=0.8,cost_of_commissions=5e-05,gamma=0.95,kl_target=0.01,lr=1e-05,num_sgd_iter=8,
 PPO_GAIAPredictorsContinuousV12_restoreID=-5130043545937450704_runID=9fAi6FX2_0_clip_param=0.8,cost_of_commissions=5e-05,gamma=0.95,kl_target=0.01,lr=1e-05,num_sgd_iter=8,
 PPO_GAIAPredictorsContinuousV12_restoreID=-5130043545937450704_runID=9fAi6FX2_0_clip_param=0.8,cost_of_commissions=5e-05,gamma=0.95,kl

In [None]:
# stop

# Temp

In [None]:
agent = ray.rllib.agents.ppo.PPOTrainer(config, GAIAPredictorsContinuousV11)

In [None]:
env = GAIAPredictorsContinuousV9()
state = env.reset()
state

In [None]:
agent.compute_action(state)

# Restore

In [11]:
results = WalkForwardResults('/home/Nicholas/trading-gym_0.8.1/trading-gym/notebooks/registry/gaia/v9/logs')
results

WalkForwardResults(['GAIAPredictorsContinuousV11', 'GAIAPredictorsContinuousV12'])

In [12]:
env_results = results['GAIAPredictorsContinuousV12']
env_results

EnvResults(GAIAPredictorsContinuousV12)

In [13]:
# Step 1.
env = env_results.make_env(
    env_config={
        'cost_of_commissions': 0.00005,
        'cost_of_spread': 0.0001,
        'folds': {
            'training-set': [datetime.min, datetime(2008, 3, 18)],
            'test-set': [datetime(2008, 3, 19), datetime.max],
        }
    },
)

In [14]:
env_results.restore_ids

{-5130043545937450704: [AgentResults(GAIAPredictorsContinuousV12_1-01-01_to_2013-12-31/PPO_GAIAPredictorsContinuousV12_restoreID=-5130043545937450704_runID=9fAi6FX2_0_clip_param=0.8,cost_of_commissions=5e-05,gamma=0.9_2019-08-25_03-39-30ra635rbb),
  AgentResults(GAIAPredictorsContinuousV12_1-01-01_to_2009-12-31/PPO_GAIAPredictorsContinuousV12_restoreID=-5130043545937450704_runID=9fAi6FX2_0_clip_param=0.8,cost_of_commissions=5e-05,gamma=0.9_2019-08-25_00-56-31q62e48jc),
  AgentResults(GAIAPredictorsContinuousV12_1-01-01_to_2007-12-31/PPO_GAIAPredictorsContinuousV12_restoreID=-5130043545937450704_runID=9fAi6FX2_0_clip_param=0.8,cost_of_commissions=5e-05,gamma=0.9_2019-08-24_23-28-033mm0loo1),
  AgentResults(GAIAPredictorsContinuousV12_1-01-01_to_2008-12-31/PPO_GAIAPredictorsContinuousV12_restoreID=-5130043545937450704_runID=9fAi6FX2_0_clip_param=0.8,cost_of_commissions=5e-05,gamma=0.9_2019-08-25_00-15-356hob7ou4),
  AgentResults(GAIAPredictorsContinuousV12_1-01-01_to_2012-12-31/PPO_GAIAP

In [16]:
2557338427780778455
print(list(env_results.restore_ids)[-1])

-8151400312683044895


In [15]:
restore_id = list(env_results.restore_ids)[-1]
restore_id= -5130043545937450704

In [16]:
policy = env_results.make_policy(
    env=env,
    restore_id=restore_id,
    checkpoint_nr=None,  # use None (or don't specify) to use last checkpoint available
)
policy

<trading_gym.ray.walkforward.policy.WalkForwardPolicy at 0x7f3fbdd4f048>

In [17]:
history = policy.history()
# history

In [18]:
# Step 3.
episode = env.sample_episode(fold='test-set', policy=policy, verbose=False)

2019-08-25 10:52:49,911	INFO rollout_worker.py:319 -- Creating policy evaluation worker 0 on CPU (please ignore any CUDA init errors)
2019-08-25 10:52:50,148	INFO dynamic_tf_policy.py:324 -- Initializing loss function with dummy input:

{ 'action_prob': <tf.Tensor 'default_policy/action_prob:0' shape=(?,) dtype=float32>,
  'actions': <tf.Tensor 'default_policy/actions:0' shape=(?, 2) dtype=float32>,
  'advantages': <tf.Tensor 'default_policy/advantages:0' shape=(?,) dtype=float32>,
  'behaviour_logits': <tf.Tensor 'default_policy/behaviour_logits:0' shape=(?, 4) dtype=float32>,
  'dones': <tf.Tensor 'default_policy/dones:0' shape=(?,) dtype=bool>,
  'new_obs': <tf.Tensor 'default_policy/new_obs:0' shape=(?, 5) dtype=float32>,
  'obs': <tf.Tensor 'default_policy/observation:0' shape=(?, 5) dtype=float32>,
  'prev_actions': <tf.Tensor 'default_policy/action:0' shape=(?, 2) dtype=float32>,
  'prev_rewards': <tf.Tensor 'default_policy/prev_reward:0' shape=(?,) dtype=float32>,
  'rewards': 

[2m[36m(pid=38401)[0m 
[2m[36m(pid=38401)[0m The 'contains' method is deprecated and will be removed in a future version. Use 'key in index' instead of 'index.contains(key)'
[2m[36m(pid=38401)[0m 
[2m[36m(pid=38541)[0m   _np_qint8 = np.dtype([("qint8", np.int8, 1)])
[2m[36m(pid=38541)[0m   _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
[2m[36m(pid=38541)[0m   _np_qint16 = np.dtype([("qint16", np.int16, 1)])
[2m[36m(pid=38541)[0m   _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
[2m[36m(pid=38541)[0m   _np_qint32 = np.dtype([("qint32", np.int32, 1)])
[2m[36m(pid=38541)[0m   np_resource = np.dtype([("resource", np.ubyte, 1)])
[2m[36m(pid=38401)[0m 2019-08-25 10:52:56,748	INFO rollout_worker.py:319 -- Creating policy evaluation worker 1 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=38401)[0m 2019-08-25 10:52:56.793812: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not com

2019-08-25 10:53:01,137	INFO trainable.py:105 -- _setup took 12.787 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
2019-08-25 10:53:01,216	INFO tf_run_builder.py:92 -- Executing TF run without tracing. To dump TF timeline traces to disk, set the TF_TIMELINE_DIR environment variable.


[2m[36m(pid=38541)[0m 2019-08-25 10:53:01,305	INFO rollout_worker.py:319 -- Creating policy evaluation worker 2 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=38541)[0m 2019-08-25 10:53:01.322587: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
[2m[36m(pid=38541)[0m 
[2m[36m(pid=38541)[0m Converting sparse IndexedSlices to a dense Tensor of unknown shape. This may consume a large amount of memory.
[2m[36m(pid=38541)[0m 


2019-08-25 10:53:04,725	INFO rollout_worker.py:319 -- Creating policy evaluation worker 0 on CPU (please ignore any CUDA init errors)
2019-08-25 10:53:06,920	INFO rollout_worker.py:742 -- Built policy map: {'default_policy': <ray.rllib.policy.tf_policy_template.PPOTFPolicy object at 0x7f3a6c745c50>}
2019-08-25 10:53:06,921	INFO rollout_worker.py:743 -- Built preprocessor map: {'default_policy': <ray.rllib.models.preprocessors.NoPreprocessor object at 0x7f3a6c745828>}
2019-08-25 10:53:06,926	INFO rollout_worker.py:356 -- Built filter map: {'default_policy': <ray.rllib.utils.filter.NoFilter object at 0x7f3a6c7455f8>}
2019-08-25 10:53:06,988	INFO multi_gpu_optimizer.py:93 -- LocalMultiGPUOptimizer devices ['/cpu:0']


[2m[36m(pid=38808)[0m   _np_qint8 = np.dtype([("qint8", np.int8, 1)])
[2m[36m(pid=38808)[0m   _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
[2m[36m(pid=38808)[0m   _np_qint16 = np.dtype([("qint16", np.int16, 1)])
[2m[36m(pid=38808)[0m   _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
[2m[36m(pid=38808)[0m   _np_qint32 = np.dtype([("qint32", np.int32, 1)])
[2m[36m(pid=38808)[0m   np_resource = np.dtype([("resource", np.ubyte, 1)])
[2m[36m(pid=38809)[0m   _np_qint8 = np.dtype([("qint8", np.int8, 1)])
[2m[36m(pid=38809)[0m   _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
[2m[36m(pid=38809)[0m   _np_qint16 = np.dtype([("qint16", np.int16, 1)])
[2m[36m(pid=38809)[0m   _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
[2m[36m(pid=38809)[0m   _np_qint32 = np.dtype([("qint32", np.int32, 1)])
[2m[36m(pid=38809)[0m   np_resource = np.dtype([("resource", np.ubyte, 1)])
[2m[36m(pid=38852)[0m   _np_qint8 = np.dtype([("qint8", np.int8, 1)])
[2m[36m

2019-08-25 10:53:15,687	INFO trainable.py:105 -- _setup took 13.093 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


[2m[36m(pid=38808)[0m 2019-08-25 10:53:15,530	INFO rollout_worker.py:319 -- Creating policy evaluation worker 1 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=38808)[0m 2019-08-25 10:53:15.553513: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
[2m[36m(pid=38808)[0m 2019-08-25 10:53:15,845	INFO dynamic_tf_policy.py:324 -- Initializing loss function with dummy input:
[2m[36m(pid=38808)[0m 
[2m[36m(pid=38808)[0m { 'action_prob': <tf.Tensor 'default_policy/action_prob:0' shape=(?,) dtype=float32>,
[2m[36m(pid=38808)[0m   'actions': <tf.Tensor 'default_policy/actions:0' shape=(?, 2) dtype=float32>,
[2m[36m(pid=38808)[0m   'advantages': <tf.Tensor 'default_policy/advantages:0' shape=(?,) dtype=float32>,
[2m[36m(pid=38808)[0m   'behaviour_logits': <tf.Tensor 'default_policy/behaviour_logits:0' shape=(?, 4) dtype=float32>,
[2m[36m(pid=38808)[0m   'dones': <tf.T

2019-08-25 10:53:19,265	INFO rollout_worker.py:319 -- Creating policy evaluation worker 0 on CPU (please ignore any CUDA init errors)
2019-08-25 10:53:21,768	INFO rollout_worker.py:742 -- Built policy map: {'default_policy': <ray.rllib.policy.tf_policy_template.PPOTFPolicy object at 0x7f3a48245438>}
2019-08-25 10:53:21,770	INFO rollout_worker.py:743 -- Built preprocessor map: {'default_policy': <ray.rllib.models.preprocessors.NoPreprocessor object at 0x7f3a482eafd0>}
2019-08-25 10:53:21,773	INFO rollout_worker.py:356 -- Built filter map: {'default_policy': <ray.rllib.utils.filter.NoFilter object at 0x7f3a482eada0>}
2019-08-25 10:53:21,845	INFO multi_gpu_optimizer.py:93 -- LocalMultiGPUOptimizer devices ['/cpu:0']


[2m[36m(pid=38852)[0m 
[2m[36m(pid=38852)[0m The 'contains' method is deprecated and will be removed in a future version. Use 'key in index' instead of 'index.contains(key)'
[2m[36m(pid=38852)[0m 
[2m[36m(pid=39114)[0m   _np_qint8 = np.dtype([("qint8", np.int8, 1)])
[2m[36m(pid=39114)[0m   _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
[2m[36m(pid=39114)[0m   _np_qint16 = np.dtype([("qint16", np.int16, 1)])
[2m[36m(pid=39114)[0m   _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
[2m[36m(pid=39114)[0m   _np_qint32 = np.dtype([("qint32", np.int32, 1)])
[2m[36m(pid=39114)[0m   np_resource = np.dtype([("resource", np.ubyte, 1)])
[2m[36m(pid=38852)[0m 2019-08-25 10:53:24,755	INFO rollout_worker.py:319 -- Creating policy evaluation worker 1 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=38852)[0m 2019-08-25 10:53:24.775466: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not com

2019-08-25 10:53:28,307	INFO trainable.py:105 -- _setup took 10.596 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


[2m[36m(pid=39114)[0m 2019-08-25 10:53:28,337	INFO rollout_worker.py:319 -- Creating policy evaluation worker 2 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=39114)[0m 2019-08-25 10:53:28.358760: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
[2m[36m(pid=39114)[0m 
[2m[36m(pid=39114)[0m Converting sparse IndexedSlices to a dense Tensor of unknown shape. This may consume a large amount of memory.
[2m[36m(pid=39114)[0m 


2019-08-25 10:53:30,798	INFO rollout_worker.py:319 -- Creating policy evaluation worker 0 on CPU (please ignore any CUDA init errors)
2019-08-25 10:53:32,765	INFO rollout_worker.py:742 -- Built policy map: {'default_policy': <ray.rllib.policy.tf_policy_template.PPOTFPolicy object at 0x7f3a26dd2b38>}
2019-08-25 10:53:32,766	INFO rollout_worker.py:743 -- Built preprocessor map: {'default_policy': <ray.rllib.models.preprocessors.NoPreprocessor object at 0x7f3a26dd2710>}
2019-08-25 10:53:32,770	INFO rollout_worker.py:356 -- Built filter map: {'default_policy': <ray.rllib.utils.filter.NoFilter object at 0x7f3a26dd2550>}
2019-08-25 10:53:32,818	INFO multi_gpu_optimizer.py:93 -- LocalMultiGPUOptimizer devices ['/cpu:0']


[2m[36m(pid=39335)[0m   _np_qint8 = np.dtype([("qint8", np.int8, 1)])
[2m[36m(pid=39335)[0m   _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
[2m[36m(pid=39335)[0m   _np_qint16 = np.dtype([("qint16", np.int16, 1)])
[2m[36m(pid=39335)[0m   _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
[2m[36m(pid=39335)[0m   _np_qint32 = np.dtype([("qint32", np.int32, 1)])
[2m[36m(pid=39335)[0m   np_resource = np.dtype([("resource", np.ubyte, 1)])
[2m[36m(pid=39336)[0m   _np_qint8 = np.dtype([("qint8", np.int8, 1)])
[2m[36m(pid=39336)[0m   _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
[2m[36m(pid=39336)[0m   _np_qint16 = np.dtype([("qint16", np.int16, 1)])
[2m[36m(pid=39336)[0m   _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
[2m[36m(pid=39336)[0m   _np_qint32 = np.dtype([("qint32", np.int32, 1)])
[2m[36m(pid=39336)[0m   np_resource = np.dtype([("resource", np.ubyte, 1)])
[2m[36m(pid=39359)[0m   _np_qint8 = np.dtype([("qint8", np.int8, 1)])
[2m[36m

2019-08-25 10:53:41,015	INFO trainable.py:105 -- _setup took 11.345 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


[2m[36m(pid=39335)[0m 2019-08-25 10:53:40,889	INFO rollout_worker.py:319 -- Creating policy evaluation worker 1 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=39335)[0m 2019-08-25 10:53:40.912704: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
[2m[36m(pid=39336)[0m 2019-08-25 10:53:41,062	INFO rollout_worker.py:319 -- Creating policy evaluation worker 2 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=39336)[0m 2019-08-25 10:53:41.085046: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
[2m[36m(pid=39335)[0m 2019-08-25 10:53:41,224	INFO dynamic_tf_policy.py:324 -- Initializing loss function with dummy input:
[2m[36m(pid=39335)[0m 
[2m[36m(pid=39335)[0m { 'action_prob': <tf.Tensor 'default_policy/action_prob:0' shape=(?,) dtype=float32>,
[2m[36m(pid=39335)

2019-08-25 10:53:43,816	INFO rollout_worker.py:319 -- Creating policy evaluation worker 0 on CPU (please ignore any CUDA init errors)
2019-08-25 10:53:45,678	INFO rollout_worker.py:742 -- Built policy map: {'default_policy': <ray.rllib.policy.tf_policy_template.PPOTFPolicy object at 0x7f3a120cf278>}
2019-08-25 10:53:45,679	INFO rollout_worker.py:743 -- Built preprocessor map: {'default_policy': <ray.rllib.models.preprocessors.NoPreprocessor object at 0x7f3a12175e10>}
2019-08-25 10:53:45,682	INFO rollout_worker.py:356 -- Built filter map: {'default_policy': <ray.rllib.utils.filter.NoFilter object at 0x7f3a27b632e8>}
2019-08-25 10:53:45,723	INFO multi_gpu_optimizer.py:93 -- LocalMultiGPUOptimizer devices ['/cpu:0']
2019-08-25 10:53:53,514	INFO rollout_worker.py:319 -- Creating policy evaluation worker 0 on CPU (please ignore any CUDA init errors)
2019-08-25 10:53:55,315	INFO rollout_worker.py:742 -- Built policy map: {'default_policy': <ray.rllib.policy.tf_policy_template.PPOTFPolicy obj

In [19]:
episode.renderer.cumulative_performance.to_plotly()
episode.renderer.target_weights.to_plotly()
episode.renderer.annual_returns.to_plotly()
episode.renderer.tearsheet()

Unnamed: 0,Unnamed: 1,Strategy,Index(Aric-Benchmark),Index(USD 1M Deposit),Cash(USD),ETF(Russell 1000),ETF(7-10Y T-Bills)
Context,From,2008-03-19,2008-03-19,2008-03-19,2008-03-19,2008-03-19,2008-03-19
Context,To,2018-08-28,2018-08-28,2018-08-28,2018-08-28,2018-08-28,2018-08-28
Context,Years,10.4493,10.4493,10.4493,10.4493,10.4493,10.4493
Context,Observations,2725,2725,2725,2725,2725,2725
Context,Risk-free asset,Index(USD 1M Deposit),Index(USD 1M Deposit),Index(USD 1M Deposit),Index(USD 1M Deposit),Index(USD 1M Deposit),Index(USD 1M Deposit)
Context,Risk-free CAGR,0.00681294,0.00681294,0.00681294,0.00681294,0.00681294,0.00681294
Return,CAGR,0.0902611,0.158586,0.00681294,0,0.104507,0.0339243
Return,CAGR over cash,0.0834481,0.151773,0,-0.00681294,0.0976941,0.0271113
Return,Overall return,1.46699,3.65592,0.0735266,0,1.82541,0.417089
Risk,Volatility,0.103097,0.0970738,0.000598812,0,0.197859,0.0766871


In [20]:
# cost2restore_id = {
#     0.00001: -3264367374635941251,    
# }

In [21]:
# nr2episode = dict()
# for cost_of_commissions, restore_id in cost2restore_id.items():
#     nr2episode[cost_of_commissions] = env_results.get_nr2episode(
#         restore_id=restore_id,
#         checkpoint_nrs=np.arange(1, 126, 1),
#         fold='test-set',
#         env_config={
#             'folds': {
#                 'training-set': [datetime.min, datetime(2008, 3, 18)],
#                 'test-set': [datetime(2008, 3, 19), datetime.max],
#             }
#         }
#     )

In [None]:
cost_of_commissions = 0.00001
nr2episode[cost_of_commissions].plot_weights()

In [None]:
nr2episode[cost_of_commissions].plot_levels()

In [None]:
nr2episode[cost_of_commissions].plot_metrics_as_we_train()