In [4]:
import ray
from ray import rllib, tune
import pandas as pd
import numpy as np
from datetime import datetime
import trading_gym
from trading_gym.registry.gaia.v13.env import GAIAPredictorsContinuousV13
from trading_gym.ray.walkforward import WalkForwardRunner, WalkForwardResults
%matplotlib inline
print(trading_gym.__package__, trading_gym.__version__)
print(ray.__package__, ray.__version__)

trading-gym 0.8.1
ray 0.7.3


In [2]:
from ray.rllib.models.tf.tf_modelv2 import TFModelV2
from ray.rllib.models import ModelCatalog

In [3]:
ray.init()

2019-08-24 23:31:21,723	INFO node.py:498 -- Process STDOUT and STDERR is being redirected to /tmp/ray/session_2019-08-24_23-31-21_722734_101587/logs.
2019-08-24 23:31:22,043	INFO services.py:409 -- Waiting for redis server at 127.0.0.1:16309 to respond...
2019-08-24 23:31:22,277	INFO services.py:409 -- Waiting for redis server at 127.0.0.1:32833 to respond...
2019-08-24 23:31:22,285	INFO services.py:809 -- Starting Redis shard with 10.0 GB max memory.
2019-08-24 23:31:22,423	INFO node.py:512 -- Process STDOUT and STDERR is being redirected to /tmp/ray/session_2019-08-24_23-31-21_722734_101587/logs.
2019-08-24 23:31:22,432	INFO services.py:1475 -- Starting the Plasma object store with 20.0 GB memory using /dev/shm.


{'node_ip_address': '10.0.5.4',
 'redis_address': '10.0.5.4:16309',
 'object_store_address': '/tmp/ray/session_2019-08-24_23-31-21_722734_101587/sockets/plasma_store',
 'raylet_socket_name': '/tmp/ray/session_2019-08-24_23-31-21_722734_101587/sockets/raylet',
 'webui_url': None,
 'session_dir': '/tmp/ray/session_2019-08-24_23-31-21_722734_101587'}

# Create a custom RNN policy

In [4]:
import ray
from ray import tune
from ray.tune.registry import register_env
from ray.rllib.models import ModelCatalog
from ray.rllib.models.modelv2 import ModelV2
from ray.rllib.models.tf.recurrent_tf_modelv2 import RecurrentTFModelV2
from ray.rllib.utils.annotations import override
from ray.rllib.utils import try_import_tf
import tensorflow as tf

In [5]:
class MyKerasRNN(RecurrentTFModelV2):
    """Example of using the Keras functional API to define a RNN model."""

    def __init__(self,
                 obs_space,
                 action_space,
                 num_outputs,
                 model_config,
                 name,
                 hiddens_size=256,
                 cell_size=64):
        super(MyKerasRNN, self).__init__(obs_space, action_space, num_outputs,
                                         model_config, name)
        self.cell_size = cell_size

        # Define input layers
        input_layer = tf.keras.layers.Input(
            shape=(None, obs_space.shape[0]), name="inputs")
        state_in_h = tf.keras.layers.Input(shape=(cell_size, ), name="h")
        state_in_c = tf.keras.layers.Input(shape=(cell_size, ), name="c")
        seq_in = tf.keras.layers.Input(shape=(), name="seq_in")

        # Preprocess observation with a hidden layer and send to LSTM cell
        dense1 = tf.keras.layers.Dense(
            hiddens_size, activation=tf.nn.relu, name="dense1")(input_layer)
        lstm_out, state_h, state_c = tf.keras.layers.LSTM(
            cell_size, return_sequences=True, return_state=True, name="lstm")(
                inputs=dense1,
                mask=tf.sequence_mask(seq_in),
                initial_state=[state_in_h, state_in_c])

        # Postprocess LSTM output with another hidden layer and compute values
        logits = tf.keras.layers.Dense(
            self.num_outputs,
            activation=tf.keras.activations.linear,
            name="logits")(lstm_out)
        values = tf.keras.layers.Dense(
            1, activation=None, name="values")(lstm_out)

        # Create the RNN model
        self.rnn_model = tf.keras.Model(
            inputs=[input_layer, seq_in, state_in_h, state_in_c],
            outputs=[logits, values, state_h, state_c])
        self.register_variables(self.rnn_model.variables)
        self.rnn_model.summary()

    @override(RecurrentTFModelV2)
    def forward_rnn(self, inputs, state, seq_lens):
        model_out, self._value_out, h, c = self.rnn_model([inputs, seq_lens] +
                                                          state)
        return model_out, [h, c]

    @override(ModelV2)
    def get_initial_state(self):
        return [
            np.zeros(self.cell_size, np.float32),
            np.zeros(self.cell_size, np.float32),
        ]

    @override(ModelV2)
    def value_function(self):
        return tf.reshape(self._value_out, [-1])

In [6]:
ModelCatalog.register_custom_model("rnn", MyKerasRNN)

## Create the walk-forward partitions
It's responsibility of the user to create the training/test (and maybe validation) partitions to turn the walk forward training. Note that 2-fold split is a particular case of walk-forward training, so you are still able to run a simple 2-fold split.


In [7]:
partitions = list()
for year in range(2007, 2018):
    partition = {
        'training-set': [datetime.min, datetime(year, 12, 31)],
        'test-set': [datetime(year + 1, 1, 1), datetime(year + 1, 12, 31)],
    }
    partitions.append(partition)
partitions


[{'training-set': [datetime.datetime(1, 1, 1, 0, 0),
   datetime.datetime(2007, 12, 31, 0, 0)],
  'test-set': [datetime.datetime(2008, 1, 1, 0, 0),
   datetime.datetime(2008, 12, 31, 0, 0)]},
 {'training-set': [datetime.datetime(1, 1, 1, 0, 0),
   datetime.datetime(2008, 12, 31, 0, 0)],
  'test-set': [datetime.datetime(2009, 1, 1, 0, 0),
   datetime.datetime(2009, 12, 31, 0, 0)]},
 {'training-set': [datetime.datetime(1, 1, 1, 0, 0),
   datetime.datetime(2009, 12, 31, 0, 0)],
  'test-set': [datetime.datetime(2010, 1, 1, 0, 0),
   datetime.datetime(2010, 12, 31, 0, 0)]},
 {'training-set': [datetime.datetime(1, 1, 1, 0, 0),
   datetime.datetime(2010, 12, 31, 0, 0)],
  'test-set': [datetime.datetime(2011, 1, 1, 0, 0),
   datetime.datetime(2011, 12, 31, 0, 0)]},
 {'training-set': [datetime.datetime(1, 1, 1, 0, 0),
   datetime.datetime(2011, 12, 31, 0, 0)],
  'test-set': [datetime.datetime(2012, 1, 1, 0, 0),
   datetime.datetime(2012, 12, 31, 0, 0)]},
 {'training-set': [datetime.datetime(1, 

In [8]:
## Create the config dict
config = ray.rllib.agents.ppo.DEFAULT_CONFIG.copy()
config['env'] = GAIAPredictorsContinuousV13
config['env_config'] = {
    'cost_of_commissions': tune.grid_search([0.00005]),
    'cost_of_spread': 0.0001,
}
config['gamma'] = tune.grid_search([0.82]) 

In [9]:
config['vf_clip_param'] = 0
config['vf_loss_coeff'] = 1e-05
config['lambda'] = 0
config['use_gae'] = True

# need to have vf share layers if lstm is used
# config['vf_share_layers'] = True
# config['model']['use_lstm'] = True


# config['model']['custom_model'] = 'rnn'
# config['model']['custom_model']['max_seq_len'] = 20

config['batch_mode'] = 'complete_episodes'
config['train_batch_size'] = 4000 # tune.grid_search([4000])
config['sgd_minibatch_size'] = 128
config['num_sgd_iter'] = tune.grid_search([8])
config['entropy_coeff'] =  1e-5 # tune.grid_search([1e-5])
config['kl_coeff'] = 0.2 #tune.grid_search([0.2])
config['kl_target'] = tune.grid_search([0.01])
config['clip_param'] = tune.grid_search([0.8])

config['lr'] = tune.grid_search([1e-5])

# config['model']['custom_model'] = CNN.__name__


In [10]:
# print(config)

In [11]:
# Linear regression
# config['model']['fcnet_hiddens'] = []

In [12]:
# TODO: Use a linear regression.

In [13]:
env = GAIAPredictorsContinuousV13()
env.action_space.sample()

array([-0.05575708, -0.07833881])

In [14]:
## Run your walk-forward experiment
walk_forward = WalkForwardRunner(
    env_partitions=partitions,
    trainable=ray.rllib.agents.ppo.PPOTrainer,
    config=config,
    stop={'timesteps_total': 500000},
    checkpoint_freq=1,
)

Note that WalkForwardRunner has constructed the implied ray Experiment(s) from your walk forward settings.

walk_forward.experiments

Note that trials are associated with a `RestoreID`. This `ID` is all you need to restore an agent. Here we are using a grid search of two values for `cost_of_commissions` on two partitions, so we have a total of 4 experiments.

In [15]:
trials = walk_forward.run(verbose=0)
trials

2019-08-24 00:05:20,180	INFO trial_runner.py:176 -- Starting a new experiment.
2019-08-24 00:05:20,261	ERROR log_sync.py:34 -- Log sync requires cluster to be setup with `ray up`.


[2m[36m(pid=81041)[0m   _np_qint8 = np.dtype([("qint8", np.int8, 1)])
[2m[36m(pid=81041)[0m   _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
[2m[36m(pid=81041)[0m   _np_qint16 = np.dtype([("qint16", np.int16, 1)])
[2m[36m(pid=81041)[0m   _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
[2m[36m(pid=81041)[0m   _np_qint32 = np.dtype([("qint32", np.int32, 1)])
[2m[36m(pid=81041)[0m   np_resource = np.dtype([("resource", np.ubyte, 1)])
[2m[36m(pid=81041)[0m 2019-08-24 00:05:33,885	INFO rollout_worker.py:319 -- Creating policy evaluation worker 0 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=81041)[0m 2019-08-24 00:05:33.890599: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
[2m[36m(pid=81041)[0m 2019-08-24 00:05:34,588	INFO dynamic_tf_policy.py:324 -- Initializing loss function with dummy input:
[2m[36m(pid=81041)[0m 
[2m[36m(pid=81041)[0m { 'ac

[2m[36m(pid=81038)[0m 2019-08-24 00:05:52,120	INFO rollout_worker.py:319 -- Creating policy evaluation worker 2 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=81038)[0m 2019-08-24 00:05:52.176183: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
[2m[36m(pid=81038)[0m 
[2m[36m(pid=81038)[0m Converting sparse IndexedSlices to a dense Tensor of unknown shape. This may consume a large amount of memory.
[2m[36m(pid=81038)[0m 
[2m[36m(pid=81034)[0m 2019-08-24 00:05:55,490	INFO rollout_worker.py:319 -- Creating policy evaluation worker 1 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=81034)[0m 2019-08-24 00:05:55.536508: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
[2m[36m(pid=81041)[0m 2019-08-24 00:05:56,527	INFO trainable.py:105 -- _setup took 24.782 sec

[2m[36m(pid=81040)[0m   _np_qint8 = np.dtype([("qint8", np.int8, 1)])
[2m[36m(pid=81040)[0m   _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
[2m[36m(pid=81040)[0m   _np_qint16 = np.dtype([("qint16", np.int16, 1)])
[2m[36m(pid=81040)[0m   _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
[2m[36m(pid=81040)[0m   _np_qint32 = np.dtype([("qint32", np.int32, 1)])
[2m[36m(pid=81040)[0m   np_resource = np.dtype([("resource", np.ubyte, 1)])
[2m[36m(pid=81037)[0m   _np_qint8 = np.dtype([("qint8", np.int8, 1)])
[2m[36m(pid=81037)[0m   _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
[2m[36m(pid=81037)[0m   _np_qint16 = np.dtype([("qint16", np.int16, 1)])
[2m[36m(pid=81037)[0m   _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
[2m[36m(pid=81037)[0m   _np_qint32 = np.dtype([("qint32", np.int32, 1)])
[2m[36m(pid=81037)[0m   np_resource = np.dtype([("resource", np.ubyte, 1)])
[2m[36m(pid=81035)[0m   _np_qint8 = np.dtype([("qint8", np.int8, 1)])
[2m[36m

[2m[36m(pid=81034)[0m 2019-08-24 00:06:03,699	INFO rollout_worker.py:485 -- Completed sample batch:
[2m[36m(pid=81034)[0m 
[2m[36m(pid=81034)[0m { 'data': { 'action_prob': np.ndarray((205,), dtype=float32, min=0.0, max=0.157, mean=0.081),
[2m[36m(pid=81034)[0m             'actions': np.ndarray((205, 2), dtype=float32, min=-3.541, max=2.845, mean=-0.003),
[2m[36m(pid=81034)[0m             'advantages': np.ndarray((205,), dtype=float32, min=-0.009, max=0.014, mean=0.0),
[2m[36m(pid=81034)[0m             'agent_index': np.ndarray((205,), dtype=int64, min=0.0, max=0.0, mean=0.0),
[2m[36m(pid=81034)[0m             'behaviour_logits': np.ndarray((205, 4), dtype=float32, min=-0.009, max=0.013, mean=0.0),
[2m[36m(pid=81034)[0m             'dones': np.ndarray((205,), dtype=bool, min=0.0, max=1.0, mean=0.024),
[2m[36m(pid=81034)[0m             'eps_id': np.ndarray((205,), dtype=int64, min=500771995.0, max=1269115279.0, mean=831006784.0),
[2m[36m(pid=81034)[0m       





















































































































































































































































































































































































































































2019-08-24 01:17:35,854	INFO trial_runner.py:176 -- Starting a new experiment.


[2m[36m(pid=81036)[0m 2019-08-24 01:17:42,938	INFO rollout_worker.py:319 -- Creating policy evaluation worker 0 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=81036)[0m 2019-08-24 01:17:42.939883: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
[2m[36m(pid=81036)[0m 2019-08-24 01:17:43,737	INFO dynamic_tf_policy.py:324 -- Initializing loss function with dummy input:
[2m[36m(pid=81036)[0m 
[2m[36m(pid=81036)[0m { 'action_prob': <tf.Tensor 'default_policy/action_prob:0' shape=(?,) dtype=float32>,
[2m[36m(pid=81036)[0m   'actions': <tf.Tensor 'default_policy/actions:0' shape=(?, 2) dtype=float32>,
[2m[36m(pid=81036)[0m   'advantages': <tf.Tensor 'default_policy/advantages:0' shape=(?,) dtype=float32>,
[2m[36m(pid=81036)[0m   'behaviour_logits': <tf.Tensor 'default_policy/behaviour_logits:0' shape=(?, 4) dtype=float32>,
[2m[36m(pid=81036)[0m   'dones': <tf.T

[2m[36m(pid=81035)[0m 2019-08-24 01:18:06,830	INFO sample_batch_builder.py:161 -- Trajectory fragment after postprocess_trajectory():
[2m[36m(pid=81035)[0m 
[2m[36m(pid=81035)[0m { 'agent0': { 'data': { 'action_prob': np.ndarray((41,), dtype=float32, min=0.0, max=0.157, mean=0.071),
[2m[36m(pid=81035)[0m                         'actions': np.ndarray((41, 2), dtype=float32, min=-3.539, max=4.274, mean=-0.109),
[2m[36m(pid=81035)[0m                         'advantages': np.ndarray((41,), dtype=float32, min=-0.01, max=0.006, mean=-0.001),
[2m[36m(pid=81035)[0m                         'agent_index': np.ndarray((41,), dtype=int64, min=0.0, max=0.0, mean=0.0),
[2m[36m(pid=81035)[0m                         'behaviour_logits': np.ndarray((41, 4), dtype=float32, min=-0.009, max=0.008, mean=-0.0),
[2m[36m(pid=81035)[0m                         'dones': np.ndarray((41,), dtype=bool, min=0.0, max=1.0, mean=0.024),
[2m[36m(pid=81035)[0m                         'eps_id': np

[2m[36m(pid=81036)[0m 2019-08-24 01:18:36,355	INFO tf_run_builder.py:92 -- Executing TF run without tracing. To dump TF timeline traces to disk, set the TF_TIMELINE_DIR environment variable.










































































































































































































































































































































































































































2019-08-24 02:24:36,801	INFO trial_runner.py:176 -- Starting a new experiment.


[2m[36m(pid=81040)[0m 2019-08-24 02:24:41,818	INFO rollout_worker.py:319 -- Creating policy evaluation worker 0 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=81040)[0m 2019-08-24 02:24:41.823373: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
[2m[36m(pid=81040)[0m 2019-08-24 02:24:42,452	INFO dynamic_tf_policy.py:324 -- Initializing loss function with dummy input:
[2m[36m(pid=81040)[0m 
[2m[36m(pid=81040)[0m { 'action_prob': <tf.Tensor 'default_policy/action_prob:0' shape=(?,) dtype=float32>,
[2m[36m(pid=81040)[0m   'actions': <tf.Tensor 'default_policy/actions:0' shape=(?, 2) dtype=float32>,
[2m[36m(pid=81040)[0m   'advantages': <tf.Tensor 'default_policy/advantages:0' shape=(?,) dtype=float32>,
[2m[36m(pid=81040)[0m   'behaviour_logits': <tf.Tensor 'default_policy/behaviour_logits:0' shape=(?, 4) dtype=float32>,
[2m[36m(pid=81040)[0m   'dones': <tf.T

[2m[36m(pid=81039)[0m 2019-08-24 02:25:04,849	INFO sampler.py:548 -- Outputs of compute_actions():
[2m[36m(pid=81039)[0m 
[2m[36m(pid=81039)[0m { 'default_policy': ( np.ndarray((1, 2), dtype=float32, min=-0.518, max=0.269, mean=-0.124),
[2m[36m(pid=81039)[0m                       [],
[2m[36m(pid=81039)[0m                       { 'action_prob': np.ndarray((1,), dtype=float32, min=0.133, max=0.133, mean=0.133),
[2m[36m(pid=81039)[0m                         'behaviour_logits': np.ndarray((1, 4), dtype=float32, min=-0.005, max=0.006, mean=0.0),
[2m[36m(pid=81039)[0m                         'vf_preds': np.ndarray((1,), dtype=float32, min=-0.003, max=-0.003, mean=-0.003)})}
[2m[36m(pid=81039)[0m 
[2m[36m(pid=17338)[0m 
[2m[36m(pid=17338)[0m Converting sparse IndexedSlices to a dense Tensor of unknown shape. This may consume a large amount of memory.
[2m[36m(pid=17338)[0m 
[2m[36m(pid=81039)[0m 2019-08-24 02:25:05,151	INFO sample_batch_builder.py:161 -- Tra

[2m[36m(pid=81040)[0m 2019-08-24 02:25:36,437	INFO tf_run_builder.py:92 -- Executing TF run without tracing. To dump TF timeline traces to disk, set the TF_TIMELINE_DIR environment variable.






























































































































































































































































































































































































































































2019-08-24 03:31:23,038	INFO trial_runner.py:176 -- Starting a new experiment.


[2m[36m(pid=49396)[0m   _np_qint8 = np.dtype([("qint8", np.int8, 1)])
[2m[36m(pid=49396)[0m   _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
[2m[36m(pid=49396)[0m   _np_qint16 = np.dtype([("qint16", np.int16, 1)])
[2m[36m(pid=49396)[0m   _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
[2m[36m(pid=49396)[0m   _np_qint32 = np.dtype([("qint32", np.int32, 1)])
[2m[36m(pid=49396)[0m   np_resource = np.dtype([("resource", np.ubyte, 1)])
[2m[36m(pid=49396)[0m 2019-08-24 03:31:36,621	INFO rollout_worker.py:319 -- Creating policy evaluation worker 0 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=49396)[0m 2019-08-24 03:31:36.622704: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
[2m[36m(pid=49396)[0m 2019-08-24 03:31:37,294	INFO dynamic_tf_policy.py:324 -- Initializing loss function with dummy input:
[2m[36m(pid=49396)[0m 
[2m[36m(pid=49396)[0m { 'ac

[2m[36m(pid=49618)[0m   _np_qint8 = np.dtype([("qint8", np.int8, 1)])
[2m[36m(pid=49618)[0m   _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
[2m[36m(pid=49618)[0m   _np_qint16 = np.dtype([("qint16", np.int16, 1)])
[2m[36m(pid=49618)[0m   _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
[2m[36m(pid=49618)[0m   _np_qint32 = np.dtype([("qint32", np.int32, 1)])
[2m[36m(pid=49618)[0m   np_resource = np.dtype([("resource", np.ubyte, 1)])
[2m[36m(pid=49590)[0m 2019-08-24 03:31:56,805	INFO rollout_worker.py:319 -- Creating policy evaluation worker 2 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=49590)[0m 2019-08-24 03:31:56.845120: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
[2m[36m(pid=49396)[0m 2019-08-24 03:31:57,965	INFO trainable.py:105 -- _setup took 22.623 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to re

[2m[36m(pid=49591)[0m 2019-08-24 03:32:07,787	INFO rollout_worker.py:485 -- Completed sample batch:
[2m[36m(pid=49591)[0m 
[2m[36m(pid=49591)[0m { 'data': { 'action_prob': np.ndarray((205,), dtype=float32, min=0.0, max=0.158, mean=0.076),
[2m[36m(pid=49591)[0m             'actions': np.ndarray((205, 2), dtype=float32, min=-3.217, max=3.003, mean=-0.093),
[2m[36m(pid=49591)[0m             'advantages': np.ndarray((205,), dtype=float32, min=-0.015, max=0.014, mean=0.0),
[2m[36m(pid=49591)[0m             'agent_index': np.ndarray((205,), dtype=int64, min=0.0, max=0.0, mean=0.0),
[2m[36m(pid=49591)[0m             'behaviour_logits': np.ndarray((205, 4), dtype=float32, min=-0.009, max=0.011, mean=0.0),
[2m[36m(pid=49591)[0m             'dones': np.ndarray((205,), dtype=bool, min=0.0, max=1.0, mean=0.024),
[2m[36m(pid=49591)[0m             'eps_id': np.ndarray((205,), dtype=int64, min=1070105450.0, max=1903314156.0, mean=1588707810.6),
[2m[36m(pid=49591)[0m     













































































































































































































































































































































































































































































2019-08-24 04:41:10,046	INFO trial_runner.py:176 -- Starting a new experiment.


[2m[36m(pid=49618)[0m 2019-08-24 04:41:15,236	INFO rollout_worker.py:319 -- Creating policy evaluation worker 0 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=49618)[0m 2019-08-24 04:41:15.238196: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
[2m[36m(pid=49618)[0m 2019-08-24 04:41:15,700	INFO dynamic_tf_policy.py:324 -- Initializing loss function with dummy input:
[2m[36m(pid=49618)[0m 
[2m[36m(pid=49618)[0m { 'action_prob': <tf.Tensor 'default_policy/action_prob:0' shape=(?,) dtype=float32>,
[2m[36m(pid=49618)[0m   'actions': <tf.Tensor 'default_policy/actions:0' shape=(?, 2) dtype=float32>,
[2m[36m(pid=49618)[0m   'advantages': <tf.Tensor 'default_policy/advantages:0' shape=(?,) dtype=float32>,
[2m[36m(pid=49618)[0m   'behaviour_logits': <tf.Tensor 'default_policy/behaviour_logits:0' shape=(?, 4) dtype=float32>,
[2m[36m(pid=49618)[0m   'dones': <tf.T

[2m[36m(pid=49618)[0m 2019-08-24 04:41:33,968	INFO trainable.py:105 -- _setup took 20.241 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
[2m[36m(pid=83190)[0m 2019-08-24 04:41:35,717	INFO rollout_worker.py:319 -- Creating policy evaluation worker 2 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=83190)[0m 2019-08-24 04:41:35.743301: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
[2m[36m(pid=83190)[0m 
[2m[36m(pid=83190)[0m Converting sparse IndexedSlices to a dense Tensor of unknown shape. This may consume a large amount of memory.
[2m[36m(pid=83190)[0m 
[2m[36m(pid=83189)[0m 2019-08-24 04:41:39,146	INFO rollout_worker.py:319 -- Creating policy evaluation worker 1 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=83189)[0m 2019-08-24 04:41:39.206360: I tensorflow/core/platform/c

[2m[36m(pid=83189)[0m 2019-08-24 04:41:48,513	INFO rollout_worker.py:485 -- Completed sample batch:
[2m[36m(pid=83189)[0m 
[2m[36m(pid=83189)[0m { 'data': { 'action_prob': np.ndarray((205,), dtype=float32, min=0.0, max=0.159, mean=0.075),
[2m[36m(pid=83189)[0m             'actions': np.ndarray((205, 2), dtype=float32, min=-3.269, max=3.371, mean=-0.011),
[2m[36m(pid=83189)[0m             'advantages': np.ndarray((205,), dtype=float32, min=-0.022, max=0.016, mean=-0.0),
[2m[36m(pid=83189)[0m             'agent_index': np.ndarray((205,), dtype=int64, min=0.0, max=0.0, mean=0.0),
[2m[36m(pid=83189)[0m             'behaviour_logits': np.ndarray((205, 4), dtype=float32, min=-0.009, max=0.012, mean=0.001),
[2m[36m(pid=83189)[0m             'dones': np.ndarray((205,), dtype=bool, min=0.0, max=1.0, mean=0.024),
[2m[36m(pid=83189)[0m             'eps_id': np.ndarray((205,), dtype=int64, min=564014879.0, max=1915954164.0, mean=1293352891.2),
[2m[36m(pid=83189)[0m   





































































































































































































































































































































































































































































2019-08-24 05:51:09,579	INFO trial_runner.py:176 -- Starting a new experiment.


[2m[36m(pid=83227)[0m 2019-08-24 05:51:15,548	INFO rollout_worker.py:319 -- Creating policy evaluation worker 0 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=83227)[0m 2019-08-24 05:51:15.549063: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
[2m[36m(pid=83227)[0m 2019-08-24 05:51:16,824	INFO dynamic_tf_policy.py:324 -- Initializing loss function with dummy input:
[2m[36m(pid=83227)[0m 
[2m[36m(pid=83227)[0m { 'action_prob': <tf.Tensor 'default_policy/action_prob:0' shape=(?,) dtype=float32>,
[2m[36m(pid=83227)[0m   'actions': <tf.Tensor 'default_policy/actions:0' shape=(?, 2) dtype=float32>,
[2m[36m(pid=83227)[0m   'advantages': <tf.Tensor 'default_policy/advantages:0' shape=(?,) dtype=float32>,
[2m[36m(pid=83227)[0m   'behaviour_logits': <tf.Tensor 'default_policy/behaviour_logits:0' shape=(?, 4) dtype=float32>,
[2m[36m(pid=83227)[0m   'dones': <tf.T

[2m[36m(pid=116633)[0m 2019-08-24 05:51:40,517	INFO rollout_worker.py:319 -- Creating policy evaluation worker 1 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=116633)[0m 2019-08-24 05:51:40.612340: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
[2m[36m(pid=83227)[0m 2019-08-24 05:51:40,980	INFO trainable.py:105 -- _setup took 28.075 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
[2m[36m(pid=116633)[0m 2019-08-24 05:51:41,350	INFO dynamic_tf_policy.py:324 -- Initializing loss function with dummy input:
[2m[36m(pid=116633)[0m 
[2m[36m(pid=116633)[0m { 'action_prob': <tf.Tensor 'default_policy/action_prob:0' shape=(?,) dtype=float32>,
[2m[36m(pid=116633)[0m   'actions': <tf.Tensor 'default_policy/actions:0' shape=(?, 2) dtype=float32>,
[2m[36m(pid=116633)[0m   'advantages': <tf.Tensor 

[2m[36m(pid=116633)[0m 2019-08-24 05:51:49,088	INFO rollout_worker.py:485 -- Completed sample batch:
[2m[36m(pid=116633)[0m 
[2m[36m(pid=116633)[0m { 'data': { 'action_prob': np.ndarray((205,), dtype=float32, min=0.002, max=0.159, mean=0.081),
[2m[36m(pid=116633)[0m             'actions': np.ndarray((205, 2), dtype=float32, min=-2.867, max=2.967, mean=0.022),
[2m[36m(pid=116633)[0m             'advantages': np.ndarray((205,), dtype=float32, min=-0.022, max=0.034, mean=0.0),
[2m[36m(pid=116633)[0m             'agent_index': np.ndarray((205,), dtype=int64, min=0.0, max=0.0, mean=0.0),
[2m[36m(pid=116633)[0m             'behaviour_logits': np.ndarray((205, 4), dtype=float32, min=-0.016, max=0.012, mean=-0.002),
[2m[36m(pid=116633)[0m             'dones': np.ndarray((205,), dtype=bool, min=0.0, max=1.0, mean=0.024),
[2m[36m(pid=116633)[0m             'eps_id': np.ndarray((205,), dtype=int64, min=55740156.0, max=1731648705.0, mean=852195180.4),
[2m[36m(pid=11663

















































































































































































































































































































































































































































































2019-08-24 07:02:45,508	INFO trial_runner.py:176 -- Starting a new experiment.


[2m[36m(pid=116663)[0m 2019-08-24 07:02:51,144	INFO rollout_worker.py:319 -- Creating policy evaluation worker 0 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=116663)[0m 2019-08-24 07:02:51.146007: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
[2m[36m(pid=116663)[0m 2019-08-24 07:02:51,782	INFO dynamic_tf_policy.py:324 -- Initializing loss function with dummy input:
[2m[36m(pid=116663)[0m 
[2m[36m(pid=116663)[0m { 'action_prob': <tf.Tensor 'default_policy/action_prob:0' shape=(?,) dtype=float32>,
[2m[36m(pid=116663)[0m   'actions': <tf.Tensor 'default_policy/actions:0' shape=(?, 2) dtype=float32>,
[2m[36m(pid=116663)[0m   'advantages': <tf.Tensor 'default_policy/advantages:0' shape=(?,) dtype=float32>,
[2m[36m(pid=116663)[0m   'behaviour_logits': <tf.Tensor 'default_policy/behaviour_logits:0' shape=(?, 4) dtype=float32>,
[2m[36m(pid=116663)[0m   'done

[2m[36m(pid=116663)[0m 2019-08-24 07:03:11,346	INFO trainable.py:105 -- _setup took 22.546 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
[2m[36m(pid=20151)[0m 2019-08-24 07:03:11,965	INFO rollout_worker.py:319 -- Creating policy evaluation worker 1 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=20151)[0m 2019-08-24 07:03:12.022528: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
[2m[36m(pid=20147)[0m 2019-08-24 07:03:12,267	INFO rollout_worker.py:319 -- Creating policy evaluation worker 2 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=20147)[0m 2019-08-24 07:03:12.332696: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
[2m[36m(pid=20151)[0m 2019-08-24 07:03:12,970	INFO dynamic_tf_poli

[2m[36m(pid=20151)[0m 2019-08-24 07:03:20,214	INFO rollout_worker.py:485 -- Completed sample batch:
[2m[36m(pid=20151)[0m 
[2m[36m(pid=20151)[0m { 'data': { 'action_prob': np.ndarray((205,), dtype=float32, min=0.002, max=0.158, mean=0.079),
[2m[36m(pid=20151)[0m             'actions': np.ndarray((205, 2), dtype=float32, min=-2.818, max=2.541, mean=0.036),
[2m[36m(pid=20151)[0m             'advantages': np.ndarray((205,), dtype=float32, min=-0.013, max=0.013, mean=0.0),
[2m[36m(pid=20151)[0m             'agent_index': np.ndarray((205,), dtype=int64, min=0.0, max=0.0, mean=0.0),
[2m[36m(pid=20151)[0m             'behaviour_logits': np.ndarray((205, 4), dtype=float32, min=-0.018, max=0.012, mean=-0.001),
[2m[36m(pid=20151)[0m             'dones': np.ndarray((205,), dtype=bool, min=0.0, max=1.0, mean=0.024),
[2m[36m(pid=20151)[0m             'eps_id': np.ndarray((205,), dtype=int64, min=61062175.0, max=1693829817.0, mean=922023147.2),
[2m[36m(pid=20151)[0m    









































































































































































































































































































































































































































































2019-08-24 08:10:31,624	INFO trial_runner.py:176 -- Starting a new experiment.


[2m[36m(pid=20182)[0m 2019-08-24 08:10:37,094	INFO rollout_worker.py:319 -- Creating policy evaluation worker 0 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=20182)[0m 2019-08-24 08:10:37.095081: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
[2m[36m(pid=20182)[0m 2019-08-24 08:10:37,603	INFO dynamic_tf_policy.py:324 -- Initializing loss function with dummy input:
[2m[36m(pid=20182)[0m 
[2m[36m(pid=20182)[0m { 'action_prob': <tf.Tensor 'default_policy/action_prob:0' shape=(?,) dtype=float32>,
[2m[36m(pid=20182)[0m   'actions': <tf.Tensor 'default_policy/actions:0' shape=(?, 2) dtype=float32>,
[2m[36m(pid=20182)[0m   'advantages': <tf.Tensor 'default_policy/advantages:0' shape=(?,) dtype=float32>,
[2m[36m(pid=20182)[0m   'behaviour_logits': <tf.Tensor 'default_policy/behaviour_logits:0' shape=(?, 4) dtype=float32>,
[2m[36m(pid=20182)[0m   'dones': <tf.T

[2m[36m(pid=52705)[0m 2019-08-24 08:10:55,411	INFO rollout_worker.py:319 -- Creating policy evaluation worker 1 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=52705)[0m 2019-08-24 08:10:55.452291: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
[2m[36m(pid=20182)[0m 2019-08-24 08:10:55,786	INFO trainable.py:105 -- _setup took 20.444 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
[2m[36m(pid=52705)[0m 2019-08-24 08:10:56,154	INFO dynamic_tf_policy.py:324 -- Initializing loss function with dummy input:
[2m[36m(pid=52705)[0m 
[2m[36m(pid=52705)[0m { 'action_prob': <tf.Tensor 'default_policy/action_prob:0' shape=(?,) dtype=float32>,
[2m[36m(pid=52705)[0m   'actions': <tf.Tensor 'default_policy/actions:0' shape=(?, 2) dtype=float32>,
[2m[36m(pid=52705)[0m   'advantages': <tf.Tensor 'defaul

[2m[36m(pid=52705)[0m 2019-08-24 08:11:02,494	INFO rollout_worker.py:485 -- Completed sample batch:
[2m[36m(pid=52705)[0m 
[2m[36m(pid=52705)[0m { 'data': { 'action_prob': np.ndarray((205,), dtype=float32, min=0.001, max=0.159, mean=0.084),
[2m[36m(pid=52705)[0m             'actions': np.ndarray((205, 2), dtype=float32, min=-2.993, max=3.095, mean=0.031),
[2m[36m(pid=52705)[0m             'advantages': np.ndarray((205,), dtype=float32, min=-0.013, max=0.011, mean=0.0),
[2m[36m(pid=52705)[0m             'agent_index': np.ndarray((205,), dtype=int64, min=0.0, max=0.0, mean=0.0),
[2m[36m(pid=52705)[0m             'behaviour_logits': np.ndarray((205, 4), dtype=float32, min=-0.011, max=0.01, mean=-0.0),
[2m[36m(pid=52705)[0m             'dones': np.ndarray((205,), dtype=bool, min=0.0, max=1.0, mean=0.024),
[2m[36m(pid=52705)[0m             'eps_id': np.ndarray((205,), dtype=int64, min=389012728.0, max=1404150874.0, mean=913974857.2),
[2m[36m(pid=52705)[0m      





























































































































































































































































































































































































































































2019-08-24 09:13:46,127	INFO trial_runner.py:176 -- Starting a new experiment.


[2m[36m(pid=52743)[0m 2019-08-24 09:13:52,202	INFO rollout_worker.py:319 -- Creating policy evaluation worker 0 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=52743)[0m 2019-08-24 09:13:52.203862: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
[2m[36m(pid=52743)[0m 2019-08-24 09:13:53,135	INFO dynamic_tf_policy.py:324 -- Initializing loss function with dummy input:
[2m[36m(pid=52743)[0m 
[2m[36m(pid=52743)[0m { 'action_prob': <tf.Tensor 'default_policy/action_prob:0' shape=(?,) dtype=float32>,
[2m[36m(pid=52743)[0m   'actions': <tf.Tensor 'default_policy/actions:0' shape=(?, 2) dtype=float32>,
[2m[36m(pid=52743)[0m   'advantages': <tf.Tensor 'default_policy/advantages:0' shape=(?,) dtype=float32>,
[2m[36m(pid=52743)[0m   'behaviour_logits': <tf.Tensor 'default_policy/behaviour_logits:0' shape=(?, 4) dtype=float32>,
[2m[36m(pid=52743)[0m   'dones': <tf.T

[2m[36m(pid=52743)[0m 2019-08-24 09:14:10,716	INFO trainable.py:105 -- _setup took 20.358 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
[2m[36m(pid=83173)[0m 2019-08-24 09:14:11,860	INFO rollout_worker.py:319 -- Creating policy evaluation worker 1 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=83173)[0m 2019-08-24 09:14:11.890731: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
[2m[36m(pid=83173)[0m 2019-08-24 09:14:12,622	INFO dynamic_tf_policy.py:324 -- Initializing loss function with dummy input:
[2m[36m(pid=83173)[0m 
[2m[36m(pid=83173)[0m { 'action_prob': <tf.Tensor 'default_policy/action_prob:0' shape=(?,) dtype=float32>,
[2m[36m(pid=83173)[0m   'actions': <tf.Tensor 'default_policy/actions:0' shape=(?, 2) dtype=float32>,
[2m[36m(pid=83173)[0m   'advantages': <tf.Tensor 'defaul

[2m[36m(pid=83173)[0m 2019-08-24 09:14:19,448	INFO rollout_worker.py:485 -- Completed sample batch:
[2m[36m(pid=83173)[0m 
[2m[36m(pid=83173)[0m { 'data': { 'action_prob': np.ndarray((205,), dtype=float32, min=0.0, max=0.157, mean=0.076),
[2m[36m(pid=83173)[0m             'actions': np.ndarray((205, 2), dtype=float32, min=-2.628, max=3.722, mean=0.07),
[2m[36m(pid=83173)[0m             'advantages': np.ndarray((205,), dtype=float32, min=-0.071, max=0.048, mean=0.001),
[2m[36m(pid=83173)[0m             'agent_index': np.ndarray((205,), dtype=int64, min=0.0, max=0.0, mean=0.0),
[2m[36m(pid=83173)[0m             'behaviour_logits': np.ndarray((205, 4), dtype=float32, min=-0.01, max=0.01, mean=-0.0),
[2m[36m(pid=83173)[0m             'dones': np.ndarray((205,), dtype=bool, min=0.0, max=1.0, mean=0.024),
[2m[36m(pid=83173)[0m             'eps_id': np.ndarray((205,), dtype=int64, min=61060247.0, max=1313736832.0, mean=600366869.0),
[2m[36m(pid=83173)[0m         

























































































































































































































































































































































































































































2019-08-24 10:17:57,959	INFO trial_runner.py:176 -- Starting a new experiment.


[2m[36m(pid=83201)[0m 2019-08-24 10:18:04,397	INFO rollout_worker.py:319 -- Creating policy evaluation worker 0 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=83201)[0m 2019-08-24 10:18:04.400935: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
[2m[36m(pid=83201)[0m 2019-08-24 10:18:05,025	INFO dynamic_tf_policy.py:324 -- Initializing loss function with dummy input:
[2m[36m(pid=83201)[0m 
[2m[36m(pid=83201)[0m { 'action_prob': <tf.Tensor 'default_policy/action_prob:0' shape=(?,) dtype=float32>,
[2m[36m(pid=83201)[0m   'actions': <tf.Tensor 'default_policy/actions:0' shape=(?, 2) dtype=float32>,
[2m[36m(pid=83201)[0m   'advantages': <tf.Tensor 'default_policy/advantages:0' shape=(?,) dtype=float32>,
[2m[36m(pid=83201)[0m   'behaviour_logits': <tf.Tensor 'default_policy/behaviour_logits:0' shape=(?, 4) dtype=float32>,
[2m[36m(pid=83201)[0m   'dones': <tf.T

[2m[36m(pid=114081)[0m 2019-08-24 10:18:26,114	INFO rollout_worker.py:319 -- Creating policy evaluation worker 2 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=114081)[0m 2019-08-24 10:18:26.146173: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
[2m[36m(pid=83201)[0m 2019-08-24 10:18:26,359	INFO trainable.py:105 -- _setup took 23.983 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
[2m[36m(pid=114080)[0m 2019-08-24 10:18:26,900	INFO rollout_worker.py:319 -- Creating policy evaluation worker 1 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=114080)[0m 2019-08-24 10:18:26.938824: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
[2m[36m(pid=114080)[0m 2019-08-24 10:18:27,749	INFO dynamic_tf_

[2m[36m(pid=114080)[0m 2019-08-24 10:18:35,019	INFO rollout_worker.py:485 -- Completed sample batch:
[2m[36m(pid=114080)[0m 
[2m[36m(pid=114080)[0m { 'data': { 'action_prob': np.ndarray((205,), dtype=float32, min=0.001, max=0.159, mean=0.077),
[2m[36m(pid=114080)[0m             'actions': np.ndarray((205, 2), dtype=float32, min=-3.053, max=2.816, mean=0.002),
[2m[36m(pid=114080)[0m             'advantages': np.ndarray((205,), dtype=float32, min=-0.022, max=0.02, mean=0.0),
[2m[36m(pid=114080)[0m             'agent_index': np.ndarray((205,), dtype=int64, min=0.0, max=0.0, mean=0.0),
[2m[36m(pid=114080)[0m             'behaviour_logits': np.ndarray((205, 4), dtype=float32, min=-0.011, max=0.011, mean=0.0),
[2m[36m(pid=114080)[0m             'dones': np.ndarray((205,), dtype=bool, min=0.0, max=1.0, mean=0.024),
[2m[36m(pid=114080)[0m             'eps_id': np.ndarray((205,), dtype=int64, min=560878442.0, max=1918595599.0, mean=1208263823.6),
[2m[36m(pid=114080)













































































































































































































































































































































































































































2019-08-24 11:30:26,596	INFO trial_runner.py:176 -- Starting a new experiment.


[2m[36m(pid=114117)[0m 2019-08-24 11:30:33,453	INFO rollout_worker.py:319 -- Creating policy evaluation worker 0 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=114117)[0m 2019-08-24 11:30:33.461019: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
[2m[36m(pid=114117)[0m 2019-08-24 11:30:34,344	INFO dynamic_tf_policy.py:324 -- Initializing loss function with dummy input:
[2m[36m(pid=114117)[0m 
[2m[36m(pid=114117)[0m { 'action_prob': <tf.Tensor 'default_policy/action_prob:0' shape=(?,) dtype=float32>,
[2m[36m(pid=114117)[0m   'actions': <tf.Tensor 'default_policy/actions:0' shape=(?, 2) dtype=float32>,
[2m[36m(pid=114117)[0m   'advantages': <tf.Tensor 'default_policy/advantages:0' shape=(?,) dtype=float32>,
[2m[36m(pid=114117)[0m   'behaviour_logits': <tf.Tensor 'default_policy/behaviour_logits:0' shape=(?, 4) dtype=float32>,
[2m[36m(pid=114117)[0m   'done

[2m[36m(pid=114117)[0m 2019-08-24 11:30:57,168	INFO trainable.py:105 -- _setup took 26.879 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
[2m[36m(pid=18800)[0m 2019-08-24 11:30:57,884	INFO rollout_worker.py:319 -- Creating policy evaluation worker 1 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=18801)[0m 2019-08-24 11:30:57,895	INFO rollout_worker.py:319 -- Creating policy evaluation worker 2 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=18800)[0m 2019-08-24 11:30:57.952554: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
[2m[36m(pid=18801)[0m 2019-08-24 11:30:57.997295: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
[2m[36m(pid=18800)[0m 2019-08-24 11:30:58,906	INFO dynamic_tf_poli

[2m[36m(pid=18800)[0m 2019-08-24 11:31:07,360	INFO rollout_worker.py:485 -- Completed sample batch:
[2m[36m(pid=18800)[0m 
[2m[36m(pid=18800)[0m { 'data': { 'action_prob': np.ndarray((205,), dtype=float32, min=0.001, max=0.158, mean=0.078),
[2m[36m(pid=18800)[0m             'actions': np.ndarray((205, 2), dtype=float32, min=-2.898, max=2.916, mean=-0.055),
[2m[36m(pid=18800)[0m             'advantages': np.ndarray((205,), dtype=float32, min=-0.009, max=0.01, mean=0.0),
[2m[36m(pid=18800)[0m             'agent_index': np.ndarray((205,), dtype=int64, min=0.0, max=0.0, mean=0.0),
[2m[36m(pid=18800)[0m             'behaviour_logits': np.ndarray((205, 4), dtype=float32, min=-0.013, max=0.012, mean=-0.001),
[2m[36m(pid=18800)[0m             'dones': np.ndarray((205,), dtype=bool, min=0.0, max=1.0, mean=0.024),
[2m[36m(pid=18800)[0m             'eps_id': np.ndarray((205,), dtype=int64, min=172434708.0, max=1689575537.0, mean=994322250.6),
[2m[36m(pid=18800)[0m   



























































































































































































































































































































































































































































[PPO_GAIAPredictorsContinuousV13_restoreID=4420155622729557137_runID=Rb4JG32f_0_clip_param=0.8,cost_of_commissions=5e-05,gamma=0.82,kl_target=0.01,lr=1e-05,num_sgd_iter=8,
 PPO_GAIAPredictorsContinuousV13_restoreID=4420155622729557137_runID=Rb4JG32f_0_clip_param=0.8,cost_of_commissions=5e-05,gamma=0.82,kl_target=0.01,lr=1e-05,num_sgd_iter=8,
 PPO_GAIAPredictorsContinuousV13_restoreID=4420155622729557137_runID=Rb4JG32f_0_clip_param=0.8,cost_of_commissions=5e-05,gamma=0.82,kl_target=0.01,lr=1e-05,num_sgd_iter=8,
 PPO_GAIAPredictorsContinuousV13_restoreID=4420155622729557137_runID=Rb4JG32f_0_clip_param=0.8,cost_of_commissions=5e-05,gamma=0.82,kl_target=0.01,lr=1e-05,num_sgd_iter=8,
 PPO_GAIAPredictorsContinuousV13_restoreID=4420155622729557137_runID=Rb4JG32f_0_clip_param=0.8,cost_of_commissions=5e-05,gamma=0.82,kl_target=0.01,lr=1e-05,num_sgd_iter=8,
 PPO_GAIAPredictorsContinuousV13_restoreID=4420155622729557137_runID=Rb4JG32f_0_clip_param=0.8,cost_of_commissions=5e-05,gamma=0.82,kl_targe

# Restore

In [None]:
# need and want to restoere the most recent one that has gamma = 0.82

In [5]:
results = WalkForwardResults('/home/Nicholas/trading-gym_0.8.1/trading-gym/notebooks/registry/gaia/v13/logs')
results

WalkForwardResults(['GAIAPredictorsContinuousV13'])

In [6]:
env_results = results['GAIAPredictorsContinuousV13']
env_results

EnvResults(GAIAPredictorsContinuousV13)

In [7]:
# Step 1.
env = env_results.make_env(
    env_config={
        'cost_of_commissions': 0.00005,
        'cost_of_spread': 0.0001,
        'folds': {
            'training-set': [datetime.min, datetime(2008, 3, 18)],
            'test-set': [datetime(2008, 3, 19), datetime.max],
        }
    },
)

**NOTE:**<br>
3892154612095477129 is for linear regression<br>
-7538808210974229658 is for non-linear, basic MLP <br>
-330274535200891787 for non-linear, basic MLP with LSTM

In [8]:
env_results.restore_ids

{4420155622729557137: [AgentResults(GAIAPredictorsContinuousV13_1-01-01_to_2012-12-31/PPO_GAIAPredictorsContinuousV13_restoreID=4420155622729557137_runID=Rb4JG32f_0_clip_param=0.8,cost_of_commissions=5e-05,gamma=0.82_2019-08-24_05-51-09hb9oeay0),
  AgentResults(GAIAPredictorsContinuousV13_1-01-01_to_2007-12-31/PPO_GAIAPredictorsContinuousV13_restoreID=4420155622729557137_runID=Rb4JG32f_0_clip_param=0.8,cost_of_commissions=5e-05,gamma=0.82_2019-08-24_00-05-2082flg21m),
  AgentResults(GAIAPredictorsContinuousV13_1-01-01_to_2014-12-31/PPO_GAIAPredictorsContinuousV13_restoreID=4420155622729557137_runID=Rb4JG32f_0_clip_param=0.8,cost_of_commissions=5e-05,gamma=0.82_2019-08-24_08-10-31nfxzgs19),
  AgentResults(GAIAPredictorsContinuousV13_1-01-01_to_2008-12-31/PPO_GAIAPredictorsContinuousV13_restoreID=4420155622729557137_runID=Rb4JG32f_0_clip_param=0.8,cost_of_commissions=5e-05,gamma=0.82_2019-08-24_01-17-35bj94l_1z),
  AgentResults(GAIAPredictorsContinuousV13_1-01-01_to_2015-12-31/PPO_GAIAPr

In [9]:
env_results_nonlinear = env_results.get_nr2episode(
    restore_id=4420155622729557137,
    checkpoint_nrs=np.arange(1, 100, 1),
    fold='test-set',
    env_config={
        'folds': {
            'training-set': [datetime.min, datetime(2008, 3, 18)],
            'test-set': [datetime(2008, 3, 19), datetime.max],
        }
    }
)
# env_results_linear = env_results.get_nr2episode(
#     restore_id=3892154612095477129,
#     checkpoint_nrs=np.arange(1, 126, 1),
#     fold='test-set',
#     env_config={
#         'folds': {
#             'training-set': [datetime.min, datetime(2008, 3, 18)],
#             'test-set': [datetime(2008, 3, 19), datetime.max],
#         }
#     }
# )

2019-08-24 23:31:37,223	ERROR log_sync.py:34 -- Log sync requires cluster to be setup with `ray up`.
2019-08-24 23:31:39,249	INFO rollout_worker.py:319 -- Creating policy evaluation worker 0 on CPU (please ignore any CUDA init errors)
2019-08-24 23:31:39,903	INFO dynamic_tf_policy.py:324 -- Initializing loss function with dummy input:

{ 'action_prob': <tf.Tensor 'default_policy/action_prob:0' shape=(?,) dtype=float32>,
  'actions': <tf.Tensor 'default_policy/actions:0' shape=(?, 2) dtype=float32>,
  'advantages': <tf.Tensor 'default_policy/advantages:0' shape=(?,) dtype=float32>,
  'behaviour_logits': <tf.Tensor 'default_policy/behaviour_logits:0' shape=(?, 4) dtype=float32>,
  'dones': <tf.Tensor 'default_policy/dones:0' shape=(?,) dtype=bool>,
  'new_obs': <tf.Tensor 'default_policy/new_obs:0' shape=(?, 23) dtype=float32>,
  'obs': <tf.Tensor 'default_policy/observation:0' shape=(?, 23) dtype=float32>,
  'prev_actions': <tf.Tensor 'default_policy/action:0' shape=(?, 2) dtype=float32

[2m[36m(pid=101984)[0m   _np_qint8 = np.dtype([("qint8", np.int8, 1)])
[2m[36m(pid=101984)[0m   _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
[2m[36m(pid=101984)[0m   _np_qint16 = np.dtype([("qint16", np.int16, 1)])
[2m[36m(pid=101984)[0m   _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
[2m[36m(pid=101984)[0m   _np_qint32 = np.dtype([("qint32", np.int32, 1)])
[2m[36m(pid=101984)[0m   np_resource = np.dtype([("resource", np.ubyte, 1)])
[2m[36m(pid=101986)[0m   _np_qint8 = np.dtype([("qint8", np.int8, 1)])
[2m[36m(pid=101986)[0m   _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
[2m[36m(pid=101986)[0m   _np_qint16 = np.dtype([("qint16", np.int16, 1)])
[2m[36m(pid=101986)[0m   _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
[2m[36m(pid=101986)[0m   _np_qint32 = np.dtype([("qint32", np.int32, 1)])
[2m[36m(pid=101986)[0m   np_resource = np.dtype([("resource", np.ubyte, 1)])
[2m[36m(pid=101984)[0m 2019-08-24 23:31:57,137	INFO rollout_worker.

2019-08-24 23:31:59,525	INFO trainable.py:105 -- _setup took 22.298 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


[2m[36m(pid=101986)[0m 
[2m[36m(pid=101986)[0m Converting sparse IndexedSlices to a dense Tensor of unknown shape. This may consume a large amount of memory.
[2m[36m(pid=101986)[0m 


2019-08-24 23:31:59,795	INFO tf_run_builder.py:92 -- Executing TF run without tracing. To dump TF timeline traces to disk, set the TF_TIMELINE_DIR environment variable.
2019-08-24 23:32:04,627	INFO rollout_worker.py:319 -- Creating policy evaluation worker 0 on CPU (please ignore any CUDA init errors)
2019-08-24 23:32:08,596	INFO rollout_worker.py:742 -- Built policy map: {'default_policy': <ray.rllib.policy.tf_policy_template.PPOTFPolicy object at 0x7fcc74ddefd0>}
2019-08-24 23:32:08,599	INFO rollout_worker.py:743 -- Built preprocessor map: {'default_policy': <ray.rllib.models.preprocessors.NoPreprocessor object at 0x7fcc74ddeba8>}
2019-08-24 23:32:08,600	INFO rollout_worker.py:356 -- Built filter map: {'default_policy': <ray.rllib.utils.filter.NoFilter object at 0x7fcc75ab9b00>}
2019-08-24 23:32:08,727	INFO multi_gpu_optimizer.py:93 -- LocalMultiGPUOptimizer devices ['/cpu:0']


[2m[36m(pid=101990)[0m   _np_qint8 = np.dtype([("qint8", np.int8, 1)])
[2m[36m(pid=101990)[0m   _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
[2m[36m(pid=101990)[0m   _np_qint16 = np.dtype([("qint16", np.int16, 1)])
[2m[36m(pid=101990)[0m   _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
[2m[36m(pid=101990)[0m   _np_qint32 = np.dtype([("qint32", np.int32, 1)])
[2m[36m(pid=101990)[0m   np_resource = np.dtype([("resource", np.ubyte, 1)])
[2m[36m(pid=101987)[0m   _np_qint8 = np.dtype([("qint8", np.int8, 1)])
[2m[36m(pid=101987)[0m   _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
[2m[36m(pid=101987)[0m   _np_qint16 = np.dtype([("qint16", np.int16, 1)])
[2m[36m(pid=101987)[0m   _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
[2m[36m(pid=101987)[0m   _np_qint32 = np.dtype([("qint32", np.int32, 1)])
[2m[36m(pid=101987)[0m   np_resource = np.dtype([("resource", np.ubyte, 1)])
[2m[36m(pid=101987)[0m 2019-08-24 23:32:17,785	INFO rollout_worker.

2019-08-24 23:32:20,955	INFO trainable.py:105 -- _setup took 18.753 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
2019-08-24 23:32:24,405	INFO rollout_worker.py:319 -- Creating policy evaluation worker 0 on CPU (please ignore any CUDA init errors)
2019-08-24 23:32:28,505	INFO rollout_worker.py:742 -- Built policy map: {'default_policy': <ray.rllib.policy.tf_policy_template.PPOTFPolicy object at 0x7fc787daaf28>}
2019-08-24 23:32:28,508	INFO rollout_worker.py:743 -- Built preprocessor map: {'default_policy': <ray.rllib.models.preprocessors.NoPreprocessor object at 0x7fc787daab00>}
2019-08-24 23:32:28,509	INFO rollout_worker.py:356 -- Built filter map: {'default_policy': <ray.rllib.utils.filter.NoFilter object at 0x7fc79cb7dc50>}
2019-08-24 23:32:28,625	INFO multi_gpu_optimizer.py:93 -- LocalMultiGPUOptimizer devices ['/cpu:0']


[2m[36m(pid=101991)[0m   _np_qint8 = np.dtype([("qint8", np.int8, 1)])
[2m[36m(pid=101991)[0m   _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
[2m[36m(pid=101991)[0m   _np_qint16 = np.dtype([("qint16", np.int16, 1)])
[2m[36m(pid=101991)[0m   _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
[2m[36m(pid=101991)[0m   _np_qint32 = np.dtype([("qint32", np.int32, 1)])
[2m[36m(pid=101991)[0m   np_resource = np.dtype([("resource", np.ubyte, 1)])
[2m[36m(pid=101985)[0m   _np_qint8 = np.dtype([("qint8", np.int8, 1)])
[2m[36m(pid=101985)[0m   _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
[2m[36m(pid=101985)[0m   _np_qint16 = np.dtype([("qint16", np.int16, 1)])
[2m[36m(pid=101985)[0m   _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
[2m[36m(pid=101985)[0m   _np_qint32 = np.dtype([("qint32", np.int32, 1)])
[2m[36m(pid=101985)[0m   np_resource = np.dtype([("resource", np.ubyte, 1)])
[2m[36m(pid=101985)[0m 2019-08-24 23:32:37,211	INFO rollout_worker.

2019-08-24 23:32:40,199	INFO trainable.py:105 -- _setup took 16.884 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
2019-08-24 23:32:44,344	INFO rollout_worker.py:319 -- Creating policy evaluation worker 0 on CPU (please ignore any CUDA init errors)
2019-08-24 23:32:47,805	INFO rollout_worker.py:742 -- Built policy map: {'default_policy': <ray.rllib.policy.tf_policy_template.PPOTFPolicy object at 0x7fc776fa9be0>}
2019-08-24 23:32:47,808	INFO rollout_worker.py:743 -- Built preprocessor map: {'default_policy': <ray.rllib.models.preprocessors.NoPreprocessor object at 0x7fc776fa97b8>}
2019-08-24 23:32:47,815	INFO rollout_worker.py:356 -- Built filter map: {'default_policy': <ray.rllib.utils.filter.NoFilter object at 0x7fc777c868d0>}
2019-08-24 23:32:47,921	INFO multi_gpu_optimizer.py:93 -- LocalMultiGPUOptimizer devices ['/cpu:0']


[2m[36m(pid=101988)[0m   _np_qint8 = np.dtype([("qint8", np.int8, 1)])
[2m[36m(pid=101988)[0m   _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
[2m[36m(pid=101988)[0m   _np_qint16 = np.dtype([("qint16", np.int16, 1)])
[2m[36m(pid=101988)[0m   _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
[2m[36m(pid=101988)[0m   _np_qint32 = np.dtype([("qint32", np.int32, 1)])
[2m[36m(pid=101988)[0m   np_resource = np.dtype([("resource", np.ubyte, 1)])
[2m[36m(pid=101989)[0m   _np_qint8 = np.dtype([("qint8", np.int8, 1)])
[2m[36m(pid=101989)[0m   _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
[2m[36m(pid=101989)[0m   _np_qint16 = np.dtype([("qint16", np.int16, 1)])
[2m[36m(pid=101989)[0m   _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
[2m[36m(pid=101989)[0m   _np_qint32 = np.dtype([("qint32", np.int32, 1)])
[2m[36m(pid=101989)[0m   np_resource = np.dtype([("resource", np.ubyte, 1)])
[2m[36m(pid=101989)[0m 2019-08-24 23:32:55,992	INFO rollout_worker.

2019-08-24 23:32:59,132	INFO trainable.py:105 -- _setup took 16.741 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
2019-08-24 23:33:04,131	INFO rollout_worker.py:319 -- Creating policy evaluation worker 0 on CPU (please ignore any CUDA init errors)
2019-08-24 23:33:08,283	INFO rollout_worker.py:742 -- Built policy map: {'default_policy': <ray.rllib.policy.tf_policy_template.PPOTFPolicy object at 0x7fc7661699b0>}
2019-08-24 23:33:08,285	INFO rollout_worker.py:743 -- Built preprocessor map: {'default_policy': <ray.rllib.models.preprocessors.NoPreprocessor object at 0x7fc766169588>}
2019-08-24 23:33:08,286	INFO rollout_worker.py:356 -- Built filter map: {'default_policy': <ray.rllib.utils.filter.NoFilter object at 0x7fc766602a58>}
2019-08-24 23:33:08,365	INFO multi_gpu_optimizer.py:93 -- LocalMultiGPUOptimizer devices ['/cpu:0']
2019-08-24 23:33:17,227	INFO trainable.py:105 -- _setup took 15.405 seconds. If your tra



## Weights

### Non-linear

In [10]:
env_results_nonlinear.plot_weights()

interactive(children=(IntSlider(value=50, description='nr', max=150, min=-50), Output()), _dom_classes=('widge…

FigureWidget({
    'data': [{'name': 'Cash(USD)',
              'type': 'scatter',
              'uid': '4f409…

### Linear

In [11]:
# env_results_linear.plot_weights()

## Levels

### Non-linear

In [12]:
env_results_nonlinear.plot_levels() 

interactive(children=(IntSlider(value=50, description='nr', max=150, min=-50), Output()), _dom_classes=('widge…

FigureWidget({
    'data': [{'name': 'Strategy',
              'type': 'scatter',
              'uid': 'a68c77…

### Linear

In [13]:
# env_results_linear.plot_levels()

## Metrics

### Non-linear

In [14]:
env_results_nonlinear.plot_metrics_as_we_train()

### Linear

In [15]:
# env_results_linear.plot_metrics_as_we_train()

In [16]:
restore_id = 4420155622729557137

In [17]:
policy = env_results.make_policy(
    env=env,
    restore_id=restore_id,
    checkpoint_nr=None,  # use None (or don't specify) to use last checkpoint available
)
policy

<trading_gym.ray.walkforward.policy.WalkForwardPolicy at 0x7fc6e1958400>

In [18]:
# Step 3.
episode = env.sample_episode(fold='test-set', policy=policy, verbose=False)

In [19]:
episode.renderer.cumulative_performance.to_plotly()
episode.renderer.target_weights.to_plotly()
episode.renderer.annual_returns.to_plotly()
episode.renderer.tearsheet()

Unnamed: 0,Unnamed: 1,Strategy,Index(Aric-Benchmark),Index(USD 1M Deposit),Cash(USD),ETF(Russell 1000),ETF(7-10Y T-Bills)
Context,From,2008-03-19,2008-03-19,2008-03-19,2008-03-19,2008-03-19,2008-03-19
Context,To,2018-08-28,2018-08-28,2018-08-28,2018-08-28,2018-08-28,2018-08-28
Context,Years,10.4493,10.4493,10.4493,10.4493,10.4493,10.4493
Context,Observations,2725,2725,2725,2725,2725,2725
Context,Risk-free asset,Index(USD 1M Deposit),Index(USD 1M Deposit),Index(USD 1M Deposit),Index(USD 1M Deposit),Index(USD 1M Deposit),Index(USD 1M Deposit)
Context,Risk-free CAGR,0.00681294,0.00681294,0.00681294,0.00681294,0.00681294,0.00681294
Return,CAGR,0.104083,0.158586,0.00681294,0,0.104507,0.0339243
Return,CAGR over cash,0.0972701,0.151773,0,-0.00681294,0.0976941,0.0271113
Return,Overall return,1.81409,3.65592,0.0735266,0,1.82541,0.417089
Risk,Volatility,0.105432,0.0970738,0.000598812,0,0.197859,0.0766871
