In [1]:
import trading_gym
import numpy as np
from trading_gym.registry.gaia.v8.env import GAIAPredictorsContinuousV8

from datetime import datetime
from collections import namedtuple
import json
import os
import pandas as pd
import ray
from ray import rllib, tune
print(datetime.now())
print(trading_gym.__name__, trading_gym.__version__)
print(ray.__name__, ray.__version__)

from trading_gym.ray.logger import calculate_tearsheet, CustomLogger
from copy import deepcopy
# ray.init(num_cpus=8,ignore_reinit_error=True,object_store_memory= 10*100 )
ray.init(ignore_reinit_error=True)
#          object_store_memory = 50000000)

2019-08-21 13:40:17,415	INFO node.py:498 -- Process STDOUT and STDERR is being redirected to /tmp/ray/session_2019-08-21_13-40-17_415079_76025/logs.
2019-08-21 13:40:17,530	INFO services.py:409 -- Waiting for redis server at 127.0.0.1:15363 to respond...


2019-08-21 13:40:17.412197
trading_gym 0.8.1
ray 0.7.2


2019-08-21 13:40:17,650	INFO services.py:409 -- Waiting for redis server at 127.0.0.1:17137 to respond...
2019-08-21 13:40:17,653	INFO services.py:806 -- Starting Redis shard with 10.0 GB max memory.
2019-08-21 13:40:17,678	INFO node.py:512 -- Process STDOUT and STDERR is being redirected to /tmp/ray/session_2019-08-21_13-40-17_415079_76025/logs.
2019-08-21 13:40:17,681	INFO services.py:1446 -- Starting the Plasma object store with 20.0 GB memory using /dev/shm.


{'node_ip_address': '10.0.5.4',
 'redis_address': '10.0.5.4:15363',
 'object_store_address': '/tmp/ray/session_2019-08-21_13-40-17_415079_76025/sockets/plasma_store',
 'raylet_socket_name': '/tmp/ray/session_2019-08-21_13-40-17_415079_76025/sockets/raylet',
 'webui_url': None,
 'session_dir': '/tmp/ray/session_2019-08-21_13-40-17_415079_76025'}

In [9]:
import tensorflow as tf
import tensorflow.contrib.slim as slim

In [2]:
env_config = dict()
env = GAIAPredictorsContinuousV8(env_config)
env

<trading_gym.registry.gaia.v8.env.GAIAPredictorsContinuousV8 at 0x7f9ddbf9d438>

In [3]:
# There is a 'common config' that sets ray's params
# and then default_config, which sets the PPO config 
config = rllib.agents.ppo.DEFAULT_CONFIG.copy()
#  The env is self.explanatory 
config['env'] = GAIAPredictorsContinuousV8

# This doesn't actually do anything 
env_config['cost_of_commissions'] = 0.00005  # i.e. 0.005% of traded value in dollars (realistic)
env_config['cost_of_spread'] = 0.0001  # i.e. bid-ask spread is 0.01% (realistic)

config['env_config'] = env_config 

config['callbacks']['on_train_result'] = tune.function(calculate_tearsheet)
config['num_workers'] = 6

config['gamma'] = 0 # tune.grid_search([0])
config['vf_clip_param'] = 0 # tune.grid_search([0.])
config['vf_loss_coeff'] = 0 # tune.grid_search([0.])
config['lambda'] = 0 # tune.grid_search([0])

config['use_gae'] = False #tune.grid_search([False])
config['vf_share_layers'] = False #tune.grid_search([False])

# If you do use this, have vf_share_layers as True (loss function then combines ) 
config['use_lstm']: False
# Whether to roll out complete epsiodes or truncate them 
config['batch_mode'] = 'complete_episodes'


# Literature suggests having different LR for actor and critic and -3 and -2 
config['lr'] = tune.grid_search([1e-5])

# Size of batches collected from each worker (number of experiences used for one iteration of SGD)
#  Don't think I actually want to use the following. 
# config['sample_batch_size'] = tune.grid_search([256])

# Increase this to maximize the amount of info(no. of experiences(think transition tuples)) we gather before making an update to policy
config['train_batch_size'] = tune.grid_search([4000])
# Total SGD batch size across all devices
config['sgd_minibatch_size'] = 128
# Number of SGD iterations in each outer loop 
config['num_sgd_iter'] = tune.grid_search([8])


# Coefficient of entropy regularizer (i.e how much we encourage explorsation)
config['entropy_coeff'] = tune.grid_search([1e-5])

# Initial coefficient for KL divergence 
config['kl_coeff'] = tune.grid_search([0.2])
# Target value for the KL divergence 
config['kl_target'] = tune.grid_search([0.01])

# PPO clip parameter
config['clip_param'] = tune.grid_search([0.8])
# config['ignore_worker_failures'] = True

In [4]:
# print(config)

In [7]:
from ray.rllib.models import ModelCatalog
from ray.rllib.models.model import Model
from ray.rllib.models.misc import normc_initializer, get_activation_fn
import tensorflow as tf
import tensorflow.contrib.slim as slim


class MLP(Model):
    def _build_layers_v2(self, input_dict: dict, num_outputs: int, config: dict):
        import tensorflow.contrib.slim as slim

        with tf.name_scope("fc_net"):
            last_layer = input_dict['obs']
            activation = get_activation_fn(config.get("fcnet_activation"))
            for i, size in enumerate(config.get("fcnet_hiddens"), 1):
                last_layer = slim.fully_connected(
                    inputs=last_layer,
                    num_outputs=size,
                    weights_initializer=normc_initializer(1.0),
                    activation_fn=activation,
                    scope="fc{}".format(i),
                )
                
            output = slim.fully_connected(
                inputs=last_layer,
                num_outputs=num_outputs,
                weights_initializer=normc_initializer(0.01),
                activation_fn=None,
                scope="fc_out",
            )
            return output, last_layer

ModelCatalog.register_custom_model(MLP.__name__, MLP)

config['model']['custom_model'] = MLP.__name__

In [8]:

for year in range(2007, 2018):
    print('_______________________________________{}____________________________________________'.format(year))
    
    
    config['env_config'] = {
        'folds': {
            'training-set': [datetime.min, datetime(year, 12, 31)],
            'test-set': [datetime(year + 1, 1, 1), datetime(year + 1, 12, 31)],
        }, 
        'cost_of_commissions': 0.00050,   #0.00005 default
        'cost_of_spread': 0.0010, #0.0001
    }
    experiment = tune.Experiment(
        name='clip_0.8-tc-WalkForward-750k{}'.format(year),
        run=rllib.agents.ppo.PPOTrainer,
        stop={"timesteps_total": 750000},
        config=deepcopy(config),
        num_samples=1,
        local_dir='logs/tran_cost_x10',
        #checkpoint_freq=int(1e4 / config['train_batch_size']),  # checkpoint every 100k iters
        checkpoint_at_end=True,
        max_failures=0,
        loggers=[CustomLogger],
    )
    trials = tune.run_experiments(
        experiments=experiment,
        search_alg=tune.suggest.BasicVariantGenerator(),
        scheduler=tune.schedulers.FIFOScheduler(),
        verbose=0,
        reuse_actors=False,
        resume=False,
    )

2019-07-12 10:49:21,662	INFO tune.py:65 -- Did not find checkpoint file in logs/tran_cost_x10/clip_0.8-tc-WalkForward-750k2007.
2019-07-12 10:49:21,664	INFO tune.py:233 -- Starting a new experiment.


_______________________________________2007____________________________________________
[2m[36m(pid=44241)[0m 2019-07-12 10:49:27,573	INFO rollout_worker.py:301 -- Creating policy evaluation worker 0 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=44241)[0m 2019-07-12 10:49:27.574178: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
[2m[36m(pid=44241)[0m 2019-07-12 10:49:34,883	INFO dynamic_tf_policy.py:313 -- Initializing loss function with dummy input:
[2m[36m(pid=44241)[0m 
[2m[36m(pid=44241)[0m { 'action_prob': <tf.Tensor 'default_policy/action_prob:0' shape=(?,) dtype=float32>,
[2m[36m(pid=44241)[0m   'actions': <tf.Tensor 'default_policy/actions:0' shape=(?, 2) dtype=float32>,
[2m[36m(pid=44241)[0m   'advantages': <tf.Tensor 'default_policy/advantages:0' shape=(?,) dtype=float32>,
[2m[36m(pid=44241)[0m   'behaviour_logits': <tf.Tensor 'default_policy/be

[2m[36m(pid=44244)[0m 
[2m[36m(pid=44244)[0m Converting sparse IndexedSlices to a dense Tensor of unknown shape. This may consume a large amount of memory.
[2m[36m(pid=44244)[0m 
[2m[36m(pid=44240)[0m 2019-07-12 10:49:59,909	INFO rollout_worker.py:428 -- Generating sample batch of size 200
[2m[36m(pid=44240)[0m 2019-07-12 10:49:59,932	INFO sampler.py:308 -- Raw obs from env: { 0: { 'agent0': np.ndarray((5,), dtype=float64, min=-1.0, max=3.353, mean=0.789)}}
[2m[36m(pid=44240)[0m 2019-07-12 10:49:59,932	INFO sampler.py:309 -- Info return from env: {0: {'agent0': None}}
[2m[36m(pid=44240)[0m 2019-07-12 10:49:59,933	INFO sampler.py:407 -- Preprocessed obs: np.ndarray((5,), dtype=float64, min=-1.0, max=3.353, mean=0.789)
[2m[36m(pid=44240)[0m 2019-07-12 10:49:59,933	INFO sampler.py:411 -- Filtered obs: np.ndarray((5,), dtype=float64, min=-1.0, max=3.353, mean=0.789)
[2m[36m(pid=44240)[0m 2019-07-12 10:49:59,934	INFO sampler.py:525 -- Inputs to compute_actions():


[2m[36m(pid=44241)[0m 2019-07-12 10:50:06,152	INFO multi_gpu_impl.py:146 -- Training on concatenated sample batches:
[2m[36m(pid=44241)[0m 
[2m[36m(pid=44241)[0m { 'inputs': [ np.ndarray((4000, 2), dtype=float32, min=0.0, max=1.0, mean=0.475),
[2m[36m(pid=44241)[0m               np.ndarray((4000,), dtype=float32, min=-0.033, max=0.027, mean=0.0),
[2m[36m(pid=44241)[0m               np.ndarray((4000, 5), dtype=float32, min=-14.063, max=12.847, mean=0.233),
[2m[36m(pid=44241)[0m               np.ndarray((4000, 2), dtype=float32, min=0.0, max=1.0, mean=0.5),
[2m[36m(pid=44241)[0m               np.ndarray((4000,), dtype=float32, min=-7.827, max=6.436, mean=0.0),
[2m[36m(pid=44241)[0m               np.ndarray((4000, 2), dtype=float32, min=-0.009, max=0.012, mean=0.001),
[2m[36m(pid=44241)[0m               np.ndarray((4000,), dtype=float32, min=0.0, max=0.0, mean=0.0),
[2m[36m(pid=44241)[0m               np.ndarray((4000,), dtype=float32, min=0.0, max=0.0, mean=















2019-07-12 11:21:14,752	INFO ray_trial_executor.py:187 -- Destroying actor for trial PPO_GAIAPredictorsContinuousV8_0_clip_param=0.8,entropy_coeff=1e-05,kl_coeff=0.2,kl_target=0.01,lr=1e-05,num_sgd_iter=8,train_batch_size=4000. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
2019-07-12 11:21:14,802	INFO tune.py:65 -- Did not find checkpoint file in logs/tran_cost_x10/clip_0.8-tc-WalkForward-750k2008.
2019-07-12 11:21:14,804	INFO tune.py:233 -- Starting a new experiment.


_______________________________________2008____________________________________________
[2m[36m(pid=44239)[0m 2019-07-12 11:21:18,030	INFO rollout_worker.py:301 -- Creating policy evaluation worker 0 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=44239)[0m 2019-07-12 11:21:18.031442: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
[2m[36m(pid=44239)[0m 2019-07-12 11:21:25,446	INFO dynamic_tf_policy.py:313 -- Initializing loss function with dummy input:
[2m[36m(pid=44239)[0m 
[2m[36m(pid=44239)[0m { 'action_prob': <tf.Tensor 'default_policy/action_prob:0' shape=(?,) dtype=float32>,
[2m[36m(pid=44239)[0m   'actions': <tf.Tensor 'default_policy/actions:0' shape=(?, 2) dtype=float32>,
[2m[36m(pid=44239)[0m   'advantages': <tf.Tensor 'default_policy/advantages:0' shape=(?,) dtype=float32>,
[2m[36m(pid=44239)[0m   'behaviour_logits': <tf.Tensor 'default_policy/be

[2m[36m(pid=61557)[0m 
[2m[36m(pid=61557)[0m Converting sparse IndexedSlices to a dense Tensor of unknown shape. This may consume a large amount of memory.
[2m[36m(pid=61557)[0m 
[2m[36m(pid=61557)[0m 2019-07-12 11:22:01,617	INFO rollout_worker.py:428 -- Generating sample batch of size 200
[2m[36m(pid=61557)[0m 2019-07-12 11:22:01,652	INFO sampler.py:308 -- Raw obs from env: { 0: { 'agent0': np.ndarray((5,), dtype=float64, min=-1.0, max=0.695, mean=0.005)}}
[2m[36m(pid=61557)[0m 2019-07-12 11:22:01,653	INFO sampler.py:309 -- Info return from env: {0: {'agent0': None}}
[2m[36m(pid=61557)[0m 2019-07-12 11:22:01,653	INFO sampler.py:407 -- Preprocessed obs: np.ndarray((5,), dtype=float64, min=-1.0, max=0.695, mean=0.005)
[2m[36m(pid=61557)[0m 2019-07-12 11:22:01,654	INFO sampler.py:411 -- Filtered obs: np.ndarray((5,), dtype=float64, min=-1.0, max=0.695, mean=0.005)
[2m[36m(pid=61557)[0m 2019-07-12 11:22:01,655	INFO sampler.py:525 -- Inputs to compute_actions():


[2m[36m(pid=44239)[0m 2019-07-12 11:22:06,903	INFO multi_gpu_impl.py:146 -- Training on concatenated sample batches:
[2m[36m(pid=44239)[0m 
[2m[36m(pid=44239)[0m { 'inputs': [ np.ndarray((4000, 2), dtype=float32, min=0.0, max=1.0, mean=0.475),
[2m[36m(pid=44239)[0m               np.ndarray((4000,), dtype=float32, min=-0.068, max=0.047, mean=-0.0),
[2m[36m(pid=44239)[0m               np.ndarray((4000, 5), dtype=float32, min=-14.063, max=12.847, mean=0.24),
[2m[36m(pid=44239)[0m               np.ndarray((4000, 2), dtype=float32, min=0.0, max=1.0, mean=0.5),
[2m[36m(pid=44239)[0m               np.ndarray((4000,), dtype=float32, min=-12.782, max=8.931, mean=0.0),
[2m[36m(pid=44239)[0m               np.ndarray((4000, 2), dtype=float32, min=-0.019, max=0.021, mean=-0.001),
[2m[36m(pid=44239)[0m               np.ndarray((4000,), dtype=float32, min=0.0, max=0.0, mean=0.0),
[2m[36m(pid=44239)[0m               np.ndarray((4000,), dtype=float32, min=0.0, max=0.0, mea



















2019-07-12 11:53:52,801	INFO ray_trial_executor.py:187 -- Destroying actor for trial PPO_GAIAPredictorsContinuousV8_0_clip_param=0.8,entropy_coeff=1e-05,kl_coeff=0.2,kl_target=0.01,lr=1e-05,num_sgd_iter=8,train_batch_size=4000. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
2019-07-12 11:53:52,859	INFO tune.py:65 -- Did not find checkpoint file in logs/tran_cost_x10/clip_0.8-tc-WalkForward-750k2009.
2019-07-12 11:53:52,865	INFO tune.py:233 -- Starting a new experiment.


_______________________________________2009____________________________________________
[2m[36m(pid=61661)[0m 2019-07-12 11:53:56,873	INFO rollout_worker.py:301 -- Creating policy evaluation worker 0 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=61661)[0m 2019-07-12 11:53:56.874156: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
[2m[36m(pid=61661)[0m 2019-07-12 11:54:04,245	INFO dynamic_tf_policy.py:313 -- Initializing loss function with dummy input:
[2m[36m(pid=61661)[0m 
[2m[36m(pid=61661)[0m { 'action_prob': <tf.Tensor 'default_policy/action_prob:0' shape=(?,) dtype=float32>,
[2m[36m(pid=61661)[0m   'actions': <tf.Tensor 'default_policy/actions:0' shape=(?, 2) dtype=float32>,
[2m[36m(pid=61661)[0m   'advantages': <tf.Tensor 'default_policy/advantages:0' shape=(?,) dtype=float32>,
[2m[36m(pid=61661)[0m   'behaviour_logits': <tf.Tensor 'default_policy/be

[2m[36m(pid=61616)[0m 
[2m[36m(pid=61616)[0m Converting sparse IndexedSlices to a dense Tensor of unknown shape. This may consume a large amount of memory.
[2m[36m(pid=61616)[0m 
[2m[36m(pid=61712)[0m 2019-07-12 11:54:26,779	INFO rollout_worker.py:428 -- Generating sample batch of size 200
[2m[36m(pid=61712)[0m 2019-07-12 11:54:26,813	INFO sampler.py:308 -- Raw obs from env: { 0: { 'agent0': np.ndarray((5,), dtype=float64, min=-1.0, max=1.548, mean=0.256)}}
[2m[36m(pid=61712)[0m 2019-07-12 11:54:26,813	INFO sampler.py:309 -- Info return from env: {0: {'agent0': None}}
[2m[36m(pid=61712)[0m 2019-07-12 11:54:26,814	INFO sampler.py:407 -- Preprocessed obs: np.ndarray((5,), dtype=float64, min=-1.0, max=1.548, mean=0.256)
[2m[36m(pid=61712)[0m 2019-07-12 11:54:26,814	INFO sampler.py:411 -- Filtered obs: np.ndarray((5,), dtype=float64, min=-1.0, max=1.548, mean=0.256)
[2m[36m(pid=61712)[0m 2019-07-12 11:54:26,815	INFO sampler.py:525 -- Inputs to compute_actions():


[2m[36m(pid=61661)[0m 2019-07-12 11:54:33,330	INFO multi_gpu_impl.py:146 -- Training on concatenated sample batches:
[2m[36m(pid=61661)[0m 
[2m[36m(pid=61661)[0m { 'inputs': [ np.ndarray((4000, 2), dtype=float32, min=0.0, max=1.0, mean=0.475),
[2m[36m(pid=61661)[0m               np.ndarray((4000,), dtype=float32, min=-0.075, max=0.047, mean=0.0),
[2m[36m(pid=61661)[0m               np.ndarray((4000, 5), dtype=float32, min=-8.926, max=11.969, mean=0.268),
[2m[36m(pid=61661)[0m               np.ndarray((4000, 2), dtype=float32, min=0.0, max=1.0, mean=0.5),
[2m[36m(pid=61661)[0m               np.ndarray((4000,), dtype=float32, min=-15.709, max=9.749, mean=0.0),
[2m[36m(pid=61661)[0m               np.ndarray((4000, 2), dtype=float32, min=-0.009, max=0.014, mean=0.002),
[2m[36m(pid=61661)[0m               np.ndarray((4000,), dtype=float32, min=0.0, max=0.0, mean=0.0),
[2m[36m(pid=61661)[0m               np.ndarray((4000,), dtype=float32, min=0.0, max=0.0, mean=















2019-07-12 12:26:00,561	INFO ray_trial_executor.py:187 -- Destroying actor for trial PPO_GAIAPredictorsContinuousV8_0_clip_param=0.8,entropy_coeff=1e-05,kl_coeff=0.2,kl_target=0.01,lr=1e-05,num_sgd_iter=8,train_batch_size=4000. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
2019-07-12 12:26:00,618	INFO tune.py:65 -- Did not find checkpoint file in logs/tran_cost_x10/clip_0.8-tc-WalkForward-750k2010.
2019-07-12 12:26:00,619	INFO tune.py:233 -- Starting a new experiment.


_______________________________________2010____________________________________________
[2m[36m(pid=61617)[0m 2019-07-12 12:26:03,600	INFO rollout_worker.py:301 -- Creating policy evaluation worker 0 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=61617)[0m 2019-07-12 12:26:03.601567: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
[2m[36m(pid=61617)[0m 2019-07-12 12:26:11,909	INFO dynamic_tf_policy.py:313 -- Initializing loss function with dummy input:
[2m[36m(pid=61617)[0m 
[2m[36m(pid=61617)[0m { 'action_prob': <tf.Tensor 'default_policy/action_prob:0' shape=(?,) dtype=float32>,
[2m[36m(pid=61617)[0m   'actions': <tf.Tensor 'default_policy/actions:0' shape=(?, 2) dtype=float32>,
[2m[36m(pid=61617)[0m   'advantages': <tf.Tensor 'default_policy/advantages:0' shape=(?,) dtype=float32>,
[2m[36m(pid=61617)[0m   'behaviour_logits': <tf.Tensor 'default_policy/be

[2m[36m(pid=94360)[0m 
[2m[36m(pid=94360)[0m Converting sparse IndexedSlices to a dense Tensor of unknown shape. This may consume a large amount of memory.
[2m[36m(pid=94360)[0m 
[2m[36m(pid=94358)[0m 2019-07-12 12:26:47,673	INFO rollout_worker.py:428 -- Generating sample batch of size 200
[2m[36m(pid=94358)[0m 2019-07-12 12:26:47,710	INFO sampler.py:308 -- Raw obs from env: { 0: { 'agent0': np.ndarray((5,), dtype=float64, min=-1.0, max=3.782, mean=0.915)}}
[2m[36m(pid=94358)[0m 2019-07-12 12:26:47,710	INFO sampler.py:309 -- Info return from env: {0: {'agent0': None}}
[2m[36m(pid=94358)[0m 2019-07-12 12:26:47,711	INFO sampler.py:407 -- Preprocessed obs: np.ndarray((5,), dtype=float64, min=-1.0, max=3.782, mean=0.915)
[2m[36m(pid=94358)[0m 2019-07-12 12:26:47,711	INFO sampler.py:411 -- Filtered obs: np.ndarray((5,), dtype=float64, min=-1.0, max=3.782, mean=0.915)
[2m[36m(pid=94358)[0m 2019-07-12 12:26:47,712	INFO sampler.py:525 -- Inputs to compute_actions():


[2m[36m(pid=61617)[0m 2019-07-12 12:26:54,576	INFO multi_gpu_impl.py:146 -- Training on concatenated sample batches:
[2m[36m(pid=61617)[0m 
[2m[36m(pid=61617)[0m { 'inputs': [ np.ndarray((4000, 2), dtype=float32, min=0.0, max=0.999, mean=0.475),
[2m[36m(pid=61617)[0m               np.ndarray((4000,), dtype=float32, min=-0.045, max=0.043, mean=0.0),
[2m[36m(pid=61617)[0m               np.ndarray((4000, 5), dtype=float32, min=-8.926, max=12.677, mean=0.252),
[2m[36m(pid=61617)[0m               np.ndarray((4000, 2), dtype=float32, min=0.001, max=0.999, mean=0.5),
[2m[36m(pid=61617)[0m               np.ndarray((4000,), dtype=float32, min=-8.39, max=7.973, mean=0.0),
[2m[36m(pid=61617)[0m               np.ndarray((4000, 2), dtype=float32, min=-0.014, max=0.014, mean=0.002),
[2m[36m(pid=61617)[0m               np.ndarray((4000,), dtype=float32, min=0.0, max=0.0, mean=0.0),
[2m[36m(pid=61617)[0m               np.ndarray((4000,), dtype=float32, min=0.0, max=0.0, m















2019-07-12 12:58:06,023	INFO ray_trial_executor.py:187 -- Destroying actor for trial PPO_GAIAPredictorsContinuousV8_0_clip_param=0.8,entropy_coeff=1e-05,kl_coeff=0.2,kl_target=0.01,lr=1e-05,num_sgd_iter=8,train_batch_size=4000. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
2019-07-12 12:58:06,070	INFO tune.py:65 -- Did not find checkpoint file in logs/tran_cost_x10/clip_0.8-tc-WalkForward-750k2011.
2019-07-12 12:58:06,071	INFO tune.py:233 -- Starting a new experiment.


_______________________________________2011____________________________________________
[2m[36m(pid=94492)[0m 2019-07-12 12:58:09,581	INFO rollout_worker.py:301 -- Creating policy evaluation worker 0 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=94492)[0m 2019-07-12 12:58:09.582887: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
[2m[36m(pid=94492)[0m 2019-07-12 12:58:16,341	INFO dynamic_tf_policy.py:313 -- Initializing loss function with dummy input:
[2m[36m(pid=94492)[0m 
[2m[36m(pid=94492)[0m { 'action_prob': <tf.Tensor 'default_policy/action_prob:0' shape=(?,) dtype=float32>,
[2m[36m(pid=94492)[0m   'actions': <tf.Tensor 'default_policy/actions:0' shape=(?, 2) dtype=float32>,
[2m[36m(pid=94492)[0m   'advantages': <tf.Tensor 'default_policy/advantages:0' shape=(?,) dtype=float32>,
[2m[36m(pid=94492)[0m   'behaviour_logits': <tf.Tensor 'default_policy/be

[2m[36m(pid=94440)[0m 
[2m[36m(pid=94440)[0m Converting sparse IndexedSlices to a dense Tensor of unknown shape. This may consume a large amount of memory.
[2m[36m(pid=94440)[0m 
[2m[36m(pid=94472)[0m 2019-07-12 12:58:41,789	INFO rollout_worker.py:428 -- Generating sample batch of size 200
[2m[36m(pid=94472)[0m 2019-07-12 12:58:41,808	INFO sampler.py:308 -- Raw obs from env: { 0: { 'agent0': np.ndarray((5,), dtype=float64, min=0.0, max=1.0, mean=0.2)}}
[2m[36m(pid=94472)[0m 2019-07-12 12:58:41,808	INFO sampler.py:309 -- Info return from env: {0: {'agent0': None}}
[2m[36m(pid=94472)[0m 2019-07-12 12:58:41,809	INFO sampler.py:407 -- Preprocessed obs: np.ndarray((5,), dtype=float64, min=0.0, max=1.0, mean=0.2)
[2m[36m(pid=94472)[0m 2019-07-12 12:58:41,809	INFO sampler.py:411 -- Filtered obs: np.ndarray((5,), dtype=float64, min=0.0, max=1.0, mean=0.2)
[2m[36m(pid=94472)[0m 2019-07-12 12:58:41,810	INFO sampler.py:525 -- Inputs to compute_actions():
[2m[36m(pid=9

[2m[36m(pid=94492)[0m 2019-07-12 12:58:48,160	INFO multi_gpu_impl.py:146 -- Training on concatenated sample batches:
[2m[36m(pid=94492)[0m 
[2m[36m(pid=94492)[0m { 'inputs': [ np.ndarray((4000, 2), dtype=float32, min=0.0, max=1.0, mean=0.475),
[2m[36m(pid=94492)[0m               np.ndarray((4000,), dtype=float32, min=-0.046, max=0.039, mean=0.0),
[2m[36m(pid=94492)[0m               np.ndarray((4000, 5), dtype=float32, min=-8.926, max=11.08, mean=0.254),
[2m[36m(pid=94492)[0m               np.ndarray((4000, 2), dtype=float32, min=0.0, max=1.0, mean=0.5),
[2m[36m(pid=94492)[0m               np.ndarray((4000,), dtype=float32, min=-9.763, max=9.958, mean=-0.0),
[2m[36m(pid=94492)[0m               np.ndarray((4000, 2), dtype=float32, min=-0.015, max=0.011, mean=-0.001),
[2m[36m(pid=94492)[0m               np.ndarray((4000,), dtype=float32, min=0.0, max=0.0, mean=0.0),
[2m[36m(pid=94492)[0m               np.ndarray((4000,), dtype=float32, min=0.0, max=0.0, mean=















2019-07-12 13:29:24,375	INFO ray_trial_executor.py:187 -- Destroying actor for trial PPO_GAIAPredictorsContinuousV8_0_clip_param=0.8,entropy_coeff=1e-05,kl_coeff=0.2,kl_target=0.01,lr=1e-05,num_sgd_iter=8,train_batch_size=4000. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
2019-07-12 13:29:24,474	INFO tune.py:65 -- Did not find checkpoint file in logs/tran_cost_x10/clip_0.8-tc-WalkForward-750k2012.
2019-07-12 13:29:24,475	INFO tune.py:233 -- Starting a new experiment.


_______________________________________2012____________________________________________
[2m[36m(pid=94422)[0m 2019-07-12 13:29:27,997	INFO rollout_worker.py:301 -- Creating policy evaluation worker 0 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=94422)[0m 2019-07-12 13:29:27.997660: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
[2m[36m(pid=94422)[0m 2019-07-12 13:29:35,513	INFO dynamic_tf_policy.py:313 -- Initializing loss function with dummy input:
[2m[36m(pid=94422)[0m 
[2m[36m(pid=94422)[0m { 'action_prob': <tf.Tensor 'default_policy/action_prob:0' shape=(?,) dtype=float32>,
[2m[36m(pid=94422)[0m   'actions': <tf.Tensor 'default_policy/actions:0' shape=(?, 2) dtype=float32>,
[2m[36m(pid=94422)[0m   'advantages': <tf.Tensor 'default_policy/advantages:0' shape=(?,) dtype=float32>,
[2m[36m(pid=94422)[0m   'behaviour_logits': <tf.Tensor 'default_policy/be

[2m[36m(pid=125787)[0m 
[2m[36m(pid=125787)[0m Converting sparse IndexedSlices to a dense Tensor of unknown shape. This may consume a large amount of memory.
[2m[36m(pid=125787)[0m 
[2m[36m(pid=125786)[0m 2019-07-12 13:30:09,326	INFO rollout_worker.py:428 -- Generating sample batch of size 200
[2m[36m(pid=125786)[0m 2019-07-12 13:30:09,358	INFO sampler.py:308 -- Raw obs from env: { 0: { 'agent0': np.ndarray((5,), dtype=float64, min=0.0, max=1.0, mean=0.2)}}
[2m[36m(pid=125786)[0m 2019-07-12 13:30:09,360	INFO sampler.py:309 -- Info return from env: {0: {'agent0': None}}
[2m[36m(pid=125786)[0m 2019-07-12 13:30:09,360	INFO sampler.py:407 -- Preprocessed obs: np.ndarray((5,), dtype=float64, min=0.0, max=1.0, mean=0.2)
[2m[36m(pid=125786)[0m 2019-07-12 13:30:09,360	INFO sampler.py:411 -- Filtered obs: np.ndarray((5,), dtype=float64, min=0.0, max=1.0, mean=0.2)
[2m[36m(pid=125786)[0m 2019-07-12 13:30:09,362	INFO sampler.py:525 -- Inputs to compute_actions():
[2m[

[2m[36m(pid=94422)[0m 2019-07-12 13:30:15,759	INFO multi_gpu_impl.py:146 -- Training on concatenated sample batches:
[2m[36m(pid=94422)[0m 
[2m[36m(pid=94422)[0m { 'inputs': [ np.ndarray((4000, 2), dtype=float32, min=0.0, max=1.0, mean=0.475),
[2m[36m(pid=94422)[0m               np.ndarray((4000,), dtype=float32, min=-0.033, max=0.031, mean=0.0),
[2m[36m(pid=94422)[0m               np.ndarray((4000, 5), dtype=float32, min=-14.063, max=13.391, mean=0.239),
[2m[36m(pid=94422)[0m               np.ndarray((4000, 2), dtype=float32, min=0.0, max=1.0, mean=0.5),
[2m[36m(pid=94422)[0m               np.ndarray((4000,), dtype=float32, min=-7.193, max=6.724, mean=-0.0),
[2m[36m(pid=94422)[0m               np.ndarray((4000, 2), dtype=float32, min=-0.012, max=0.011, mean=0.003),
[2m[36m(pid=94422)[0m               np.ndarray((4000,), dtype=float32, min=0.0, max=0.0, mean=0.0),
[2m[36m(pid=94422)[0m               np.ndarray((4000,), dtype=float32, min=0.0, max=0.0, mean















2019-07-12 14:01:50,242	INFO ray_trial_executor.py:187 -- Destroying actor for trial PPO_GAIAPredictorsContinuousV8_0_clip_param=0.8,entropy_coeff=1e-05,kl_coeff=0.2,kl_target=0.01,lr=1e-05,num_sgd_iter=8,train_batch_size=4000. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
2019-07-12 14:01:50,291	INFO tune.py:65 -- Did not find checkpoint file in logs/tran_cost_x10/clip_0.8-tc-WalkForward-750k2013.
2019-07-12 14:01:50,292	INFO tune.py:233 -- Starting a new experiment.


_______________________________________2013____________________________________________
[2m[36m(pid=125889)[0m 2019-07-12 14:01:53,583	INFO rollout_worker.py:301 -- Creating policy evaluation worker 0 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=125889)[0m 2019-07-12 14:01:53.584318: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
[2m[36m(pid=125889)[0m 2019-07-12 14:02:01,363	INFO dynamic_tf_policy.py:313 -- Initializing loss function with dummy input:
[2m[36m(pid=125889)[0m 
[2m[36m(pid=125889)[0m { 'action_prob': <tf.Tensor 'default_policy/action_prob:0' shape=(?,) dtype=float32>,
[2m[36m(pid=125889)[0m   'actions': <tf.Tensor 'default_policy/actions:0' shape=(?, 2) dtype=float32>,
[2m[36m(pid=125889)[0m   'advantages': <tf.Tensor 'default_policy/advantages:0' shape=(?,) dtype=float32>,
[2m[36m(pid=125889)[0m   'behaviour_logits': <tf.Tensor 'default_p

[2m[36m(pid=125896)[0m 
[2m[36m(pid=125896)[0m Converting sparse IndexedSlices to a dense Tensor of unknown shape. This may consume a large amount of memory.
[2m[36m(pid=125896)[0m 
[2m[36m(pid=125870)[0m 2019-07-12 14:02:24,897	INFO rollout_worker.py:428 -- Generating sample batch of size 200
[2m[36m(pid=125870)[0m 2019-07-12 14:02:24,949	INFO sampler.py:308 -- Raw obs from env: { 0: { 'agent0': np.ndarray((5,), dtype=float64, min=-1.0, max=1.573, mean=0.264)}}
[2m[36m(pid=125870)[0m 2019-07-12 14:02:24,949	INFO sampler.py:309 -- Info return from env: {0: {'agent0': None}}
[2m[36m(pid=125870)[0m 2019-07-12 14:02:24,949	INFO sampler.py:407 -- Preprocessed obs: np.ndarray((5,), dtype=float64, min=-1.0, max=1.573, mean=0.264)
[2m[36m(pid=125870)[0m 2019-07-12 14:02:24,950	INFO sampler.py:411 -- Filtered obs: np.ndarray((5,), dtype=float64, min=-1.0, max=1.573, mean=0.264)
[2m[36m(pid=125870)[0m 2019-07-12 14:02:24,951	INFO sampler.py:525 -- Inputs to compute_ac

[2m[36m(pid=125889)[0m 2019-07-12 14:02:30,318	INFO multi_gpu_impl.py:146 -- Training on concatenated sample batches:
[2m[36m(pid=125889)[0m 
[2m[36m(pid=125889)[0m { 'inputs': [ np.ndarray((4000, 2), dtype=float32, min=0.0, max=1.0, mean=0.475),
[2m[36m(pid=125889)[0m               np.ndarray((4000,), dtype=float32, min=-0.051, max=0.042, mean=0.0),
[2m[36m(pid=125889)[0m               np.ndarray((4000, 5), dtype=float32, min=-14.063, max=10.774, mean=0.227),
[2m[36m(pid=125889)[0m               np.ndarray((4000, 2), dtype=float32, min=0.0, max=1.0, mean=0.5),
[2m[36m(pid=125889)[0m               np.ndarray((4000,), dtype=float32, min=-10.018, max=8.171, mean=0.0),
[2m[36m(pid=125889)[0m               np.ndarray((4000, 2), dtype=float32, min=-0.013, max=0.013, mean=-0.0),
[2m[36m(pid=125889)[0m               np.ndarray((4000,), dtype=float32, min=0.0, max=0.0, mean=0.0),
[2m[36m(pid=125889)[0m               np.ndarray((4000,), dtype=float32, min=0.0, max=















2019-07-12 14:35:12,722	INFO ray_trial_executor.py:187 -- Destroying actor for trial PPO_GAIAPredictorsContinuousV8_0_clip_param=0.8,entropy_coeff=1e-05,kl_coeff=0.2,kl_target=0.01,lr=1e-05,num_sgd_iter=8,train_batch_size=4000. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
2019-07-12 14:35:12,777	INFO tune.py:65 -- Did not find checkpoint file in logs/tran_cost_x10/clip_0.8-tc-WalkForward-750k2014.
2019-07-12 14:35:12,779	INFO tune.py:233 -- Starting a new experiment.


_______________________________________2014____________________________________________
[2m[36m(pid=125848)[0m 2019-07-12 14:35:16,287	INFO rollout_worker.py:301 -- Creating policy evaluation worker 0 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=125848)[0m 2019-07-12 14:35:16.288268: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
[2m[36m(pid=125848)[0m 2019-07-12 14:35:23,767	INFO dynamic_tf_policy.py:313 -- Initializing loss function with dummy input:
[2m[36m(pid=125848)[0m 
[2m[36m(pid=125848)[0m { 'action_prob': <tf.Tensor 'default_policy/action_prob:0' shape=(?,) dtype=float32>,
[2m[36m(pid=125848)[0m   'actions': <tf.Tensor 'default_policy/actions:0' shape=(?, 2) dtype=float32>,
[2m[36m(pid=125848)[0m   'advantages': <tf.Tensor 'default_policy/advantages:0' shape=(?,) dtype=float32>,
[2m[36m(pid=125848)[0m   'behaviour_logits': <tf.Tensor 'default_p

[2m[36m(pid=27804)[0m 2019-07-12 14:36:01,195	INFO sampler.py:552 -- Outputs of compute_actions():
[2m[36m(pid=27804)[0m 
[2m[36m(pid=27804)[0m { 'default_policy': ( np.ndarray((1, 2), dtype=float32, min=0.081, max=0.919, mean=0.5),
[2m[36m(pid=27804)[0m                       [],
[2m[36m(pid=27804)[0m                       { 'action_prob': np.ndarray((1,), dtype=float32, min=1.003, max=1.003, mean=1.003),
[2m[36m(pid=27804)[0m                         'behaviour_logits': np.ndarray((1, 2), dtype=float32, min=-0.002, max=-0.0, mean=-0.001),
[2m[36m(pid=27804)[0m                         'vf_preds': np.ndarray((1,), dtype=float32, min=0.0, max=0.0, mean=0.0)})}
[2m[36m(pid=27804)[0m 
[2m[36m(pid=27804)[0m 2019-07-12 14:36:01,400	INFO sample_batch_builder.py:161 -- Trajectory fragment after postprocess_trajectory():
[2m[36m(pid=27804)[0m 
[2m[36m(pid=27804)[0m { 'agent0': { 'data': { 'action_prob': np.ndarray((20,), dtype=float32, min=0.992, max=1.03, mean=1

[2m[36m(pid=125848)[0m 2019-07-12 14:36:12,118	INFO tf_run_builder.py:92 -- Executing TF run without tracing. To dump TF timeline traces to disk, set the TF_TIMELINE_DIR environment variable.
















2019-07-12 15:08:44,227	INFO ray_trial_executor.py:187 -- Destroying actor for trial PPO_GAIAPredictorsContinuousV8_0_clip_param=0.8,entropy_coeff=1e-05,kl_coeff=0.2,kl_target=0.01,lr=1e-05,num_sgd_iter=8,train_batch_size=4000. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
2019-07-12 15:08:44,290	INFO tune.py:65 -- Did not find checkpoint file in logs/tran_cost_x10/clip_0.8-tc-WalkForward-750k2015.
2019-07-12 15:08:44,292	INFO tune.py:233 -- Starting a new experiment.


_______________________________________2015____________________________________________
[2m[36m(pid=27946)[0m 2019-07-12 15:08:48,301	INFO rollout_worker.py:301 -- Creating policy evaluation worker 0 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=27946)[0m 2019-07-12 15:08:48.302875: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
[2m[36m(pid=27946)[0m 2019-07-12 15:08:56,635	INFO dynamic_tf_policy.py:313 -- Initializing loss function with dummy input:
[2m[36m(pid=27946)[0m 
[2m[36m(pid=27946)[0m { 'action_prob': <tf.Tensor 'default_policy/action_prob:0' shape=(?,) dtype=float32>,
[2m[36m(pid=27946)[0m   'actions': <tf.Tensor 'default_policy/actions:0' shape=(?, 2) dtype=float32>,
[2m[36m(pid=27946)[0m   'advantages': <tf.Tensor 'default_policy/advantages:0' shape=(?,) dtype=float32>,
[2m[36m(pid=27946)[0m   'behaviour_logits': <tf.Tensor 'default_policy/be

[2m[36m(pid=27882)[0m 2019-07-12 15:09:24,796	INFO rollout_worker.py:428 -- Generating sample batch of size 200
[2m[36m(pid=27882)[0m 2019-07-12 15:09:24,821	INFO sampler.py:308 -- Raw obs from env: { 0: { 'agent0': np.ndarray((5,), dtype=float64, min=-1.0, max=3.35, mean=0.788)}}
[2m[36m(pid=27882)[0m 2019-07-12 15:09:24,821	INFO sampler.py:309 -- Info return from env: {0: {'agent0': None}}
[2m[36m(pid=27882)[0m 2019-07-12 15:09:24,821	INFO sampler.py:407 -- Preprocessed obs: np.ndarray((5,), dtype=float64, min=-1.0, max=3.35, mean=0.788)
[2m[36m(pid=27882)[0m 2019-07-12 15:09:24,822	INFO sampler.py:411 -- Filtered obs: np.ndarray((5,), dtype=float64, min=-1.0, max=3.35, mean=0.788)
[2m[36m(pid=27882)[0m 2019-07-12 15:09:24,824	INFO sampler.py:525 -- Inputs to compute_actions():
[2m[36m(pid=27882)[0m 
[2m[36m(pid=27882)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=27882)[0m                                   'env_id': 0,
[2m[36m(pi

[2m[36m(pid=27946)[0m 2019-07-12 15:09:30,359	INFO multi_gpu_impl.py:146 -- Training on concatenated sample batches:
[2m[36m(pid=27946)[0m 
[2m[36m(pid=27946)[0m { 'inputs': [ np.ndarray((4000, 2), dtype=float32, min=0.0, max=1.0, mean=0.475),
[2m[36m(pid=27946)[0m               np.ndarray((4000,), dtype=float32, min=-0.064, max=0.042, mean=0.0),
[2m[36m(pid=27946)[0m               np.ndarray((4000, 5), dtype=float32, min=-13.352, max=10.882, mean=0.263),
[2m[36m(pid=27946)[0m               np.ndarray((4000, 2), dtype=float32, min=0.0, max=1.0, mean=0.5),
[2m[36m(pid=27946)[0m               np.ndarray((4000,), dtype=float32, min=-13.42, max=8.77, mean=0.0),
[2m[36m(pid=27946)[0m               np.ndarray((4000, 2), dtype=float32, min=-0.009, max=0.01, mean=0.002),
[2m[36m(pid=27946)[0m               np.ndarray((4000,), dtype=float32, min=0.0, max=0.0, mean=0.0),
[2m[36m(pid=27946)[0m               np.ndarray((4000,), dtype=float32, min=0.0, max=0.0, mean=0.















2019-07-12 15:41:23,734	INFO ray_trial_executor.py:187 -- Destroying actor for trial PPO_GAIAPredictorsContinuousV8_0_clip_param=0.8,entropy_coeff=1e-05,kl_coeff=0.2,kl_target=0.01,lr=1e-05,num_sgd_iter=8,train_batch_size=4000. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
2019-07-12 15:41:23,869	INFO tune.py:65 -- Did not find checkpoint file in logs/tran_cost_x10/clip_0.8-tc-WalkForward-750k2016.
2019-07-12 15:41:23,876	INFO tune.py:233 -- Starting a new experiment.


_______________________________________2016____________________________________________
[2m[36m(pid=27868)[0m 2019-07-12 15:41:27,093	INFO rollout_worker.py:301 -- Creating policy evaluation worker 0 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=27868)[0m 2019-07-12 15:41:27.094217: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
[2m[36m(pid=27868)[0m 2019-07-12 15:41:34,430	INFO dynamic_tf_policy.py:313 -- Initializing loss function with dummy input:
[2m[36m(pid=27868)[0m 
[2m[36m(pid=27868)[0m { 'action_prob': <tf.Tensor 'default_policy/action_prob:0' shape=(?,) dtype=float32>,
[2m[36m(pid=27868)[0m   'actions': <tf.Tensor 'default_policy/actions:0' shape=(?, 2) dtype=float32>,
[2m[36m(pid=27868)[0m   'advantages': <tf.Tensor 'default_policy/advantages:0' shape=(?,) dtype=float32>,
[2m[36m(pid=27868)[0m   'behaviour_logits': <tf.Tensor 'default_policy/be

[2m[36m(pid=61325)[0m 
[2m[36m(pid=61325)[0m Converting sparse IndexedSlices to a dense Tensor of unknown shape. This may consume a large amount of memory.
[2m[36m(pid=61325)[0m 
[2m[36m(pid=61324)[0m 2019-07-12 15:42:09,665	INFO rollout_worker.py:428 -- Generating sample batch of size 200
[2m[36m(pid=61324)[0m 2019-07-12 15:42:09,690	INFO sampler.py:308 -- Raw obs from env: { 0: { 'agent0': np.ndarray((5,), dtype=float64, min=-1.0, max=4.971, mean=1.266)}}
[2m[36m(pid=61324)[0m 2019-07-12 15:42:09,691	INFO sampler.py:309 -- Info return from env: {0: {'agent0': None}}
[2m[36m(pid=61324)[0m 2019-07-12 15:42:09,691	INFO sampler.py:407 -- Preprocessed obs: np.ndarray((5,), dtype=float64, min=-1.0, max=4.971, mean=1.266)
[2m[36m(pid=61324)[0m 2019-07-12 15:42:09,692	INFO sampler.py:411 -- Filtered obs: np.ndarray((5,), dtype=float64, min=-1.0, max=4.971, mean=1.266)
[2m[36m(pid=61324)[0m 2019-07-12 15:42:09,693	INFO sampler.py:525 -- Inputs to compute_actions():


[2m[36m(pid=27868)[0m 2019-07-12 15:42:16,467	INFO multi_gpu_impl.py:146 -- Training on concatenated sample batches:
[2m[36m(pid=27868)[0m 
[2m[36m(pid=27868)[0m { 'inputs': [ np.ndarray((4000, 2), dtype=float32, min=0.0, max=1.0, mean=0.475),
[2m[36m(pid=27868)[0m               np.ndarray((4000,), dtype=float32, min=-0.061, max=0.025, mean=0.0),
[2m[36m(pid=27868)[0m               np.ndarray((4000, 5), dtype=float32, min=-15.841, max=15.356, mean=0.254),
[2m[36m(pid=27868)[0m               np.ndarray((4000, 2), dtype=float32, min=0.0, max=1.0, mean=0.5),
[2m[36m(pid=27868)[0m               np.ndarray((4000,), dtype=float32, min=-12.915, max=5.212, mean=0.0),
[2m[36m(pid=27868)[0m               np.ndarray((4000, 2), dtype=float32, min=-0.007, max=0.007, mean=0.0),
[2m[36m(pid=27868)[0m               np.ndarray((4000,), dtype=float32, min=0.0, max=0.0, mean=0.0),
[2m[36m(pid=27868)[0m               np.ndarray((4000,), dtype=float32, min=0.0, max=0.0, mean=0















2019-07-12 16:13:56,314	INFO ray_trial_executor.py:187 -- Destroying actor for trial PPO_GAIAPredictorsContinuousV8_0_clip_param=0.8,entropy_coeff=1e-05,kl_coeff=0.2,kl_target=0.01,lr=1e-05,num_sgd_iter=8,train_batch_size=4000. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
2019-07-12 16:13:56,381	INFO tune.py:65 -- Did not find checkpoint file in logs/tran_cost_x10/clip_0.8-tc-WalkForward-750k2017.
2019-07-12 16:13:56,382	INFO tune.py:233 -- Starting a new experiment.


_______________________________________2017____________________________________________
[2m[36m(pid=61454)[0m 2019-07-12 16:13:59,979	INFO rollout_worker.py:301 -- Creating policy evaluation worker 0 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=61454)[0m 2019-07-12 16:13:59.980205: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
[2m[36m(pid=61454)[0m 2019-07-12 16:14:07,920	INFO dynamic_tf_policy.py:313 -- Initializing loss function with dummy input:
[2m[36m(pid=61454)[0m 
[2m[36m(pid=61454)[0m { 'action_prob': <tf.Tensor 'default_policy/action_prob:0' shape=(?,) dtype=float32>,
[2m[36m(pid=61454)[0m   'actions': <tf.Tensor 'default_policy/actions:0' shape=(?, 2) dtype=float32>,
[2m[36m(pid=61454)[0m   'advantages': <tf.Tensor 'default_policy/advantages:0' shape=(?,) dtype=float32>,
[2m[36m(pid=61454)[0m   'behaviour_logits': <tf.Tensor 'default_policy/be

[2m[36m(pid=61381)[0m 
[2m[36m(pid=61381)[0m Converting sparse IndexedSlices to a dense Tensor of unknown shape. This may consume a large amount of memory.
[2m[36m(pid=61381)[0m 
[2m[36m(pid=61437)[0m 2019-07-12 16:14:32,071	INFO rollout_worker.py:428 -- Generating sample batch of size 200
[2m[36m(pid=61437)[0m 2019-07-12 16:14:32,153	INFO sampler.py:308 -- Raw obs from env: { 0: { 'agent0': np.ndarray((5,), dtype=float64, min=0.0, max=1.0, mean=0.2)}}
[2m[36m(pid=61437)[0m 2019-07-12 16:14:32,153	INFO sampler.py:309 -- Info return from env: {0: {'agent0': None}}
[2m[36m(pid=61437)[0m 2019-07-12 16:14:32,154	INFO sampler.py:407 -- Preprocessed obs: np.ndarray((5,), dtype=float64, min=0.0, max=1.0, mean=0.2)
[2m[36m(pid=61437)[0m 2019-07-12 16:14:32,154	INFO sampler.py:411 -- Filtered obs: np.ndarray((5,), dtype=float64, min=0.0, max=1.0, mean=0.2)
[2m[36m(pid=61437)[0m 2019-07-12 16:14:32,155	INFO sampler.py:525 -- Inputs to compute_actions():
[2m[36m(pid=6

[2m[36m(pid=61454)[0m 2019-07-12 16:14:38,744	INFO multi_gpu_impl.py:146 -- Training on concatenated sample batches:
[2m[36m(pid=61454)[0m 
[2m[36m(pid=61454)[0m { 'inputs': [ np.ndarray((4000, 2), dtype=float32, min=0.0, max=1.0, mean=0.475),
[2m[36m(pid=61454)[0m               np.ndarray((4000,), dtype=float32, min=-0.041, max=0.038, mean=0.0),
[2m[36m(pid=61454)[0m               np.ndarray((4000, 5), dtype=float32, min=-14.063, max=10.831, mean=0.234),
[2m[36m(pid=61454)[0m               np.ndarray((4000, 2), dtype=float32, min=0.0, max=1.0, mean=0.5),
[2m[36m(pid=61454)[0m               np.ndarray((4000,), dtype=float32, min=-8.453, max=7.603, mean=0.0),
[2m[36m(pid=61454)[0m               np.ndarray((4000, 2), dtype=float32, min=-0.013, max=0.014, mean=-0.0),
[2m[36m(pid=61454)[0m               np.ndarray((4000,), dtype=float32, min=0.0, max=0.0, mean=0.0),
[2m[36m(pid=61454)[0m               np.ndarray((4000,), dtype=float32, min=0.0, max=0.0, mean=0















2019-07-12 16:44:33,359	INFO ray_trial_executor.py:187 -- Destroying actor for trial PPO_GAIAPredictorsContinuousV8_0_clip_param=0.8,entropy_coeff=1e-05,kl_coeff=0.2,kl_target=0.01,lr=1e-05,num_sgd_iter=8,train_batch_size=4000. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


In [10]:

# for year in range(2007, 2018):
#     print('_______________________________________{}____________________________________________'.format(year))
    
    
#     config['env_config'] = {
#         'folds': {
#             'training-set': [datetime.min, datetime(year, 12, 31)],
#             'test-set': [datetime(year + 1, 1, 1), datetime(year + 1, 12, 31)],
#         }, 
#         'cost_of_commissions': 0.00025,   #0.00005 default
#         'cost_of_spread': 0.0050, #0.0001
#     }
#     experiment = tune.Experiment(
#         name='clip_0.8-tc-WalkForward-750k{}'.format(year),
#         run=rllib.agents.ppo.PPOTrainer,
#         stop={"timesteps_total": 750000},
#         config=deepcopy(config),
#         num_samples=1,
#         local_dir='logs/tran_cost_x50',
#         #checkpoint_freq=int(1e4 / config['train_batch_size']),  # checkpoint every 100k iters
#         checkpoint_at_end=True,
#         max_failures=0,
#         loggers=[CustomLogger],
#     )
#     trials = tune.run_experiments(
#         experiments=experiment,
#         search_alg=tune.suggest.BasicVariantGenerator(),
#         scheduler=tune.schedulers.FIFOScheduler(),
#         verbose=0,
#         reuse_actors=False,
#         resume=False,
#     )

In [11]:
from ray import cloudpickle
from ray.utils import binary_to_hex, hex_to_binary


def cloudpickleloads(obj):
    if isinstance(obj, dict):
        try:
            return cloudpickle.loads(hex_to_binary(obj["value"]))
        except:
            for key, value in obj.items():
                if isinstance(value, dict):
                    if sorted(value) == ['_type', 'value']:
                        obj[key] = cloudpickle.loads(hex_to_binary(value["value"]))
                    else:
                        obj[key] = cloudpickleloads(value)
                elif isinstance(value, list):
                    for i, item in enumerate(value):
                        obj[key][i] = cloudpickleloads(item)
    return obj

In [15]:
# no transaction costs with clip 0.8 
paths = {2007: '/home/Nicholas/trading-gym/notebooks/registry/gaia/v8/logs/PPOclip_0.8-WalkForward-750k2007/experiment_state-2019-07-02_12-08-42.json',
        2008: '/home/Nicholas/trading-gym/notebooks/registry/gaia/v8/logs/PPOclip_0.8-WalkForward-750k2008/experiment_state-2019-07-02_12-39-37.json',
        2009: '/home/Nicholas/trading-gym/notebooks/registry/gaia/v8/logs/PPOclip_0.8-WalkForward-750k2009/experiment_state-2019-07-02_13-11-44.json',
        2010: '/home/Nicholas/trading-gym/notebooks/registry/gaia/v8/logs/PPOclip_0.8-WalkForward-750k2010/experiment_state-2019-07-02_13-44-01.json',
        2011: '/home/Nicholas/trading-gym/notebooks/registry/gaia/v8/logs/PPOclip_0.8-WalkForward-750k2011/experiment_state-2019-07-02_14-15-31.json',
        2012: '/home/Nicholas/trading-gym/notebooks/registry/gaia/v8/logs/PPOclip_0.8-WalkForward-750k2012/experiment_state-2019-07-02_14-37-48.json',
        2013: '/home/Nicholas/trading-gym/notebooks/registry/gaia/v8/logs/PPOclip_0.8-WalkForward-750k2013/experiment_state-2019-07-02_15-03-59.json',
        2014: '/home/Nicholas/trading-gym/notebooks/registry/gaia/v8/logs/PPOclip_0.8-WalkForward-750k2014/experiment_state-2019-07-02_15-25-46.json',
        2015: '/home/Nicholas/trading-gym/notebooks/registry/gaia/v8/logs/PPOclip_0.8-WalkForward-750k2015/experiment_state-2019-07-02_15-48-10.json',
        2016: '/home/Nicholas/trading-gym/notebooks/registry/gaia/v8/logs/PPOclip_0.8-WalkForward-750k2016/experiment_state-2019-07-02_16-11-44.json',
        2017: '/home/Nicholas/trading-gym/notebooks/registry/gaia/v8/logs/PPOclip_0.8-WalkForward-750k2017/experiment_state-2019-07-02_16-42-02.json'
        }

# No transaction costs with clip 0.8 as well
# paths = {2007: '/home/Nicholas/trading-gym/notebooks/registry/gaia/v8/logs/clip_1.0-tc-WalkForward-750k2007/experiment_state-2019-07-02_17-16-37.json',
#         2008: '/home/Nicholas/trading-gym/notebooks/registry/gaia/v8/logs/clip_1.0-tc-WalkForward-750k2008/experiment_state-2019-07-02_17-49-56.json',
#         2009: '/home/Nicholas/trading-gym/notebooks/registry/gaia/v8/logs/clip_1.0-tc-WalkForward-750k2009/experiment_state-2019-07-02_18-24-50.json',
#         2010: '/home/Nicholas/trading-gym/notebooks/registry/gaia/v8/logs/clip_1.0-tc-WalkForward-750k2010/experiment_state-2019-07-02_18-59-46.json',
#         2011: '/home/Nicholas/trading-gym/notebooks/registry/gaia/v8/logs/clip_1.0-tc-WalkForward-750k2011/experiment_state-2019-07-02_19-34-34.json',
#         2012: '/home/Nicholas/trading-gym/notebooks/registry/gaia/v8/logs/clip_1.0-tc-WalkForward-750k2012/experiment_state-2019-07-02_20-08-38.json',
#         2013: '/home/Nicholas/trading-gym/notebooks/registry/gaia/v8/logs/clip_1.0-tc-WalkForward-750k2013/experiment_state-2019-07-02_20-32-51.json',
#         2014: '/home/Nicholas/trading-gym/notebooks/registry/gaia/v8/logs/clip_1.0-tc-WalkForward-750k2014/experiment_state-2019-07-02_20-57-24.json',
#         2015: '/home/Nicholas/trading-gym/notebooks/registry/gaia/v8/logs/clip_1.0-tc-WalkForward-750k2015/experiment_state-2019-07-02_21-21-59.json',
#         2016: '/home/Nicholas/trading-gym/notebooks/registry/gaia/v8/logs/clip_1.0-tc-WalkForward-750k2016/experiment_state-2019-07-02_21-46-11.json',
#         2017: '/home/Nicholas/trading-gym/notebooks/registry/gaia/v8/logs/clip_1.0-tc-WalkForward-750k2017/experiment_state-2019-07-02_22-10-54.json'
#         }

# no transaction costs with clip 0.9
# paths = {2007: '/home/Nicholas/trading-gym/notebooks/registry/gaia/v8/logs/clip_0.9-tc-WalkForward-750k2007/experiment_state-2019-07-03_00-01-00.json',
#         2008: '/home/Nicholas/trading-gym/notebooks/registry/gaia/v8/logs/clip_0.9-tc-WalkForward-750k2008/experiment_state-2019-07-03_00-48-41.json',
#         2009: '/home/Nicholas/trading-gym/notebooks/registry/gaia/v8/logs/clip_0.9-tc-WalkForward-750k2009/experiment_state-2019-07-03_01-26-09.json',
#         2010: '/home/Nicholas/trading-gym/notebooks/registry/gaia/v8/logs/clip_0.9-tc-WalkForward-750k2010/experiment_state-2019-07-03_01-51-35.json',
#         2011: '/home/Nicholas/trading-gym/notebooks/registry/gaia/v8/logs/clip_0.9-tc-WalkForward-750k2011/experiment_state-2019-07-03_02-17-30.json',
#         2012: '/home/Nicholas/trading-gym/notebooks/registry/gaia/v8/logs/clip_0.9-tc-WalkForward-750k2012/experiment_state-2019-07-03_02-41-49.json',
#         2013: '/home/Nicholas/trading-gym/notebooks/registry/gaia/v8/logs/clip_0.9-tc-WalkForward-750k2013/experiment_state-2019-07-03_03-07-25.json',
#         2014: '/home/Nicholas/trading-gym/notebooks/registry/gaia/v8/logs/clip_0.9-tc-WalkForward-750k2014/experiment_state-2019-07-03_03-32-32.json',
#         2015: '/home/Nicholas/trading-gym/notebooks/registry/gaia/v8/logs/clip_0.9-tc-WalkForward-750k2015/experiment_state-2019-07-03_03-58-08.json',
#         2016: '/home/Nicholas/trading-gym/notebooks/registry/gaia/v8/logs/clip_0.9-tc-WalkForward-750k2016/experiment_state-2019-07-03_04-23-49.json',
#         2017: '/home/Nicholas/trading-gym/notebooks/registry/gaia/v8/logs/clip_0.9-tc-WalkForward-750k2017/experiment_state-2019-07-03_04-49-35.json'
#         }



#  stilll Transaction cost, clip of 0.8  -- to add to results 
# paths = {2007: '/home/Nicholas/trading-gym/notebooks/registry/gaia/v8/logs/clip_0.8-tc-WalkForward-750k2007/experiment_state-2019-07-03_10-43-30.json',
#         2008: '/home/Nicholas/trading-gym/notebooks/registry/gaia/v8/logs/clip_0.8-tc-WalkForward-750k2008/experiment_state-2019-07-03_11-26-04.json',
#         2009: '/home/Nicholas/trading-gym/notebooks/registry/gaia/v8/logs/clip_0.8-tc-WalkForward-750k2009/experiment_state-2019-07-03_12-04-55.json',
#         2010: '/home/Nicholas/trading-gym/notebooks/registry/gaia/v8/logs/clip_0.8-tc-WalkForward-750k2010/experiment_state-2019-07-03_12-40-43.json',
#         2011: '/home/Nicholas/trading-gym/notebooks/registry/gaia/v8/logs/clip_0.8-tc-WalkForward-750k2011/experiment_state-2019-07-03_13-21-05.json',
#         2012: '/home/Nicholas/trading-gym/notebooks/registry/gaia/v8/logs/clip_0.8-tc-WalkForward-750k2012/experiment_state-2019-07-03_13-58-51.json',
#         2013: '/home/Nicholas/trading-gym/notebooks/registry/gaia/v8/logs/clip_0.8-tc-WalkForward-750k2013/experiment_state-2019-07-03_14-36-10.json',
#         2014: '/home/Nicholas/trading-gym/notebooks/registry/gaia/v8/logs/clip_0.8-tc-WalkForward-750k2014/experiment_state-2019-07-03_15-22-57.json',
#         2015: '/home/Nicholas/trading-gym/notebooks/registry/gaia/v8/logs/clip_0.8-tc-WalkForward-750k2015/experiment_state-2019-07-03_16-10-41.json',
#         2016: '/home/Nicholas/trading-gym/notebooks/registry/gaia/v8/logs/clip_0.8-tc-WalkForward-750k2016/experiment_state-2019-07-03_16-57-02.json',
#         2017: '/home/Nicholas/trading-gym/notebooks/registry/gaia/v8/logs/clip_0.8-tc-WalkForward-750k2017/experiment_state-2019-07-03_17-44-47.json'
#         }

# Actually with transaction cost now (at the default level) and 0.8 clip param
# paths = {2007: '/home/Nicholas/trading-gym/notebooks/registry/gaia/v8/logs/tran_cost/clip_0.8-tc-WalkForward-750k2007/experiment_state-2019-07-04_09-43-03.json',
#         2008: '/home/Nicholas/trading-gym/notebooks/registry/gaia/v8/logs/tran_cost/clip_0.8-tc-WalkForward-750k2008/experiment_state-2019-07-04_10-15-25.json',
#         2009: '/home/Nicholas/trading-gym/notebooks/registry/gaia/v8/logs/tran_cost/clip_0.8-tc-WalkForward-750k2009/experiment_state-2019-07-04_10-51-51.json',
#         2010: '/home/Nicholas/trading-gym/notebooks/registry/gaia/v8/logs/tran_cost/clip_0.8-tc-WalkForward-750k2010/experiment_state-2019-07-04_11-28-12.json',
#         2011: '/home/Nicholas/trading-gym/notebooks/registry/gaia/v8/logs/tran_cost/clip_0.8-tc-WalkForward-750k2011/experiment_state-2019-07-04_11-57-09.json',
#         2012: '/home/Nicholas/trading-gym/notebooks/registry/gaia/v8/logs/tran_cost/clip_0.8-tc-WalkForward-750k2012/experiment_state-2019-07-04_12-23-50.json',
#         2013: '/home/Nicholas/trading-gym/notebooks/registry/gaia/v8/logs/tran_cost/clip_0.8-tc-WalkForward-750k2013/experiment_state-2019-07-04_12-50-33.json',
#         2014: '/home/Nicholas/trading-gym/notebooks/registry/gaia/v8/logs/tran_cost/clip_0.8-tc-WalkForward-750k2014/experiment_state-2019-07-04_13-16-57.json',
#         2015: '/home/Nicholas/trading-gym/notebooks/registry/gaia/v8/logs/tran_cost/clip_0.8-tc-WalkForward-750k2015/experiment_state-2019-07-04_13-44-00.json',
#         2016: '/home/Nicholas/trading-gym/notebooks/registry/gaia/v8/logs/tran_cost/clip_0.8-tc-WalkForward-750k2016/experiment_state-2019-07-04_14-14-27.json',
#         2017: '/home/Nicholas/trading-gym/notebooks/registry/gaia/v8/logs/tran_cost/clip_0.8-tc-WalkForward-750k2017/experiment_state-2019-07-04_14-49-49.json'
#         }

# With transaction cost now (at double the level) and 0.8 clip param
# paths = {2007: '/home/Nicholas/trading-gym/notebooks/registry/gaia/v8/logs/tran_cost_double/clip_0.8-tc-WalkForward-750k2007/experiment_state-2019-07-08_09-08-56.json',
#         2008: '/home/Nicholas/trading-gym/notebooks/registry/gaia/v8/logs/tran_cost_double/clip_0.8-tc-WalkForward-750k2008/experiment_state-2019-07-08_09-34-49.json',
#         2009: '/home/Nicholas/trading-gym/notebooks/registry/gaia/v8/logs/tran_cost_double/clip_0.8-tc-WalkForward-750k2009/experiment_state-2019-07-08_10-02-15.json',
#         2010: '/home/Nicholas/trading-gym/notebooks/registry/gaia/v8/logs/tran_cost_double/clip_0.8-tc-WalkForward-750k2010/experiment_state-2019-07-08_10-29-33.json',
#         2011: '/home/Nicholas/trading-gym/notebooks/registry/gaia/v8/logs/tran_cost_double/clip_0.8-tc-WalkForward-750k2011/experiment_state-2019-07-08_10-58-06.json',
#         2012: '/home/Nicholas/trading-gym/notebooks/registry/gaia/v8/logs/tran_cost_double/clip_0.8-tc-WalkForward-750k2012/experiment_state-2019-07-08_11-24-29.json',
#         2013: '/home/Nicholas/trading-gym/notebooks/registry/gaia/v8/logs/tran_cost_double/clip_0.8-tc-WalkForward-750k2013/experiment_state-2019-07-08_11-50-59.json',
#         2014: '/home/Nicholas/trading-gym/notebooks/registry/gaia/v8/logs/tran_cost_double/clip_0.8-tc-WalkForward-750k2014/experiment_state-2019-07-08_12-18-35.json',
#         2015: '/home/Nicholas/trading-gym/notebooks/registry/gaia/v8/logs/tran_cost_double/clip_0.8-tc-WalkForward-750k2015/experiment_state-2019-07-08_12-47-25.json',
#         2016: '/home/Nicholas/trading-gym/notebooks/registry/gaia/v8/logs/tran_cost_double/clip_0.8-tc-WalkForward-750k2016/experiment_state-2019-07-08_13-14-43.json',
#         2017: '/home/Nicholas/trading-gym/notebooks/registry/gaia/v8/logs/tran_cost_double/clip_0.8-tc-WalkForward-750k2017/experiment_state-2019-07-08_13-41-22.json'
#         }

# With transaction cost now (at triple the level) and 0.8 clip param
paths = {2007: '/home/Nicholas/trading-gym/notebooks/registry/gaia/v8/logs/tran_cost_triple/clip_0.8-tc-WalkForward-750k2007/experiment_state-2019-07-08_14-26-15.json',
        2008: '/home/Nicholas/trading-gym/notebooks/registry/gaia/v8/logs/tran_cost_triple/clip_0.8-tc-WalkForward-750k2008/experiment_state-2019-07-08_14-52-38.json',
        2009: '/home/Nicholas/trading-gym/notebooks/registry/gaia/v8/logs/tran_cost_triple/clip_0.8-tc-WalkForward-750k2009/experiment_state-2019-07-08_15-20-13.json',
        2010: '/home/Nicholas/trading-gym/notebooks/registry/gaia/v8/logs/tran_cost_triple/clip_0.8-tc-WalkForward-750k2010/experiment_state-2019-07-08_15-47-57.json',
        2011: '/home/Nicholas/trading-gym/notebooks/registry/gaia/v8/logs/tran_cost_triple/clip_0.8-tc-WalkForward-750k2011/experiment_state-2019-07-08_16-15-37.json',
        2012: '/home/Nicholas/trading-gym/notebooks/registry/gaia/v8/logs/tran_cost_triple/clip_0.8-tc-WalkForward-750k2012/experiment_state-2019-07-08_16-44-41.json',
        2013: '/home/Nicholas/trading-gym/notebooks/registry/gaia/v8/logs/tran_cost_triple/clip_0.8-tc-WalkForward-750k2013/experiment_state-2019-07-08_17-13-04.json',
        2014: '/home/Nicholas/trading-gym/notebooks/registry/gaia/v8/logs/tran_cost_triple/clip_0.8-tc-WalkForward-750k2014/experiment_state-2019-07-08_17-39-13.json',
        2015: '/home/Nicholas/trading-gym/notebooks/registry/gaia/v8/logs/tran_cost_triple/clip_0.8-tc-WalkForward-750k2015/experiment_state-2019-07-08_18-06-10.json',
        2016: '/home/Nicholas/trading-gym/notebooks/registry/gaia/v8/logs/tran_cost_triple/clip_0.8-tc-WalkForward-750k2016/experiment_state-2019-07-08_18-31-47.json',
        2017: '/home/Nicholas/trading-gym/notebooks/registry/gaia/v8/logs/tran_cost_triple/clip_0.8-tc-WalkForward-750k2017/experiment_state-2019-07-08_18-58-52.json'
        }

# To-do: put in both the quadruple and the x5 cost levels 

# To-do: put for x6 as well
# Would be interesting to see how the the turnover changes as a function of this 
# Will make it easier for us to design something around the turnvoer



**Check to see what the config is to be sure**

In [16]:
for year,path in paths.items():
    with open(path) as f:
        metadata = json.load(f)

    runner_data = metadata['runner_data']
    stats = metadata['stats']

    checkpoint = metadata['checkpoints'][-1]
    checkpoint = cloudpickleloads(checkpoint)
    checkpoint_path = cloudpickle.loads(hex_to_binary(checkpoint['_checkpoint'])).value

    config = checkpoint['config']
    print(config)
    break

{'monitor': False, 'log_level': 'INFO', 'callbacks': {'on_episode_start': None, 'on_episode_step': None, 'on_episode_end': None, 'on_sample_end': None, 'on_train_result': tune.function(<function calculate_tearsheet at 0x7f9ddbf89158>), 'on_postprocess_traj': None}, 'ignore_worker_failures': False, 'model': {'conv_filters': None, 'conv_activation': 'relu', 'fcnet_activation': 'tanh', 'fcnet_hiddens': [256, 256], 'free_log_std': False, 'squash_to_range': False, 'use_lstm': False, 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'framestack': True, 'dim': 84, 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'custom_model': 'MLP', 'custom_options': {}}, 'optimizer': {}, 'gamma': 0, 'horizon': None, 'soft_horizon': False, 'env_config': {'folds': {'training-set': [datetime.datetime(1, 1, 1, 0, 0), datetime.datetime(2007, 12, 31, 0, 0)], 'test-set': [datetime.datetime(2008, 1, 1, 0, 0), datetime.datetime(2008, 12, 31, 0, 0)]}, 'cost_of_commiss

In [17]:
episodes = dict()
agents = dict()
for year, path in paths.items():
    # RESTORE part (a)
    with open(path) as f:
        metadata = json.load(f)

    runner_data = metadata['runner_data']
    stats = metadata['stats']

    checkpoint = metadata['checkpoints'][-1]
    checkpoint = cloudpickleloads(checkpoint)
    checkpoint_path = cloudpickle.loads(hex_to_binary(checkpoint['_checkpoint'])).value

    config = checkpoint['config']
#     Don't actually need to redefine the env_cls as it's always the same 
    env_cls = config['env']
    env_config = config['env_config']
    print(env_config)
    
#   Manually enter in the cost of commissions etc -- although is this right? 
#     env_config['cost_of_commissions'] = 0.00005  
#     env_config['cost_of_spread'] = 0.0001  
    
    path_restore = os.path.join(checkpoint['logdir'], checkpoint_path)
    
    agent = rllib.agents.ppo.PPOTrainer(config, env_cls)
    agent.restore(path_restore)

    env = env_cls(env_config)

    
    episode = env.sample_episode(
        fold='test-set',
        policy=agent,
        episode_length=None,
        benchmark=env._load_benchmark().squeeze(),
        risk_free=env._load_risk_free().squeeze(),
        burn=1,
    )
    
    renderer = env.render()
    renderer.level.to_plotly()
    renderer.cost_of_commissions.to_plotly()
    renderer.cost_of_spread.to_plotly()
    
    episodes[year] = episode
    agents[year] = agent

{'folds': {'training-set': [datetime.datetime(1, 1, 1, 0, 0), datetime.datetime(2007, 12, 31, 0, 0)], 'test-set': [datetime.datetime(2008, 1, 1, 0, 0), datetime.datetime(2008, 12, 31, 0, 0)]}, 'cost_of_commissions': 0.00015, 'cost_of_spread': 0.0003}


Exception: Unknown config parameter `local_evaluator_tf_session_args` 

In [None]:
renderer = env.render()
renderer.level.to_plotly()
renderer.cost_of_commissions.to_plotly()
renderer.cost_of_spread.to_plotly()

In [None]:
levels = list()
mappings = pd.DataFrame()
mapping_functions = dict()
for year in paths:
    episode = episodes[year]
    agent = agents[year]

    # Load.
    actions = episode.actions_as_frame()
    states = episode.states_as_frame()
    
    # Parse.
    gaia_predictor = states[0].to_frame('GAIA Predictor')
    
#     The following line was here before
#     target_weight_russell_1000 = actions[ETF('Russell 1000')]
    target_weight_russell_1000 = actions[actions.columns[0]]
    target_weight_russell_1000.name = 'Target weight: ' + str(target_weight_russell_1000.name)
    mapping = gaia_predictor.join(target_weight_russell_1000)
    mapping_function = mapping.set_index('GAIA Predictor')

    levels.append(episode.renderer.level.to_frame().pct_change())
    mappings = mappings.append(mapping)
    mapping_functions[year] = mapping_function

    # Visualize.
    mapping.iplot(
        title="Hisorical GAIA predictor for Russell 1000 vs agent's target weights",
        secondary_y='GAIA Predictor',
        yTitle=target_weight_russell_1000.name,
        secondary_y_title='GAIA Predictor',
        legend={'orientation': 'h'},
    )
    mapping_function.iplot(
        title='Policy: mapping from GAIA predictor (state) to target weight for Russell 1000 (action)',
        xTitle='GAIA predictor for Russell 1000 (standardized)',
        yTitle='Target weight for Russell 1000',
        kind='scatter',
        mode='markers',
        size=4,
    )

In [None]:
daily_ret = pd.concat(levels).sort_index().fillna(0)
cumulative_performance = (1 + daily_ret).cumprod() - 1
cumulative_performance *= 100

aric = cumulative_performance.columns[1]
cumulative_performance['Strategy relative to Aric-Benchmark'] = cumulative_performance['Strategy'] - cumulative_performance[aric]


# Visualizations.
cumulative_performance.iplot(
    legend={'orientation': 'h'},
    yTitle='Total returns',
)

In [None]:
levels = (1 + cumulative_performance / 100)
annual_rets = (levels.resample('Y').last() / levels.resample('Y').first() - 1)

    
annual_rets['Strategy relative to Aric-Benchmark'] = annual_rets['Strategy'] - annual_rets[aric]
annual_rets.index = annual_rets.index.year
annual_rets *= 100
annual_rets.iplot(kind='bar', legend={'orientation': 'h'}, yTitle='%')

In [None]:
levels.drop('Strategy relative to Aric-Benchmark', axis='columns').tearsheet(
    benchmark=env._load_benchmark().loc['2008':].squeeze(),
    risk_free=env._load_risk_free().loc['2008':].squeeze(),
    weights=env.broker.track_record.to_frame('weights_target').iloc[1:]
)

In [None]:
import plotly.plotly as py
import plotly.graph_objs as go
from plotly.offline import init_notebook_mode, iplot
import cufflinks
cufflinks.go_offline()
init_notebook_mode(connected=False)


traces = list()
for year, series in mapping_functions.items():
    trace = go.Scatter(
        x = list(series.squeeze().index[:-1]),
        y = list(series.squeeze().values[:-1]),
        mode = 'markers',
        name = year
    )
    traces.append(trace)
    
layout = go.Layout(
    title='GAIA vs RL mapping functions',
    xaxis=dict(
        title='GAIA Mapping'
    ),
    yaxis=dict(
        title='PPO Mapping'
        )
        
    )
fig = go.Figure(data=traces,layout=layout)
iplot(fig,filename='scatter=mode')

# iplot(traces, filename='scatter-mode')