In [2]:
import trading_gym
from trading_gym.registry.gaia.v7.env import GAIAPredictorsContinuousV7
from datetime import datetime
from collections import namedtuple
import json
import os
import pandas as pd
import ray
print(datetime.now())
print(trading_gym.__name__, trading_gym.__version__)
print(ray.__name__, ray.__version__)

2019-08-21 13:56:28.896556
trading_gym 0.8.1
ray 0.7.3


In [3]:
# ray.utils.get_system_memory_bytes()

In [4]:
import ray
from ray import rllib, tune
from trading_gym.ray.logger import calculate_tearsheet, CustomLogger
from copy import deepcopy
# ray.init(num_cpus=8,ignore_reinit_error=True,object_store_memory= 10*100 )
ray.init(ignore_reinit_error=True)
#          object_store_memory = 50000000)

ray.__version__

2019-08-21 13:56:30,106	INFO node.py:498 -- Process STDOUT and STDERR is being redirected to /tmp/ray/session_2019-08-21_13-56-30_105575_129448/logs.
2019-08-21 13:56:30,224	INFO services.py:409 -- Waiting for redis server at 127.0.0.1:44130 to respond...
2019-08-21 13:56:30,344	INFO services.py:409 -- Waiting for redis server at 127.0.0.1:47704 to respond...
2019-08-21 13:56:30,347	INFO services.py:809 -- Starting Redis shard with 10.0 GB max memory.
2019-08-21 13:56:30,377	INFO node.py:512 -- Process STDOUT and STDERR is being redirected to /tmp/ray/session_2019-08-21_13-56-30_105575_129448/logs.
2019-08-21 13:56:30,381	INFO services.py:1475 -- Starting the Plasma object store with 20.0 GB memory using /dev/shm.


'0.7.3'

**Set up the environment configuration**

In [5]:
env_config = dict()
env_config['folds'] =  {
    'training-set': [datetime.min, datetime(2008, 3, 18)],
    'test-set': [datetime(2008, 3, 19), datetime.max],
}
env = GAIAPredictorsContinuousV7(env_config)
env

<trading_gym.registry.gaia.v7.env.GAIAPredictorsContinuousV7 at 0x7f598819b978>

**Seting up the PPO agent's configuration**

In [6]:
# print(config)

In [7]:
# There is a 'common config' that sets ray's params
# and then default_config, which sets the PPO config 
config = rllib.agents.ppo.DEFAULT_CONFIG.copy()
#  The env is self.explanatory 
config['env'] = GAIAPredictorsContinuousV7
config['callbacks']['on_train_result'] = tune.function(calculate_tearsheet)
config['num_workers'] = 6

config['gamma'] = 0 # tune.grid_search([0])
config['vf_clip_param'] = 0 # tune.grid_search([0.])
config['vf_loss_coeff'] = 0 # tune.grid_search([0.])
config['lambda'] = 0 # tune.grid_search([0])

config['use_gae'] = False #tune.grid_search([False])
config['vf_share_layers'] = True #tune.grid_search([False])

# If you do use this, have vf_share_layers as True (loss function then combines ) 
config['use_lstm']: True
# Whether to roll out complete epsiodes or truncate them 
config['batch_mode'] = 'complete_episodes'


# Literature suggests having different LR for actor and critic and -3 and -2 
config['lr'] = tune.grid_search([1e-5])

# Size of batches collected from each worker (number of experiences used for one iteration of SGD)
#  Don't think I actually want to use the following. 
# config['sample_batch_size'] = tune.grid_search([256])

# Increase this to maximize the amount of info(no. of experiences(think transition tuples)) we gather before making an update to policy
config['train_batch_size'] = tune.grid_search([4000])
# Total SGD batch size across all devices
config['sgd_minibatch_size'] = 100
# Number of SGD iterations in each outer loop 
config['num_sgd_iter'] = tune.grid_search([8])


# Coefficient of entropy regularizer (i.e how much we encourage explorsation)
config['entropy_coeff'] = tune.grid_search([1e-5])

# Initial coefficient for KL divergence 
config['kl_coeff'] = tune.grid_search([0.2])
# Target value for the KL divergence 
config['kl_target'] = tune.grid_search([0.01])

# PPO clip parameter
config['clip_param'] = tune.grid_search([0.8])
# config['ignore_worker_failures'] = True

The original paper had some param alpha that they multipled both the adam stepsize and the clipping param. Alpha was linearly annealed from 1 to 0 over training.

In [8]:
# print(config)

In [9]:
config['env_config'] = env_config

In [9]:
experiment = tune.Experiment(
    name='0.8_100batch',
    run=rllib.agents.ppo.PPOTrainer,
    stop={"timesteps_total": 1000000},
    config=deepcopy(config),
#     This determines the number of times the grid search is run. s
    num_samples=1,
    local_dir='logs/kl_exps',
    #checkpoint_freq=int(1e4 / config['train_batch_size']),  # checkpoint every 100k iters
    checkpoint_at_end=True,
    max_failures=0,
    loggers=[CustomLogger],
    
)

**For now, let us use the architecture that Federico has used**

In [10]:
from ray.rllib.models import ModelCatalog
from ray.rllib.models.model import Model
from ray.rllib.models.misc import normc_initializer, get_activation_fn
import tensorflow as tf
import tensorflow.contrib.slim as slim


class MLP(Model):
    def _build_layers_v2(self, input_dict: dict, num_outputs: int, config: dict):
        import tensorflow.contrib.slim as slim

        with tf.name_scope("fc_net"):
            last_layer = input_dict['obs']
            activation = get_activation_fn(config.get("fcnet_activation"))
            for i, size in enumerate(config.get("fcnet_hiddens"), 1):
                last_layer = slim.fully_connected(
                    inputs=last_layer,
                    num_outputs=size,
                    weights_initializer=normc_initializer(1.0),
                    activation_fn=activation,
                    scope="fc{}".format(i),
                )
#                 We don't need any dropout at this stage
#                 last_layer = tf.layers.dropout(
#                     inputs=last_layer,
#                     rate=config['custom_options']["fcnet_dropout_rate"],
#                     training=input_dict['is_training'],
#                     name="dropout{}".format(i),
#                 )
            output = slim.fully_connected(
                inputs=last_layer,
                num_outputs=num_outputs,
                weights_initializer=normc_initializer(0.01),
                activation_fn=None,
                scope="fc_out",
            )
            return output, last_layer

ModelCatalog.register_custom_model(MLP.__name__, MLP)

In [11]:
from ray.rllib.models.misc import conv2d as rllibconv2d
# import tensorflow as tf
# from tf.nn import conv2d 

#             Find a way of implementing the activation as an argument 
#             activation = 'relu' # or tanh or softplus or sigmoid 
activation = 'relu'

class CNN(Model):
#      Check that this is the corect way to use kwargs
    def _build_layers_v2(self,input_dict:dict, num_outputs: int, config:dict):
        import tensorflow.contrib.slim as slim
        
        with tf.name_scope("cnn_net"):
            last_layer = input_dict['obs']

            conv_w1 = tf.Variable(tf.truncated_normal([1,3,int(last_layer.shape[3]),2],stddev=0.5),trainable = "True")
            layer = tf.nn.conv2d(last_layer,filter = conv_w1,padding='VALID',strides= [1,1,1,1])
            norm = tf.layers.batch_normalization(layer)
            last_layer = tf.nn.relu(norm)
            
            con_w2 = tf.Variable(tf.truncated_normal([1,int(last_layer.shape[2]),int(last_layer.shape[3]),48],stddev=0.5),trainable = "True")
            layer = tf.nn.conv2d(last_layer,filter = conv_w2,padding='VALID',strides=[1,1,1,1])
            norm = tf.layers.batch_normalization(layer)
            last_layer = tf.nn.relu(norm)
            
            con_w3 = tf.Variable(tf.truncated_normal([1,int(last_layer.shape[2]),48,1],stddev=0.5),trainable = "True")
            layer = tf.nn.conv2d(last_layer,filter = conv_w3, padding='VALID',strides=[1,1,1,1])
            norm = tf.layers.batch_normalization(layer)
            last_layer = tf.nn.relu(norm)
            
            dense_input = last_layer[:,:,0,0]
            out_dim = num_outputs
#             Dense fully connected
            dense_w = tf.Variable(tf.truncated_normal([int(dense_input.shape[1]),out_dim],stddev=0.1,trainable = "True"))
            dense_b = tf.Variable(tf.constant(0.1,shape[out_dim]), trainable= "True")
            out = tf.matmul(dense_input,dense_w) + dense_b
            
            if activation == 'relu':
                output = tf.nn.relu(out)
            elif activation == 'tanh':
                output = tf.nn.tanh(out)
            elif activation == 'softplus':
                output = tf.nn.softplus(out)
            elif activation=='sigmoid':
                output = tf.nn.sigmoid(out)
            else: 
                print("CNN Build has failed")
                
        
        return output, out
ModelCatalog.register_custom_model(CNN.__name__, CNN)

In [12]:
# config['model']['custom_options'] = {'fcnet_dropout_rate': 0.5}
config['model']['custom_model'] = MLP.__name__
# config['model']['custom_model'] = CNN.__name__


# Now run the experiment

NTN: Need to work out what tune.suggest does.
May need to .add_configurations for the different experiments (with different hyperparamaeters)

object_store_memory
redis_max_memory

In [13]:
trials = tune.run_experiments(
    experiments=experiment,
    search_alg=tune.suggest.BasicVariantGenerator(),
    scheduler=tune.schedulers.FIFOScheduler(),
    verbose=1,
    reuse_actors=False,
    resume=False,)


2019-07-04 14:02:05,549	INFO tune.py:65 -- Did not find checkpoint file in logs/kl_exps/0.8_100batch.
2019-07-04 14:02:05,550	INFO tune.py:232 -- Starting a new experiment.


== Status ==
Using FIFO scheduling algorithm.
Resources requested: 0/8 CPUs, 0/0 GPUs
Memory usage on this node: 30.2/67.5 GB

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 7/8 CPUs, 0/0 GPUs
Memory usage on this node: 30.2/67.5 GB
Result logdir: logs/kl_exps/0.8_100batch
Number of trials: 1 ({'RUNNING': 1})
RUNNING trials:
 - PPOTrainer_GAIAPredictorsContinuousV7_0_clip_param=0.8,entropy_coeff=1e-05,kl_coeff=0.2,kl_target=0.01,lr=1e-05,num_sgd_iter=8,train_batch_size=4000:	RUNNING

[2m[36m(pid=15712)[0m 2019-07-04 14:02:12,785	INFO policy_evaluator.py:311 -- Creating policy evaluation worker 0 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=15712)[0m 2019-07-04 14:02:12.787201: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
[2m[36m(pid=15712)[0m 2019-07-04 14:02:13,488	INFO dynamic_tf_policy.py:265 -- Initializing loss function with dummy input:
[2m

[2m[36m(pid=15711)[0m 
[2m[36m(pid=15711)[0m Converting sparse IndexedSlices to a dense Tensor of unknown shape. This may consume a large amount of memory.
[2m[36m(pid=15711)[0m 
[2m[36m(pid=15713)[0m 
[2m[36m(pid=15713)[0m Converting sparse IndexedSlices to a dense Tensor of unknown shape. This may consume a large amount of memory.
[2m[36m(pid=15713)[0m 
[2m[36m(pid=15717)[0m 2019-07-04 14:02:32,178	INFO policy_evaluator.py:437 -- Generating sample batch of size 200
[2m[36m(pid=15717)[0m 2019-07-04 14:02:32,247	INFO sampler.py:308 -- Raw obs from env: { 0: { 'agent0': np.ndarray((3,), dtype=float64, min=-1.0, max=3.257, mean=1.267)}}
[2m[36m(pid=15717)[0m 2019-07-04 14:02:32,250	INFO sampler.py:309 -- Info return from env: {0: {'agent0': None}}
[2m[36m(pid=15717)[0m 2019-07-04 14:02:32,251	INFO sampler.py:407 -- Preprocessed obs: np.ndarray((3,), dtype=float64, min=-1.0, max=3.257, mean=1.267)
[2m[36m(pid=15717)[0m 2019-07-04 14:02:32,251	INFO sampler.

[2m[36m(pid=15712)[0m 2019-07-04 14:02:38,672	INFO multi_gpu_impl.py:146 -- Training on concatenated sample batches:
[2m[36m(pid=15712)[0m 
[2m[36m(pid=15712)[0m { 'inputs': [ np.ndarray((4000, 2), dtype=float32, min=0.0, max=1.0, mean=0.475),
[2m[36m(pid=15712)[0m               np.ndarray((4000,), dtype=float32, min=-0.034, max=0.023, mean=0.0),
[2m[36m(pid=15712)[0m               np.ndarray((4000, 3), dtype=float32, min=-14.063, max=9.568, mean=0.107),
[2m[36m(pid=15712)[0m               np.ndarray((4000, 2), dtype=float32, min=0.0, max=1.0, mean=0.5),
[2m[36m(pid=15712)[0m               np.ndarray((4000,), dtype=float32, min=-8.034, max=5.421, mean=-0.0),
[2m[36m(pid=15712)[0m               np.ndarray((4000, 2), dtype=float32, min=-0.014, max=0.013, mean=0.003),
[2m[36m(pid=15712)[0m               np.ndarray((4000,), dtype=float32, min=0.0, max=0.0, mean=0.0),
[2m[36m(pid=15712)[0m               np.ndarray((4000,), dtype=float32, min=0.0, max=0.0, mean=

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 7/8 CPUs, 0/0 GPUs
Memory usage on this node: 32.0/67.5 GB
Result logdir: logs/kl_exps/0.8_100batch
Number of trials: 1 ({'RUNNING': 1})
RUNNING trials:
 - PPOTrainer_GAIAPredictorsContinuousV7_0_clip_param=0.8,entropy_coeff=1e-05,kl_coeff=0.2,kl_target=0.01,lr=1e-05,num_sgd_iter=8,train_batch_size=4000:	RUNNING, [7 CPUs, 0 GPUs], [pid=15712], 79 s, 9 iter, 36000 ts, 0.00253 rew

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 7/8 CPUs, 0/0 GPUs
Memory usage on this node: 32.0/67.5 GB
Result logdir: logs/kl_exps/0.8_100batch
Number of trials: 1 ({'RUNNING': 1})
RUNNING trials:
 - PPOTrainer_GAIAPredictorsContinuousV7_0_clip_param=0.8,entropy_coeff=1e-05,kl_coeff=0.2,kl_target=0.01,lr=1e-05,num_sgd_iter=8,train_batch_size=4000:	RUNNING, [7 CPUs, 0 GPUs], [pid=15712], 87 s, 10 iter, 40000 ts, 0.0065 rew

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 7/8 CPUs, 0/0 GPUs
Memory usage o

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 7/8 CPUs, 0/0 GPUs
Memory usage on this node: 32.0/67.5 GB
Result logdir: logs/kl_exps/0.8_100batch
Number of trials: 1 ({'RUNNING': 1})
RUNNING trials:
 - PPOTrainer_GAIAPredictorsContinuousV7_0_clip_param=0.8,entropy_coeff=1e-05,kl_coeff=0.2,kl_target=0.01,lr=1e-05,num_sgd_iter=8,train_batch_size=4000:	RUNNING, [7 CPUs, 0 GPUs], [pid=15712], 164 s, 20 iter, 80000 ts, 0.00438 rew

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 7/8 CPUs, 0/0 GPUs
Memory usage on this node: 32.0/67.5 GB
Result logdir: logs/kl_exps/0.8_100batch
Number of trials: 1 ({'RUNNING': 1})
RUNNING trials:
 - PPOTrainer_GAIAPredictorsContinuousV7_0_clip_param=0.8,entropy_coeff=1e-05,kl_coeff=0.2,kl_target=0.01,lr=1e-05,num_sgd_iter=8,train_batch_size=4000:	RUNNING, [7 CPUs, 0 GPUs], [pid=15712], 171 s, 21 iter, 84000 ts, 0.00641 rew

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 7/8 CPUs, 0/0 GPUs
Memory usa

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 7/8 CPUs, 0/0 GPUs
Memory usage on this node: 32.0/67.5 GB
Result logdir: logs/kl_exps/0.8_100batch
Number of trials: 1 ({'RUNNING': 1})
RUNNING trials:
 - PPOTrainer_GAIAPredictorsContinuousV7_0_clip_param=0.8,entropy_coeff=1e-05,kl_coeff=0.2,kl_target=0.01,lr=1e-05,num_sgd_iter=8,train_batch_size=4000:	RUNNING, [7 CPUs, 0 GPUs], [pid=15712], 246 s, 31 iter, 124000 ts, 0.00779 rew

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 7/8 CPUs, 0/0 GPUs
Memory usage on this node: 32.1/67.5 GB
Result logdir: logs/kl_exps/0.8_100batch
Number of trials: 1 ({'RUNNING': 1})
RUNNING trials:
 - PPOTrainer_GAIAPredictorsContinuousV7_0_clip_param=0.8,entropy_coeff=1e-05,kl_coeff=0.2,kl_target=0.01,lr=1e-05,num_sgd_iter=8,train_batch_size=4000:	RUNNING, [7 CPUs, 0 GPUs], [pid=15712], 253 s, 32 iter, 128000 ts, 0.00571 rew

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 7/8 CPUs, 0/0 GPUs
Memory u

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 7/8 CPUs, 0/0 GPUs
Memory usage on this node: 33.0/67.5 GB
Result logdir: logs/kl_exps/0.8_100batch
Number of trials: 1 ({'RUNNING': 1})
RUNNING trials:
 - PPOTrainer_GAIAPredictorsContinuousV7_0_clip_param=0.8,entropy_coeff=1e-05,kl_coeff=0.2,kl_target=0.01,lr=1e-05,num_sgd_iter=8,train_batch_size=4000:	RUNNING, [7 CPUs, 0 GPUs], [pid=15712], 338 s, 42 iter, 168000 ts, 0.00556 rew

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 7/8 CPUs, 0/0 GPUs
Memory usage on this node: 33.1/67.5 GB
Result logdir: logs/kl_exps/0.8_100batch
Number of trials: 1 ({'RUNNING': 1})
RUNNING trials:
 - PPOTrainer_GAIAPredictorsContinuousV7_0_clip_param=0.8,entropy_coeff=1e-05,kl_coeff=0.2,kl_target=0.01,lr=1e-05,num_sgd_iter=8,train_batch_size=4000:	RUNNING, [7 CPUs, 0 GPUs], [pid=15712], 345 s, 43 iter, 172000 ts, 0.00912 rew

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 7/8 CPUs, 0/0 GPUs
Memory u

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 7/8 CPUs, 0/0 GPUs
Memory usage on this node: 33.1/67.5 GB
Result logdir: logs/kl_exps/0.8_100batch
Number of trials: 1 ({'RUNNING': 1})
RUNNING trials:
 - PPOTrainer_GAIAPredictorsContinuousV7_0_clip_param=0.8,entropy_coeff=1e-05,kl_coeff=0.2,kl_target=0.01,lr=1e-05,num_sgd_iter=8,train_batch_size=4000:	RUNNING, [7 CPUs, 0 GPUs], [pid=15712], 421 s, 53 iter, 212000 ts, 0.00683 rew

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 7/8 CPUs, 0/0 GPUs
Memory usage on this node: 33.1/67.5 GB
Result logdir: logs/kl_exps/0.8_100batch
Number of trials: 1 ({'RUNNING': 1})
RUNNING trials:
 - PPOTrainer_GAIAPredictorsContinuousV7_0_clip_param=0.8,entropy_coeff=1e-05,kl_coeff=0.2,kl_target=0.01,lr=1e-05,num_sgd_iter=8,train_batch_size=4000:	RUNNING, [7 CPUs, 0 GPUs], [pid=15712], 429 s, 54 iter, 216000 ts, 0.00544 rew

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 7/8 CPUs, 0/0 GPUs
Memory u

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 7/8 CPUs, 0/0 GPUs
Memory usage on this node: 33.1/67.5 GB
Result logdir: logs/kl_exps/0.8_100batch
Number of trials: 1 ({'RUNNING': 1})
RUNNING trials:
 - PPOTrainer_GAIAPredictorsContinuousV7_0_clip_param=0.8,entropy_coeff=1e-05,kl_coeff=0.2,kl_target=0.01,lr=1e-05,num_sgd_iter=8,train_batch_size=4000:	RUNNING, [7 CPUs, 0 GPUs], [pid=15712], 508 s, 64 iter, 256000 ts, 0.00931 rew

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 7/8 CPUs, 0/0 GPUs
Memory usage on this node: 33.1/67.5 GB
Result logdir: logs/kl_exps/0.8_100batch
Number of trials: 1 ({'RUNNING': 1})
RUNNING trials:
 - PPOTrainer_GAIAPredictorsContinuousV7_0_clip_param=0.8,entropy_coeff=1e-05,kl_coeff=0.2,kl_target=0.01,lr=1e-05,num_sgd_iter=8,train_batch_size=4000:	RUNNING, [7 CPUs, 0 GPUs], [pid=15712], 516 s, 65 iter, 260000 ts, 0.00795 rew

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 7/8 CPUs, 0/0 GPUs
Memory u

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 7/8 CPUs, 0/0 GPUs
Memory usage on this node: 33.2/67.5 GB
Result logdir: logs/kl_exps/0.8_100batch
Number of trials: 1 ({'RUNNING': 1})
RUNNING trials:
 - PPOTrainer_GAIAPredictorsContinuousV7_0_clip_param=0.8,entropy_coeff=1e-05,kl_coeff=0.2,kl_target=0.01,lr=1e-05,num_sgd_iter=8,train_batch_size=4000:	RUNNING, [7 CPUs, 0 GPUs], [pid=15712], 591 s, 75 iter, 300000 ts, 0.00796 rew

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 7/8 CPUs, 0/0 GPUs
Memory usage on this node: 33.2/67.5 GB
Result logdir: logs/kl_exps/0.8_100batch
Number of trials: 1 ({'RUNNING': 1})
RUNNING trials:
 - PPOTrainer_GAIAPredictorsContinuousV7_0_clip_param=0.8,entropy_coeff=1e-05,kl_coeff=0.2,kl_target=0.01,lr=1e-05,num_sgd_iter=8,train_batch_size=4000:	RUNNING, [7 CPUs, 0 GPUs], [pid=15712], 598 s, 76 iter, 304000 ts, 0.00974 rew

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 7/8 CPUs, 0/0 GPUs
Memory u

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 7/8 CPUs, 0/0 GPUs
Memory usage on this node: 33.2/67.5 GB
Result logdir: logs/kl_exps/0.8_100batch
Number of trials: 1 ({'RUNNING': 1})
RUNNING trials:
 - PPOTrainer_GAIAPredictorsContinuousV7_0_clip_param=0.8,entropy_coeff=1e-05,kl_coeff=0.2,kl_target=0.01,lr=1e-05,num_sgd_iter=8,train_batch_size=4000:	RUNNING, [7 CPUs, 0 GPUs], [pid=15712], 674 s, 86 iter, 344000 ts, 0.00489 rew

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 7/8 CPUs, 0/0 GPUs
Memory usage on this node: 33.2/67.5 GB
Result logdir: logs/kl_exps/0.8_100batch
Number of trials: 1 ({'RUNNING': 1})
RUNNING trials:
 - PPOTrainer_GAIAPredictorsContinuousV7_0_clip_param=0.8,entropy_coeff=1e-05,kl_coeff=0.2,kl_target=0.01,lr=1e-05,num_sgd_iter=8,train_batch_size=4000:	RUNNING, [7 CPUs, 0 GPUs], [pid=15712], 682 s, 87 iter, 348000 ts, 0.00754 rew

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 7/8 CPUs, 0/0 GPUs
Memory u

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 7/8 CPUs, 0/0 GPUs
Memory usage on this node: 33.2/67.5 GB
Result logdir: logs/kl_exps/0.8_100batch
Number of trials: 1 ({'RUNNING': 1})
RUNNING trials:
 - PPOTrainer_GAIAPredictorsContinuousV7_0_clip_param=0.8,entropy_coeff=1e-05,kl_coeff=0.2,kl_target=0.01,lr=1e-05,num_sgd_iter=8,train_batch_size=4000:	RUNNING, [7 CPUs, 0 GPUs], [pid=15712], 759 s, 97 iter, 388000 ts, 0.00451 rew

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 7/8 CPUs, 0/0 GPUs
Memory usage on this node: 33.2/67.5 GB
Result logdir: logs/kl_exps/0.8_100batch
Number of trials: 1 ({'RUNNING': 1})
RUNNING trials:
 - PPOTrainer_GAIAPredictorsContinuousV7_0_clip_param=0.8,entropy_coeff=1e-05,kl_coeff=0.2,kl_target=0.01,lr=1e-05,num_sgd_iter=8,train_batch_size=4000:	RUNNING, [7 CPUs, 0 GPUs], [pid=15712], 767 s, 98 iter, 392000 ts, 0.00839 rew

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 7/8 CPUs, 0/0 GPUs
Memory u



== Status ==
Using FIFO scheduling algorithm.
Resources requested: 7/8 CPUs, 0/0 GPUs
Memory usage on this node: 33.3/67.5 GB
Result logdir: logs/kl_exps/0.8_100batch
Number of trials: 1 ({'RUNNING': 1})
RUNNING trials:
 - PPOTrainer_GAIAPredictorsContinuousV7_0_clip_param=0.8,entropy_coeff=1e-05,kl_coeff=0.2,kl_target=0.01,lr=1e-05,num_sgd_iter=8,train_batch_size=4000:	RUNNING, [7 CPUs, 0 GPUs], [pid=15712], 798 s, 102 iter, 408000 ts, 0.00644 rew

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 7/8 CPUs, 0/0 GPUs
Memory usage on this node: 33.4/67.5 GB
Result logdir: logs/kl_exps/0.8_100batch
Number of trials: 1 ({'RUNNING': 1})
RUNNING trials:
 - PPOTrainer_GAIAPredictorsContinuousV7_0_clip_param=0.8,entropy_coeff=1e-05,kl_coeff=0.2,kl_target=0.01,lr=1e-05,num_sgd_iter=8,train_batch_size=4000:	RUNNING, [7 CPUs, 0 GPUs], [pid=15712], 806 s, 103 iter, 412000 ts, 0.00808 rew

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 7/8 CPUs, 0/0 GPUs
Memory

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 7/8 CPUs, 0/0 GPUs
Memory usage on this node: 33.5/67.5 GB
Result logdir: logs/kl_exps/0.8_100batch
Number of trials: 1 ({'RUNNING': 1})
RUNNING trials:
 - PPOTrainer_GAIAPredictorsContinuousV7_0_clip_param=0.8,entropy_coeff=1e-05,kl_coeff=0.2,kl_target=0.01,lr=1e-05,num_sgd_iter=8,train_batch_size=4000:	RUNNING, [7 CPUs, 0 GPUs], [pid=15712], 888 s, 113 iter, 452000 ts, 0.00974 rew

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 7/8 CPUs, 0/0 GPUs
Memory usage on this node: 33.5/67.5 GB
Result logdir: logs/kl_exps/0.8_100batch
Number of trials: 1 ({'RUNNING': 1})
RUNNING trials:
 - PPOTrainer_GAIAPredictorsContinuousV7_0_clip_param=0.8,entropy_coeff=1e-05,kl_coeff=0.2,kl_target=0.01,lr=1e-05,num_sgd_iter=8,train_batch_size=4000:	RUNNING, [7 CPUs, 0 GPUs], [pid=15712], 896 s, 114 iter, 456000 ts, 0.00626 rew

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 7/8 CPUs, 0/0 GPUs
Memory

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 7/8 CPUs, 0/0 GPUs
Memory usage on this node: 33.5/67.5 GB
Result logdir: logs/kl_exps/0.8_100batch
Number of trials: 1 ({'RUNNING': 1})
RUNNING trials:
 - PPOTrainer_GAIAPredictorsContinuousV7_0_clip_param=0.8,entropy_coeff=1e-05,kl_coeff=0.2,kl_target=0.01,lr=1e-05,num_sgd_iter=8,train_batch_size=4000:	RUNNING, [7 CPUs, 0 GPUs], [pid=15712], 978 s, 124 iter, 496000 ts, 0.00651 rew

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 7/8 CPUs, 0/0 GPUs
Memory usage on this node: 33.5/67.5 GB
Result logdir: logs/kl_exps/0.8_100batch
Number of trials: 1 ({'RUNNING': 1})
RUNNING trials:
 - PPOTrainer_GAIAPredictorsContinuousV7_0_clip_param=0.8,entropy_coeff=1e-05,kl_coeff=0.2,kl_target=0.01,lr=1e-05,num_sgd_iter=8,train_batch_size=4000:	RUNNING, [7 CPUs, 0 GPUs], [pid=15712], 985 s, 125 iter, 500000 ts, 0.00733 rew

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 7/8 CPUs, 0/0 GPUs
Memory

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 7/8 CPUs, 0/0 GPUs
Memory usage on this node: 32.6/67.5 GB
Result logdir: logs/kl_exps/0.8_100batch
Number of trials: 1 ({'RUNNING': 1})
RUNNING trials:
 - PPOTrainer_GAIAPredictorsContinuousV7_0_clip_param=0.8,entropy_coeff=1e-05,kl_coeff=0.2,kl_target=0.01,lr=1e-05,num_sgd_iter=8,train_batch_size=4000:	RUNNING, [7 CPUs, 0 GPUs], [pid=15712], 1070 s, 135 iter, 540000 ts, 0.00763 rew

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 7/8 CPUs, 0/0 GPUs
Memory usage on this node: 32.6/67.5 GB
Result logdir: logs/kl_exps/0.8_100batch
Number of trials: 1 ({'RUNNING': 1})
RUNNING trials:
 - PPOTrainer_GAIAPredictorsContinuousV7_0_clip_param=0.8,entropy_coeff=1e-05,kl_coeff=0.2,kl_target=0.01,lr=1e-05,num_sgd_iter=8,train_batch_size=4000:	RUNNING, [7 CPUs, 0 GPUs], [pid=15712], 1078 s, 136 iter, 544000 ts, 0.0102 rew

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 7/8 CPUs, 0/0 GPUs
Memor

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 7/8 CPUs, 0/0 GPUs
Memory usage on this node: 32.6/67.5 GB
Result logdir: logs/kl_exps/0.8_100batch
Number of trials: 1 ({'RUNNING': 1})
RUNNING trials:
 - PPOTrainer_GAIAPredictorsContinuousV7_0_clip_param=0.8,entropy_coeff=1e-05,kl_coeff=0.2,kl_target=0.01,lr=1e-05,num_sgd_iter=8,train_batch_size=4000:	RUNNING, [7 CPUs, 0 GPUs], [pid=15712], 1166 s, 146 iter, 584000 ts, 0.00836 rew

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 7/8 CPUs, 0/0 GPUs
Memory usage on this node: 32.7/67.5 GB
Result logdir: logs/kl_exps/0.8_100batch
Number of trials: 1 ({'RUNNING': 1})
RUNNING trials:
 - PPOTrainer_GAIAPredictorsContinuousV7_0_clip_param=0.8,entropy_coeff=1e-05,kl_coeff=0.2,kl_target=0.01,lr=1e-05,num_sgd_iter=8,train_batch_size=4000:	RUNNING, [7 CPUs, 0 GPUs], [pid=15712], 1174 s, 147 iter, 588000 ts, 0.00745 rew

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 7/8 CPUs, 0/0 GPUs
Memo

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 7/8 CPUs, 0/0 GPUs
Memory usage on this node: 32.7/67.5 GB
Result logdir: logs/kl_exps/0.8_100batch
Number of trials: 1 ({'RUNNING': 1})
RUNNING trials:
 - PPOTrainer_GAIAPredictorsContinuousV7_0_clip_param=0.8,entropy_coeff=1e-05,kl_coeff=0.2,kl_target=0.01,lr=1e-05,num_sgd_iter=8,train_batch_size=4000:	RUNNING, [7 CPUs, 0 GPUs], [pid=15712], 1269 s, 157 iter, 628000 ts, 0.00836 rew

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 7/8 CPUs, 0/0 GPUs
Memory usage on this node: 32.7/67.5 GB
Result logdir: logs/kl_exps/0.8_100batch
Number of trials: 1 ({'RUNNING': 1})
RUNNING trials:
 - PPOTrainer_GAIAPredictorsContinuousV7_0_clip_param=0.8,entropy_coeff=1e-05,kl_coeff=0.2,kl_target=0.01,lr=1e-05,num_sgd_iter=8,train_batch_size=4000:	RUNNING, [7 CPUs, 0 GPUs], [pid=15712], 1279 s, 158 iter, 632000 ts, 0.00664 rew

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 7/8 CPUs, 0/0 GPUs
Memo

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 7/8 CPUs, 0/0 GPUs
Memory usage on this node: 32.7/67.5 GB
Result logdir: logs/kl_exps/0.8_100batch
Number of trials: 1 ({'RUNNING': 1})
RUNNING trials:
 - PPOTrainer_GAIAPredictorsContinuousV7_0_clip_param=0.8,entropy_coeff=1e-05,kl_coeff=0.2,kl_target=0.01,lr=1e-05,num_sgd_iter=8,train_batch_size=4000:	RUNNING, [7 CPUs, 0 GPUs], [pid=15712], 1366 s, 168 iter, 672000 ts, 0.00843 rew

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 7/8 CPUs, 0/0 GPUs
Memory usage on this node: 32.7/67.5 GB
Result logdir: logs/kl_exps/0.8_100batch
Number of trials: 1 ({'RUNNING': 1})
RUNNING trials:
 - PPOTrainer_GAIAPredictorsContinuousV7_0_clip_param=0.8,entropy_coeff=1e-05,kl_coeff=0.2,kl_target=0.01,lr=1e-05,num_sgd_iter=8,train_batch_size=4000:	RUNNING, [7 CPUs, 0 GPUs], [pid=15712], 1377 s, 169 iter, 676000 ts, 0.00737 rew

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 7/8 CPUs, 0/0 GPUs
Memo

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 7/8 CPUs, 0/0 GPUs
Memory usage on this node: 32.8/67.5 GB
Result logdir: logs/kl_exps/0.8_100batch
Number of trials: 1 ({'RUNNING': 1})
RUNNING trials:
 - PPOTrainer_GAIAPredictorsContinuousV7_0_clip_param=0.8,entropy_coeff=1e-05,kl_coeff=0.2,kl_target=0.01,lr=1e-05,num_sgd_iter=8,train_batch_size=4000:	RUNNING, [7 CPUs, 0 GPUs], [pid=15712], 1462 s, 179 iter, 716000 ts, 0.00724 rew

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 7/8 CPUs, 0/0 GPUs
Memory usage on this node: 32.8/67.5 GB
Result logdir: logs/kl_exps/0.8_100batch
Number of trials: 1 ({'RUNNING': 1})
RUNNING trials:
 - PPOTrainer_GAIAPredictorsContinuousV7_0_clip_param=0.8,entropy_coeff=1e-05,kl_coeff=0.2,kl_target=0.01,lr=1e-05,num_sgd_iter=8,train_batch_size=4000:	RUNNING, [7 CPUs, 0 GPUs], [pid=15712], 1473 s, 180 iter, 720000 ts, 0.00879 rew

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 7/8 CPUs, 0/0 GPUs
Memo

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 7/8 CPUs, 0/0 GPUs
Memory usage on this node: 32.8/67.5 GB
Result logdir: logs/kl_exps/0.8_100batch
Number of trials: 1 ({'RUNNING': 1})
RUNNING trials:
 - PPOTrainer_GAIAPredictorsContinuousV7_0_clip_param=0.8,entropy_coeff=1e-05,kl_coeff=0.2,kl_target=0.01,lr=1e-05,num_sgd_iter=8,train_batch_size=4000:	RUNNING, [7 CPUs, 0 GPUs], [pid=15712], 1560 s, 190 iter, 760000 ts, 0.00586 rew

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 7/8 CPUs, 0/0 GPUs
Memory usage on this node: 32.8/67.5 GB
Result logdir: logs/kl_exps/0.8_100batch
Number of trials: 1 ({'RUNNING': 1})
RUNNING trials:
 - PPOTrainer_GAIAPredictorsContinuousV7_0_clip_param=0.8,entropy_coeff=1e-05,kl_coeff=0.2,kl_target=0.01,lr=1e-05,num_sgd_iter=8,train_batch_size=4000:	RUNNING, [7 CPUs, 0 GPUs], [pid=15712], 1571 s, 191 iter, 764000 ts, 0.00596 rew

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 7/8 CPUs, 0/0 GPUs
Memo

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 7/8 CPUs, 0/0 GPUs
Memory usage on this node: 32.8/67.5 GB
Result logdir: logs/kl_exps/0.8_100batch
Number of trials: 1 ({'RUNNING': 1})
RUNNING trials:
 - PPOTrainer_GAIAPredictorsContinuousV7_0_clip_param=0.8,entropy_coeff=1e-05,kl_coeff=0.2,kl_target=0.01,lr=1e-05,num_sgd_iter=8,train_batch_size=4000:	RUNNING, [7 CPUs, 0 GPUs], [pid=15712], 1659 s, 201 iter, 804000 ts, 0.00694 rew

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 7/8 CPUs, 0/0 GPUs
Memory usage on this node: 32.8/67.5 GB
Result logdir: logs/kl_exps/0.8_100batch
Number of trials: 1 ({'RUNNING': 1})
RUNNING trials:
 - PPOTrainer_GAIAPredictorsContinuousV7_0_clip_param=0.8,entropy_coeff=1e-05,kl_coeff=0.2,kl_target=0.01,lr=1e-05,num_sgd_iter=8,train_batch_size=4000:	RUNNING, [7 CPUs, 0 GPUs], [pid=15712], 1669 s, 202 iter, 808000 ts, 0.0088 rew

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 7/8 CPUs, 0/0 GPUs
Memor

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 7/8 CPUs, 0/0 GPUs
Memory usage on this node: 32.9/67.5 GB
Result logdir: logs/kl_exps/0.8_100batch
Number of trials: 1 ({'RUNNING': 1})
RUNNING trials:
 - PPOTrainer_GAIAPredictorsContinuousV7_0_clip_param=0.8,entropy_coeff=1e-05,kl_coeff=0.2,kl_target=0.01,lr=1e-05,num_sgd_iter=8,train_batch_size=4000:	RUNNING, [7 CPUs, 0 GPUs], [pid=15712], 1759 s, 212 iter, 848000 ts, 0.00563 rew

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 7/8 CPUs, 0/0 GPUs
Memory usage on this node: 32.9/67.5 GB
Result logdir: logs/kl_exps/0.8_100batch
Number of trials: 1 ({'RUNNING': 1})
RUNNING trials:
 - PPOTrainer_GAIAPredictorsContinuousV7_0_clip_param=0.8,entropy_coeff=1e-05,kl_coeff=0.2,kl_target=0.01,lr=1e-05,num_sgd_iter=8,train_batch_size=4000:	RUNNING, [7 CPUs, 0 GPUs], [pid=15712], 1767 s, 213 iter, 852000 ts, 0.00887 rew

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 7/8 CPUs, 0/0 GPUs
Memo

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 7/8 CPUs, 0/0 GPUs
Memory usage on this node: 30.8/67.5 GB
Result logdir: logs/kl_exps/0.8_100batch
Number of trials: 1 ({'RUNNING': 1})
RUNNING trials:
 - PPOTrainer_GAIAPredictorsContinuousV7_0_clip_param=0.8,entropy_coeff=1e-05,kl_coeff=0.2,kl_target=0.01,lr=1e-05,num_sgd_iter=8,train_batch_size=4000:	RUNNING, [7 CPUs, 0 GPUs], [pid=15712], 1847 s, 223 iter, 892000 ts, 0.00702 rew

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 7/8 CPUs, 0/0 GPUs
Memory usage on this node: 30.8/67.5 GB
Result logdir: logs/kl_exps/0.8_100batch
Number of trials: 1 ({'RUNNING': 1})
RUNNING trials:
 - PPOTrainer_GAIAPredictorsContinuousV7_0_clip_param=0.8,entropy_coeff=1e-05,kl_coeff=0.2,kl_target=0.01,lr=1e-05,num_sgd_iter=8,train_batch_size=4000:	RUNNING, [7 CPUs, 0 GPUs], [pid=15712], 1853 s, 224 iter, 896000 ts, 0.00796 rew

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 7/8 CPUs, 0/0 GPUs
Memo

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 7/8 CPUs, 0/0 GPUs
Memory usage on this node: 30.9/67.5 GB
Result logdir: logs/kl_exps/0.8_100batch
Number of trials: 1 ({'RUNNING': 1})
RUNNING trials:
 - PPOTrainer_GAIAPredictorsContinuousV7_0_clip_param=0.8,entropy_coeff=1e-05,kl_coeff=0.2,kl_target=0.01,lr=1e-05,num_sgd_iter=8,train_batch_size=4000:	RUNNING, [7 CPUs, 0 GPUs], [pid=15712], 1916 s, 234 iter, 936000 ts, 0.00856 rew

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 7/8 CPUs, 0/0 GPUs
Memory usage on this node: 30.9/67.5 GB
Result logdir: logs/kl_exps/0.8_100batch
Number of trials: 1 ({'RUNNING': 1})
RUNNING trials:
 - PPOTrainer_GAIAPredictorsContinuousV7_0_clip_param=0.8,entropy_coeff=1e-05,kl_coeff=0.2,kl_target=0.01,lr=1e-05,num_sgd_iter=8,train_batch_size=4000:	RUNNING, [7 CPUs, 0 GPUs], [pid=15712], 1922 s, 235 iter, 940000 ts, 0.00789 rew

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 7/8 CPUs, 0/0 GPUs
Memo

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 7/8 CPUs, 0/0 GPUs
Memory usage on this node: 30.9/67.5 GB
Result logdir: logs/kl_exps/0.8_100batch
Number of trials: 1 ({'RUNNING': 1})
RUNNING trials:
 - PPOTrainer_GAIAPredictorsContinuousV7_0_clip_param=0.8,entropy_coeff=1e-05,kl_coeff=0.2,kl_target=0.01,lr=1e-05,num_sgd_iter=8,train_batch_size=4000:	RUNNING, [7 CPUs, 0 GPUs], [pid=15712], 1983 s, 245 iter, 980000 ts, 0.00987 rew

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 7/8 CPUs, 0/0 GPUs
Memory usage on this node: 30.9/67.5 GB
Result logdir: logs/kl_exps/0.8_100batch
Number of trials: 1 ({'RUNNING': 1})
RUNNING trials:
 - PPOTrainer_GAIAPredictorsContinuousV7_0_clip_param=0.8,entropy_coeff=1e-05,kl_coeff=0.2,kl_target=0.01,lr=1e-05,num_sgd_iter=8,train_batch_size=4000:	RUNNING, [7 CPUs, 0 GPUs], [pid=15712], 1989 s, 246 iter, 984000 ts, 0.00766 rew

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 7/8 CPUs, 0/0 GPUs
Memo

2019-07-04 15:32:14,855	INFO ray_trial_executor.py:187 -- Destroying actor for trial PPOTrainer_GAIAPredictorsContinuousV7_0_clip_param=0.8,entropy_coeff=1e-05,kl_coeff=0.2,kl_target=0.01,lr=1e-05,num_sgd_iter=8,train_batch_size=4000. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


== Status ==
Using FIFO scheduling algorithm.
Resources requested: 0/8 CPUs, 0/0 GPUs
Memory usage on this node: 30.9/67.5 GB
Result logdir: logs/kl_exps/0.8_100batch
Number of trials: 1 ({'TERMINATED': 1})
TERMINATED trials:
 - PPOTrainer_GAIAPredictorsContinuousV7_0_clip_param=0.8,entropy_coeff=1e-05,kl_coeff=0.2,kl_target=0.01,lr=1e-05,num_sgd_iter=8,train_batch_size=4000:	TERMINATED, [7 CPUs, 0 GPUs], [pid=15712], 2013 s, 250 iter, 1000000 ts, 0.00945 rew

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 0/8 CPUs, 0/0 GPUs
Memory usage on this node: 30.9/67.5 GB
Result logdir: logs/kl_exps/0.8_100batch
Number of trials: 1 ({'TERMINATED': 1})
TERMINATED trials:
 - PPOTrainer_GAIAPredictorsContinuousV7_0_clip_param=0.8,entropy_coeff=1e-05,kl_coeff=0.2,kl_target=0.01,lr=1e-05,num_sgd_iter=8,train_batch_size=4000:	TERMINATED, [7 CPUs, 0 GPUs], [pid=15712], 2013 s, 250 iter, 1000000 ts, 0.00945 rew



**Restore the agent**

In [10]:
from ray import cloudpickle
from ray.utils import binary_to_hex, hex_to_binary


def cloudpickleloads(obj):
    if isinstance(obj, dict):
        try:
            return cloudpickle.loads(hex_to_binary(obj["value"]))
        except:
            for key, value in obj.items():
                if isinstance(value, dict):
                    if sorted(value) == ['_type', 'value']:
                        obj[key] = cloudpickle.loads(hex_to_binary(value["value"]))
                    else:
                        obj[key] = cloudpickleloads(value)
                elif isinstance(value, list):
                    for i, item in enumerate(value):
                        obj[key][i] = cloudpickleloads(item)
    return obj

In [11]:
# 0.8 clip param -- the best run yet. (200 minibatch size)
# path = '/home/Nicholas/Desktop/trading-gym/notebooks/registry/gaia/v7/logs/Playground-2folds-tuning/experiment_state-2019-06-23_12-25-51.json'
# 1.5 clip param
# path = '/home/Nicholas/Desktop/trading-gym/notebooks/registry/gaia/v7/logs/Playground-2folds-tuning/experiment_state-2019-06-23_14-13-39.json'
# # 1.0 clip params
# path = '/home/Nicholas/Desktop/trading-gym/notebooks/registry/gaia/v7/logs/Playground-2folds-tuning/experiment_state-2019-06-23_16-20-44.json'
# # 0.9 clip param
# path = '/home/Nicholas/Desktop/trading-gym/notebooks/registry/gaia/v7/logs/Playground-2folds-tuning/experiment_state-2019-06-23_18-04-12.json'


# # 0.8 clip param and 8k batches 
# path = '/home/Nicholas/Desktop/trading-gym/notebooks/registry/gaia/v7/PPO Tuning/logs/0.8clip_8kbatch/experiment_state-2019-07-02_23-46-28.json'

# 0.8 clip and 128 minibatch size 
# path = ' /home/Nicholas/Desktop/trading-gym/notebooks/registry/gaia/v7/PPO Tuning/logs/kl_exps/0.8_rerun/experiment_state-2019-07-04_08-37-47.json'

# 0.8 clip and 400 minibatch size 
path = '/home/Nicholas/Desktop/trading-gym/notebooks/registry/gaia/v7/PPO Tuning/logs/kl_exps/0.8_400batch/experiment_state-2019-07-04_10-03-58.json'

with open(path) as f:
    metadata = json.load(f)

runner_data = metadata['runner_data']
stats = metadata['stats']

checkpoint = metadata['checkpoints'][-1]
checkpoint = cloudpickleloads(checkpoint)
checkpoint_path = cloudpickle.loads(hex_to_binary(checkpoint['_checkpoint'])).value
config = checkpoint['config']
env_cls = config['env']
env_config = config['env_config']
path_restore = os.path.join(checkpoint['logdir'], checkpoint_path)

AttributeError: 'ETF' object has no attribute '_symbol'

In [None]:
print(config)

In [None]:
agent = rllib.agents.ppo.PPOTrainer(config, env_cls)
agent.restore(path_restore)

In [None]:
episode = env.sample_episode(
    fold='test-set',
    policy=agent,
    episode_length=None,
    benchmark=env._load_benchmark().squeeze(),
    risk_free=env._load_risk_free().squeeze(),
    burn=1,
)

In [None]:
episode.renderer.plotly_report()
tearsheet = episode.renderer.tearsheet()

In [None]:
tearsheet

In [None]:
tearsheet.iloc[-1,0]

**Now let us compare the annual turnover for all the different clipping params**

In [None]:
paths = {0.5 : '/home/Nicholas/Desktop/trading-gym/notebooks/registry/gaia/v7/logs/Playground-2folds-tuning/experiment_state-2019-06-20_15-53-36.json',
         0.7 : '/homve/Nicholas/Desktop/trading-gym/notebooks/registry/gaia/v7/logs/Playground-2folds-tuning/experiment_state-2019-06-23_20-20-56.json', 
         0.8 : '/home/Nicholas/Desktop/trading-gym/notebooks/registry/gaia/v7/logs/Playground-2folds-tuning/experiment_state-2019-06-23_12-25-51.json', 
         0.85: '/home/Nicholas/Desktop/trading-gym/notebooks/registry/gaia/v7/logs/Playground-2folds-tuning/experiment_state-2019-06-23_22-00-56.json',
         0.9 : '/home/Nicholas/Desktop/trading-gym/notebooks/registry/gaia/v7/logs/Playground-2folds-tuning/experiment_state-2019-06-23_18-04-12.json', 
         1.0 : '/home/Nicholas/Desktop/trading-gym/notebooks/registry/gaia/v7/logs/Playground-2folds-tuning/experiment_state-2019-06-23_16-20-44.json',
         1.5 : '/home/Nicholas/Desktop/trading-gym/notebooks/registry/gaia/v7/logs/Playground-2folds-tuning/experiment_state-2019-06-23_14-13-39.json'}

turnovers = dict()

In [None]:
for value, path in paths.items():
    
    with open(path) as f:
        metadata = json.load(f)

    runner_data = metadata['runner_data']
    stats = metadata['stats']

    checkpoint = metadata['checkpoints'][-1]
    checkpoint = cloudpickleloads(checkpoint)
    # print(checkpoint)
    checkpoint_path = cloudpickle.loads(hex_to_binary(checkpoint['_checkpoint'])).value
#     print(checkpoint_path)
    config = checkpoint['config']
    env_cls = config['env']
    env_config = config['env_config']
    path_restore = os.path.join(checkpoint['logdir'], checkpoint_path)
    
    agent = rllib.agents.ppo.PPOTrainer(config, env_cls)
    agent.restore(path_restore)
    
    episode = env.sample_episode(
    fold='test-set',
    policy=agent,
    episode_length=None,
    benchmark=env._load_benchmark().squeeze(),
    risk_free=env._load_risk_free().squeeze(),
    burn=1,
    )
    
    tearsheet = episode.renderer.tearsheet()
    
    turnover -= tearsheet.iloc[-1,0]
    turnovers[value] = turnover