In [1]:
import trading_gym
from trading_gym.registry.gaia.v7.env import GAIAPredictorsContinuousV7
from datetime import datetime
from collections import namedtuple
import json
import os
import pandas as pd
import ray
print(datetime.now())
print(trading_gym.__name__, trading_gym.__version__)
print(ray.__name__, ray.__version__)

2019-07-12 08:38:09.123992
trading_gym 0.7.6
ray 0.7.2


In [2]:
import ray
from ray import rllib, tune
from trading_gym.ray.logger import calculate_tearsheet, CustomLogger
from copy import deepcopy
# ray.init(num_cpus=8,ignore_reinit_error=True,object_store_memory= 10*100 )
ray.init(ignore_reinit_error=True)
#          object_store_memory = 50000000)

ray.__version__

2019-07-12 08:38:09,138	INFO node.py:498 -- Process STDOUT and STDERR is being redirected to /tmp/ray/session_2019-07-12_08-38-09_136918_105476/logs.
2019-07-12 08:38:09,266	INFO services.py:409 -- Waiting for redis server at 127.0.0.1:27935 to respond...
2019-07-12 08:38:09,396	INFO services.py:409 -- Waiting for redis server at 127.0.0.1:23426 to respond...
2019-07-12 08:38:09,401	INFO services.py:806 -- Starting Redis shard with 10.0 GB max memory.
2019-07-12 08:38:09,466	INFO node.py:512 -- Process STDOUT and STDERR is being redirected to /tmp/ray/session_2019-07-12_08-38-09_136918_105476/logs.
2019-07-12 08:38:09,470	INFO services.py:1446 -- Starting the Plasma object store with 20.0 GB memory using /dev/shm.


'0.7.2'

In [3]:
env_config = dict()
env_config['folds'] =  {
    'training-set': [datetime.min, datetime(2008, 3, 18)],
    'test-set': [datetime(2008, 3, 19), datetime.max],
}
env = GAIAPredictorsContinuousV7(env_config)
env

<trading_gym.registry.gaia.v7.env.GAIAPredictorsContinuousV7 at 0x7f6c5d46c828>

In [4]:
# There is a 'common config' that sets ray's params
# and then default_config, which sets the PPO config 
config = rllib.agents.ppo.DEFAULT_CONFIG.copy()
#  The env is self.explanatory 
config['env'] = GAIAPredictorsContinuousV7
config['callbacks']['on_train_result'] = tune.function(calculate_tearsheet)
config['num_workers'] = 6

config['gamma'] = 0 # tune.grid_search([0])
config['vf_clip_param'] = 0 # tune.grid_search([0.])
config['vf_loss_coeff'] = 0 # tune.grid_search([0.])
config['lambda'] = 0 # tune.grid_search([0])

config['use_gae'] = False #tune.grid_search([False])
config['vf_share_layers'] = False #tune.grid_search([False])

# If you do use this, have vf_share_layers as True (loss function then combines ) 
config['use_lstm']: False
# Whether to roll out complete epsiodes or truncate them 
config['batch_mode'] = 'complete_episodes'


# Literature suggests having different LR for actor and critic and -3 and -2 
config['lr'] = tune.grid_search([1e-5])

# Size of batches collected from each worker (number of experiences used for one iteration of SGD)
#  Don't think I actually want to use the following. 
config['sample_batch_size'] = 200 # tune.grid_search([256])

# Increase this to maximize the amount of info(no. of experiences(think transition tuples)) we gather before making an update to policy
config['train_batch_size'] = tune.grid_search([4000])
# Total SGD batch size across all devices
config['sgd_minibatch_size'] = 128
# Number of SGD iterations in each outer loop 
config['num_sgd_iter'] = tune.grid_search([8])


# Coefficient of entropy regularizer (i.e how much we encourage explorsation)
config['entropy_coeff'] = tune.grid_search([1e-5])

# Initial coefficient for KL divergence 
config['kl_coeff'] = tune.grid_search([0.2])
# Target value for the KL divergence 
config['kl_target'] = tune.grid_search([0.01])

# PPO clip parameter
config['clip_param'] = tune.grid_search([0.7])
# config['ignore_worker_failures'] = True

In [5]:
config['env_config'] = env_config

In [6]:
from ray.rllib.models import ModelCatalog
from ray.rllib.models.model import Model
from ray.rllib.models.misc import normc_initializer, get_activation_fn
import tensorflow as tf
import tensorflow.contrib.slim as slim


class MLP(Model):
    def _build_layers_v2(self, input_dict: dict, num_outputs: int, config: dict):
        import tensorflow.contrib.slim as slim

        with tf.name_scope("fc_net"):
            last_layer = input_dict['obs']
            activation = get_activation_fn(config.get("fcnet_activation"))
            for i, size in enumerate(config.get("fcnet_hiddens"), 1):
                last_layer = slim.fully_connected(
                    inputs=last_layer,
                    num_outputs=size,
                    weights_initializer=normc_initializer(1.0),
                    activation_fn=activation,
                    scope="fc{}".format(i),
                )
#                 We don't need any dropout at this stage
#                 last_layer = tf.layers.dropout(
#                     inputs=last_layer,
#                     rate=config['custom_options']["fcnet_dropout_rate"],
#                     training=input_dict['is_training'],
#                     name="dropout{}".format(i),
#                 )
            output = slim.fully_connected(
                inputs=last_layer,
                num_outputs=num_outputs,
                weights_initializer=normc_initializer(0.01),
                activation_fn=None,
                scope="fc_out",
            )
            return output, last_layer

ModelCatalog.register_custom_model(MLP.__name__, MLP)

In [7]:
# config['model']['custom_options'] = {'fcnet_dropout_rate': 0.5}
config['model']['custom_model'] = MLP.__name__
# config['model']['custom_model'] = CNN.__name__


In [8]:
for year in range(2013, 2018):
    print('_______________________________________{}____________________________________________'.format(year))
    config['env_config'] = {
        'folds': {
            'training-set': [datetime.min, datetime(year, 12, 31)],
            'test-set': [datetime(year + 1, 1, 1), datetime(year + 1, 12, 31)],
        }
    }
    experiment = tune.Experiment(
        name='PPOclip_0.7-WalkForward-750k{}'.format(year),
        run=rllib.agents.ppo.PPOTrainer,
        stop={"timesteps_total": 750000},
        config=deepcopy(config),
        num_samples=1,
        local_dir='logs',
        #checkpoint_freq=int(1e4 / config['train_batch_size']),  # checkpoint every 100k iters
        checkpoint_at_end=True,
        max_failures=0,
        loggers=[CustomLogger],
    )
    trials = tune.run_experiments(
        experiments=experiment,
        search_alg=tune.suggest.BasicVariantGenerator(),
        scheduler=tune.schedulers.FIFOScheduler(),
        verbose=0,
        reuse_actors=False,
        resume=False,
    )

2019-07-12 08:39:08,570	INFO tune.py:65 -- Did not find checkpoint file in logs/PPOclip_0.7-WalkForward-750k2013.
2019-07-12 08:39:08,571	INFO tune.py:233 -- Starting a new experiment.


_______________________________________2013____________________________________________
[2m[36m(pid=105657)[0m 2019-07-12 08:39:16,196	INFO rollout_worker.py:301 -- Creating policy evaluation worker 0 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=105657)[0m 2019-07-12 08:39:16.197090: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
[2m[36m(pid=105657)[0m 2019-07-12 08:39:27,108	INFO dynamic_tf_policy.py:313 -- Initializing loss function with dummy input:
[2m[36m(pid=105657)[0m 
[2m[36m(pid=105657)[0m { 'action_prob': <tf.Tensor 'default_policy/action_prob:0' shape=(?,) dtype=float32>,
[2m[36m(pid=105657)[0m   'actions': <tf.Tensor 'default_policy/actions:0' shape=(?, 2) dtype=float32>,
[2m[36m(pid=105657)[0m   'advantages': <tf.Tensor 'default_policy/advantages:0' shape=(?,) dtype=float32>,
[2m[36m(pid=105657)[0m   'behaviour_logits': <tf.Tensor 'default_p

[2m[36m(pid=105651)[0m 
[2m[36m(pid=105651)[0m Converting sparse IndexedSlices to a dense Tensor of unknown shape. This may consume a large amount of memory.
[2m[36m(pid=105651)[0m 
[2m[36m(pid=105653)[0m 2019-07-12 08:40:04,707	INFO rollout_worker.py:428 -- Generating sample batch of size 200
[2m[36m(pid=105653)[0m 2019-07-12 08:40:04,741	INFO sampler.py:308 -- Raw obs from env: { 0: { 'agent0': np.ndarray((3,), dtype=float64, min=-1.508, max=-0.715, mean=-1.074)}}
[2m[36m(pid=105653)[0m 2019-07-12 08:40:04,743	INFO sampler.py:309 -- Info return from env: {0: {'agent0': None}}
[2m[36m(pid=105653)[0m 2019-07-12 08:40:04,744	INFO sampler.py:407 -- Preprocessed obs: np.ndarray((3,), dtype=float64, min=-1.508, max=-0.715, mean=-1.074)
[2m[36m(pid=105653)[0m 2019-07-12 08:40:04,744	INFO sampler.py:411 -- Filtered obs: np.ndarray((3,), dtype=float64, min=-1.508, max=-0.715, mean=-1.074)
[2m[36m(pid=105653)[0m 2019-07-12 08:40:04,746	INFO sampler.py:525 -- Inputs t

[2m[36m(pid=105657)[0m 2019-07-12 08:40:13,489	INFO multi_gpu_impl.py:146 -- Training on concatenated sample batches:
[2m[36m(pid=105657)[0m 
[2m[36m(pid=105657)[0m { 'inputs': [ np.ndarray((4000, 2), dtype=float32, min=0.0, max=1.0, mean=0.475),
[2m[36m(pid=105657)[0m               np.ndarray((4000,), dtype=float32, min=-0.036, max=0.033, mean=0.0),
[2m[36m(pid=105657)[0m               np.ndarray((4000, 3), dtype=float32, min=-14.063, max=10.641, mean=0.055),
[2m[36m(pid=105657)[0m               np.ndarray((4000, 2), dtype=float32, min=0.0, max=1.0, mean=0.5),
[2m[36m(pid=105657)[0m               np.ndarray((4000,), dtype=float32, min=-7.595, max=6.921, mean=-0.0),
[2m[36m(pid=105657)[0m               np.ndarray((4000, 2), dtype=float32, min=-0.011, max=0.011, mean=0.001),
[2m[36m(pid=105657)[0m               np.ndarray((4000,), dtype=float32, min=0.0, max=0.0, mean=0.0),
[2m[36m(pid=105657)[0m               np.ndarray((4000,), dtype=float32, min=0.0, max











2019-07-12 09:10:04,332	ERROR trial_runner.py:487 -- Error processing event.
Traceback (most recent call last):
  File "/home/Nicholas/.venv/lib/python3.6/site-packages/ray/tune/trial_runner.py", line 436, in _process_trial
    result = self.trial_executor.fetch_result(trial)
  File "/home/Nicholas/.venv/lib/python3.6/site-packages/ray/tune/ray_trial_executor.py", line 323, in fetch_result
    result = ray.get(trial_future[0])
  File "/home/Nicholas/.venv/lib/python3.6/site-packages/ray/worker.py", line 2195, in get
    raise value
ray.exceptions.RayTaskError: [36mray_PPO:train()[39m (pid=105657, host=Nicholas)
  File "/home/Nicholas/.venv/lib/python3.6/site-packages/ray/memory_monitor.py", line 77, in raise_if_low_memory
    self.error_threshold))
ray.memory_monitor.RayOutOfMemoryError: More than 95% of the memory on node Nicholas is used (64.17 / 67.53 GB). The top 5 memory consumers are:

PID	MEM	COMMAND
67034	15.03GB	/home/Nicholas/anaconda3/bin/python /home/Nicholas/anaconda3/bi

TuneError: ('Trials did not complete', [PPO_GAIAPredictorsContinuousV7_0_clip_param=0.7,entropy_coeff=1e-05,kl_coeff=0.2,kl_target=0.01,lr=1e-05,num_sgd_iter=8,train_batch_size=4000])

In [None]:
from ray import cloudpickle
from ray.utils import binary_to_hex, hex_to_binary


def cloudpickleloads(obj):
    if isinstance(obj, dict):
        try:
            return cloudpickle.loads(hex_to_binary(obj["value"]))
        except:
            for key, value in obj.items():
                if isinstance(value, dict):
                    if sorted(value) == ['_type', 'value']:
                        obj[key] = cloudpickle.loads(hex_to_binary(value["value"]))
                    else:
                        obj[key] = cloudpickleloads(value)
                elif isinstance(value, list):
                    for i, item in enumerate(value):
                        obj[key][i] = cloudpickleloads(item)
    return obj

In [None]:
# raise ValueError('TODO: update paths with latest runs')
#(1.0 clip)
paths = {2007: '/home/Nicholas/Desktop/trading-gym/notebooks/registry/gaia/v7/PPO Tuning/logs/PPOclip_0.8-WalkForward-750k2007/experiment_state-2019-07-01_10-37-58.json',
        2008: '/home/Nicholas/Desktop/trading-gym/notebooks/registry/gaia/v7/PPO Tuning/logs/PPOclip_0.8-WalkForward-750k2008/experiment_state-2019-07-01_12-18-57.json',
        2009: '/home/Nicholas/Desktop/trading-gym/notebooks/registry/gaia/v7/PPO Tuning/logs/PPOclip_0.8-WalkForward-750k2009/experiment_state-2019-07-01_13-41-34.json',
        2010: '/home/Nicholas/Desktop/trading-gym/notebooks/registry/gaia/v7/PPO Tuning/logs/PPOclip_0.8-WalkForward-750k2010/experiment_state-2019-07-01_14-50-33.json',
        2011: '/home/Nicholas/Desktop/trading-gym/notebooks/registry/gaia/v7/PPO Tuning/logs/PPOclip_0.8-WalkForward-750k2011/experiment_state-2019-07-01_15-59-02.json',
        2012: '/home/Nicholas/Desktop/trading-gym/notebooks/registry/gaia/v7/PPO Tuning/logs/PPOclip_0.8-WalkForward-750k2012/experiment_state-2019-07-01_17-19-21.json',
        2013: '/home/Nicholas/Desktop/trading-gym/notebooks/registry/gaia/v7/PPO Tuning/logs/PPOclip_0.8-WalkForward-750k2013/experiment_state-2019-07-02_11-49-44.json',
        2014: '/home/Nicholas/Desktop/trading-gym/notebooks/registry/gaia/v7/PPO Tuning/logs/PPOclip_0.8-WalkForward-750k2014/experiment_state-2019-07-02_12-12-03.json',
        2015: '/home/Nicholas/Desktop/trading-gym/notebooks/registry/gaia/v7/PPO Tuning/logs/PPOclip_0.8-WalkForward-750k2015/experiment_state-2019-07-02_12-42-28.json',
        2016: '/home/Nicholas/Desktop/trading-gym/notebooks/registry/gaia/v7/PPO Tuning/logs/PPOclip_0.8-WalkForward-750k2016/experiment_state-2019-07-02_13-14-05.json',
        2017: '/home/Nicholas/Desktop/trading-gym/notebooks/registry/gaia/v7/PPO Tuning/logs/PPOclip_0.8-WalkForward-750k2017/experiment_state-2019-07-02_13-46-32.json'
        }

# These are for the 750k runs (0.8 clip) note they're saved in the normal logs folder (have to run it in copy1)
# These are the best results
# paths = {2007: '/home/Nicholas/Desktop/trading-gym/notebooks/registry/gaia/v7/logs/PPOclip_0.8-WalkForward-750k2007/experiment_state-2019-06-24_23-35-32.json',
#         2008: '/home/Nicholas/Desktop/trading-gym/notebooks/registry/gaia/v7/logs/PPOclip_0.8-WalkForward-750k2008/experiment_state-2019-06-25_00-28-20.json',
#         2009: '/home/Nicholas/Desktop/trading-gym/notebooks/registry/gaia/v7/logs/PPOclip_0.8-WalkForward-750k2009/experiment_state-2019-06-25_01-21-47.json',
#         2010: '/home/Nicholas/Desktop/trading-gym/notebooks/registry/gaia/v7/logs/PPOclip_0.8-WalkForward-750k2010/experiment_state-2019-06-25_02-14-52.json',
#         2011: '/home/Nicholas/Desktop/trading-gym/notebooks/registry/gaia/v7/logs/PPOclip_0.8-WalkForward-750k2011/experiment_state-2019-06-25_03-08-23.json',
#         2012: '/home/Nicholas/Desktop/trading-gym/notebooks/registry/gaia/v7/logs/PPOclip_0.8-WalkForward-750k2012/experiment_state-2019-06-25_04-00-54.json',
#         2013: '/home/Nicholas/Desktop/trading-gym/notebooks/registry/gaia/v7/logs/PPOclip_0.8-WalkForward-750k2013/experiment_state-2019-06-25_04-54-29.json',
#         2014: '/home/Nicholas/Desktop/trading-gym/notebooks/registry/gaia/v7/logs/PPOclip_0.8-WalkForward-750k2014/experiment_state-2019-06-25_05-47-18.json',
#         2015: '/home/Nicholas/Desktop/trading-gym/notebooks/registry/gaia/v7/logs/PPOclip_0.8-WalkForward-750k2015/experiment_state-2019-06-25_06-40-12.json',
#         2016: '/home/Nicholas/Desktop/trading-gym/notebooks/registry/gaia/v7/logs/PPOclip_0.8-WalkForward-750k2016/experiment_state-2019-06-25_07-33-17.json',
#         2017: '/home/Nicholas/Desktop/trading-gym/notebooks/registry/gaia/v7/logs/PPOclip_0.8-WalkForward-750k2017/experiment_state-2019-06-25_08-26-33.json'
#         }

#  (0.9 clip) 
# paths = {2007: '/home/Nicholas/Desktop/trading-gym/notebooks/registry/gaia/v7/PPO Tuning/logs/PPOclip_0.9-WalkForward-750k2007/experiment_state-2019-07-02_22-13-01.json',
#         2008: '/home/Nicholas/Desktop/trading-gym/notebooks/registry/gaia/v7/PPO Tuning/logs/PPOclip_0.9-WalkForward-750k2008/experiment_state-2019-07-02_22-45-20.json',
#         2009: '/home/Nicholas/Desktop/trading-gym/notebooks/registry/gaia/v7/PPO Tuning/logs/PPOclip_0.9-WalkForward-750k2009/experiment_state-2019-07-02_23-09-36.json',
#         2010: '/home/Nicholas/Desktop/trading-gym/notebooks/registry/gaia/v7/PPO Tuning/logs/PPOclip_0.9-WalkForward-750k2010/experiment_state-2019-07-02_23-33-28.json',
#         2011: '/home/Nicholas/Desktop/trading-gym/notebooks/registry/gaia/v7/PPO Tuning/logs/PPOclip_0.9-WalkForward-750k2011/experiment_state-2019-07-03_00-03-19.json',
#         2012: '/home/Nicholas/Desktop/trading-gym/notebooks/registry/gaia/v7/PPO Tuning/logs/PPOclip_0.9-WalkForward-750k2012/experiment_state-2019-07-03_00-50-22.json',
#         2013: '/home/Nicholas/Desktop/trading-gym/notebooks/registry/gaia/v7/PPO Tuning/logs/PPOclip_0.9-WalkForward-750k2013/experiment_state-2019-07-02_17-15-12.json',
#         2014: '/home/Nicholas/Desktop/trading-gym/notebooks/registry/gaia/v7/PPO Tuning/logs/PPOclip_0.9-WalkForward-750k2014/experiment_state-2019-07-02_17-47-30.json',
#         2015: '/home/Nicholas/Desktop/trading-gym/notebooks/registry/gaia/v7/PPO Tuning/logs/PPOclip_0.9-WalkForward-750k2015/experiment_state-2019-07-02_18-22-00.json',
#         2016: '/home/Nicholas/Desktop/trading-gym/notebooks/registry/gaia/v7/PPO Tuning/logs/PPOclip_0.9-WalkForward-750k2016/experiment_state-2019-07-02_18-56-40.json',
#         2017: '/home/Nicholas/Desktop/trading-gym/notebooks/registry/gaia/v7/PPO Tuning/logs/PPOclip_0.9-WalkForward-750k2017/experiment_state-2019-07-02_19-30-56.json'
#         }

#  Insert 0.7 clip paths (need to add the rest)
# paths = {2007: '/home/Nicholas/Desktop/trading-gym/notebooks/registry/gaia/v7/PPO Tuning/logs/PPOclip_0.7-WalkForward-750k2007/experiment_state-2019-07-03_09-32-28.json',
#         2008: '/home/Nicholas/Desktop/trading-gym/notebooks/registry/gaia/v7/PPO Tuning/logs/PPOclip_0.7-WalkForward-750k2008/experiment_state-2019-07-03_10-13-04.json',
#         2009: '/home/Nicholas/Desktop/trading-gym/notebooks/registry/gaia/v7/PPO Tuning/logs/PPOclip_0.7-WalkForward-750k2009/experiment_state-2019-07-03_10-56-47.json',
#         2010: '/home/Nicholas/Desktop/trading-gym/notebooks/registry/gaia/v7/PPO Tuning/logs/PPOclip_0.7-WalkForward-750k2010/experiment_state-2019-07-03_11-38-06.json',
#         2011: '/home/Nicholas/Desktop/trading-gym/notebooks/registry/gaia/v7/PPO Tuning/logs/PPOclip_0.7-WalkForward-750k2011/experiment_state-2019-07-03_12-15-59.json',
#         2012: '/home/Nicholas/Desktop/trading-gym/notebooks/registry/gaia/v7/PPO Tuning/logs/PPOclip_0.7-WalkForward-750k2012/experiment_state-2019-07-03_12-51-25.json',
#         2013: '',
#         2014: '',
#         2015: '',
#         2016: '',
#         2017: ''
#         }

#  These are actually 0.9 clip 
# paths = {2007: '/home/Nicholas/Desktop/trading-gym/notebooks/registry/gaia/v7/PPO Tuning/logs/PPOclip_1.1-WalkForward-750k2007/experiment_state-2019-07-03_14-23-27.json',
#         2008: '/home/Nicholas/Desktop/trading-gym/notebooks/registry/gaia/v7/PPO Tuning/logs/PPOclip_1.1-WalkForward-750k2008/experiment_state-2019-07-03_15-09-40.json',
#         2009: '/home/Nicholas/Desktop/trading-gym/notebooks/registry/gaia/v7/PPO Tuning/logs/PPOclip_1.1-WalkForward-750k2009/experiment_state-2019-07-03_15-58-09.json',
#         2010: '/home/Nicholas/Desktop/trading-gym/notebooks/registry/gaia/v7/PPO Tuning/logs/PPOclip_1.1-WalkForward-750k2010/experiment_state-2019-07-03_16-43-36.json',
#         2011: '/home/Nicholas/Desktop/trading-gym/notebooks/registry/gaia/v7/PPO Tuning/logs/PPOclip_1.1-WalkForward-750k2011/experiment_state-2019-07-03_17-31-51.json',
#         2012: '/home/Nicholas/Desktop/trading-gym/notebooks/registry/gaia/v7/PPO Tuning/logs/PPOclip_1.1-WalkForward-750k2012/experiment_state-2019-07-03_18-19-23.json',
#         2013: '/home/Nicholas/Desktop/trading-gym/notebooks/registry/gaia/v7/PPO Tuning/logs/PPOclip_1.1-WalkForward-750k2013/experiment_state-2019-07-03_18-56-29.json',
#         2014: '/home/Nicholas/Desktop/trading-gym/notebooks/registry/gaia/v7/PPO Tuning/logs/PPOclip_1.1-WalkForward-750k2014/experiment_state-2019-07-03_19-29-31.json',
#         2015: '/home/Nicholas/Desktop/trading-gym/notebooks/registry/gaia/v7/PPO Tuning/logs/PPOclip_1.1-WalkForward-750k2015/experiment_state-2019-07-03_20-04-11.json',
#         2016: '/home/Nicholas/Desktop/trading-gym/notebooks/registry/gaia/v7/PPO Tuning/logs/PPOclip_1.1-WalkForward-750k2016/experiment_state-2019-07-03_20-37-50.json',
#         2017: '/home/Nicholas/Desktop/trading-gym/notebooks/registry/gaia/v7/PPO Tuning/logs/PPOclip_1.1-WalkForward-750k2017/experiment_state-2019-07-03_21-11-29.json'
#         }

# These will now be the real 1.1 clip param  (note that they end up being found in the same folder, just have different subfolders)
paths = {2007: '',
        2008: '',
        2009: '/home/Nicholas/Desktop/trading-gym/notebooks/registry/gaia/v7/PPO Tuning/logs/PPOclip_1.1-WalkForward-750k2009/experiment_state-2019-07-03_15-58-09.json',
        2010: '/home/Nicholas/Desktop/trading-gym/notebooks/registry/gaia/v7/PPO Tuning/logs/PPOclip_1.1-WalkForward-750k2010/experiment_state-2019-07-03_16-43-36.json',
        2011: '/home/Nicholas/Desktop/trading-gym/notebooks/registry/gaia/v7/PPO Tuning/logs/PPOclip_1.1-WalkForward-750k2011/experiment_state-2019-07-03_17-31-51.json',
        2012: '/home/Nicholas/Desktop/trading-gym/notebooks/registry/gaia/v7/PPO Tuning/logs/PPOclip_1.1-WalkForward-750k2012/experiment_state-2019-07-03_18-19-23.json',
        2013: '/home/Nicholas/Desktop/trading-gym/notebooks/registry/gaia/v7/PPO Tuning/logs/PPOclip_1.1-WalkForward-750k2013/experiment_state-2019-07-03_18-56-29.json',
        2014: '/home/Nicholas/Desktop/trading-gym/notebooks/registry/gaia/v7/PPO Tuning/logs/PPOclip_1.1-WalkForward-750k2014/experiment_state-2019-07-03_19-29-31.json',
        2015: '/home/Nicholas/Desktop/trading-gym/notebooks/registry/gaia/v7/PPO Tuning/logs/PPOclip_1.1-WalkForward-750k2015/experiment_state-2019-07-03_20-04-11.json',
        2016: '/home/Nicholas/Desktop/trading-gym/notebooks/registry/gaia/v7/PPO Tuning/logs/PPOclip_1.1-WalkForward-750k2016/experiment_state-2019-07-03_20-37-50.json',
        2017: '/home/Nicholas/Desktop/trading-gym/notebooks/registry/gaia/v7/PPO Tuning/logs/PPOclip_1.1-WalkForward-750k2017/experiment_state-2019-07-03_21-11-29.json'
        }

# These will be the 0.6 clip - they'll be the third set of experiments in the 1.1 folderssss

# These will be the 0.5 clip - they'll be the 4th set of experiments in the 1.1 folder 



episodes = dict()
agents = dict()
for year, path in paths.items():
    # RESTORE part (a)
    with open(path) as f:
        metadata = json.load(f)

    runner_data = metadata['runner_data']
    stats = metadata['stats']

    checkpoint = metadata['checkpoints'][-1]
    checkpoint = cloudpickleloads(checkpoint)
    checkpoint_path = cloudpickle.loads(hex_to_binary(checkpoint['_checkpoint'])).value

    config = checkpoint['config']
#     Don't actually need to redefine the env_cls as it's always the same 
    env_cls = config['env']
    env_config = config['env_config']
    path_restore = os.path.join(checkpoint['logdir'], checkpoint_path)
    
    # RESTORE part (b)
    agent = rllib.agents.ppo.PPOTrainer(config, env_cls)
    agent.restore(path_restore)
# THIS IS A BUG: 
#     agent._restore(path_restore)
    
    env = env_cls(env_config)
    episode = env.sample_episode(
        fold='test-set',
        policy=agent,
        episode_length=None,
        benchmark=env._load_benchmark().squeeze(),
        risk_free=env._load_risk_free().squeeze(),
        burn=1,
    )
    
    episodes[year] = episode
    agents[year] = agent

In [None]:
for year,path in paths.items():
    with open(path) as f:
        metadata = json.load(f)

    runner_data = metadata['runner_data']
    stats = metadata['stats']

    checkpoint = metadata['checkpoints'][-1]
    checkpoint = cloudpickleloads(checkpoint)
    checkpoint_path = cloudpickle.loads(hex_to_binary(checkpoint['_checkpoint'])).value

    config = checkpoint['config']
    print(config)
    break

In [None]:
levels = list()
mappings = pd.DataFrame()
mapping_functions = dict()
for year in paths:
    episode = episodes[year]
    agent = agents[year]

    # Load.
    actions = episode.actions_as_frame()
    states = episode.states_as_frame()
    
    # Parse.
    gaia_predictor = states[0].to_frame('GAIA Predictor')
    
#     The following line was here before
#     target_weight_russell_1000 = actions[ETF('Russell 1000')]
    target_weight_russell_1000 = actions[actions.columns[0]]
    target_weight_russell_1000.name = 'Target weight: ' + str(target_weight_russell_1000.name)
    mapping = gaia_predictor.join(target_weight_russell_1000)
    mapping_function = mapping.set_index('GAIA Predictor')

    levels.append(episode.renderer.level.to_frame().pct_change())
    mappings = mappings.append(mapping)
    mapping_functions[year] = mapping_function

    # Visualize.
    mapping.iplot(
        title="Hisorical GAIA predictor for Russell 1000 vs agent's target weights",
        secondary_y='GAIA Predictor',
        yTitle=target_weight_russell_1000.name,
        secondary_y_title='GAIA Predictor',
        legend={'orientation': 'h'},
    )
    mapping_function.iplot(
        title='Policy: mapping from GAIA predictor (state) to target weight for Russell 1000 (action)',
        xTitle='GAIA predictor for Russell 1000 (standardized)',
        yTitle='Target weight for Russell 1000',
        kind='scatter',
        mode='markers',
        size=4,
    )

In [None]:
daily_ret = pd.concat(levels).sort_index().fillna(0)
cumulative_performance = (1 + daily_ret).cumprod() - 1
cumulative_performance *= 100

aric = cumulative_performance.columns[1]
cumulative_performance['Strategy relative to Aric-Benchmark'] = cumulative_performance['Strategy'] - cumulative_performance[aric]


# Visualizations.
cumulative_performance.iplot(
    legend={'orientation': 'h'},
    yTitle='Total returns',
)

In [None]:
levels = (1 + cumulative_performance / 100)
annual_rets = (levels.resample('Y').last() / levels.resample('Y').first() - 1)

    
annual_rets['Strategy relative to Aric-Benchmark'] = annual_rets['Strategy'] - annual_rets[aric]
annual_rets.index = annual_rets.index.year
annual_rets *= 100
annual_rets.iplot(kind='bar', legend={'orientation': 'h'}, yTitle='%')

In [None]:
levels.drop('Strategy relative to Aric-Benchmark', axis='columns').tearsheet(
    benchmark=env._load_benchmark().loc['2008':].squeeze(),
    risk_free=env._load_risk_free().loc['2008':].squeeze(),
    weights=env.broker.track_record.to_frame('weights_target').iloc[1:]
)

In [None]:
import plotly.plotly as py
import plotly.graph_objs as go
from plotly.offline import init_notebook_mode, iplot
import cufflinks
cufflinks.go_offline()
init_notebook_mode(connected=False)


traces = list()
for year, series in mapping_functions.items():
    trace = go.Scatter(
        x = list(series.squeeze().index[:-1]),
        y = list(series.squeeze().values[:-1]),
        mode = 'markers',
        name = year
    )
    traces.append(trace)
    
layout = go.Layout(
    title='GAIA vs RL mapping functions',
    xaxis=dict(
        title='GAIA Mapping'
    ),
    yaxis=dict(
        title='PPO Mapping'
        )
        
    )
fig = go.Figure(data=traces,layout=layout)
iplot(fig,filename='scatter=mode')

# iplot(traces, filename='scatter-mode')