In [1]:
import trading_gym
from trading_gym.registry.gaia.v7.env import GAIAPredictorsContinuousV7
from datetime import datetime
from collections import namedtuple
import json
import os
import pandas as pd
import ray
print(datetime.now())
print(trading_gym.__name__, trading_gym.__version__)
print(ray.__name__, ray.__version__)

2019-08-19 15:22:39.475605
trading_gym 0.6.0
ray 0.7.1


In [2]:
import ray
from ray import rllib, tune
from trading_gym.ray.logger import calculate_tearsheet, CustomLogger
from copy import deepcopy
# ray.init(num_cpus=8,ignore_reinit_error=True,object_store_memory= 10*100 )
ray.init(ignore_reinit_error=True)
#          object_store_memory = 50000000)

ray.__version__

2019-07-02 16:18:01,252	INFO node.py:498 -- Process STDOUT and STDERR is being redirected to /tmp/ray/session_2019-07-02_16-18-01_251957_53675/logs.
2019-07-02 16:18:01,398	INFO services.py:409 -- Waiting for redis server at 127.0.0.1:50924 to respond...
2019-07-02 16:18:01,547	INFO services.py:409 -- Waiting for redis server at 127.0.0.1:63442 to respond...
2019-07-02 16:18:01,552	INFO services.py:806 -- Starting Redis shard with 10.0 GB max memory.
2019-07-02 16:18:01,671	INFO node.py:512 -- Process STDOUT and STDERR is being redirected to /tmp/ray/session_2019-07-02_16-18-01_251957_53675/logs.
2019-07-02 16:18:01,680	INFO services.py:1442 -- Starting the Plasma object store with 20.0 GB memory using /dev/shm.


'0.7.1'

In [None]:
env_config = dict()
env_config['folds'] =  {
    'training-set': [datetime.min, datetime(2008, 3, 18)],
    'test-set': [datetime(2008, 3, 19), datetime.max],
}
env = GAIAPredictorsContinuousV7(env_config)
env

In [None]:
# There is a 'common config' that sets ray's params
# and then default_config, which sets the PPO config 
config = rllib.agents.ppo.DEFAULT_CONFIG.copy()
#  The env is self.explanatory 
config['env'] = GAIAPredictorsContinuousV7
config['callbacks']['on_train_result'] = tune.function(calculate_tearsheet)
config['num_workers'] = 6

config['gamma'] = 0 # tune.grid_search([0])
config['vf_clip_param'] = 0 # tune.grid_search([0.])
config['vf_loss_coeff'] = 0 # tune.grid_search([0.])
config['lambda'] = 0 # tune.grid_search([0])

config['use_gae'] = False #tune.grid_search([False])
config['vf_share_layers'] = False #tune.grid_search([False])

# If you do use this, have vf_share_layers as True (loss function then combines ) 
config['use_lstm']: False
# Whether to roll out complete epsiodes or truncate them 
config['batch_mode'] = 'complete_episodes'


# Literature suggests having different LR for actor and critic and -3 and -2 
config['lr'] = tune.grid_search([1e-5])

# Size of batches collected from each worker (number of experiences used for one iteration of SGD)
#  Don't think I actually want to use the following. 
# config['sample_batch_size'] = tune.grid_search([256])

# Increase this to maximize the amount of info(no. of experiences(think transition tuples)) we gather before making an update to policy
config['train_batch_size'] = tune.grid_search([4000])
# Total SGD batch size across all devices
config['sgd_minibatch_size'] = 128
# Number of SGD iterations in each outer loop 
config['num_sgd_iter'] = tune.grid_search([8])


# Coefficient of entropy regularizer (i.e how much we encourage explorsation)
config['entropy_coeff'] = tune.grid_search([1e-5])

# Initial coefficient for KL divergence 
config['kl_coeff'] = tune.grid_search([0.2])
# Target value for the KL divergence 
config['kl_target'] = tune.grid_search([0.01])

# PPO clip parameter
config['clip_param'] = tune.grid_search([1.0])
# config['ignore_worker_failures'] = True

In [None]:
config['env_config'] = env_config

In [3]:
from ray.rllib.models import ModelCatalog
from ray.rllib.models.model import Model
from ray.rllib.models.misc import normc_initializer, get_activation_fn
import tensorflow as tf
import tensorflow.contrib.slim as slim


class MLP(Model):
    def _build_layers_v2(self, input_dict: dict, num_outputs: int, config: dict):
        import tensorflow.contrib.slim as slim

        with tf.name_scope("fc_net"):
            last_layer = input_dict['obs']
            activation = get_activation_fn(config.get("fcnet_activation"))
            for i, size in enumerate(config.get("fcnet_hiddens"), 1):
                last_layer = slim.fully_connected(
                    inputs=last_layer,
                    num_outputs=size,
                    weights_initializer=normc_initializer(1.0),
                    activation_fn=activation,
                    scope="fc{}".format(i),
                )
#                 We don't need any dropout at this stage
#                 last_layer = tf.layers.dropout(
#                     inputs=last_layer,
#                     rate=config['custom_options']["fcnet_dropout_rate"],
#                     training=input_dict['is_training'],
#                     name="dropout{}".format(i),
#                 )
            output = slim.fully_connected(
                inputs=last_layer,
                num_outputs=num_outputs,
                weights_initializer=normc_initializer(0.01),
                activation_fn=None,
                scope="fc_out",
            )
            return output, last_layer

ModelCatalog.register_custom_model(MLP.__name__, MLP)

In [None]:
# config['model']['custom_options'] = {'fcnet_dropout_rate': 0.5}
config['model']['custom_model'] = MLP.__name__
# config['model']['custom_model'] = CNN.__name__


In [None]:
for year in range(2013, 2018):
    print('_______________________________________{}____________________________________________'.format(year))
    config['env_config'] = {
        'folds': {
            'training-set': [datetime.min, datetime(year, 12, 31)],
            'test-set': [datetime(year + 1, 1, 1), datetime(year + 1, 12, 31)],
        }
    }
    experiment = tune.Experiment(
        name='PPOclip_0.8-WalkForward-750k{}'.format(year),
        run=rllib.agents.ppo.PPOTrainer,
        stop={"timesteps_total": 750000},
        config=deepcopy(config),
        num_samples=1,
        local_dir='logs',
        #checkpoint_freq=int(1e4 / config['train_batch_size']),  # checkpoint every 100k iters
        checkpoint_at_end=True,
        max_failures=0,
        loggers=[CustomLogger],
    )
    trials = tune.run_experiments(
        experiments=experiment,
        search_alg=tune.suggest.BasicVariantGenerator(),
        scheduler=tune.schedulers.FIFOScheduler(),
        verbose=0,
        reuse_actors=False,
        resume=False,
    )

In [4]:
from ray import cloudpickle
from ray.utils import binary_to_hex, hex_to_binary


def cloudpickleloads(obj):
    if isinstance(obj, dict):
        try:
            return cloudpickle.loads(hex_to_binary(obj["value"]))
        except:
            for key, value in obj.items():
                if isinstance(value, dict):
                    if sorted(value) == ['_type', 'value']:
                        obj[key] = cloudpickle.loads(hex_to_binary(value["value"]))
                    else:
                        obj[key] = cloudpickleloads(value)
                elif isinstance(value, list):
                    for i, item in enumerate(value):
                        obj[key][i] = cloudpickleloads(item)
    return obj

In [5]:
# raise ValueError('TODO: update paths with latest runs')
#(1.0 clip)
# paths = {2007: '/home/Nicholas/Desktop/trading-gym/notebooks/registry/gaia/v7/PPO Tuning/logs/PPOclip_0.8-WalkForward-750k2007/experiment_state-2019-07-01_10-37-58.json',
#         2008: '/home/Nicholas/Desktop/trading-gym/notebooks/registry/gaia/v7/PPO Tuning/logs/PPOclip_0.8-WalkForward-750k2008/experiment_state-2019-07-01_12-18-57.json',
#         2009: '/home/Nicholas/Desktop/trading-gym/notebooks/registry/gaia/v7/PPO Tuning/logs/PPOclip_0.8-WalkForward-750k2009/experiment_state-2019-07-01_13-41-34.json',
#         2010: '/home/Nicholas/Desktop/trading-gym/notebooks/registry/gaia/v7/PPO Tuning/logs/PPOclip_0.8-WalkForward-750k2010/experiment_state-2019-07-01_14-50-33.json',
#         2011: '/home/Nicholas/Desktop/trading-gym/notebooks/registry/gaia/v7/PPO Tuning/logs/PPOclip_0.8-WalkForward-750k2011/experiment_state-2019-07-01_15-59-02.json',
#         2012: '/home/Nicholas/Desktop/trading-gym/notebooks/registry/gaia/v7/PPO Tuning/logs/PPOclip_0.8-WalkForward-750k2012/experiment_state-2019-07-01_17-19-21.json',
#         2013: '/home/Nicholas/Desktop/trading-gym/notebooks/registry/gaia/v7/PPO Tuning/logs/PPOclip_0.8-WalkForward-750k2013/experiment_state-2019-07-02_11-49-44.json',
#         2014: '/home/Nicholas/Desktop/trading-gym/notebooks/registry/gaia/v7/PPO Tuning/logs/PPOclip_0.8-WalkForward-750k2014/experiment_state-2019-07-02_12-12-03.json',
#         2015: '/home/Nicholas/Desktop/trading-gym/notebooks/registry/gaia/v7/PPO Tuning/logs/PPOclip_0.8-WalkForward-750k2015/experiment_state-2019-07-02_12-42-28.json',
#         2016: '/home/Nicholas/Desktop/trading-gym/notebooks/registry/gaia/v7/PPO Tuning/logs/PPOclip_0.8-WalkForward-750k2016/experiment_state-2019-07-02_13-14-05.json',
#         2017: '/home/Nicholas/Desktop/trading-gym/notebooks/registry/gaia/v7/PPO Tuning/logs/PPOclip_0.8-WalkForward-750k2017/experiment_state-2019-07-02_13-46-32.json'
#         }

# These are for the 750k runs (0.8 clip) note they're saved in the normal logs folder -- the best run yet
paths = {2007: '/home/Nicholas/Desktop/trading-gym/notebooks/registry/gaia/v7/logs/PPOclip_0.8-WalkForward-750k2007/experiment_state-2019-06-24_23-35-32.json',
        2008: '/home/Nicholas/Desktop/trading-gym/notebooks/registry/gaia/v7/logs/PPOclip_0.8-WalkForward-750k2008/experiment_state-2019-06-25_00-28-20.json',
        2009: '/home/Nicholas/Desktop/trading-gym/notebooks/registry/gaia/v7/logs/PPOclip_0.8-WalkForward-750k2009/experiment_state-2019-06-25_01-21-47.json',
        2010: '/home/Nicholas/Desktop/trading-gym/notebooks/registry/gaia/v7/logs/PPOclip_0.8-WalkForward-750k2010/experiment_state-2019-06-25_02-14-52.json',
        2011: '/home/Nicholas/Desktop/trading-gym/notebooks/registry/gaia/v7/logs/PPOclip_0.8-WalkForward-750k2011/experiment_state-2019-06-25_03-08-23.json',
        2012: '/home/Nicholas/Desktop/trading-gym/notebooks/registry/gaia/v7/logs/PPOclip_0.8-WalkForward-750k2012/experiment_state-2019-06-25_04-00-54.json',
        2013: '/home/Nicholas/Desktop/trading-gym/notebooks/registry/gaia/v7/logs/PPOclip_0.8-WalkForward-750k2013/experiment_state-2019-06-25_04-54-29.json',
        2014: '/home/Nicholas/Desktop/trading-gym/notebooks/registry/gaia/v7/logs/PPOclip_0.8-WalkForward-750k2014/experiment_state-2019-06-25_05-47-18.json',
        2015: '/home/Nicholas/Desktop/trading-gym/notebooks/registry/gaia/v7/logs/PPOclip_0.8-WalkForward-750k2015/experiment_state-2019-06-25_06-40-12.json',
        2016: '/home/Nicholas/Desktop/trading-gym/notebooks/registry/gaia/v7/logs/PPOclip_0.8-WalkForward-750k2016/experiment_state-2019-06-25_07-33-17.json',
        2017: '/home/Nicholas/Desktop/trading-gym/notebooks/registry/gaia/v7/logs/PPOclip_0.8-WalkForward-750k2017/experiment_state-2019-06-25_08-26-33.json'
        }
 


episodes = dict()
agents = dict()
for year, path in paths.items():
    # RESTORE part (a)
    with open(path) as f:
        metadata = json.load(f)

    runner_data = metadata['runner_data']
    stats = metadata['stats']

    checkpoint = metadata['checkpoints'][-1]
    checkpoint = cloudpickleloads(checkpoint)
    checkpoint_path = cloudpickle.loads(hex_to_binary(checkpoint['_checkpoint'])).value

    config = checkpoint['config']
#     Don't actually need to redefine the env_cls as it's always the same 
    env_cls = config['env']
    env_config = config['env_config']
    path_restore = os.path.join(checkpoint['logdir'], checkpoint_path)
    
    # RESTORE part (b)
    agent = rllib.agents.ppo.PPOTrainer(config, env_cls)
    agent.restore(path_restore)
# THIS IS A BUG: 
#     agent._restore(path_restore)
    
    env = env_cls(env_config)
    episode = env.sample_episode(
        fold='test-set',
        policy=agent,
        episode_length=None,
        benchmark=env._load_benchmark().squeeze(),
        risk_free=env._load_risk_free().squeeze(),
        burn=1,
    )
    
    episodes[year] = episode
    agents[year] = agent

2019-07-02 16:18:18,995	INFO policy_evaluator.py:311 -- Creating policy evaluation worker 0 on CPU (please ignore any CUDA init errors)
2019-07-02 16:18:19,386	INFO dynamic_tf_policy.py:265 -- Initializing loss function with dummy input:

{ 'action_prob': <tf.Tensor 'default_policy/action_prob:0' shape=(?,) dtype=float32>,
  'actions': <tf.Tensor 'default_policy/actions:0' shape=(?, 2) dtype=float32>,
  'advantages': <tf.Tensor 'default_policy/advantages:0' shape=(?,) dtype=float32>,
  'behaviour_logits': <tf.Tensor 'default_policy/behaviour_logits:0' shape=(?, 2) dtype=float32>,
  'dones': <tf.Tensor 'default_policy/dones:0' shape=(?,) dtype=bool>,
  'new_obs': <tf.Tensor 'default_policy/new_obs:0' shape=(?, 3) dtype=float32>,
  'obs': <tf.Tensor 'default_policy/observation:0' shape=(?, 3) dtype=float32>,
  'prev_actions': <tf.Tensor 'default_policy/action:0' shape=(?, 2) dtype=float32>,
  'prev_rewards': <tf.Tensor 'default_policy/prev_reward:0' shape=(?,) dtype=float32>,
  'rewards'

[2m[36m(pid=53782)[0m 2019-07-02 16:18:32,625	INFO policy_evaluator.py:311 -- Creating policy evaluation worker 5 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=53786)[0m 2019-07-02 16:18:32,596	INFO policy_evaluator.py:311 -- Creating policy evaluation worker 3 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=53782)[0m 2019-07-02 16:18:32.654948: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
[2m[36m(pid=53786)[0m 2019-07-02 16:18:32.663270: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
[2m[36m(pid=53789)[0m 2019-07-02 16:18:33,128	INFO policy_evaluator.py:311 -- Creating policy evaluation worker 4 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=53789)[0m 2019-07-02 16:18:33.165382: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supp

2019-07-02 16:18:36,991	INFO tf_run_builder.py:92 -- Executing TF run without tracing. To dump TF timeline traces to disk, set the TF_TIMELINE_DIR environment variable.
2019-07-02 16:18:42,246	INFO policy_evaluator.py:311 -- Creating policy evaluation worker 0 on CPU (please ignore any CUDA init errors)
2019-07-02 16:18:48,120	INFO policy_evaluator.py:731 -- Built policy map: {'default_policy': <ray.rllib.policy.tf_policy_template.PPOTFPolicy object at 0x7f8dd7dbcfd0>}
2019-07-02 16:18:48,121	INFO policy_evaluator.py:732 -- Built preprocessor map: {'default_policy': <ray.rllib.models.preprocessors.NoPreprocessor object at 0x7f8dd7dbcba8>}
2019-07-02 16:18:48,122	INFO policy_evaluator.py:343 -- Built filter map: {'default_policy': <ray.rllib.utils.filter.NoFilter object at 0x7f8dd7dbc9e8>}
2019-07-02 16:18:48,485	INFO multi_gpu_optimizer.py:80 -- LocalMultiGPUOptimizer devices ['/cpu:0']


[2m[36m(pid=53788)[0m 2019-07-02 16:18:55,999	INFO dynamic_tf_policy.py:265 -- Initializing loss function with dummy input:
[2m[36m(pid=53788)[0m 
[2m[36m(pid=53788)[0m { 'action_prob': <tf.Tensor 'default_policy/action_prob:0' shape=(?,) dtype=float32>,
[2m[36m(pid=53788)[0m   'actions': <tf.Tensor 'default_policy/actions:0' shape=(?, 2) dtype=float32>,
[2m[36m(pid=53788)[0m   'advantages': <tf.Tensor 'default_policy/advantages:0' shape=(?,) dtype=float32>,
[2m[36m(pid=53788)[0m   'behaviour_logits': <tf.Tensor 'default_policy/behaviour_logits:0' shape=(?, 2) dtype=float32>,
[2m[36m(pid=53788)[0m   'dones': <tf.Tensor 'default_policy/dones:0' shape=(?,) dtype=bool>,
[2m[36m(pid=53788)[0m   'new_obs': <tf.Tensor 'default_policy/new_obs:0' shape=(?, 3) dtype=float32>,
[2m[36m(pid=53788)[0m   'obs': <tf.Tensor 'default_policy/observation:0' shape=(?, 3) dtype=float32>,
[2m[36m(pid=53788)[0m   'prev_actions': <tf.Tensor 'default_policy/action:0' shape=(?, 2) 

2019-07-02 16:19:06,990	INFO policy_evaluator.py:311 -- Creating policy evaluation worker 0 on CPU (please ignore any CUDA init errors)
2019-07-02 16:19:13,050	INFO policy_evaluator.py:731 -- Built policy map: {'default_policy': <ray.rllib.policy.tf_policy_template.PPOTFPolicy object at 0x7f891f2c76a0>}
2019-07-02 16:19:13,054	INFO policy_evaluator.py:732 -- Built preprocessor map: {'default_policy': <ray.rllib.models.preprocessors.NoPreprocessor object at 0x7f891f2c7278>}
2019-07-02 16:19:13,055	INFO policy_evaluator.py:343 -- Built filter map: {'default_policy': <ray.rllib.utils.filter.NoFilter object at 0x7f891f2c7048>}
2019-07-02 16:19:13,429	INFO multi_gpu_optimizer.py:80 -- LocalMultiGPUOptimizer devices ['/cpu:0']


[2m[36m(pid=53785)[0m 2019-07-02 16:19:18,331	INFO dynamic_tf_policy.py:265 -- Initializing loss function with dummy input:
[2m[36m(pid=53785)[0m 
[2m[36m(pid=53785)[0m { 'action_prob': <tf.Tensor 'default_policy/action_prob:0' shape=(?,) dtype=float32>,
[2m[36m(pid=53785)[0m   'actions': <tf.Tensor 'default_policy/actions:0' shape=(?, 2) dtype=float32>,
[2m[36m(pid=53785)[0m   'advantages': <tf.Tensor 'default_policy/advantages:0' shape=(?,) dtype=float32>,
[2m[36m(pid=53785)[0m   'behaviour_logits': <tf.Tensor 'default_policy/behaviour_logits:0' shape=(?, 2) dtype=float32>,
[2m[36m(pid=53785)[0m   'dones': <tf.Tensor 'default_policy/dones:0' shape=(?,) dtype=bool>,
[2m[36m(pid=53785)[0m   'new_obs': <tf.Tensor 'default_policy/new_obs:0' shape=(?, 3) dtype=float32>,
[2m[36m(pid=53785)[0m   'obs': <tf.Tensor 'default_policy/observation:0' shape=(?, 3) dtype=float32>,
[2m[36m(pid=53785)[0m   'prev_actions': <tf.Tensor 'default_policy/action:0' shape=(?, 2) 

2019-07-02 16:19:31,286	INFO policy_evaluator.py:311 -- Creating policy evaluation worker 0 on CPU (please ignore any CUDA init errors)
2019-07-02 16:19:35,226	INFO policy_evaluator.py:731 -- Built policy map: {'default_policy': <ray.rllib.policy.tf_policy_template.PPOTFPolicy object at 0x7f8906a65d30>}
2019-07-02 16:19:35,228	INFO policy_evaluator.py:732 -- Built preprocessor map: {'default_policy': <ray.rllib.models.preprocessors.NoPreprocessor object at 0x7f8906a65908>}
2019-07-02 16:19:35,234	INFO policy_evaluator.py:343 -- Built filter map: {'default_policy': <ray.rllib.utils.filter.NoFilter object at 0x7f8906a65748>}
2019-07-02 16:19:35,646	INFO multi_gpu_optimizer.py:80 -- LocalMultiGPUOptimizer devices ['/cpu:0']
2019-07-02 16:19:59,489	INFO policy_evaluator.py:311 -- Creating policy evaluation worker 0 on CPU (please ignore any CUDA init errors)
2019-07-02 16:20:06,099	INFO policy_evaluator.py:731 -- Built policy map: {'default_policy': <ray.rllib.policy.tf_policy_template.PPO

In [10]:
for year,path in paths.items():
    with open(path) as f:
        metadata = json.load(f)

    runner_data = metadata['runner_data']
    stats = metadata['stats']

    checkpoint = metadata['checkpoints'][-1]
    checkpoint = cloudpickleloads(checkpoint)
    checkpoint_path = cloudpickle.loads(hex_to_binary(checkpoint['_checkpoint'])).value

    config = checkpoint['config']
    print(config)
    break

{'monitor': False, 'log_level': 'INFO', 'callbacks': {'on_episode_start': None, 'on_episode_step': None, 'on_episode_end': None, 'on_sample_end': None, 'on_train_result': tune.function(<function calculate_tearsheet at 0x7f8e444b6268>), 'on_postprocess_traj': None}, 'ignore_worker_failures': False, 'model': {'conv_filters': None, 'conv_activation': 'relu', 'fcnet_activation': 'tanh', 'fcnet_hiddens': [256, 256], 'free_log_std': False, 'squash_to_range': False, 'use_lstm': False, 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'framestack': True, 'dim': 84, 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'custom_model': 'MLP', 'custom_options': {}}, 'optimizer': {}, 'gamma': 0, 'horizon': None, 'soft_horizon': False, 'env_config': {'folds': {'training-set': [datetime.datetime(1, 1, 1, 0, 0), datetime.datetime(2007, 12, 31, 0, 0)], 'test-set': [datetime.datetime(2008, 1, 1, 0, 0), datetime.datetime(2008, 12, 31, 0, 0)]}}, 'env': <class '

In [6]:
levels = list()
mappings = pd.DataFrame()
mapping_functions = dict()
for year in paths:
    episode = episodes[year]
    agent = agents[year]

    # Load.
    actions = episode.actions_as_frame()
    states = episode.states_as_frame()
    
    # Parse.
    gaia_predictor = states[0].to_frame('GAIA Predictor')
    
#     The following line was here before
#     target_weight_russell_1000 = actions[ETF('Russell 1000')]
    target_weight_russell_1000 = actions[actions.columns[0]]
    target_weight_russell_1000.name = 'Target weight: ' + str(target_weight_russell_1000.name)
    mapping = gaia_predictor.join(target_weight_russell_1000)
    mapping_function = mapping.set_index('GAIA Predictor')

    levels.append(episode.renderer.level.to_frame().pct_change())
    mappings = mappings.append(mapping)
    mapping_functions[year] = mapping_function

    # Visualize.
    mapping.iplot(
        title="Hisorical GAIA predictor for Russell 1000 vs agent's target weights",
        secondary_y='GAIA Predictor',
        yTitle=target_weight_russell_1000.name,
        secondary_y_title='GAIA Predictor',
        legend={'orientation': 'h'},
    )
    mapping_function.iplot(
        title='Policy: mapping from GAIA predictor (state) to target weight for Russell 1000 (action)',
        xTitle='GAIA predictor for Russell 1000 (standardized)',
        yTitle='Target weight for Russell 1000',
        kind='scatter',
        mode='markers',
        size=4,
    )

In [7]:
daily_ret = pd.concat(levels).sort_index().fillna(0)
cumulative_performance = (1 + daily_ret).cumprod() - 1
cumulative_performance *= 100

aric = cumulative_performance.columns[1]
cumulative_performance['Strategy relative to Aric-Benchmark'] = cumulative_performance['Strategy'] - cumulative_performance[aric]


# Visualizations.
cumulative_performance.iplot(
    legend={'orientation': 'h'},
    yTitle='Total returns',
)

In [8]:
levels = (1 + cumulative_performance / 100)
annual_rets = (levels.resample('Y').last() / levels.resample('Y').first() - 1)

    
annual_rets['Strategy relative to Aric-Benchmark'] = annual_rets['Strategy'] - annual_rets[aric]
annual_rets.index = annual_rets.index.year
annual_rets *= 100
annual_rets.iplot(kind='bar', legend={'orientation': 'h'}, yTitle='%')

In [9]:
levels.drop('Strategy relative to Aric-Benchmark', axis='columns').tearsheet(
    benchmark=env._load_benchmark().loc['2008':].squeeze(),
    risk_free=env._load_risk_free().loc['2008':].squeeze(),
    weights=env.broker.track_record.to_frame('weights_target').iloc[1:]
)

Unnamed: 0,Unnamed: 1,Strategy,Index(Aric-Benchmark),Index(USD 1M Deposit),Cash(USD),"ETF(Russell 1000, SMART, USD)","ETF(7-10Y T-Bills, SMART, USD)"
Context,From,2018-01-02,2018-01-02,2018-01-02,2018-01-02,2018-01-02,2018-01-02
Context,To,2018-08-28,2018-08-28,2018-08-28,2018-08-28,2018-08-28,2018-08-28
Context,Years,0.652055,0.652055,0.652055,0.652055,0.652055,0.652055
Context,Observations,171,171,171,171,171,171
Context,Risk-free asset,Index(USD 1M Deposit),Index(USD 1M Deposit),Index(USD 1M Deposit),Index(USD 1M Deposit),Index(USD 1M Deposit),Index(USD 1M Deposit)
Context,Risk-free CAGR,0.0196092,0.0196092,0.0196092,0.0196092,0.0196092,0.0196092
Return,CAGR,0.105693,0.116997,0.0196092,0,0.140864,-0.0375795
Return,CAGR over cash,0.0860834,0.0973876,-1.33227e-15,-0.0196092,0.121254,-0.0571887
Return,Overall return,0.0677068,0.0748119,0.012743,0,0.0897315,-0.0246668
Risk,Volatility,0.140658,0.114766,0.000688749,0,0.143906,0.0478027


In [None]:
import plotly.plotly as py
import plotly.graph_objs as go
from plotly.offline import init_notebook_mode, iplot
import cufflinks
cufflinks.go_offline()
init_notebook_mode(connected=False)


traces = list()
for year, series in mapping_functions.items():
    trace = go.Scatter(
        x = list(series.squeeze().index[:-1]),
        y = list(series.squeeze().values[:-1]),
        mode = 'markers',
        name = year
    )
    traces.append(trace)
    
layout = go.Layout(
    title='GAIA vs RL mapping functions',
    xaxis=dict(
        title='GAIA Mapping'
    ),
    yaxis=dict(
        title='PPO Mapping'
        )
        
    )
fig = go.Figure(data=traces,layout=layout)
iplot(fig,filename='scatter=mode')

# iplot(traces, filename='scatter-mode')

[2m[36m(pid=15442)[0m 
[2m[36m(pid=15442)[0m Converting sparse IndexedSlices to a dense Tensor of unknown shape. This may consume a large amount of memory.
[2m[36m(pid=15442)[0m 
[2m[36m(pid=15516)[0m 
[2m[36m(pid=15516)[0m Converting sparse IndexedSlices to a dense Tensor of unknown shape. This may consume a large amount of memory.
[2m[36m(pid=15516)[0m 
[2m[36m(pid=15443)[0m 2019-07-02 16:16:43,545	INFO policy_evaluator.py:311 -- Creating policy evaluation worker 5 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=15443)[0m 2019-07-02 16:16:43.599312: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
[2m[36m(pid=15444)[0m 2019-07-02 16:16:44,056	INFO policy_evaluator.py:311 -- Creating policy evaluation worker 6 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=15444)[0m 2019-07-02 16:16:44.090366: I tensorflow/core/platform/cpu_feature_guard.cc:14