In [10]:
import numpy as np
import pandas as pd
import cufflinks
import json
import os
from copy import deepcopy
from datetime import datetime

import trading_gym
from trading_gym.ray.logger import calculate_tearsheet, CustomLogger, PPOTensorboard
from trading_gym.registry.gaia.v7.env import GAIAPredictorsContinuousV7
from trading_gym.ray.models import MultiLayersPerceptron

import ray
from ray import rllib
from ray import tune
from ray.rllib.models import ModelCatalog
from ray.rllib.models.model import Model
from ray.rllib.models.misc import normc_initializer, get_activation_fn
from ray import cloudpickle
from ray.utils import binary_to_hex, hex_to_binary

import tensorflow as tf
import tensorflow.contrib.slim as slim

ModelCatalog.register_custom_model(MultiLayersPerceptron.__name__, MultiLayersPerceptron)
cufflinks.go_offline()
ray.init(ignore_reinit_error=True)

2019-06-21 15:45:25,913	ERROR worker.py:1337 -- Calling ray.init() again after it has already been called.


In [8]:
class MLP(Model):
    def _build_layers_v2(self, input_dict: dict, num_outputs: int, config: dict):
        import tensorflow.contrib.slim as slim

        with tf.name_scope("fc_net"):
            last_layer = input_dict['obs']
            activation = get_activation_fn(config.get("fcnet_activation"))
            for i, size in enumerate(config.get("fcnet_hiddens"), 1):
                last_layer = slim.fully_connected(
                    inputs=last_layer,
                    num_outputs=size,
                    weights_initializer=normc_initializer(1.0),
                    activation_fn=activation,
                    scope="fc{}".format(i),
                )
                last_layer = tf.layers.dropout(
                    inputs=last_layer,
                    rate=config['custom_options']["fcnet_dropout_rate"],
                    training=input_dict['is_training'],
                    name="dropout{}".format(i),
                )
            output = slim.fully_connected(
                inputs=last_layer,
                num_outputs=num_outputs,
                weights_initializer=normc_initializer(0.01),
                activation_fn=None,
                scope="fc_out",
            )
            return output, last_layer


ModelCatalog.register_custom_model(MLP.__name__, MLP)

In [None]:
# Note that episode has 20 steps (21 states, aka 1 month of data).
#env = GAIAPredictorsContinuousV7({})
#episode = env.sample_episode('test-set')
#renderer = env.render()
#renderer.states.to_frame().iplot()

In [None]:
config = rllib.agents.ppo.DEFAULT_CONFIG.copy()
config['env'] = GAIAPredictorsContinuousV7
config['gamma'] = 0.
config['num_workers'] = 6
config['callbacks']['on_train_result'] = tune.function(calculate_tearsheet)
config['entropy_coeff'] = 1e-5
config['batch_mode'] = 'complete_episodes'

config['use_lstm']: False
    
config['lr'] = 1e-5
config['num_sgd_iter'] = 8  # 30 by default
config['sgd_minibatch_size'] = 128
#config['sample_batch_size'] = 200  # does change the results that much
config['train_batch_size'] = 4000  # smoother, faster in time, slower in nr steps

config['use_gae'] = False
config['vf_share_layers'] = False
config['vf_loss_coeff'] = 0.
config['vf_clip_param'] = 0.

config['lambda'] = 0.
config['kl_coeff'] = 0.2
config['kl_target'] = 0.01
config['clip_param'] = 0.3

In [None]:
config['model']['custom_options'] = {'fcnet_dropout_rate': 0.5}
config['model']['custom_model'] = MLP.__name__

In [None]:
for year in range(2007, 2018):
    config['env_config'] = {
        'folds': {
            'training-set': [datetime.min, datetime(year, 12, 31)],
            'test-set': [datetime(year + 1, 1, 1), datetime(year + 1, 12, 31)],
        }
    }
    experiment = tune.Experiment(
        name='WalkForward-{}'.format(year),
        run=rllib.agents.ppo.PPOTrainer,
        stop={"timesteps_total": 750000},
        config=deepcopy(config),
        num_samples=1,
        local_dir='logs',
        #checkpoint_freq=int(1e4 / config['train_batch_size']),  # checkpoint every 100k iters
        checkpoint_at_end=True,
        max_failures=0,
        loggers=[CustomLogger],
    )
    trials = tune.run_experiments(
        experiments=experiment,
        search_alg=tune.suggest.BasicVariantGenerator(),
        scheduler=tune.schedulers.FIFOScheduler(),
        verbose=1,
        reuse_actors=False,
        resume=False,
    )

In [None]:
stop

In [3]:
from ray.tune.analysis import ExperimentAnalysis
import os
directory = '/home/Nicholas/Desktop/trading-gym/notebooks/registry/gaia/v7/logs'
experiment_name = 'Playground-WalkForward2007'
path = os.path.join(directory, experiment_name)
analysis = ExperimentAnalysis(path)
analysis.dataframe()

Unnamed: 0,__logger_started__,_checkpoint,_cmp_greater,_nonjson_fields,batch_mode,best_checkpoint_attr_value,best_result,callbacks:on_episode_end,callbacks:on_episode_start,callbacks:on_episode_step,...,train_batch_size,trainable_name,training_iteration,trial_id,upload_dir,use_gae,verbose,vf_clip_param,vf_loss_coeff,vf_share_layers
0,False,800495ca5f0200000000008c0e7261792e74756e652e74...,True,"[_checkpoint, loggers, sync_function, results,...",complete_episodes,-inf,80049502000000000000004e2e,,,,...,4000,PPOTrainer,13,cc362f46,,False,False,0.0,0.0,False


In [30]:
def cloudpickleloads(obj):
    if isinstance(obj, dict):
        try:
            return cloudpickle.loads(hex_to_binary(obj["value"]))
        except:
            for key, value in obj.items():
                if isinstance(value, dict):
                    if sorted(value) == ['_type', 'value']:
                        obj[key] = cloudpickle.loads(hex_to_binary(value["value"]))
                    else:
                        obj[key] = cloudpickleloads(value)
                elif isinstance(value, list):
                    for i, item in enumerate(value):
                        obj[key][i] = cloudpickleloads(item)
    return obj


path = '/home/Nicholas/Desktop/trading-gym/notebooks/registry/gaia/v7/logs/Playground-2folds-tuning/experiment_state-2019-06-20_15-53-36.json'
with open(path) as f:
    metadata = json.load(f)
    checkpoints = metadata['checkpoints']
    runner_data = metadata['runner_data']
    stats = metadata['stats']

for checkpoint in checkpoints:
    checkpoint = cloudpickleloads(checkpoint)
    cp = cloudpickle.loads(hex_to_binary(checkpoint['_checkpoint']))
    config = checkpoint['config']
    env_cls = config['env']  
    env_config = config['env_config']
    path_restore = os.path.join(checkpoint['logdir'], cp.value)

TypeError: join() argument must be str or bytes, not 'NoneType'

In [11]:
agent = rllib.agents.ppo.PPOTrainer(config, env_cls)
agent.restore(path_restore)
policy = agent.get_policy()

2019-06-21 15:45:33,073	INFO policy_evaluator.py:312 -- Creating policy evaluation worker 0 on CPU (please ignore any CUDA init errors)


<class 'module'>


2019-06-21 15:45:33,477	INFO dynamic_tf_policy.py:265 -- Initializing loss function with dummy input:

{ 'action_prob': <tf.Tensor 'default_policy/action_prob:0' shape=(?,) dtype=float32>,
  'actions': <tf.Tensor 'default_policy/actions:0' shape=(?, 2) dtype=float32>,
  'advantages': <tf.Tensor 'default_policy/advantages:0' shape=(?,) dtype=float32>,
  'behaviour_logits': <tf.Tensor 'default_policy/behaviour_logits:0' shape=(?, 2) dtype=float32>,
  'dones': <tf.Tensor 'default_policy/dones:0' shape=(?,) dtype=bool>,
  'new_obs': <tf.Tensor 'default_policy/new_obs:0' shape=(?, 3) dtype=float32>,
  'obs': <tf.Tensor 'default_policy/observation:0' shape=(?, 3) dtype=float32>,
  'prev_actions': <tf.Tensor 'default_policy/action:0' shape=(?, 2) dtype=float32>,
  'prev_rewards': <tf.Tensor 'default_policy/prev_reward:0' shape=(?,) dtype=float32>,
  'rewards': <tf.Tensor 'default_policy/rewards:0' shape=(?,) dtype=float32>,
  'value_targets': <tf.Tensor 'default_policy/value_targets:0' shape=

[2m[36m(pid=20948)[0m <class 'module'>
[2m[36m(pid=20948)[0m 2019-06-21 15:45:57,735	INFO policy_evaluator.py:312 -- Creating policy evaluation worker 6 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=20948)[0m 2019-06-21 15:45:57.828427: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX AVX2 FMA
[2m[36m(pid=20953)[0m 2019-06-21 15:45:58,027	INFO policy_evaluator.py:312 -- Creating policy evaluation worker 2 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=20953)[0m <class 'module'>
[2m[36m(pid=20953)[0m 2019-06-21 15:45:58.111944: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX AVX2 FMA
[2m[36m(pid=20949)[0m <class 'module'>
[2m[36m(pid=20949)[0m 2019-06-21 15:45:58,398	INFO policy_evaluator.py:312 -- Creating policy evaluation worker 5 o

In [12]:
env = env_cls(env_config)
episode = env.sample_episode(
    fold='test-set',
    policy=policy,
)
renderer = env.render(
    benchmark=env._load_benchmark().squeeze().loc['2008'],
    risk_free=env._load_risk_free().squeeze().loc['2008'],
)
renderer.plotly_report()
renderer.tearsheet()

2019-06-21 16:04:25,989	INFO tf_run_builder.py:92 -- Executing TF run without tracing. To dump TF timeline traces to disk, set the TF_TIMELINE_DIR environment variable.


Unnamed: 0,Unnamed: 1,Strategy,Index(Aric-Benchmark),Index(USD 1M Deposit),Cash(USD),"ETF(Russell 1000, SMART, USD)","ETF(7-10Y T-Bills, SMART, USD)"
Context,From,2008-01-01,2008-01-01,2008-01-01,2008-01-01,2008-01-01,2008-01-01
Context,To,2008-12-30,2008-12-30,2008-12-30,2008-12-30,2008-12-30,2008-12-30
Context,Years,0.99726,0.99726,0.99726,0.99726,0.99726,0.99726
Context,Observations,261,261,261,261,261,261
Context,Risk-free asset,Index(USD 1M Deposit),Index(USD 1M Deposit),Index(USD 1M Deposit),Index(USD 1M Deposit),Index(USD 1M Deposit),Index(USD 1M Deposit)
Context,Risk-free CAGR,0.0286226,0.0286226,0.0286226,0.0286226,0.0286226,0.0286226
Return,CAGR,0.128301,0.209932,0.0286226,0,-0.386245,0.228945
Return,CAGR over cash,0.0996788,0.181309,0,-0.0286226,-0.414867,0.200323
Return,Overall return,0.127928,0.2093,0.0285431,0,-0.385423,0.228252
Risk,Volatility,0.155944,0.11562,0.00119151,0,0.404087,0.112143


In [13]:
weights = env.broker.track_record.to_frame('weights_target')
weights = weights.groupby(weights.index).first()


In [14]:
self = env.broker.track_record.to_frame('nlv')
benchmark = env._load_benchmark().squeeze().loc[:'2018-07']
risk_free = env._load_risk_free().squeeze()

index = self.index
for data in [benchmark, risk_free, weights]:
    if data is not None:
        data.index.name = self.index.name
        index = index.intersection(data.index)
index

DatetimeIndex(['2008-01-01', '2008-01-02', '2008-01-03', '2008-01-04',
               '2008-01-07', '2008-01-08', '2008-01-09', '2008-01-10',
               '2008-01-11', '2008-01-14',
               ...
               '2008-12-17', '2008-12-18', '2008-12-19', '2008-12-22',
               '2008-12-23', '2008-12-24', '2008-12-25', '2008-12-26',
               '2008-12-29', '2008-12-30'],
              dtype='datetime64[ns]', length=261, freq='B')

In [15]:
self.index.union(benchmark.index)

DatetimeIndex(['2004-05-17', '2004-05-18', '2004-05-19', '2004-05-20',
               '2004-05-21', '2004-05-24', '2004-05-25', '2004-05-26',
               '2004-05-27', '2004-05-28',
               ...
               '2018-07-18', '2018-07-19', '2018-07-20', '2018-07-23',
               '2018-07-24', '2018-07-25', '2018-07-26', '2018-07-27',
               '2018-07-30', '2018-07-31'],
              dtype='datetime64[ns]', length=3707, freq=None)

In [16]:
set([1, 2, 3]).intersection(set([3, 4])) 

{3}

In [17]:
self.index.intersection(benchmark.index)

DatetimeIndex(['2008-01-01', '2008-01-02', '2008-01-03', '2008-01-04',
               '2008-01-07', '2008-01-08', '2008-01-09', '2008-01-10',
               '2008-01-11', '2008-01-14',
               ...
               '2008-12-17', '2008-12-18', '2008-12-19', '2008-12-22',
               '2008-12-23', '2008-12-24', '2008-12-25', '2008-12-26',
               '2008-12-29', '2008-12-30'],
              dtype='datetime64[ns]', length=261, freq='B')

In [18]:
benchmark.tail(1)

2018-07-31    577.344211
Name: Index(Aric-Benchmark), dtype: float64

In [19]:
index

DatetimeIndex(['2008-01-01', '2008-01-02', '2008-01-03', '2008-01-04',
               '2008-01-07', '2008-01-08', '2008-01-09', '2008-01-10',
               '2008-01-11', '2008-01-14',
               ...
               '2008-12-17', '2008-12-18', '2008-12-19', '2008-12-22',
               '2008-12-23', '2008-12-24', '2008-12-25', '2008-12-26',
               '2008-12-29', '2008-12-30'],
              dtype='datetime64[ns]', length=261, freq='B')

In [20]:
pd.concat([
    env.broker.track_record.to_frame('nlv'),
    env._load_benchmark().squeeze().loc[:'2018-07'],
    env._load_risk_free().squeeze(),
    weights,
], axis=1, join='inner').dropna().index

DatetimeIndex(['2008-01-01', '2008-01-02', '2008-01-03', '2008-01-04',
               '2008-01-07', '2008-01-08', '2008-01-09', '2008-01-10',
               '2008-01-11', '2008-01-14',
               ...
               '2008-12-17', '2008-12-18', '2008-12-19', '2008-12-22',
               '2008-12-23', '2008-12-24', '2008-12-25', '2008-12-26',
               '2008-12-29', '2008-12-30'],
              dtype='datetime64[ns]', length=261, freq='B')

In [21]:
renderer = env.render(
    benchmark=env._load_benchmark().squeeze().loc[:'2018-07'],
    risk_free=env._load_risk_free().squeeze(),
)
#renderer.plotly_report()
renderer.tearsheet()

Unnamed: 0,Unnamed: 1,Strategy,Index(Aric-Benchmark),Index(USD 1M Deposit),Cash(USD),"ETF(Russell 1000, SMART, USD)","ETF(7-10Y T-Bills, SMART, USD)"
Context,From,2008-01-01,2008-01-01,2008-01-01,2008-01-01,2008-01-01,2008-01-01
Context,To,2008-12-30,2008-12-30,2008-12-30,2008-12-30,2008-12-30,2008-12-30
Context,Years,0.99726,0.99726,0.99726,0.99726,0.99726,0.99726
Context,Observations,261,261,261,261,261,261
Context,Risk-free asset,Index(USD 1M Deposit),Index(USD 1M Deposit),Index(USD 1M Deposit),Index(USD 1M Deposit),Index(USD 1M Deposit),Index(USD 1M Deposit)
Context,Risk-free CAGR,0.0286226,0.0286226,0.0286226,0.0286226,0.0286226,0.0286226
Return,CAGR,0.128301,0.209932,0.0286226,0,-0.386245,0.228945
Return,CAGR over cash,0.0996788,0.181309,0,-0.0286226,-0.414867,0.200323
Return,Overall return,0.127928,0.2093,0.0285431,0,-0.385423,0.228252
Risk,Volatility,0.155944,0.11562,0.00119151,0,0.404087,0.112143


In [24]:
nlv = env.broker.track_record.to_frame('nlv')
benchmark=env._load_benchmark().squeeze()
risk_free=env._load_risk_free().squeeze()
# df = 

In [25]:
df  = pd.concat([nlv.iloc[1:], None, benchmark, risk_free, None], axis=1, join='inner')

In [26]:
renderer = env.render()
renderer.tearsheet()

Unnamed: 0,Unnamed: 1,Strategy,Index(Aric-Benchmark),Index(USD 1M Deposit),Cash(USD),"ETF(Russell 1000, SMART, USD)","ETF(7-10Y T-Bills, SMART, USD)"
Context,From,2008-01-01,2008-01-01,2008-01-01,2008-01-01,2008-01-01,2008-01-01
Context,To,2008-12-30,2008-12-30,2008-12-30,2008-12-30,2008-12-30,2008-12-30
Context,Years,0.99726,0.99726,0.99726,0.99726,0.99726,0.99726
Context,Observations,261,261,261,261,261,261
Context,Risk-free asset,Index(USD 1M Deposit),Index(USD 1M Deposit),Index(USD 1M Deposit),Index(USD 1M Deposit),Index(USD 1M Deposit),Index(USD 1M Deposit)
Context,Risk-free CAGR,0.0286226,0.0286226,0.0286226,0.0286226,0.0286226,0.0286226
Return,CAGR,0.128301,0.209932,0.0286226,0,-0.386245,0.228945
Return,CAGR over cash,0.0996788,0.181309,0,-0.0286226,-0.414867,0.200323
Return,Overall return,0.127928,0.2093,0.0285431,0,-0.385423,0.228252
Risk,Volatility,0.155944,0.11562,0.00119151,0,0.404087,0.112143


In [27]:
self = renderer.cumulative_performance
nlv = self.env.broker.track_record.to_frame('nlv').bfill()
performance = nlv.rename({'nlv': 'Strategy'}, axis='columns')
bid_prices = self.env.broker.track_record.to_frame('bid_prices').bfill()
ask_prices = self.env.broker.track_record.to_frame('ask_prices').bfill()
mid_prices = (bid_prices + ask_prices) / 2
performance = performance.join(mid_prices)

AttributeError: 'RendererCumulativePerformance' object has no attribute 'env'

In [28]:
df= env.broker.track_record.to_frame('nlv')
df.rename({'nlv': 'Net liquidation value'}, axis='columns', inplace=True)
df

Unnamed: 0,Net liquidation value
2008-01-01,100.000000
2008-01-02,101.111036
2008-01-03,101.061302
2008-01-04,101.500255
2008-01-07,101.648975
2008-01-08,101.645131
2008-01-09,102.053517
2008-01-10,101.285971
2008-01-11,100.744328
2008-01-14,100.891272


In [29]:
trial = trials[0]
env = trial.config['env'](trial.config)
path = os.path.join(trial.logdir, trial.logdir, 'checkpoint-250')
agent = rllib.agents.ppo.PPOAgent(trial.config, trial.config['env'])
agent.restore(path)
policy = agent.get_policy()
episode = env.sample_episode('training-set', policy)

NameError: name 'trials' is not defined

In [None]:
# BUG: does not restore weights, only agent.
from ray.tune.analysis import ExperimentAnalysis
ea = ExperimentAnalysis(r'/home/federico/Desktop/repos/trading-gym/notebooks/registry/gaia/v6/logs/Ray0.7.0')
ea.get_best_config('entropy')
agent = ea.get_best_trainable(metric='aaa', trainable_cls=rllib.agents.ppo.PPOTrainer)
policy = agent.get_policy()
env = GAIAPredictorsContinuousV6()
episode = env.sample_episode('test-set', policy)
renderer = env.render()
renderer.plotly_report()