In [1]:
import os
import backtrader as bt
import numpy as np

from btgym import BTgymEnv, BTgymDataset
from btgym.strategy.observers import Reward, Position, NormPnL
from btgym.algorithms import Launcher, Unreal, AacStackedRL2Policy
from btgym.research import DevStrat_4_11

#### Stacked LSTM Agent usage example.

Based on NAV_A3C+D from ["LEARNING TO NAVIGATE IN COMPLEX ENVIRONMENTS"](https://arxiv.org/pdf/1611.03673.pdf) paper by Mirowski at al.;

Modifications to original paper arhcitecture:
- splitted Policy/Value outputs: Policy is taken off first LSTM layer, Value - off the second;
- LSTM state initialisation: first RNN layer context (policy) is initialised on every episode start, while second   (Value) is reset either on begining of every Trial (future work) or or every N-constant episodes (60 for this     example), motivated by RL^2 approach by Duan et al., 
  ["FAST REINFORCEMENT LEARNING VIA SLOW REINFORCEMENT LEARNING"](https://arxiv.org/pdf/1611.02779.pdf);
- inner/external observation state state split: external (market) is encoded via conolution layers and fed to       first LSTM layer, inner (broker) state is fed into second LSTM layer, can optionally be encoded via separate       convolution block (doesnt seem to improve much though);
- optional Value Replay losss (`Unreal` feature) improves sample efficiency, but is computationally expensive;

Other details:
- All convolution and LSTM layers are layer-normalized, see 
  ["Layer Normalisation"](https://arxiv.org/abs/1607.06450) paper by Jimmy Ba at al.;
  
- Upd 2.02.18: linear layers aer Noisy-Net ones, see: [Noisy Networks for Exploration](https://arxiv.org/abs/1706.10295) paper by Fortunato at al.; policy output is centered using layer normalisation;
- A3C option `time_flat` is ON by default, improves training stability, reduces computation costs, see 
  [Base_AAC class Note](https://kismuz.github.io/btgym/btgym.algorithms.html#module-btgym.algorithms.aac) for       details;
  
Diagram: https://kismuz.github.io/btgym/_images/a3c_stacked_lstm_agent.png

**NOTE:**
Currently it takes ~20M env.steps to fit 6-month 1min bars data set. Much faster on smaller ones.

In [2]:
# Set backtesting engine parameters:

MyCerebro = bt.Cerebro()

MyCerebro.addstrategy(
    DevStrat_4_11,
    drawdown_call=10, # max % to loose, in percent of initial cash
    target_call=10,  # max % to win, same
    skip_frame=10,
    gamma=0.99,
    reward_scale=7, # gardient`s nitrox, touch with care!
)
# Set leveraged account:
MyCerebro.broker.setcash(2000)
MyCerebro.broker.setcommission(commission=0.0001, leverage=10.0) # commisssion to imitate spread
MyCerebro.addsizer(bt.sizers.SizerFix, stake=5000,)  

# Visualisations for reward, position and PnL dynamics:
MyCerebro.addobserver(Reward)
MyCerebro.addobserver(Position)
MyCerebro.addobserver(NormPnL)

# Data: uncomment to get up to six month of 1 minute bars:
data_m1_6_month = [
    './data/DAT_ASCII_EURUSD_M1_201701.csv',
    './data/DAT_ASCII_EURUSD_M1_201702.csv',
    './data/DAT_ASCII_EURUSD_M1_201703.csv',
    './data/DAT_ASCII_EURUSD_M1_201704.csv',
    #'./data/DAT_ASCII_EURUSD_M1_201705.csv',
    #'./data/DAT_ASCII_EURUSD_M1_201706.csv',
]

# Uncomment single choice:
MyDataset = BTgymDataset(
    #filename=data_m1_6_month,
    filename='./data/test_sine_1min_period256_delta0002.csv',  # simple sine 
    start_weekdays={0, 1, 2, 3, 4, 5, 6},
    episode_duration={'days': 1, 'hours': 23, 'minutes': 40}, # note: 2day-long episode
    start_00=False,
    time_gap={'hours': 10},
)

env_config = dict(
    class_ref=BTgymEnv, 
    kwargs=dict(
        dataset=MyDataset,
        engine=MyCerebro,
        render_modes=['episode', 'human', 'external', 'internal'],
        render_state_as_image=True,
        render_ylabel='OHL_diff. / Internals',
        render_size_episode=(12,8),
        render_size_human=(9, 4),
        render_size_state=(11, 3),
        render_dpi=75,
        port=5000,
        data_port=4999,
        connect_timeout=90,
        verbose=0,
    )
)

cluster_config = dict(
    host='127.0.0.1',
    port=12230,
    num_workers=6,  # set according CPU's available or so
    num_ps=1,
    num_envs=1,
    log_dir=os.path.expanduser('~/tmp/test_4_11'),
)

policy_config = dict(
    class_ref=AacStackedRL2Policy,
    kwargs={
        'lstm_layers': (256, 256),
        'lstm_2_init_period': 60,
    }
)

trainer_config = dict(
    class_ref=Unreal,
    kwargs=dict(
        opt_learn_rate=[1e-4, 1e-4], # random log-uniform 
        opt_end_learn_rate=1e-5,
        opt_decay_steps=50*10**6,
        model_gamma=0.99,
        model_gae_lambda=1.0,
        model_beta=0.01, # entropy reg
        rollout_length=20,
        time_flat=True, 
        use_value_replay=False, 
        model_summary_freq=100,
        episode_summary_freq=5,
        env_render_freq=20,
    )
)

In [3]:
launcher = Launcher(
    cluster_config=cluster_config,
    env_config=env_config,
    trainer_config=trainer_config,
    policy_config=policy_config,
    test_mode=False,
    max_env_steps=100*10**6,
    root_random_seed=0,
    purge_previous=1,  # ask to override previously saved model and logs
    verbose=0
)

# Train it:
launcher.run()

[2018-02-06 08:44:57.472827] NOTICE: LauncherShell: </Users/muzikin/tmp/test_4_11> created.
[2018-02-06 08:45:00.297938] NOTICE: UNREAL_0: learn_rate: 0.000100, entropy_beta: 0.010000

********************************************************************************************
**  Press `Ctrl-C` or jupyter:[Kernel]->[Interrupt] to stop training and close launcher.  **
********************************************************************************************

[2018-02-06 08:45:04.341929] NOTICE: UNREAL_1: learn_rate: 0.000100, entropy_beta: 0.010000
[2018-02-06 08:45:04.352484] NOTICE: UNREAL_3: learn_rate: 0.000100, entropy_beta: 0.010000
[2018-02-06 08:45:04.352755] NOTICE: UNREAL_2: learn_rate: 0.000100, entropy_beta: 0.010000
[2018-02-06 08:45:04.354218] NOTICE: UNREAL_4: learn_rate: 0.000100, entropy_beta: 0.010000
[2018-02-06 08:45:04.356964] NOTICE: UNREAL_5: learn_rate: 0.000100, entropy_beta: 0.010000
INFO:tensorflow:Starting queue runners.
INFO:tensorflow:Starting queue runn

Process BTgymDataFeedServer-2:1:
Process BTgymServer-4:1:
Process BTgymServer-5:1:
Process DrawCerebro-2:2:36:
Process BTgymServer-6:1:
Process BTgymServer-7:1:
Process BTgymServer-3:1:
Traceback (most recent call last):
  File "/Users/muzikin/anaconda/envs/tensorforce/lib/python3.6/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
  File "/Users/muzikin/Yandex.Disk.localized/work/btgym/btgym/dataserver.py", line 159, in run
    service_input = socket.recv_pyobj()
  File "/Users/muzikin/anaconda/envs/tensorforce/lib/python3.6/site-packages/zmq/sugar/socket.py", line 491, in recv_pyobj
    msg = self.recv(flags)
Traceback (most recent call last):
Traceback (most recent call last):
  File "zmq/backend/cython/socket.pyx", line 693, in zmq.backend.cython.socket.Socket.recv (zmq/backend/cython/socket.c:7683)
  File "/Users/muzikin/anaconda/envs/tensorforce/lib/python3.6/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
  File "/Users/muzikin/anaconda/envs

[2018-02-06 08:52:20.585012] NOTICE: LauncherShell: worker_1 has joined.


  File "/Users/muzikin/Yandex.Disk.localized/work/btgym/btgym/server.py", line 519, in run
    episode = cerebro.run(stdstats=True, preload=False, oldbuysell=True)[0]
  File "/Users/muzikin/Yandex.Disk.localized/work/btgym/btgym/server.py", line 519, in run
    episode = cerebro.run(stdstats=True, preload=False, oldbuysell=True)[0]
Traceback (most recent call last):
  File "zmq/backend/cython/socket.pyx", line 145, in zmq.backend.cython.socket._recv_copy (zmq/backend/cython/socket.c:2344)
  File "zmq/backend/cython/checkrc.pxd", line 12, in zmq.backend.cython.checkrc._check_rc (zmq/backend/cython/socket.c:9621)
  File "/Users/muzikin/anaconda/envs/tensorforce/lib/python3.6/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
  File "/Users/muzikin/anaconda/envs/tensorforce/lib/python3.6/site-packages/backtrader/cerebro.py", line 1073, in run
    runstrat = self.runstrategies(iterstrat)
  File "/Users/muzikin/anaconda/envs/tensorforce/lib/python3.6/site-packages/backtrade

[2018-02-06 08:52:20.595687] NOTICE: LauncherShell: worker_2 has joined.


KeyboardInterrupt
  File "/Users/muzikin/Yandex.Disk.localized/work/btgym/btgym/server.py", line 519, in run
    episode = cerebro.run(stdstats=True, preload=False, oldbuysell=True)[0]
  File "/Users/muzikin/anaconda/envs/tensorforce/lib/python3.6/site-packages/backtrader/cerebro.py", line 1235, in runstrategies
    self._runnext(runstrats)


[2018-02-06 08:52:20.596972] NOTICE: LauncherShell: worker_3 has joined.


  File "/Users/muzikin/anaconda/envs/tensorforce/lib/python3.6/site-packages/backtrader/cerebro.py", line 1235, in runstrategies
    self._runnext(runstrats)


[2018-02-06 08:52:20.599142] NOTICE: LauncherShell: worker_4 has joined.


  File "/Users/muzikin/anaconda/envs/tensorforce/lib/python3.6/site-packages/backtrader/cerebro.py", line 1500, in _runnext
    for i, ret in enumerate(drets):


[2018-02-06 08:52:20.600472] NOTICE: LauncherShell: worker_5 has joined.


Traceback (most recent call last):
Traceback (most recent call last):
  File "/Users/muzikin/anaconda/envs/tensorforce/lib/python3.6/site-packages/backtrader/cerebro.py", line 1477, in _runnext
    d.do_qcheck(newqcheck, qlapse.total_seconds())


[2018-02-06 08:52:20.602254] NOTICE: LauncherShell: chief_worker_0 has joined.


  File "/Users/muzikin/anaconda/envs/tensorforce/lib/python3.6/site-packages/backtrader/cerebro.py", line 1073, in run
    runstrat = self.runstrategies(iterstrat)
KeyboardInterrupt
  File "/Users/muzikin/anaconda/envs/tensorforce/lib/python3.6/multiprocessing/process.py", line 249, in _bootstrap
    self.run()


[2018-02-06 08:52:20.604336] NOTICE: LauncherShell: parameter_server_0 has joined.


  File "/Users/muzikin/anaconda/envs/tensorforce/lib/python3.6/site-packages/backtrader/feed.py", line 262, in do_qcheck
    qwait = max(0.0, qwait - qlapse)
  File "/Users/muzikin/anaconda/envs/tensorforce/lib/python3.6/multiprocessing/process.py", line 249, in _bootstrap
    self.run()


[2018-02-06 08:52:20.606435] NOTICE: LauncherShell: Launcher closed.


  File "/Users/muzikin/anaconda/envs/tensorforce/lib/python3.6/site-packages/backtrader/cerebro.py", line 1235, in runstrategies
    self._runnext(runstrats)
  File "/Users/muzikin/anaconda/envs/tensorforce/lib/python3.6/site-packages/backtrader/cerebro.py", line 1564, in _runnext
    strat._next()
  File "/Users/muzikin/Yandex.Disk.localized/work/btgym/btgym/server.py", line 519, in run
    episode = cerebro.run(stdstats=True, preload=False, oldbuysell=True)[0]
KeyboardInterrupt
  File "/Users/muzikin/Yandex.Disk.localized/work/btgym/btgym/server.py", line 519, in run
    episode = cerebro.run(stdstats=True, preload=False, oldbuysell=True)[0]
  File "/Users/muzikin/anaconda/envs/tensorforce/lib/python3.6/site-packages/backtrader/cerebro.py", line 1073, in run
    runstrat = self.runstrategies(iterstrat)
  File "/Users/muzikin/anaconda/envs/tensorforce/lib/python3.6/site-packages/backtrader/strategy.py", line 328, in _next
    self._next_observers(minperstatus)
  File "/Users/muzikin/a