In [2]:
import backtrader as bt
import backtrader.indicators as btind
import numpy as np

from gym import spaces

from btgym import BTgymEnv, BTgymStrategy, BTgymDataset

from btgym.a3c import Launcher, BaseLSTMPolicy

import tensorflow as tf
import tensorflow.contrib.rnn as rnn
from tensorflow.python.util.nest import flatten as flatten_nested

In [3]:
class LSTMPolicy1D(BaseLSTMPolicy):
    """
    A3C LSTM policy with 1D convolutions.
    """
    def __init__(self, ob_space, ac_space, num_filters=32, filter_size=5, stride=2,
                 lstm_class=rnn.BasicLSTMCell, lstm_layers=(256,)):
        
        self.x = x = tf.placeholder(tf.float32, [None] + list(ob_space), name='x_in_pl')

        # Conv layers:
        for i in range(4):
            x = tf.nn.elu(self.conv1d(x, num_filters, "l{}".format(i + 1), filter_size, stride))
      
        # Run LSTM along rollout time dimension and evrything else:
        super(LSTMPolicy1D, self).__init__(x, ob_space, ac_space, lstm_class, lstm_layers)
           
    def conv1d(self, x, num_filters, name, filter_size=3, stride=2, pad="SAME", dtype=tf.float32,
               collections=None):
        with tf.variable_scope(name):
            stride_shape =  stride
            
            #print('stride_shape:',stride_shape)
            
            filter_shape = [filter_size, int(x.get_shape()[-1]), num_filters]
            
            #print('filter_shape:', filter_shape)
            
            # there are "num input feature maps * filter height * filter width"
            # inputs to each hidden unit
            fan_in = np.prod(filter_shape[:2])
            
            # each unit in the lower layer receives a gradient from:
            # "num output feature maps * filter height * filter width" /
            #   pooling size
            fan_out = np.prod(filter_shape[:1]) * num_filters

            # initialize weights with random weights
            w_bound = np.sqrt(6. / (fan_in + fan_out))

            w = tf.get_variable("W", filter_shape, dtype, tf.random_uniform_initializer(-w_bound, w_bound),
                                collections=collections)
            b = tf.get_variable("b", [1, 1, num_filters], initializer=tf.constant_initializer(0.0),
                                collections=collections)
            return tf.nn.conv1d(x, w, stride_shape, pad) + b
   

In [4]:

class MyStrategy(BTgymStrategy):
    """
    BT server inner computation startegy tuned to pass simple sine wave test.
    """
    
    def __init__(self, **kwargs):
        super(MyStrategy, self).__init__(**kwargs)
        
        self.dim_time = self.p.state_shape['raw_state'].shape[0] 

        self.trade_just_closed = False
        self.trade_result = None

        self.realised_broker_value = self.env.broker.startingcash
        self.episode_result = 0
        self.reward = 0
        
        self.avg_period = 10 # should be somehow consistent with skip_frame value
        
        # Signal features:
        self.data.sma_4 = btind.SimpleMovingAverage(self.datas[0], period=4)
        self.data.sma_8 = btind.SimpleMovingAverage(self.datas[0], period=8)
        self.data.sma_16 = btind.SimpleMovingAverage(self.datas[0], period=16)
        self.data.sma_32 = btind.SimpleMovingAverage(self.datas[0], period=32)
        self.data.sma_64 = btind.SimpleMovingAverage(self.datas[0], period=64)
        self.data.sma_128 = btind.SimpleMovingAverage(self.datas[0], period=128)
        self.data.sma_256 = btind.SimpleMovingAverage(self.datas[0], period=256)

        # Service sma to get correct first features values:
        self.data.dim_sma = btind.SimpleMovingAverage(
            self.datas[0],
            period=(256 + self.dim_time)
        )
        self.data.dim_sma.plotinfo.plot = False
        
    def notify_trade(self, trade):    
        if trade.isclosed:
            # Set trade flag and store trade result:
            self.trade_just_closed = True
            self.trade_result = trade.pnlcomm
            
            # Store realized prtfolio value:
            self.realised_broker_value = self.broker.get_value()
 
    def get_state(self):
        """ 
        Computes obs. state as [time_dim, 8] matrix of log-scaled features gradients.
        """
        T = 1e4 # EURUSD
        #T = 1 # BTCUSD
      
        x = np.stack(
            [
                np.frombuffer(self.data.open.get(size=self.dim_time)),
                np.frombuffer(self.data.sma_4.get(size=self.dim_time)), 
                np.frombuffer(self.data.sma_8.get(size=self.dim_time)), 
                np.frombuffer(self.data.sma_16.get(size=self.dim_time)), 
                np.frombuffer(self.data.sma_32.get(size=self.dim_time)),
                np.frombuffer(self.data.sma_64.get(size=self.dim_time)),
                np.frombuffer(self.data.sma_128.get(size=self.dim_time)),
                np.frombuffer(self.data.sma_256.get(size=self.dim_time)), 
            ], 
            axis=-1
        )
        # Amplified gradient along features axis:
        x = np.gradient(x, axis=1) * T
        
        # Log-scale:
        x = self.log_transform(x)
        
        self.state['raw_state'] = self.raw_state
                
        self.state['model_input'] = x
        
        return self.state
        
    def log_transform(self, x):
        return np.sign(x) * np.log(np.fabs(x) + 1)

    def norm_log_value(self, current_value, start_value, drawdown_call, target_call, epsilon=1e-4):
        """Current value log-normalized in [-1,1] wrt upper and lower bounds"""
        x = np.asarray(current_value)
        x = (x / start_value - 1) * 100
        x = (x - target_call)/(drawdown_call+target_call) + 1
        x = np.clip(x, epsilon, 1 - epsilon)
        x = 1 - 2 * np.log(x) / np.log( epsilon)
        return x
    
    def norm_value(self, current_value, start_value, drawdown_call, target_call, epsilon=1e-8):
        """Current value normalized in [-1,1] wrt upper and lower bounds"""
        x = np.asarray(current_value)
        x = (x / start_value - 1) * 100
        x = (x - target_call)/(drawdown_call+target_call) + 1
        x = 2 * np.clip(x, epsilon, 1 - epsilon) - 1 
        return x
    
    def decayed_result(self, trade_result, current_value, start_value, drawdown_call, target_call, gamma=0.8):
        "Normalized in [-1,1] trade result, lineary decayed wrt current value."
        target_value = start_value * (1 + target_call/100)
        value_range = start_value * (drawdown_call + target_call)/100
        decay = (gamma - 1) * (current_value - target_value) / value_range + gamma
        x = trade_result * decay / value_range
        return x
    
    def get_reward(self):
        """
        Defines reward as composition of portfolio performance statisitics.
        """
        
        # Reward term 1: averaged profit/loss for current opened trade (unrealized p/l):
        unrealised_pnl = np.average(self.stats.broker.value.get(size=self.avg_period))\
            - self.realised_broker_value
        
        #Normalize:
        unrealised_pnl /= self.env.broker.startingcash *(self.p.drawdown_call + self.p.target_call) / 100

        # Reward term 2: averaged broker value, normalized wrt to max drawdown and target bounds.
        avg_norm_broker_value = self.norm_value(
            np.average(self.stats.broker.value.get(size=self.avg_period)),
            self.env.broker.startingcash,
            self.p.drawdown_call,
            self.p.target_call,
        )
        
        # Reward term 3: normalized single trade realized profit/loss:
        if self.trade_just_closed:
            realized_pnl = self.decayed_result(
                self.trade_result,
                np.average(self.stats.broker.value.get(size=self.avg_period)),
                self.env.broker.startingcash,
                self.p.drawdown_call,
                self.p.target_call,
                gamma=1.0
            ) 
            self.trade_just_closed = False
        
        else:
            realized_pnl = 0
            
        # Coefficients are tunable:
        self.reward = unrealised_pnl + 1e-2 * avg_norm_broker_value + 10 * realized_pnl
        
        return self.reward 

class RewardObserver(bt.observer.Observer):
    """ 
    Adds reward visualisation to episode plot.
    """
    lines = ('reward',)
    plotinfo = dict(plot=True, subplot=True)
    
    plotlines = dict(
        reward=dict(markersize=4.0, color='green', fillstyle='full'),
    )
    
    def next(self):
        self.lines.reward[0] = self._owner.reward

########################################################       


# Set backtesting engine parameters:

time_embed_dim = 16

state_shape = {
    'raw_state': spaces.Box(low=-100, high=100, shape=(time_embed_dim, 4)),
    'model_input': spaces.Box(low=-100, high=100, shape=(time_embed_dim, 8)),
}

MyCerebro = bt.Cerebro()

MyCerebro.addstrategy(
    MyStrategy,
    state_shape=state_shape,
    portfolio_actions=('hold', 'buy', 'sell', 'close'),
    drawdown_call=5, # max % to loose, in percent of initial cash
    target_call=8,  # max % to win, same
    skip_frame=10,
)

# Set leveraged account:
MyCerebro.broker.setcash(2000)
MyCerebro.broker.setcommission(commission=0.0001, leverage=10.0) # commisssion to imitate spread
MyCerebro.addsizer(bt.sizers.SizerFix, stake=5000,)  

MyCerebro.addanalyzer(bt.analyzers.DrawDown)

MyCerebro.addobserver(RewardObserver)

MyDataset = BTgymDataset(
    filename='../../data/DAT_ASCII_EURUSD_M1_2016.csv',
    #filename='../../data/DAT_ASCII_EURUSD_M1_201703.csv',
    #filename='../examples/data/DAT_ASCII_EURUSD_M1_201704.csv',
    #filename='../../data/test_sine_1min_period256_delta0002.csv',
    start_weekdays=[0, 1, 2, 3, 4],
    episode_len_days=1,
    episode_len_hours=2,
    episode_len_minutes=0,
    start_00=False,
    time_gap_hours=6,
)
env_config = dict(
    dataset=MyDataset,
    engine=MyCerebro,
    render_modes=['episode', 'human', 'model_input'],
    render_state_as_image=True,
    render_ylabel='SMA_log_gradients',
    render_size_episode=(12,8),
    render_size_human=(10, 5),
    render_size_state=(10, 5),
    render_dpi=75,
    port=5000,
    data_port=4999,
    connect_timeout=60,
    verbose=0,
)
# Set tensorflow distributed cluster and a3c configuration:
cluster_config = dict(
    host='127.0.0.1',
    port=12222,
    num_workers=8,  # ~ num of CPU cores
    num_ps=1,
    log_dir='./tmp/a3c_test_4',
)
launcher = Launcher(
    cluster_config=cluster_config,
    env_class=BTgymEnv,
    env_config=env_config,
    policy_class=LSTMPolicy1D,
    policy_config={'lstm_layers': (256,)},
    rollout_length=20,
    model_beta=0.02,  # entropy regularization, shouldbe in ~[0.1, 0.01]
    opt_learn_rate=1e-4,  # adam learn rate
    test_mode=False,
    train_steps=1000000000,
    model_summary_freq=20,
    episode_summary_freq=1,
    env_render_freq=10,
    verbose=1
)

In [6]:
# Train it.
launcher.run()

# shell:  tensorboard --logdir './tmp/a3c_test_4'


[2017-09-21 13:55:33,290] Press `Ctrl-C` to stop training and close launcher.


Press `Ctrl-C` to stop training and close launcher.
INFO:tensorflow:Restoring parameters from ./tmp/a3c_test_4/train/model.ckpt-9117618


[2017-09-21 13:55:46,226] Restoring parameters from ./tmp/a3c_test_4/train/model.ckpt-9117618


INFO:tensorflow:Waiting for model to be ready.  Ready_for_local_init_op:  None, ready: Variables not initialized: global/l2/W, global/rnn/multi_rnn_cell/cell_0/basic_lstm_cell/kernel, global/l1/b/Adam, global/l3/W/Adam, global/l4/W/Adam_1, global/rnn/multi_rnn_cell/cell_0/basic_lstm_cell/kernel/Adam_1, global/action/b/Adam_1


[2017-09-21 13:55:48,302] Waiting for model to be ready.  Ready_for_local_init_op:  None, ready: Variables not initialized: global/l2/W, global/rnn/multi_rnn_cell/cell_0/basic_lstm_cell/kernel, global/l1/b/Adam, global/l3/W/Adam, global/l4/W/Adam_1, global/rnn/multi_rnn_cell/cell_0/basic_lstm_cell/kernel/Adam_1, global/action/b/Adam_1


INFO:tensorflow:Waiting for model to be ready.  Ready_for_local_init_op:  None, ready: Variables not initialized: global/l2/W, global/rnn/multi_rnn_cell/cell_0/basic_lstm_cell/kernel, global/l1/b/Adam, global/l3/W/Adam, global/l4/W/Adam_1, global/rnn/multi_rnn_cell/cell_0/basic_lstm_cell/kernel/Adam_1, global/action/b/Adam_1
INFO:tensorflow:Waiting for model to be ready.  Ready_for_local_init_op:  None, ready: Variables not initialized: global/l2/W, global/rnn/multi_rnn_cell/cell_0/basic_lstm_cell/kernel, global/l1/b/Adam, global/l3/W/Adam, global/l4/W/Adam_1, global/rnn/multi_rnn_cell/cell_0/basic_lstm_cell/kernel/Adam_1, global/action/b/Adam_1
INFO:tensorflow:Waiting for model to be ready.  Ready_for_local_init_op:  None, ready: Variables not initialized: global/l2/W, global/rnn/multi_rnn_cell/cell_0/basic_lstm_cell/kernel, global/l1/b/Adam, global/l3/W/Adam, global/l4/W/Adam_1, global/rnn/multi_rnn_cell/cell_0/basic_lstm_cell/kernel/Adam_1, global/action/b/Adam_1
INFO:tensorflow:Wai

[2017-09-21 13:55:48,302] Waiting for model to be ready.  Ready_for_local_init_op:  None, ready: Variables not initialized: global/l2/W, global/rnn/multi_rnn_cell/cell_0/basic_lstm_cell/kernel, global/l1/b/Adam, global/l3/W/Adam, global/l4/W/Adam_1, global/rnn/multi_rnn_cell/cell_0/basic_lstm_cell/kernel/Adam_1, global/action/b/Adam_1


INFO:tensorflow:Waiting for model to be ready.  Ready_for_local_init_op:  None, ready: Variables not initialized: global/rnn/multi_rnn_cell/cell_0/basic_lstm_cell/kernel, global/l3/W/Adam, global/l4/W/Adam_1


[2017-09-21 13:55:48,304] Waiting for model to be ready.  Ready_for_local_init_op:  None, ready: Variables not initialized: global/l2/W, global/rnn/multi_rnn_cell/cell_0/basic_lstm_cell/kernel, global/l1/b/Adam, global/l3/W/Adam, global/l4/W/Adam_1, global/rnn/multi_rnn_cell/cell_0/basic_lstm_cell/kernel/Adam_1, global/action/b/Adam_1


INFO:tensorflow:Waiting for model to be ready.  Ready_for_local_init_op:  None, ready: Variables not initialized: global/l2/W, global/rnn/multi_rnn_cell/cell_0/basic_lstm_cell/kernel, global/l1/b/Adam, global/l3/W/Adam, global/l4/W/Adam_1, global/rnn/multi_rnn_cell/cell_0/basic_lstm_cell/kernel/Adam_1, global/action/b/Adam_1


[2017-09-21 13:55:48,307] Waiting for model to be ready.  Ready_for_local_init_op:  None, ready: Variables not initialized: global/rnn/multi_rnn_cell/cell_0/basic_lstm_cell/kernel, global/l3/W/Adam, global/l4/W/Adam_1
[2017-09-21 13:55:48,304] Waiting for model to be ready.  Ready_for_local_init_op:  None, ready: Variables not initialized: global/l2/W, global/rnn/multi_rnn_cell/cell_0/basic_lstm_cell/kernel, global/l1/b/Adam, global/l3/W/Adam, global/l4/W/Adam_1, global/rnn/multi_rnn_cell/cell_0/basic_lstm_cell/kernel/Adam_1, global/action/b/Adam_1
[2017-09-21 13:55:48,309] Waiting for model to be ready.  Ready_for_local_init_op:  None, ready: Variables not initialized: global/rnn/multi_rnn_cell/cell_0/basic_lstm_cell/kernel, global/l3/W/Adam, global/l4/W/Adam_1
[2017-09-21 13:55:48,310] Waiting for model to be ready.  Ready_for_local_init_op:  None, ready: Variables not initialized: global/l2/W, global/rnn/multi_rnn_cell/cell_0/basic_lstm_cell/kernel, global/l1/b/Adam, global/l3/W/Ada

INFO:tensorflow:Starting standard services.


[2017-09-21 13:55:51,442] Starting standard services.


INFO:tensorflow:Saving checkpoint to path ./tmp/a3c_test_4/train/model.ckpt


[2017-09-21 13:55:51,454] Saving checkpoint to path ./tmp/a3c_test_4/train/model.ckpt


INFO:tensorflow:Starting queue runners.


[2017-09-21 13:55:51,455] Starting queue runners.


INFO:tensorflow:global/global_step/sec: 0


[2017-09-21 13:55:51,456] global/global_step/sec: 0
[2017-09-21 13:55:51,483] worker_0: starting training at step: 9117638


INFO:tensorflow:Starting queue runners.


[2017-09-21 13:56:20,011] Starting queue runners.


INFO:tensorflow:Starting queue runners.
INFO:tensorflow:Starting queue runners.


[2017-09-21 13:56:20,158] Starting queue runners.
[2017-09-21 13:56:20,159] Starting queue runners.


INFO:tensorflow:Starting queue runners.
INFO:tensorflow:Starting queue runners.


[2017-09-21 13:56:20,169] Starting queue runners.
[2017-09-21 13:56:20,171] worker_7: starting training at step: 9118782


INFO:tensorflow:Starting queue runners.


[2017-09-21 13:56:20,173] Starting queue runners.


INFO:tensorflow:Starting queue runners.


[2017-09-21 13:56:20,198] worker_4: starting training at step: 9118782
[2017-09-21 13:56:20,180] Starting queue runners.
[2017-09-21 13:56:20,182] Starting queue runners.
[2017-09-21 13:56:20,225] worker_5: starting training at step: 9118782
[2017-09-21 13:56:20,235] worker_3: starting training at step: 9118782
[2017-09-21 13:56:20,223] worker_6: starting training at step: 9118782
[2017-09-21 13:56:20,258] worker_1: starting training at step: 9118782
[2017-09-21 13:56:20,267] worker_2: starting training at step: 9118782


INFO:tensorflow:global/global_step/sec: 196.846


[2017-09-21 13:57:51,454] global/global_step/sec: 196.846


INFO:tensorflow:global/global_step/sec: 283.994


[2017-09-21 13:59:51,456] global/global_step/sec: 283.994


INFO:tensorflow:Saving checkpoint to path ./tmp/a3c_test_4/train/model.ckpt


[2017-09-21 14:00:51,441] Saving checkpoint to path ./tmp/a3c_test_4/train/model.ckpt


INFO:tensorflow:global/global_step/sec: 281.171


[2017-09-21 14:01:51,461] global/global_step/sec: 281.171


INFO:tensorflow:global/global_step/sec: 273.836


[2017-09-21 14:03:51,456] global/global_step/sec: 273.836


INFO:tensorflow:Saving checkpoint to path ./tmp/a3c_test_4/train/model.ckpt


[2017-09-21 14:05:51,441] Saving checkpoint to path ./tmp/a3c_test_4/train/model.ckpt


INFO:tensorflow:global/global_step/sec: 275.173


[2017-09-21 14:05:51,483] global/global_step/sec: 275.173


INFO:tensorflow:global/global_step/sec: 264.886


[2017-09-21 14:07:51,455] global/global_step/sec: 264.886


INFO:tensorflow:global/global_step/sec: 274.239


[2017-09-21 14:09:51,453] global/global_step/sec: 274.239


INFO:tensorflow:Saving checkpoint to path ./tmp/a3c_test_4/train/model.ckpt


[2017-09-21 14:10:51,444] Saving checkpoint to path ./tmp/a3c_test_4/train/model.ckpt


INFO:tensorflow:global/global_step/sec: 272.076


[2017-09-21 14:11:51,452] global/global_step/sec: 272.076


INFO:tensorflow:global/global_step/sec: 283.57


[2017-09-21 14:13:51,454] global/global_step/sec: 283.57


INFO:tensorflow:Saving checkpoint to path ./tmp/a3c_test_4/train/model.ckpt


[2017-09-21 14:15:51,442] Saving checkpoint to path ./tmp/a3c_test_4/train/model.ckpt


INFO:tensorflow:Saving checkpoint to path ./tmp/a3c_test_4/train/model.ckpt


[2017-09-21 14:20:51,442] Saving checkpoint to path ./tmp/a3c_test_4/train/model.ckpt


INFO:tensorflow:Saving checkpoint to path ./tmp/a3c_test_4/train/model.ckpt


[2017-09-21 14:25:51,449] Saving checkpoint to path ./tmp/a3c_test_4/train/model.ckpt


INFO:tensorflow:Saving checkpoint to path ./tmp/a3c_test_4/train/model.ckpt


[2017-09-21 14:30:51,444] Saving checkpoint to path ./tmp/a3c_test_4/train/model.ckpt


INFO:tensorflow:Saving checkpoint to path ./tmp/a3c_test_4/train/model.ckpt


[2017-09-21 14:35:51,442] Saving checkpoint to path ./tmp/a3c_test_4/train/model.ckpt


INFO:tensorflow:Saving checkpoint to path ./tmp/a3c_test_4/train/model.ckpt


[2017-09-21 14:40:51,447] Saving checkpoint to path ./tmp/a3c_test_4/train/model.ckpt


INFO:tensorflow:Saving checkpoint to path ./tmp/a3c_test_4/train/model.ckpt


[2017-09-21 14:45:51,444] Saving checkpoint to path ./tmp/a3c_test_4/train/model.ckpt
Process BTgymDataFeedServer-11:1:
Process BTgymServer-15:1:
Process BTgymServer-12:1:
Process BTgymServer-16:1:
Process BTgymServer-17:1:
Process DrawCerebro-11:2:492:
Process BTgymServer-18:1:
Process BTgymServer-13:1:
Process BTgymServer-14:1:
[2017-09-21 14:50:11,152] worker_1 has joined.
Traceback (most recent call last):
Traceback (most recent call last):
[2017-09-21 14:50:11,217] worker_2 has joined.
  File "/Users/muzikin/anaconda/envs/tensorforce/lib/python3.6/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
[2017-09-21 14:50:11,252] worker_3 has joined.
  File "/Users/muzikin/anaconda/envs/tensorforce/lib/python3.6/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
  File "/Users/muzikin/Yandex.Disk.localized/work/btgym/btgym/server.py", line 433, in run
    gc.collect()
[2017-09-21 14:50:11,254] worker_4 has joined.
  File "/Users/muzikin/Yandex.Disk.local

  File "/Users/muzikin/Yandex.Disk.localized/work/btgym/btgym/server.py", line 106, in next
    reward = self.strategy.get_reward()
  File "/Users/muzikin/anaconda/envs/tensorforce/lib/python3.6/site-packages/backtrader/lineiterator.py", line 284, in _clk_update
    self.forward()
  File "/Users/muzikin/anaconda/envs/tensorforce/lib/python3.6/site-packages/backtrader/lineiterator.py", line 258, in _next
    indicator._next()
  File "/Users/muzikin/anaconda/envs/tensorforce/lib/python3.6/site-packages/backtrader/lineiterator.py", line 275, in _next
    self.next()
  File "/Users/muzikin/anaconda/envs/tensorforce/lib/python3.6/site-packages/backtrader/lineiterator.py", line 260, in _next
    self._notify()
  File "<ipython-input-4-20eef61197bb>", line 112, in get_reward
    unrealised_pnl = np.average(self.stats.broker.value.get(size=self.avg_period))            - self.realised_broker_value
KeyboardInterrupt
KeyboardInterrupt
  File "/Users/muzikin/anaconda/envs/tensorforce/lib/python3.6

In [None]:
# Just in case: for manual environment testing :

env_config.update({'port': 5090, 'data_port': 5089})
env = BTgymEnv(**env_config)