In [1]:
#%load_ext autoreload
#%autoreload 2

#import sys
#sys.path.insert(0,'..')

import os

import backtrader as bt
import backtrader.indicators as btind
import numpy as np
import scipy.signal as signal
from scipy import stats

from gym import spaces

from btgym import BTgymEnv, BTgymStrategy, BTgymDataset

from launcher import Launcher
from model import LSTMPolicy

import tensorflow as tf
import tensorflow.contrib.rnn as rnn

In [2]:
class SimpleLSTM(LSTMPolicy):
    def __init__(self, ob_space, ac_space):
        

        self.diagnostic = dict()

        self.x = x = tf.placeholder(tf.float32, [None] + list(ob_space))

        self.diagnostic['input_shape'] = self.x.shape
        
        #print('self.diagnostic0:', self.diagnostic)

        # introduce a "fake" batch dimension of 1 after flatten so that we can do LSTM over time dim
        x = tf.expand_dims(self.flatten(x), [0])

        self.diagnostic['flatten_shape'] = x.shape
        
        #print('self.diagnostic1:', self.diagnostic)

        size = 256
        
        #num_layers = 2

        lstm = rnn.BasicLSTMCell(size, state_is_tuple=True)

        self.state_size = lstm.state_size
        
        step_size = tf.shape(self.x)[:1]

        self.diagnostic['step_size'] = step_size
        
        #print('self.diagnostic2:', self.diagnostic)

        c_init = np.zeros((1, lstm.state_size.c), np.float32)
        h_init = np.zeros((1, lstm.state_size.h), np.float32)
        self.state_init = [c_init, h_init]
        c_in = tf.placeholder(tf.float32, [1, lstm.state_size.c])
        h_in = tf.placeholder(tf.float32, [1, lstm.state_size.h])
        self.state_in = [c_in, h_in]

        state_in = rnn.LSTMStateTuple(c_in, h_in)

        lstm_outputs, lstm_state = tf.nn.dynamic_rnn(
            lstm, x, initial_state=state_in, sequence_length=step_size,
            time_major=False)

        lstm_c, lstm_h = lstm_state
        x = tf.reshape(lstm_outputs, [-1, size])
        self.logits = self.linear(x, ac_space, "action", self.normalized_columns_initializer(0.01))
        self.vf = tf.reshape(self.linear(x, 1, "value", self.normalized_columns_initializer(1.0)), [-1])
        self.state_out = [lstm_c[:1, :], lstm_h[:1, :]]
        self.sample = self.categorical_sample(self.logits, ac_space)[0, :]
        self.var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, tf.get_variable_scope().name)

In [47]:
class MyStrategy(BTgymStrategy):
    """
    Example subclass of BT server inner computation startegy.
    """
    
    def __init__(self, **kwargs):
        super(MyStrategy,self).__init__(**kwargs)
        
        self.current_value_embeded = np.ones(self.p.state_shape['raw_state'].shape[0]) * \
            self.p.target_call / (self.p.target_call + self.p.drawdown_call )

        self.order_penalty = 2
        self.trade_just_closed = False
        self.trade_result = None
        
    def notify_trade(self, trade):
        #if trade.justopened:
            #print('trade {} just opened'.format(trade.ref))
            
        if trade.isclosed:
            #print('trade {} closed, pnl_comm: {}'.format(trade.ref, trade.pnlcomm))
            # Set trade flag and result:
            self.trade_just_closed = True
            self.trade_result = trade.pnlcomm
    
    def sigmoid(self, x):
        return 1/(1 + np.exp(-x))
        
    def get_state(self):
        """
        Computes featurized RL-ready environment observation state
        by applying continious wavelet transform to time-embedded vector
        of close-price gradients.
        """
        # Use close price:
        channel = 3
        
        X = self.raw_state[:, channel]
        
        # Prepare parameters:
        Tau = 2
        max_cwt_scale = self.p.state_shape['model_input'].shape[1] # - 1
        cwt_width = np.linspace(Tau, max_cwt_scale + Tau - 1, max_cwt_scale) # scale of wavelet transdorm [n]
    
        T = 1e4
        
        # Get vector of gradients of last [n] prices:
        X = np.gradient(X, axis=0) * T
        
        # Compute continious wavelet transform using Ricker wavelet, get [n,m,1]-dim. matrix:
        X = signal.cwt(X, signal.ricker, cwt_width).T + 1

        # Local min-max norm:
        #X = (X - X.min()) / (X.max() - X.min())
        #print('X:', X.shape)
        #print('self.current_value_embeded:', self.current_value_embeded.shape)
        
        #self.state['model_input'] = np.concatenate([X, self.current_value_embeded[:, None] ], axis=-1)
        
        self.state['model_input'] = X
        
        #print('model_input:', self.state['model_input'].shape)
        
        # Squash values in [0,1]:
        #self.state['model_input'] = self.sigmoid(self.state['model_input'])
        
        return self.state
    
    
    def get_reward(self):
        """
        Defines reward as [0,1]-bounded function of last closed trade result.
        """
        #r = 0
        
        r = (self.broker.get_value() / self.env.broker.startingcash - 1) * 10
        
        # Result
        if self.trade_just_closed:
            r += self.trade_result
            self.trade_just_closed = False
            #print('R-trade:', r)
            
        # Penalty for failed order:
        if self.order_failed:
            #print('Failed order!')
            r -= self.order_penalty
            self.order_failed = False
            #print('R-failed:', r)
            
        #print('reward_', r)
        
        return r / 10
    
    def next(self):
        """
        Extension of default implementation.
        Defines one step environment routine for server 'Episode mode';
        At least, it should handle order execution logic according to action received.
        """
        # Normalized time-embedded vector of broker values:
        self.current_value_embeded = np.roll(self.current_value_embeded, -1)
        
        self.current_value_embeded[-1] =\
            (self.broker.get_value() / self.env.broker.startingcash - 1 + self.p.drawdown_call / 100) / \
            (self.p.target_call + self.p.drawdown_call) * 100
        
        # Simple action-to-order logic:
        if self.action == 'hold' or self.order:
            pass
        elif self.action == 'buy':
            self.order = self.buy()
            self.broker_message = 'New BUY created; ' + self.broker_message
        elif self.action == 'sell':
            self.order = self.sell()
            self.broker_message = 'New SELL created; ' + self.broker_message
        elif self.action == 'close':
            self.order = self.close()
            self.broker_message = 'New CLOSE created; ' + self.broker_message
            
# Set backtesting engine parameters:
time_embed_dim = 30
state_shape = {
    'raw_state': spaces.Box(low=-1, high=1, shape=(time_embed_dim, 4)),
    'model_input': spaces.Box(low=-10, high=10, shape=(time_embed_dim, 15))
}

MyCerebro = bt.Cerebro()

MyCerebro.addstrategy(
    MyStrategy,
    state_shape=state_shape,
    portfolio_actions=('hold', 'buy', 'sell'),
    drawdown_call=5, # in percent of initial cash
    target_call=20,
    skip_frame=8,
)

# Set leveraged account:
MyCerebro.broker.setcash(2000)
MyCerebro.broker.setcommission(commission=0.0001, leverage=10.0)
MyCerebro.broker.set_shortcash(False)
MyCerebro.addsizer(bt.sizers.SizerFix, stake=10000,)


MyCerebro.addanalyzer(bt.analyzers.DrawDown)

# Provide data (seven years of 1 minute bars):
filenames = [
    '../data/DAT_ASCII_EURUSD_M1_2010.csv',
    '../data/DAT_ASCII_EURUSD_M1_2011.csv',
    '../data/DAT_ASCII_EURUSD_M1_2012.csv',
    '../data/DAT_ASCII_EURUSD_M1_2013.csv',
    '../data/DAT_ASCII_EURUSD_M1_2014.csv',
    '../data/DAT_ASCII_EURUSD_M1_2015.csv',
    '../data/DAT_ASCII_EURUSD_M1_2016.csv',
]

MyDataset = BTgymDataset(
    #filename=filenames,
    filename='../data/test_sine_1min_period256_delta0002.csv',
    start_weekdays=[0, 1, 2, 3, 4],
    episode_len_days=0,
    episode_len_hours=23,
    episode_len_minutes=0,
    start_00=False,
    time_gap_hours=6,
)
env_config = dict(
    dataset=MyDataset,
    engine=MyCerebro,
    render_modes=['episode', 'human', 'model_input'],
    render_state_as_image=False,
    render_ylabel='AVG,VAL Gradients',
    render_size_episode=(12,8),
    render_size_human=(8, 3.5),
    render_size_state=(10, 5),
    render_dpi=75,
    port=5000,
    data_port=4999,
    connect_timeout=60,
    verbose=0,
)
cluster_config = dict(
    host='127.0.0.1',
    port=12222,
    num_workers=8,
    num_ps=1,
    log_dir='./tmp/a3c_testing_6',
)
launcher = Launcher(
    cluster_config=cluster_config,
    env_class=BTgymEnv,
    env_config=env_config,
    model_class=SimpleLSTM,
    rollout_length=20,
    test_mode=False,
    train_steps=1000000000,
    model_summary_freq=20,
    episode_summary_freq=1,
    env_render_freq=20,
    verbose=1
    
)

[2017-08-25 19:10:09,899] ./tmp/a3c_testing_6 created.


In [48]:
launcher.run()

LSTM init started
self.diagnostic0: {'input_shape': TensorShape([Dimension(None), Dimension(30), Dimension(15)])}
self.diagnostic1: {'input_shape': TensorShape([Dimension(None), Dimension(30), Dimension(15)]), 'flatten_shape': TensorShape([Dimension(1), Dimension(None), Dimension(450)])}
self.diagnostic2: {'input_shape': TensorShape([Dimension(None), Dimension(30), Dimension(15)]), 'flatten_shape': TensorShape([Dimension(1), Dimension(None), Dimension(450)]), 'step_size': <tf.Tensor 'global/strided_slice:0' shape=(1,) dtype=int32>}
LSTM init started
self.diagnostic0: {'input_shape': TensorShape([Dimension(None), Dimension(30), Dimension(15)])}
self.diagnostic1: {'input_shape': TensorShape([Dimension(None), Dimension(30), Dimension(15)]), 'flatten_shape': TensorShape([Dimension(1), Dimension(None), Dimension(450)])}
self.diagnostic2: {'input_shape': TensorShape([Dimension(None), Dimension(30), Dimension(15)]), 'flatten_shape': TensorShape([Dimension(1), Dimension(None), Dimension(450)])

[2017-08-25 19:10:16,223] Press `Ctrl-C` to stop training and close launcher.


Press `Ctrl-C` to stop training and close launcher.
LSTM init started
self.diagnostic0: {'input_shape': TensorShape([Dimension(None), Dimension(30), Dimension(15)])}
self.diagnostic1: {'input_shape': TensorShape([Dimension(None), Dimension(30), Dimension(15)]), 'flatten_shape': TensorShape([Dimension(1), Dimension(None), Dimension(450)])}
self.diagnostic2: {'input_shape': TensorShape([Dimension(None), Dimension(30), Dimension(15)]), 'flatten_shape': TensorShape([Dimension(1), Dimension(None), Dimension(450)]), 'step_size': <tf.Tensor 'global/strided_slice:0' shape=(1,) dtype=int32>}
LSTM init started
self.diagnostic0: {'input_shape': TensorShape([Dimension(None), Dimension(30), Dimension(15)])}
self.diagnostic1: {'input_shape': TensorShape([Dimension(None), Dimension(30), Dimension(15)]), 'flatten_shape': TensorShape([Dimension(1), Dimension(None), Dimension(450)])}
LSTM init started
self.diagnostic2: {'input_shape': TensorShape([Dimension(None), Dimension(30), Dimension(15)]), 'flatte

[2017-08-25 19:10:18,948] Starting standard services.


INFO:tensorflow:Saving checkpoint to path ./tmp/a3c_testing_6/train/model.ckpt


[2017-08-25 19:10:19,012] Saving checkpoint to path ./tmp/a3c_testing_6/train/model.ckpt


INFO:tensorflow:Starting queue runners.


[2017-08-25 19:10:19,034] Starting queue runners.


INFO:tensorflow:global/global_step/sec: 0


[2017-08-25 19:10:19,057] global/global_step/sec: 0
[2017-08-25 19:10:19,165] worker_0: starting training at step: 0


INFO:tensorflow:Starting queue runners.


[2017-08-25 19:10:22,507] Starting queue runners.
[2017-08-25 19:10:22,539] worker_2: starting training at step: 160


INFO:tensorflow:Starting queue runners.


[2017-08-25 19:10:22,701] Starting queue runners.
[2017-08-25 19:10:22,782] worker_7: starting training at step: 160


INFO:tensorflow:Starting queue runners.


[2017-08-25 19:10:22,857] Starting queue runners.
[2017-08-25 19:10:22,897] worker_3: starting training at step: 160


INFO:tensorflow:Starting queue runners.


[2017-08-25 19:10:22,934] Starting queue runners.


INFO:tensorflow:Starting queue runners.


[2017-08-25 19:10:22,999] worker_4: starting training at step: 160


INFO:tensorflow:Starting queue runners.


[2017-08-25 19:10:23,019] Starting queue runners.
[2017-08-25 19:10:23,008] Starting queue runners.
[2017-08-25 19:10:23,067] worker_1: starting training at step: 160
[2017-08-25 19:10:23,066] worker_5: starting training at step: 160


INFO:tensorflow:Starting queue runners.


[2017-08-25 19:10:23,265] Starting queue runners.
[2017-08-25 19:10:23,301] worker_6: starting training at step: 180


INFO:tensorflow:global/global_step/sec: 305.079


[2017-08-25 19:12:19,037] global/global_step/sec: 305.079


INFO:tensorflow:global/global_step/sec: 231.26


[2017-08-25 19:14:19,061] global/global_step/sec: 231.26


INFO:tensorflow:Saving checkpoint to path ./tmp/a3c_testing_6/train/model.ckpt


[2017-08-25 19:15:19,015] Saving checkpoint to path ./tmp/a3c_testing_6/train/model.ckpt


INFO:tensorflow:global/global_step/sec: 306.017


[2017-08-25 19:16:19,040] global/global_step/sec: 306.017


INFO:tensorflow:global/global_step/sec: 322.402


[2017-08-25 19:18:19,042] global/global_step/sec: 322.402


INFO:tensorflow:Saving checkpoint to path ./tmp/a3c_testing_6/train/model.ckpt
INFO:tensorflow:global/global_step/sec: 326.94


[2017-08-25 19:20:19,002] Saving checkpoint to path ./tmp/a3c_testing_6/train/model.ckpt
[2017-08-25 19:20:19,040] global/global_step/sec: 326.94


INFO:tensorflow:global/global_step/sec: 325.032


[2017-08-25 19:22:19,040] global/global_step/sec: 325.032


INFO:tensorflow:global/global_step/sec: 299.232


[2017-08-25 19:24:19,041] global/global_step/sec: 299.232


INFO:tensorflow:Saving checkpoint to path ./tmp/a3c_testing_6/train/model.ckpt


[2017-08-25 19:25:19,004] Saving checkpoint to path ./tmp/a3c_testing_6/train/model.ckpt


INFO:tensorflow:global/global_step/sec: 306.07


[2017-08-25 19:26:19,040] global/global_step/sec: 306.07


INFO:tensorflow:global/global_step/sec: 319.387


[2017-08-25 19:28:19,045] global/global_step/sec: 319.387


INFO:tensorflow:Saving checkpoint to path ./tmp/a3c_testing_6/train/model.ckpt


[2017-08-25 19:30:19,000] Saving checkpoint to path ./tmp/a3c_testing_6/train/model.ckpt
Process DrawCerebro-179:2:155:
  File "/Users/muzikin/anaconda/envs/tensorforce/lib/python3.6/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
Process BTgymServer-186:1:
Process BTgymServer-182:1:
Process BTgymServer-180:1:
Process BTgymDataFeedServer-179:1:
Process BTgymServer-181:1:
Process BTgymServer-185:1:
Process BTgymServer-184:1:
[2017-08-25 19:31:38,491] worker_1 has joined.
Process BTgymServer-183:1:
Traceback (most recent call last):
  File "/Users/muzikin/Yandex.Disk.localized/work/btgym/btgym/server.py", line 433, in run
    gc.collect()
KeyboardInterrupt
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
  File "/Users/muzikin/anaconda/envs/tensorforce/lib/python3.6/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
  File "/Users/muzikin/anaconda/envs/tensorforce/lib/python3.6/multiprocessing/pr

  File "/Users/muzikin/anaconda/envs/tensorforce/lib/python3.6/site-packages/backtrader/lineiterator.py", line 282, in _clk_update
    clock_len = len(self._clock)
[2017-08-25 19:31:39,833] worker_7 has joined.
  File "/Users/muzikin/anaconda/envs/tensorforce/lib/python3.6/site-packages/zmq/sugar/socket.py", line 476, in send_pyobj
    return self.send(msg, flags)
KeyboardInterrupt
  File "/Users/muzikin/anaconda/envs/tensorforce/lib/python3.6/site-packages/backtrader/cerebro.py", line 1235, in runstrategies
    self._runnext(runstrats)
  File "/Users/muzikin/anaconda/envs/tensorforce/lib/python3.6/site-packages/backtrader/strategy.py", line 324, in _next
    super(Strategy, self)._next()
  File "zmq/backend/cython/socket.pyx", line 636, in zmq.backend.cython.socket.Socket.send (zmq/backend/cython/socket.c:7305)
[2017-08-25 19:31:39,835] chief_worker_0 has joined.  File "/Users/muzikin/anaconda/envs/tensorforce/lib/python3.6/site-packages/backtrader/lineseries.py", line 463, in __len__

In [None]:
print(launcher.kwargs, '\n\n')
print(launcher.env_config)
print(launcher.cluster_config)
print(launcher.cluster_spec)
for config in launcher.workers_config_list:
    print('============')
    for k, v in config.items():
        print('{}:\n{}\n'.format(k, v))
    

In [None]:
def func1(max_step):
    step = 0
    done = False
    
    def func2(max_step):
        nonlocal step
        nonlocal done
        step +=1
        if step == max_step:
            step = 0
            done = True
        return step
    
    for i in range(20):
        done = False
        print(func2(max_step), step, done)
        


func1(7)
            

In [None]:
a = dict()
a.update({'b': 2, 'c':4})
type(a) == dict