In [18]:
import backtrader as bt
import backtrader.indicators as btind
import numpy as np
import scipy.signal as signal
from scipy import stats
import pywt

from gym import spaces

from btgym import BTgymEnv, BTgymStrategy, BTgymDataset

from btgym.a3c import Launcher, BaseLSTMPolicy

In [19]:
import tensorflow as tf

class StatePreProcessor():
    def __init__(self, x, window, stride, pooling_type='MAX', padding='SAME'):
        """_____"""
        self.x = x
        
        x_max = tf.expand_dims(self.x[...,0], [-1])
        
        x_min = tf.negative(
            tf.expand_dims(self.x[...,-1], [-1])
        )
        # MAX H-values pooling:
        x_max = tf.nn.pool(
            input=x_max,
            window_shape=[window],
            pooling_type=pooling_type,
            padding=padding,
            dilation_rate=None,
            strides=[stride],
        )
        # MIN L-values pooling:
        x_min = tf.nn.pool(
            input=x_min,
            window_shape=[window],
            pooling_type=pooling_type,
            padding=padding,
            dilation_rate=None,
            strides=[stride],
        )
        
        self.output = tf.concat([x_max, x_min], axis=2)
    
    def process(self, x):
        sess = tf.get_default_session()
        return sess.run(self.output, {self.x: x})
    
class MinMaxPooler2D():
    def __init__(self, x, window, stride, pooling_type='MAX', padding='SAME'):
        """_____"""
        self.x = x
        
        x_max = tf.expand_dims(self.x[...,0], [-1])
        
        x_min = tf.negative(
            tf.expand_dims(self.x[...,1], [-1])
        )
        
        # MAX pooling:
        x_max = tf.nn.pool(
            input=x_max,
            window_shape=window,
            pooling_type=pooling_type,
            padding=padding,
            dilation_rate=None,
            strides=stride,
        )
        # MIN pooling:
        x_min = tf.negative(
            tf.nn.pool(
                input=x_min,
                window_shape=window,
                pooling_type=pooling_type,
                padding=padding,
                dilation_rate=None,
                strides=stride,
            )
        )
        self.output = tf.concat([x_max, x_min], axis=-1)
    
    def process(self, x):
        sess = tf.get_default_session()
        return sess.run(self.y, {self.x: x})
    


In [43]:

import tensorflow.contrib.rnn as rnn

class LSTMPolicy2(BaseLSTMPolicy):
    """
    +++++
    """
    def __init__(self, ob_space, ac_space,
                 lstm_class=rnn.BasicLSTMCell, lstm_layers=(256,)):
        
        self.x = x = tf.placeholder(tf.float32, [None] + list(ob_space), name='x_in_pl')
        
        #print('GOT HERE 0')
        #print ('x:', x.shape)
        pre_processor = MinMaxPooler2D(x, [8,4], [6,3], 'MAX', 'SAME')
        
        #x_pooled = tf.contrib.layers.flatten(pre_processor.output)
        
        x_pooled = pre_processor.output
        
        #print('GOT HERE 1')
        #print('x_pooled:', x_pooled.shape)
        
        # Run LSTM along rollout time dimension and evrything else:
        super(LSTMPolicy2, self).__init__(x_pooled, ob_space, ac_space, lstm_class, lstm_layers)

        

In [None]:
#tf.reset_default_graph()
#pi = LSTMPolicy2((10,2),(3))

In [65]:
class MyStrategy(BTgymStrategy):
    """
    Example subclass of BT server inner computation startegy.
    """
    
    def __init__(self, **kwargs):
        super(MyStrategy,self).__init__(**kwargs)
        self.order_penalty = 1
        self.trade_just_closed = False
        self.trade_result = None
        
    def notify_trade(self, trade):    
        if trade.isclosed:
            # Set trade flag and result:
            self.trade_just_closed = True
            self.trade_result = trade.pnlcomm
        
    def get_state(self):
        T = 1e2 
        
        #wavelet = 'morl'
        
        widths = np.arange(2, 45 + 2)
        
        x = self.raw_state[:, 1:3]
        
        #d_x = np.gradient(x, axis=0) * T
        d_x = x - self.raw_state[0,0]
        
        #x_cwt_hi, freqs = pywt.cwt(d_x[...,0], widths, wavelet)
        x_cwt_hi = signal.cwt(d_x[...,0], signal.ricker, widths)
        
        #x_cwt_lo, freqs = pywt.cwt(d_x[...,1], widths, wavelet)
        x_cwt_lo = signal.cwt(d_x[...,1], signal.ricker, widths)
        
        x_cwt = np.stack([x_cwt_hi,x_cwt_lo], axis=-1) * T
        
        self.state['raw_state'] = self.raw_state
        self.state['model_input'] = np.transpose(x_cwt, (1, 0, 2))
        
        return self.state
    
    def get_reward(self):
        """
        Defines reward as function of last closed trade result with penalty for erronious order placement.
        """
        # Low-value term:
        r = (self.broker.get_value() / self.env.broker.startingcash - 1) * 10
        
        # Result (main-value):
        if self.trade_just_closed:
            r += self.trade_result
            self.trade_just_closed = False
            
        # Penalty for failed order:
        if self.order_failed:
            r -= self.order_penalty
            self.order_failed = False

        return r / 20  # sinse reward is just quick-example, denominator is here to keep gradients sane
    
    def __next(self):
        """
        _____
        """
        # Simple action-to-order logic:
        if self.order:
            pass
        elif self.action == 'buy_hold' and self.position.size <= 0:
            self.order = self.buy()
            self.broker_message = 'New BUY created; ' + self.broker_message
        elif self.action == 'sell_hold' and self.position.size >= 0:
            self.order = self.sell()
            self.broker_message = 'New SELL created; ' + self.broker_message
        elif self.action == 'close_hold' and self.position.size != 0:
            self.order = self.close()
            self.broker_message = 'New CLOSE created; ' + self.broker_message

# Set backtesting engine parameters:

time_embed_dim = 120

state_shape = {
    'raw_state': spaces.Box(low=-1, high=1, shape=(time_embed_dim, 4)),
    'model_input': spaces.Box(low=-100, high=100, shape=(time_embed_dim, 45, 2))
}

MyCerebro = bt.Cerebro()

MyCerebro.addstrategy(
    MyStrategy,
    state_shape=state_shape,
    #portfolio_actions=('close_hold', 'buy_hold', 'sell_hold'),
    portfolio_actions=('hold', 'buy', 'sell'),
    drawdown_call=5, # max to loose, in percent of initial cash
    target_call=10,  # max to win, same
    skip_frame=10,
)

# Set leveraged account:
MyCerebro.broker.setcash(2000)
MyCerebro.broker.setcommission(commission=0.0001, leverage=10.0) # commisssion to imitate spread
MyCerebro.broker.set_shortcash(False)
MyCerebro.addsizer(bt.sizers.SizerFix, stake=10000,)

MyCerebro.addanalyzer(bt.analyzers.DrawDown)

MyDataset = BTgymDataset(
    #filename='../examples/data/DAT_ASCII_EURUSD_M1_2016.csv',
    filename='../examples/data/DAT_ASCII_EURUSD_M1_201703.csv',
    #filename='../examples/data/DAT_ASCII_EURUSD_M1_201704.csv',
    #filename='../examples/data/test_sine_1min_period256_delta0002.csv',
    start_weekdays=[0, 1, 2, 3, 4],
    episode_len_days=0,
    episode_len_hours=23,
    episode_len_minutes=55,
    start_00=False,
    time_gap_hours=8,
)
env_config = dict(
    dataset=MyDataset,
    engine=MyCerebro,
    render_modes=['episode', 'human', 'model_input'],
    render_state_as_image=True,
    render_ylabel='CWT / Ricker',
    render_size_episode=(12,8),
    render_size_human=(8, 3.5),
    render_size_state=(10, 5),
    render_dpi=75,
    port=5000,
    data_port=4999,
    connect_timeout=60,
    verbose=0,
)

# Set tensorflow distributed cluster and a3c configuration:
cluster_config = dict(
    host='127.0.0.1',
    port=12222,
    num_workers=6,
    num_ps=1,
    log_dir='./tmp/a3c_test',
)
launcher = Launcher(
    cluster_config=cluster_config,
    env_class=BTgymEnv,
    env_config=env_config,
    policy_class=LSTMPolicy2,
    policy_config={'lstm_layers': (256,)},
    rollout_length=20,
    test_mode=False,
    train_steps=1000000000,
    model_summary_freq=20,
    episode_summary_freq=1,
    env_render_freq=10,
    verbose=2
    
)

[2017-09-07 16:22:44,053] Launcher ready.


In [66]:
launcher.run()

[2017-09-07 16:22:44,317] worker_0 tf.server started.
[2017-09-07 16:22:44,269] parameters_server started.
[2017-09-07 16:22:44,376] making environment.
[2017-09-07 16:22:44,378] worker_0 is data_master: True
[2017-09-07 16:22:46,680] worker_0:envronment ok.
[2017-09-07 16:22:46,683] A3C_0: init() started
[2017-09-07 16:22:48,055] A3C_0: train op defined
[2017-09-07 16:22:48,110] A3C_0: init() done
[2017-09-07 16:22:48,112] worker_0:trainer ok.
[2017-09-07 16:22:48,755] worker_0: connecting to the parameter server... 


INFO:tensorflow:Restoring parameters from ./tmp/a3c_test/train/model.ckpt-30935


[2017-09-07 16:22:48,809] Restoring parameters from ./tmp/a3c_test/train/model.ckpt-30935
[2017-09-07 16:22:49,162] Press `Ctrl-C` to stop training and close launcher.
[2017-09-07 16:22:49,165] worker_1 tf.server started.
[2017-09-07 16:22:49,169] worker_2 tf.server started.
[2017-09-07 16:22:49,174] worker_3 tf.server started.
[2017-09-07 16:22:49,174] making environment.
[2017-09-07 16:22:49,179] making environment.
[2017-09-07 16:22:49,182] making environment.
[2017-09-07 16:22:49,178] worker_4 tf.server started.
[2017-09-07 16:22:49,188] worker_3 is data_master: False
[2017-09-07 16:22:49,184] worker_2 is data_master: False
[2017-09-07 16:22:49,183] worker_1 is data_master: False
[2017-09-07 16:22:49,188] making environment.
[2017-09-07 16:22:49,185] worker_5 tf.server started.
[2017-09-07 16:22:49,192] worker_4 is data_master: False
[2017-09-07 16:22:49,195] making environment.
[2017-09-07 16:22:49,203] worker_5 is data_master: False


Press `Ctrl-C` to stop training and close launcher.


[2017-09-07 16:22:50,271] worker_2:envronment ok.
[2017-09-07 16:22:50,274] A3C_2: init() started
[2017-09-07 16:22:50,340] worker_5:envronment ok.
[2017-09-07 16:22:50,343] A3C_5: init() started
[2017-09-07 16:22:50,358] worker_3:envronment ok.
[2017-09-07 16:22:50,362] A3C_3: init() started
[2017-09-07 16:22:50,383] worker_1:envronment ok.
[2017-09-07 16:22:50,387] A3C_1: init() started
[2017-09-07 16:22:50,406] worker_4:envronment ok.
[2017-09-07 16:22:50,410] A3C_4: init() started
[2017-09-07 16:22:52,021] A3C_2: train op defined
[2017-09-07 16:22:52,106] A3C_2: init() done
[2017-09-07 16:22:52,108] worker_2:trainer ok.
[2017-09-07 16:22:52,174] A3C_3: train op defined
[2017-09-07 16:22:52,207] A3C_5: train op defined
[2017-09-07 16:22:52,232] A3C_4: train op defined
[2017-09-07 16:22:52,242] A3C_1: train op defined
[2017-09-07 16:22:52,267] A3C_3: init() done
[2017-09-07 16:22:52,269] worker_3:trainer ok.
[2017-09-07 16:22:52,295] A3C_5: init() done
[2017-09-07 16:22:52,297] worke

INFO:tensorflow:Starting standard services.


[2017-09-07 16:22:52,356] Starting standard services.


INFO:tensorflow:Saving checkpoint to path ./tmp/a3c_test/train/model.ckpt


[2017-09-07 16:22:52,374] Saving checkpoint to path ./tmp/a3c_test/train/model.ckpt


INFO:tensorflow:Starting queue runners.


[2017-09-07 16:22:52,375] Starting queue runners.


INFO:tensorflow:global/global_step/sec: 0


[2017-09-07 16:22:52,384] global/global_step/sec: 0
[2017-09-07 16:22:52,460] worker_0: starting training at step: 30955
[2017-09-07 16:22:53,195] worker_2: connecting to the parameter server... 
[2017-09-07 16:22:53,410] worker_3: connecting to the parameter server... 
[2017-09-07 16:22:53,419] worker_5: connecting to the parameter server... 
[2017-09-07 16:22:53,507] worker_1: connecting to the parameter server... 
[2017-09-07 16:22:53,544] worker_4: connecting to the parameter server... 


INFO:tensorflow:Starting queue runners.


[2017-09-07 16:22:54,317] Starting queue runners.
[2017-09-07 16:22:54,417] worker_2: starting training at step: 31035


INFO:tensorflow:Starting queue runners.


[2017-09-07 16:22:54,604] Starting queue runners.


INFO:tensorflow:Starting queue runners.


[2017-09-07 16:22:54,646] Starting queue runners.
[2017-09-07 16:22:54,663] worker_3: starting training at step: 31035
[2017-09-07 16:22:54,714] worker_5: starting training at step: 31035


INFO:tensorflow:Starting queue runners.


[2017-09-07 16:22:54,778] Starting queue runners.


INFO:tensorflow:Starting queue runners.


[2017-09-07 16:22:54,792] Starting queue runners.
[2017-09-07 16:22:54,844] worker_1: starting training at step: 31035
[2017-09-07 16:22:54,881] worker_4: starting training at step: 31035


INFO:tensorflow:global/global_step/sec: 105.365


[2017-09-07 16:24:52,387] global/global_step/sec: 105.365


INFO:tensorflow:global/global_step/sec: 103.887


[2017-09-07 16:26:52,372] global/global_step/sec: 103.887


INFO:tensorflow:Saving checkpoint to path ./tmp/a3c_test/train/model.ckpt


[2017-09-07 16:27:52,363] Saving checkpoint to path ./tmp/a3c_test/train/model.ckpt


INFO:tensorflow:global/global_step/sec: 100.666


[2017-09-07 16:28:52,384] global/global_step/sec: 100.666


INFO:tensorflow:global/global_step/sec: 96.0239


[2017-09-07 16:30:52,387] global/global_step/sec: 96.0239
Process BTgymDataFeedServer-108:1:
Process BTgymServer-109:1:
[2017-09-07 16:30:59,984] worker_1 has joined.
Process BTgymServer-112:1:
Process BTgymServer-111:1:
Process BTgymServer-110:1:
Process BTgymServer-113:1:
[2017-09-07 16:31:00,008] worker_2 has joined.
Process BTgymServer-108:2:
[2017-09-07 16:31:00,014] worker_3 has joined.
[2017-09-07 16:31:00,015] worker_4 has joined.
[2017-09-07 16:31:00,016] worker_5 has joined.
[2017-09-07 16:31:00,017] chief_worker_0 has joined.
[2017-09-07 16:31:00,018] parameter_server_0 has joined.
[2017-09-07 16:31:00,018] Launcher closed.
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
  File "/Users/muzikin/anaconda/envs/tensorforce/lib/python3.6/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
  File "/Users/muzikin/anaconda/envs/tensorforce/lib/python3.6/multiprocessing/process.py", line 249, in _bootstrap
    s

  File "/Users/muzikin/anaconda/envs/tensorforce/lib/python3.6/site-packages/backtrader/lineiterator.py", line 266, in _next
    self.next()
  File "/Users/muzikin/anaconda/envs/tensorforce/lib/python3.6/site-packages/backtrader/cerebro.py", line 1564, in _runnext
    strat._next()
  File "/Users/muzikin/Yandex.Disk.localized/work/btgym/btgym/strategy.py", line 267, in next
    self.order = self.sell()
  File "/Users/muzikin/anaconda/envs/tensorforce/lib/python3.6/site-packages/backtrader/strategy.py", line 943, in sell
    **kwargs)
  File "/Users/muzikin/anaconda/envs/tensorforce/lib/python3.6/site-packages/backtrader/strategy.py", line 327, in _next
    self._next_analyzers(minperstatus)
  File "/Users/muzikin/anaconda/envs/tensorforce/lib/python3.6/site-packages/backtrader/brokers/bbroker.py", line 645, in sell
    parent=parent, transmit=transmit)
  File "/Users/muzikin/anaconda/envs/tensorforce/lib/python3.6/site-packages/backtrader/strategy.py", line 361, in _next_analyzers
    

In [None]:
env_config.update({'port': 5050, 'data_port': 5049})
env = BTgymEnv(**env_config)

In [None]:
o = env.reset()

In [None]:
o

In [None]:
o,r,d,i = env.step(0)
print(o)

In [None]:
env.close()