In [None]:
import backtrader as bt
import backtrader.indicators as btind
import numpy as np
import scipy.signal as signal
from scipy import stats

from gym import spaces

from btgym import BTgymEnv, BTgymStrategy, BTgymDataset

from btgym.a3c import Launcher, LSTMPolicy, BaseLSTMPolicy

In [None]:
import tensorflow as tf
import tensorflow.contrib.rnn as rnn

class LSTMPolicyTFF(BaseLSTMPolicy):
    """
    Time/frequency LSTM feature extraction.
    """
    def __init__(self, ob_space, ac_space,
                 lstm_class=rnn.BasicLSTMCell, lstm_layers=(256, 256)):
        
        # Run LSTM along time-embedding dim:
        self.x = x = x_f = tf.placeholder(tf.float32, [None] + list(ob_space), name='x_in_pl')

        lstm_t = lstm_class(128, state_is_tuple=True,)

        num_time_steps = tf.expand_dims(tf.shape(x)[1], [0])
        n_t_expanded = tf.fill(tf.expand_dims(tf.shape(x)[0], [0]), num_time_steps[0])
        batch_size = tf.shape(x)[0]

        lstm_t_outputs, lstm_t_state = tf.nn.dynamic_rnn(
            lstm_t,
            x,
            initial_state=lstm_t.zero_state(batch_size, dtype=tf.float32),
            sequence_length=n_t_expanded,
            time_major=False,
            scope='time_embed_lstm')
        
        # Run LSTM along frequency dimension:
        x_f = tf.transpose(x_f, perm=[0,2,1])

        lstm_f =  lstm_class(128, state_is_tuple=True,)
        num_time_steps = tf.expand_dims(tf.shape(x_f)[1], [0])
        n_t_expanded = tf.fill(tf.expand_dims(tf.shape(x_f)[0], [0]), num_time_steps[0])
        batch_size = tf.shape(x_f)[0]
        
        lstm_f_outputs, lstm_f_state = tf.nn.dynamic_rnn(
            lstm_f,
            x_f,
            initial_state=lstm_f.zero_state(batch_size, dtype=tf.float32),
            sequence_length=n_t_expanded,
            time_major=False,
            scope='frequency_embed_lstm')
    
        x_feature = tf.concat([lstm_t_state.h, lstm_f_state.h], 1)
        
        # Run LSTM along rollout time dimension and evrything else:
        super(MultiLSTMPolicy, self).__init__(x_feature, ob_space, ac_space, lstm_class, lstm_layers)

        

In [None]:
class MyStrategy(BTgymStrategy):
    """
    Example subclass of BT server inner computation startegy.
    """
    
    def __init__(self, **kwargs):
        super(MyStrategy,self).__init__(**kwargs)
        self.order_penalty = 1
        self.trade_just_closed = False
        self.trade_result = None
        
    def notify_trade(self, trade):    
        if trade.isclosed:
            # Set trade flag and result:
            self.trade_just_closed = True
            self.trade_result = trade.pnlcomm
        
    def get_state(self):
        """
        Computes featurized RL-ready environment observation state
        by applying continious wavelet transform to time-embedded vector
        of close-price gradients.
        """
        # Use close price:
        channel = 3
        X = self.raw_state[:, channel]
        
        # Prepare parameters:
        Tau = 2
        max_cwt_scale = self.p.state_shape['model_input'].shape[1] #- 1
        cwt_width = np.linspace(Tau, max_cwt_scale + Tau - 1, max_cwt_scale) # scale of wavelet transdorm [n]
    
        T = 1e4
        
        # Get amplified vector of gradients of last [n] prices:
        X = np.gradient(X, axis=0) * T
        
        # Compute continious wavelet transform using Ricker wavelet, get [n,m,1]-dim. matrix:
        X = signal.cwt(X, signal.ricker, cwt_width).T
        
        self.state['model_input'] = X 
    
        return self.state
    
    def get_reward(self):
        """
        Defines reward as function of last closed trade result with penalty for erronious order placement.
        """
        # Low-value term:
        r = (self.broker.get_value() / self.env.broker.startingcash - 1) * 10
        
        # Result (main-value):
        if self.trade_just_closed:
            r += self.trade_result
            self.trade_just_closed = False
            
        # Penalty for failed order:
        if self.order_failed:
            r -= self.order_penalty
            self.order_failed = False

        return r / 20  # sinse reward is just quick-example, denominator is here to keep gradients sane

# Set backtesting engine parameters:

time_embed_dim = 30

state_shape = {
    'raw_state': spaces.Box(low=-1, high=1, shape=(time_embed_dim, 4)),
    'model_input': spaces.Box(low=-10, high=10, shape=(time_embed_dim, 15))
}

MyCerebro = bt.Cerebro()

MyCerebro.addstrategy(
    MyStrategy,
    state_shape=state_shape,
    portfolio_actions=('hold', 'buy', 'sell'),
    drawdown_call=5, # max to loose, in percent of initial cash
    target_call=10,  # max to win, same
    skip_frame=10,
)

# Set leveraged account:
MyCerebro.broker.setcash(2000)
MyCerebro.broker.setcommission(commission=0.0001, leverage=10.0) # commisssion to imitate spread
MyCerebro.broker.set_shortcash(False)
MyCerebro.addsizer(bt.sizers.SizerFix, stake=10000,)

MyCerebro.addanalyzer(bt.analyzers.DrawDown)

MyDataset = BTgymDataset(
    #filename='../examples/data/DAT_ASCII_EURUSD_M1_2016.csv',
    filename='../examples/data/test_sine_1min_period256_delta0002.csv',
    start_weekdays=[0, 1, 2, 3, 4],
    episode_len_days=1,
    episode_len_hours=23,
    episode_len_minutes=0,
    start_00=False,
    time_gap_hours=2,
)
env_config = dict(
    dataset=MyDataset,
    engine=MyCerebro,
    render_modes=['episode', 'human', 'model_input'],
    render_state_as_image=True,
    render_ylabel='OHLC Gradients',
    render_size_episode=(12,8),
    render_size_human=(8, 3.5),
    render_size_state=(10, 5),
    render_dpi=75,
    port=5000,
    data_port=4999,
    connect_timeout=60,
    verbose=0,
)

# Set tensorflow distributed cluster and a3c configuration:
cluster_config = dict(
    host='127.0.0.1',
    port=12222,
    num_workers=8,
    num_ps=1,
    log_dir='./tmp/a3c_test_tf_2016',
)
launcher = Launcher(
    cluster_config=cluster_config,
    env_class=BTgymEnv,
    env_config=env_config,
    policy_class=MultiLSTMPolicy,
    policy_config={'lstm_layers': (256, 256)},
    rollout_length=20,
    test_mode=False,
    train_steps=1000000000,
    model_summary_freq=20,
    episode_summary_freq=1,
    env_render_freq=10,
    verbose=2
    
)

In [None]:
launcher.run()

In [None]:
env_config.update({'port': 5050, 'data_port': 5049})
env = BTgymEnv(**env_config)

In [None]:
o = env.reset()

In [None]:
o

In [None]:
o,r,d,i = env.step(0)
print(o)

In [None]:
env.close()