In [None]:
#%load_ext autoreload
#%autoreload 2

#import sys
#sys.path.insert(0,'..')

import os

import backtrader as bt
import backtrader.indicators as btind
import numpy as np
import scipy.signal as signal
from scipy import stats

from gym import spaces

from btgym import BTgymEnv, BTgymStrategy, BTgymDataset

from launcher import Launcher
from model import LSTMPolicy

import tensorflow as tf
import tensorflow.contrib.rnn as rnn

In [None]:
# GYM TEST ENV:
cluster_config = dict(
    host='127.0.0.1',
    port=12222,
    num_workers=8,
    num_ps=1,
    log_dir='./tmp/a3c_testing_gym',
)

env_config = dict(
    gym_id='Breakout-v0'
)

class LSTM_new(LSTMPolicy):
    pass

launcher = Launcher(
    cluster_config=cluster_config,
    env_config=env_config,
    model_class=LSTM_new,
    train_steps=500000000,
    opt_learn_rate=1e-4,
    rollout_length=20,
    test_mode=True,
    model_summary_freq=50,
    episode_summary_freq=2,
    env_render_freq=10,
    verbose=2
)

In [None]:
launcher.run()


In [None]:
class SimpleLSTM(LSTMPolicy):
    def __init__(self, ob_space, ac_space):
        
        print('LSTM init started')

        self.diagnostic = dict()

        self.x = x = tf.placeholder(tf.float32, [None] + list(ob_space))

        self.diagnostic['input_shape'] = self.x.shape
        
        print('self.diagnostic0:', self.diagnostic)

        # introduce a "fake" batch dimension of 1 after flatten so that we can do LSTM over time dim
        x = tf.expand_dims(self.flatten(x), [0])

        self.diagnostic['flatten_shape'] = x.shape
        
        print('self.diagnostic1:', self.diagnostic)

        size = 256

        lstm = rnn.BasicLSTMCell(size, state_is_tuple=True)

        self.state_size = lstm.state_size
        step_size = tf.shape(self.x)[:1]

        self.diagnostic['step_size'] = step_size
        
        print('self.diagnostic2:', self.diagnostic)

        c_init = np.zeros((1, lstm.state_size.c), np.float32)
        h_init = np.zeros((1, lstm.state_size.h), np.float32)
        self.state_init = [c_init, h_init]
        c_in = tf.placeholder(tf.float32, [1, lstm.state_size.c])
        h_in = tf.placeholder(tf.float32, [1, lstm.state_size.h])
        self.state_in = [c_in, h_in]

        state_in = rnn.LSTMStateTuple(c_in, h_in)

        lstm_outputs, lstm_state = tf.nn.dynamic_rnn(
            lstm, x, initial_state=state_in, sequence_length=step_size,
            time_major=False)

        lstm_c, lstm_h = lstm_state
        x = tf.reshape(lstm_outputs, [-1, size])
        self.logits = self.linear(x, ac_space, "action", self.normalized_columns_initializer(0.01))
        self.vf = tf.reshape(self.linear(x, 1, "value", self.normalized_columns_initializer(1.0)), [-1])
        self.state_out = [lstm_c[:1, :], lstm_h[:1, :]]
        self.sample = self.categorical_sample(self.logits, ac_space)[0, :]
        self.var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, tf.get_variable_scope().name)

In [None]:
class MyStrategy(BTgymStrategy):
    """
    Example subclass of BT server inner computation startegy.
    """
    
    def __init__(self, **kwargs):
        super(MyStrategy,self).__init__(**kwargs)
        
        self.current_value_embeded = np.ones(self.p.state_shape['raw_state'].shape[0]) * \
            self.p.target_call / (self.p.target_call + self.p.drawdown_call )
            
        self.data.sma4 = btind.SimpleMovingAverage(self.datas[0], period=4)
        self.data.sma8 = btind.SimpleMovingAverage(self.datas[0], period=8)
        self.data.sma16 = btind.SimpleMovingAverage(self.datas[0], period=16)
        self.data.sma32 = btind.SimpleMovingAverage(self.datas[0], period=32)
        self.data.sma64 = btind.SimpleMovingAverage(self.datas[0], period=64)
        self.data.sma128 = btind.SimpleMovingAverage(self.datas[0], period=128)
        self.data.sma256 = btind.SimpleMovingAverage(self.datas[0], period=256)
        
        self.sma4_embed = np.ones(self.p.state_shape['raw_state'].shape[0])
        self.sma8_embed = np.ones(self.p.state_shape['raw_state'].shape[0])
        self.sma16_embed = np.ones(self.p.state_shape['raw_state'].shape[0])
        self.sma32_embed = np.ones(self.p.state_shape['raw_state'].shape[0])
        self.sma64_embed = np.ones(self.p.state_shape['raw_state'].shape[0])
        self.sma128_embed = np.ones(self.p.state_shape['raw_state'].shape[0])
        self.sma256_embed = np.ones(self.p.state_shape['raw_state'].shape[0])
        
        self.order_penalty = 0.5
        self.trade_just_closed = False
        self.trade_result = None
        
    def notify_trade(self, trade):
        #if trade.justopened:
            #print('trade {} just opened'.format(trade.ref))
            
        if trade.isclosed:
            #print('trade {} closed, pnl_comm: {}'.format(trade.ref, trade.pnlcomm))
            # Set trade flag and result:
            self.trade_just_closed = True
            self.trade_result = trade.pnlcomm
    
    def sigmoid(self, x):
        return 1/(1 + np.exp(-x))
        
    def __get_state(self):
        """
        Computes featurized RL-ready environment observation state
        by applying continious wavelet transform to time-embedded vector
        of close-price gradients.
        """
        # Use close price:
        channel = 3
        
        X = self.raw_state[:, channel]
        
        # Prepare parameters:
        Tau = 5
        max_cwt_scale = self.p.state_shape['model_input'].shape[1]
        cwt_width = np.linspace(Tau, max_cwt_scale + Tau - 1, max_cwt_scale) # scale of wavelet transdorm [n]
    
        T = 1# 000
        
        # Get vector of gradients of last [n] prices:
        X = np.gradient(X, axis=0) * T
        
        # Compute continious wavelet transform using Ricker wavelet, get [n,m,1]-dim. matrix:
        X = signal.cwt(X, signal.ricker, cwt_width).T[:, :, None]
        
        # Z-score:
        X = stats.zscore(X)
        
        # Local min-max norm:
        self.state['model_input'] = (X - X.min()) / (X.max() - X.min())
        
        # Squash values in [0,1]:
        #self.state['model_input'] = self.sigmoid(self.state['model_input'])
        
        return self.state
    
    def get_state(self):
        """___"""
        
        t = 1e-2
        X = np.row_stack(
            (
                self.current_value_embeded * t,
                self.sma4_embed,
                self.sma8_embed,
                self.sma16_embed,
                self.sma32_embed,
                self.sma64_embed,
                self.sma128_embed,
                self.sma256_embed,
            )
        ).T
        
        self.state['model_input']  = self.sigmoid(np.gradient(X, axis=0) * 2.2e4)
        
        return self.state

    
    def get_reward(self):
        """
        Defines reward as [0,1]-bounded function of last closed trade result.
        """
        r = 0
        
        # Result
        if self.trade_just_closed:
            r = self.trade_result
            self.trade_just_closed = False
            
        # Penalty for failed order:
        if self.order_failed:
            #print('Failed order!')
            r -= self.order_penalty
            self.order_failed = False
            
        #print('reward_', r)
        
        return r / 20
    
    def next(self):
        """
        Extension of default implementation.
        Defines one step environment routine for server 'Episode mode';
        At least, it should handle order execution logic according to action received.
        """
        # Normalized time-embedded vector of broker values:
        self.current_value_embeded = np.roll(self.current_value_embeded, -1)
        self.sma4_embed = np.roll(self.sma4_embed, -1)
        self.sma8_embed =  np.roll(self.sma8_embed, -1)
        self.sma16_embed =  np.roll(self.sma16_embed, -1)
        self.sma32_embed =  np.roll(self.sma32_embed, -1)
        self.sma64_embed =  np.roll(self.sma64_embed, -1)
        self.sma128_embed =  np.roll(self.sma128_embed, -1)
        self.sma256_embed =  np.roll(self.sma256_embed, -1)
        
        self.current_value_embeded[-1] =\
            (self.broker.get_value() / self.env.broker.startingcash - 1 + self.p.drawdown_call / 100) / \
            (self.p.target_call + self.p.drawdown_call) * 100
            
        self.sma4_embed[-1] = self.data.sma4[0]
        self.sma8_embed[-1] = self.data.sma8[0]
        self.sma16_embed[-1] = self.data.sma16[0]
        self.sma32_embed[-1] = self.data.sma32[0]
        self.sma64_embed[-1] = self.data.sma64[0]
        self.sma128_embed[-1] = self.data.sma128[0]
        self.sma256_embed[-1] = self.data.sma256[0]
        
        # Simple action-to-order logic:
        if self.action == 'hold' or self.order:
            pass
        elif self.action == 'buy':
            self.order = self.buy()
            self.broker_message = 'New BUY created; ' + self.broker_message
        elif self.action == 'sell':
            self.order = self.sell()
            self.broker_message = 'New SELL created; ' + self.broker_message
        elif self.action == 'close':
            self.order = self.close()
            self.broker_message = 'New CLOSE created; ' + self.broker_message
            
# Set backtesting engine parameters:

state_shape = {
    'raw_state': spaces.Box(low=-1, high=1, shape=(30, 4)),
    'model_input': spaces.Box(low=0, high=1, shape=(30, 8))
}

MyCerebro = bt.Cerebro()

MyCerebro.addstrategy(
    MyStrategy,
    state_shape=state_shape,
    portfolio_actions=('hold', 'buy', 'sell'),
    drawdown_call=5, # in percent of initial cash
    target_call=20,
    skip_frame=10,
)

# Set leveraged account:
MyCerebro.broker.setcash(2000)
MyCerebro.broker.setcommission(commission=0.0001, leverage=10.0)
MyCerebro.broker.set_shortcash(False)
MyCerebro.addsizer(bt.sizers.SizerFix, stake=10000,)


MyCerebro.addanalyzer(bt.analyzers.DrawDown)

# Provide data (seven years of 1 minute bars):
filenames = [
    '../data/DAT_ASCII_EURUSD_M1_2010.csv',
    '../data/DAT_ASCII_EURUSD_M1_2011.csv',
    '../data/DAT_ASCII_EURUSD_M1_2012.csv',
    '../data/DAT_ASCII_EURUSD_M1_2013.csv',
    '../data/DAT_ASCII_EURUSD_M1_2014.csv',
    '../data/DAT_ASCII_EURUSD_M1_2015.csv',
    '../data/DAT_ASCII_EURUSD_M1_2016.csv',
]

MyDataset = BTgymDataset(
    filename=filenames,
    #filename='../data/test_sine_1min_period256_delta0002.csv',
    start_weekdays=[0, 1, 2, 3, 4],
    episode_len_days=0,
    episode_len_hours=23,
    episode_len_minutes=0,
    start_00=False,
    time_gap_hours=6,
)
env_config = dict(
    dataset=MyDataset,
    engine=MyCerebro,
    render_modes=['episode', 'human', 'model_input'],
    render_state_as_image=False,
    render_ylabel='AVG,VAL Gradients',
    render_size_episode=(12,8),
    render_size_human=(8, 3.5),
    render_size_state=(10, 5),
    render_dpi=75,
    port=5000,
    data_port=4999,
    connect_timeout=60,
    verbose=0,
)
cluster_config = dict(
    host='127.0.0.1',
    port=12222,
    num_workers=8,
    num_ps=1,
    log_dir='./tmp/a3c_testing',
)
launcher = Launcher(
    cluster_config=cluster_config,
    env_class=BTgymEnv,
    env_config=env_config,
    model_class=SimpleLSTM,
    rollout_length=20,
    test_mode=False,
    train_steps=1000000000,
    model_summary_freq=20,
    episode_summary_freq=1,
    env_render_freq=20,
    verbose=2
    
)

In [None]:
launcher.run()

In [None]:
print(launcher.kwargs, '\n\n')
print(launcher.env_config)
print(launcher.cluster_config)
print(launcher.cluster_spec)
for config in launcher.workers_config_list:
    print('============')
    for k, v in config.items():
        print('{}:\n{}\n'.format(k, v))
    

In [None]:
def func1(max_step):
    step = 0
    done = False
    
    def func2(max_step):
        nonlocal step
        nonlocal done
        step +=1
        if step == max_step:
            step = 0
            done = True
        return step
    
    for i in range(20):
        done = False
        print(func2(max_step), step, done)
        


func1(7)
            

In [None]:
a = dict()
a.update({'b': 2, 'c':4})
type(a) == dict