In [1]:
#%load_ext autoreload
#%autoreload 2

#import sys
#sys.path.insert(0,'..')

import os

import backtrader as bt
import backtrader.indicators as btind
import numpy as np
import scipy.signal as signal
from scipy import stats

from gym import spaces

from btgym import BTgymEnv, BTgymStrategy, BTgymDataset

from launcher import Launcher
from model import LSTMPolicy


In [15]:
import tensorflow as tf
import tensorflow.contrib.rnn as rnn
from tensorflow.contrib.layers import flatten as flatten_on_batch
from tensorflow.python.util.nest import flatten as flatten_nested

class LSTMPolicy2(object):
    """
    Policy estimator with multi-layer LSTM cells. 
    """
    def __init__(self, ob_space, ac_space, lstm_class=rnn.LSTMCell, lstm_sizes=(256,)):

        self.x = x = tf.placeholder(tf.float32, [None] + list(ob_space), name='x_in_pl')
        
        # Conv layers, features here:
        for i in range(4):
            x = tf.nn.elu(self.conv2d(x, 32, "l{}".format(i + 1), [3, 3], [2, 2]))
                 
        # Flatten to feed to LSTM babk:
        x = tf.expand_dims(flatten_on_batch(x), [0])
        
        print('flatten_x_shape:', x.shape)
        
        # Define LSTM layers:
        lstm = []
        for size in lstm_sizes:
            lstm += [lstm_class(size, state_is_tuple=True)]

        #self.lstm = rnn.MultiRNNCell(lstm, state_is_tuple=True)
        self.lstm = lstm[0]
        
        state_size = self.lstm.state_size
        step_size = tf.shape(x)[:1]
        print('step_size:', step_size)
        
        self.lstm_init_state = self.lstm.zero_state(1, dtype=tf.float32)

        lstm_state_pl = self.rnn_placeholders(self.lstm.zero_state(1, dtype=tf.float32))
        self.lstm_state_pl_flatten = flatten_nested(lstm_state_pl)

        print('lstm_state_pl :', lstm_state_pl )

        lstm_outputs, self.lstm_state_out = tf.nn.dynamic_rnn(
            self.lstm,
            x,
            initial_state=lstm_state_pl,
            sequence_length=step_size,
            time_major=False
        )

        x = tf.reshape(lstm_outputs, [-1, size])
        
        print('x_shape_before_logits:', x.shape)
                 
        self.logits = self.linear(x, ac_space, "action", self.normalized_columns_initializer(0.01))
        self.vf = tf.reshape(self.linear(x, 1, "value", self.normalized_columns_initializer(1.0)), [-1])
        #self.state_out = [lstm_c[:1, :], lstm_h[:1, :]]
        self.sample = self.categorical_sample(self.logits, ac_space)[0, :]
        self.var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, tf.get_variable_scope().name)

        
    def get_initial_state(self):
        sess = tf.get_default_session()
        return sess.run(self.lstm_init_state)

    def act(self, ob, lstm_state):
        sess = tf.get_default_session()
        feeder = {pl: value for pl, value in zip(self.lstm_state_pl_flatten, flatten_nested(lstm_state))}
        feeder.update({self.x: [ob]})
        return sess.run([self.sample, self.vf, self.lstm_state_out], feeder)
    
    def value(self, ob, lstm_state):
        sess = tf.get_default_session()
        feeder = {pl: value for pl, value in zip(self.lstm_state_pl_flatten, flatten_nested(lstm_state))}
        feeder.update({self.x: [ob]})
        return sess.run(self.vf, feeder)[0]
                 
    def normalized_columns_initializer(self, std=1.0):
        def _initializer(shape, dtype=None, partition_info=None):
            out = np.random.randn(*shape).astype(np.float32)
            out *= std / np.sqrt(np.square(out).sum(axis=0, keepdims=True))
            return tf.constant(out)

        return _initializer
                 
    def conv2d(self, x, num_filters, name, filter_size=(3, 3), stride=(1, 1), pad="SAME", dtype=tf.float32, collections=None):
        with tf.variable_scope(name):
            stride_shape = [1, stride[0], stride[1], 1]
            filter_shape = [filter_size[0], filter_size[1], int(x.get_shape()[3]), num_filters]

            # there are "num input feature maps * filter height * filter width"
            # inputs to each hidden unit
            fan_in = np.prod(filter_shape[:3])
            # each unit in the lower layer receives a gradient from:
            # "num output feature maps * filter height * filter width" /
            #   pooling size
            fan_out = np.prod(filter_shape[:2]) * num_filters
            # initialize weights with random weights
            w_bound = np.sqrt(6. / (fan_in + fan_out))

            w = tf.get_variable("W", filter_shape, dtype, tf.random_uniform_initializer(-w_bound, w_bound),
                                collections=collections)
            b = tf.get_variable("b", [1, 1, 1, num_filters], initializer=tf.constant_initializer(0.0),
                                collections=collections)
            return tf.nn.conv2d(x, w, stride_shape, pad) + b

    def linear(self, x, size, name, initializer=None, bias_init=0):
        w = tf.get_variable(name + "/w", [x.get_shape()[1], size], initializer=initializer)
        b = tf.get_variable(name + "/b", [size], initializer=tf.constant_initializer(bias_init))
        return tf.matmul(x, w) + b

    def categorical_sample(self, logits, d):
        value = tf.squeeze(tf.multinomial(logits - tf.reduce_max(logits, [1], keep_dims=True), 1), [1])
        return tf.one_hot(value, d)
            
    def rnn_placeholders(self, state):
        """
        Converts RNN state tensors to placeholders with the zero state as default.
        """
        if isinstance(state, tf.contrib.rnn.LSTMStateTuple):
            c, h = state
            c = tf.placeholder_with_default(c, c.shape, c.op.name + '_pl')
            print('c_shape:', c.shape)
            h = tf.placeholder_with_default(h, h.shape, h.op.name + '_pl')
            return tf.contrib.rnn.LSTMStateTuple(c, h)
        elif isinstance(state, tf.Tensor):
            h = state
            h = tf.placeholder_with_default(h, h.shape, h.op.name + '_pl')
            return h
        else:
            structure = [self.rnn_placeholders(x) for x in state]
            return tuple(structure)   

In [16]:
# GYM TEST ENV:
cluster_config = dict(
    host='127.0.0.1',
    port=22222,
    num_workers=8,
    num_ps=1,
    log_dir='./tmp/a3c_testing_gym',
)

env_config = dict(
    gym_id='Breakout-v0'
)

launcher = Launcher(
    cluster_config=cluster_config,
    env_config=env_config,
    model_class=LSTMPolicy2,
    train_steps=500000000,
    opt_learn_rate=1e-4,
    rollout_length=20,
    test_mode=True,
    model_summary_freq=50,
    episode_summary_freq=2,
    env_render_freq=10,
    verbose=2
)

[2017-08-27 13:21:15,208] ./tmp/a3c_testing_gym created.
[2017-08-27 13:21:16,145] Launcher ready.


In [17]:
launcher.run()

[2017-08-27 13:21:17,667] worker_0 tf.server started.
[2017-08-27 13:21:17,709] parameters_server started.
[2017-08-27 13:21:17,733] making environment.
[2017-08-27 13:21:17,791] Making new env: Breakout-v0
[2017-08-27 13:21:18,250] worker_0:envronment ok.
[2017-08-27 13:21:18,252] A3C_0: init() started


flatten_x_shape: (1, ?, 288)
step_size: Tensor("global/strided_slice:0", shape=(1,), dtype=int32, device=/job:worker/task:0/device:CPU:0)
c_shape: (1, 256)
lstm_state_pl : LSTMStateTuple(c=<tf.Tensor 'global/global/LSTMCellZeroState_1/zeros_pl:0' shape=(1, 256) dtype=float32>, h=<tf.Tensor 'global/global/LSTMCellZeroState_1/zeros_1_pl:0' shape=(1, 256) dtype=float32>)
x_shape_before_logits: (?, 256)
flatten_x_shape: (1, ?, 288)
step_size: Tensor("local/strided_slice:0", shape=(1,), dtype=int32, device=/job:worker/task:0/device:CPU:0)
c_shape: (1, 256)
lstm_state_pl : LSTMStateTuple(c=<tf.Tensor 'local/local/LSTMCellZeroState_1/zeros_pl:0' shape=(1, 256) dtype=float32>, h=<tf.Tensor 'local/local/LSTMCellZeroState_1/zeros_1_pl:0' shape=(1, 256) dtype=float32>)
x_shape_before_logits: (?, 256)


[2017-08-27 13:21:22,130] A3C_0: train op defined
[2017-08-27 13:21:22,287] A3C_0: init() done
[2017-08-27 13:21:22,289] worker_0:trainer ok.
[2017-08-27 13:21:22,669] Press `Ctrl-C` to stop training and close launcher.
[2017-08-27 13:21:22,698] worker_1 tf.server started.
[2017-08-27 13:21:22,701] worker_2 tf.server started.
[2017-08-27 13:21:22,729] making environment.
[2017-08-27 13:21:22,730] making environment.
[2017-08-27 13:21:22,749] worker_3 tf.server started.
[2017-08-27 13:21:22,750] worker_7 tf.server started.
[2017-08-27 13:21:22,749] worker_5 tf.server started.
[2017-08-27 13:21:22,750] worker_6 tf.server started.
[2017-08-27 13:21:22,763] making environment.
[2017-08-27 13:21:22,754] worker_4 tf.server started.
[2017-08-27 13:21:22,763] making environment.
[2017-08-27 13:21:22,763] making environment.
[2017-08-27 13:21:22,765] making environment.
[2017-08-27 13:21:22,747] Making new env: Breakout-v0
[2017-08-27 13:21:22,773] making environment.
[2017-08-27 13:21:22,759] 

Press `Ctrl-C` to stop training and close launcher.


[2017-08-27 13:21:22,935] worker_1:envronment ok.
[2017-08-27 13:21:22,938] worker_2:envronment ok.
[2017-08-27 13:21:22,943] A3C_1: init() started
[2017-08-27 13:21:22,944] worker_6:envronment ok.
[2017-08-27 13:21:22,952] A3C_6: init() started
[2017-08-27 13:21:22,947] worker_3:envronment ok.
[2017-08-27 13:21:22,944] A3C_2: init() started
[2017-08-27 13:21:22,955] worker_7:envronment ok.
[2017-08-27 13:21:22,957] worker_4:envronment ok.
[2017-08-27 13:21:22,947] worker_5:envronment ok.
[2017-08-27 13:21:22,960] A3C_3: init() started
[2017-08-27 13:21:22,963] A3C_7: init() started
[2017-08-27 13:21:22,962] A3C_4: init() started
[2017-08-27 13:21:22,964] A3C_5: init() started


flatten_x_shape: (1, ?, 288)
flatten_x_shape: (1, ?, 288)
flatten_x_shape: (1, ?, 288)
flatten_x_shape: (1, ?, 288)
flatten_x_shape: (1, ?, 288)
flatten_x_shape: (1, ?, 288)
flatten_x_shape: (1, ?, 288)
step_size: Tensor("global/strided_slice:0", shape=(1,), dtype=int32, device=/job:worker/task:1/device:CPU:0)
step_size: Tensor("global/strided_slice:0", shape=(1,), dtype=int32, device=/job:worker/task:3/device:CPU:0)
step_size: Tensor("global/strided_slice:0", shape=(1,), dtype=int32, device=/job:worker/task:6/device:CPU:0)
step_size: Tensor("global/strided_slice:0", shape=(1,), dtype=int32, device=/job:worker/task:7/device:CPU:0)
step_size: Tensor("global/strided_slice:0", shape=(1,), dtype=int32, device=/job:worker/task:5/device:CPU:0)
step_size: Tensor("global/strided_slice:0", shape=(1,), dtype=int32, device=/job:worker/task:2/device:CPU:0)
step_size: Tensor("global/strided_slice:0", shape=(1,), dtype=int32, device=/job:worker/task:4/device:CPU:0)
c_shape: (1, 256)
c_shape: (1, 256

[2017-08-27 13:21:24,378] connecting to the parameter server... 


flatten_x_shape: (1, ?, 288)
flatten_x_shape: (1, ?, 288)
step_size: Tensor("local/strided_slice:0", shape=(1,), dtype=int32, device=/job:worker/task:5/device:CPU:0)
step_size: Tensor("local/strided_slice:0", shape=(1,), dtype=int32, device=/job:worker/task:3/device:CPU:0)
c_shape: (1, 256)
c_shape: (1, 256)
lstm_state_pl : LSTMStateTuple(c=<tf.Tensor 'local/local/LSTMCellZeroState_1/zeros_pl:0' shape=(1, 256) dtype=float32>, h=<tf.Tensor 'local/local/LSTMCellZeroState_1/zeros_1_pl:0' shape=(1, 256) dtype=float32>)
flatten_x_shape: (1, ?, 288)
lstm_state_pl : LSTMStateTuple(c=<tf.Tensor 'local/local/LSTMCellZeroState_1/zeros_pl:0' shape=(1, 256) dtype=float32>, h=<tf.Tensor 'local/local/LSTMCellZeroState_1/zeros_1_pl:0' shape=(1, 256) dtype=float32>)
flatten_x_shape: (1, ?, 288)
flatten_x_shape: (1, ?, 288)
step_size: Tensor("local/strided_slice:0", shape=(1,), dtype=int32, device=/job:worker/task:1/device:CPU:0)
flatten_x_shape: (1, ?, 288)
step_size: Tensor("local/strided_slice:0", s

[2017-08-27 13:21:26,051] Initializing all parameters.
[2017-08-27 13:21:26,590] A3C_5: train op defined
[2017-08-27 13:21:26,630] A3C_3: train op defined
[2017-08-27 13:21:26,739] A3C_1: train op defined
[2017-08-27 13:21:26,762] A3C_2: train op defined
[2017-08-27 13:21:26,774] A3C_7: train op defined
[2017-08-27 13:21:26,798] A3C_6: train op defined
[2017-08-27 13:21:26,824] A3C_4: train op defined
[2017-08-27 13:21:26,965] A3C_5: init() done
[2017-08-27 13:21:26,972] worker_5:trainer ok.
[2017-08-27 13:21:27,005] A3C_3: init() done
[2017-08-27 13:21:27,028] worker_3:trainer ok.
[2017-08-27 13:21:27,127] A3C_1: init() done
[2017-08-27 13:21:27,129] worker_1:trainer ok.
[2017-08-27 13:21:27,152] A3C_2: init() done
[2017-08-27 13:21:27,154] worker_2:trainer ok.
[2017-08-27 13:21:27,161] A3C_7: init() done
[2017-08-27 13:21:27,163] worker_7:trainer ok.
[2017-08-27 13:21:27,190] A3C_6: init() done
[2017-08-27 13:21:27,192] worker_6:trainer ok.
[2017-08-27 13:21:27,201] A3C_4: init() don

INFO:tensorflow:Starting queue runners.


[2017-08-27 13:21:30,375] Starting queue runners.
[2017-08-27 13:21:30,418] worker_5: starting training at step: 0


INFO:tensorflow:Starting queue runners.


[2017-08-27 13:21:30,459] Starting queue runners.


INFO:tensorflow:Starting queue runners.
INFO:tensorflow:Starting queue runners.


[2017-08-27 13:21:30,535] worker_3: starting training at step: 0
[2017-08-27 13:21:30,555] Starting queue runners.
[2017-08-27 13:21:30,554] Starting queue runners.
[2017-08-27 13:21:30,643] worker_7: starting training at step: 0
[2017-08-27 13:21:30,645] worker_6: starting training at step: 0


INFO:tensorflow:Starting queue runners.
INFO:tensorflow:Starting queue runners.


[2017-08-27 13:21:30,683] Starting queue runners.


INFO:tensorflow:Starting queue runners.


[2017-08-27 13:21:30,703] Starting queue runners.
[2017-08-27 13:21:30,683] Starting queue runners.
[2017-08-27 13:21:30,738] worker_4: starting training at step: 0
[2017-08-27 13:21:30,732] worker_2: starting training at step: 0
[2017-08-27 13:21:30,744] worker_1: starting training at step: 0


INFO:tensorflow:Starting standard services.


[2017-08-27 13:21:32,669] Starting standard services.


INFO:tensorflow:Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


[2017-08-27 13:21:32,710] Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


INFO:tensorflow:Starting queue runners.


[2017-08-27 13:21:32,732] Starting queue runners.


INFO:tensorflow:global/global_step/sec: 0


[2017-08-27 13:21:32,756] global/global_step/sec: 0
[2017-08-27 13:21:32,867] worker_0: starting training at step: 700


INFO:tensorflow:global/global_step/sec: 523.696


[2017-08-27 13:23:32,768] global/global_step/sec: 523.696


INFO:tensorflow:global/global_step/sec: 545.358


[2017-08-27 13:25:32,741] global/global_step/sec: 545.358


INFO:tensorflow:Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


[2017-08-27 13:26:32,701] Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


INFO:tensorflow:global/global_step/sec: 518.238


[2017-08-27 13:27:32,738] global/global_step/sec: 518.238


INFO:tensorflow:global/global_step/sec: 531.304


[2017-08-27 13:29:32,739] global/global_step/sec: 531.304


INFO:tensorflow:Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


[2017-08-27 13:31:32,700] Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


INFO:tensorflow:global/global_step/sec: 516.39


[2017-08-27 13:31:32,771] global/global_step/sec: 516.39


INFO:tensorflow:global/global_step/sec: 523.096


[2017-08-27 13:33:32,743] global/global_step/sec: 523.096


INFO:tensorflow:global/global_step/sec: 526.832


[2017-08-27 13:35:32,781] global/global_step/sec: 526.832


INFO:tensorflow:Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


[2017-08-27 13:36:32,697] Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


INFO:tensorflow:global/global_step/sec: 514.827


[2017-08-27 13:37:32,738] global/global_step/sec: 514.827


INFO:tensorflow:global/global_step/sec: 521.483


[2017-08-27 13:39:32,742] global/global_step/sec: 521.483


INFO:tensorflow:Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


[2017-08-27 13:41:32,697] Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


INFO:tensorflow:Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


[2017-08-27 13:46:32,697] Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


INFO:tensorflow:Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


[2017-08-27 13:51:32,699] Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


INFO:tensorflow:Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


[2017-08-27 13:56:32,698] Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


INFO:tensorflow:Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


[2017-08-27 14:01:32,697] Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


INFO:tensorflow:Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


[2017-08-27 14:06:32,699] Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


INFO:tensorflow:Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


[2017-08-27 14:11:32,700] Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


INFO:tensorflow:Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


[2017-08-27 14:16:32,698] Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


INFO:tensorflow:Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


[2017-08-27 14:21:32,697] Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


INFO:tensorflow:Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


[2017-08-27 14:26:32,699] Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


INFO:tensorflow:Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


[2017-08-27 14:31:32,702] Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


INFO:tensorflow:Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


[2017-08-27 14:36:32,700] Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


INFO:tensorflow:Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


[2017-08-27 14:41:32,697] Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


INFO:tensorflow:Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


[2017-08-27 14:46:32,697] Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


INFO:tensorflow:Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


[2017-08-27 14:51:32,697] Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


INFO:tensorflow:Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


[2017-08-27 14:56:32,703] Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


INFO:tensorflow:Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


[2017-08-27 15:01:32,706] Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


INFO:tensorflow:Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


[2017-08-27 15:06:32,707] Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


INFO:tensorflow:Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


[2017-08-27 15:11:32,703] Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


INFO:tensorflow:Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


[2017-08-27 15:16:32,706] Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


INFO:tensorflow:Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


[2017-08-27 15:21:32,701] Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


INFO:tensorflow:Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


[2017-08-27 15:26:32,707] Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


INFO:tensorflow:Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


[2017-08-27 15:31:32,701] Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt
[2017-08-27 15:34:04,315] worker_1 has joined.
[2017-08-27 15:34:04,328] worker_2 has joined.
[2017-08-27 15:34:04,329] worker_3 has joined.
[2017-08-27 15:34:04,330] worker_4 has joined.
[2017-08-27 15:34:04,331] worker_5 has joined.
[2017-08-27 15:34:04,332] worker_6 has joined.
[2017-08-27 15:34:04,357] worker_7 has joined.
[2017-08-27 15:34:04,358] chief_worker_0 has joined.
[2017-08-27 15:34:04,359] parameter_server_0 has joined.
[2017-08-27 15:34:04,360] Launcher closed.


In [None]:
class MyStrategy(BTgymStrategy):
    """
    Example subclass of BT server inner computation startegy.
    """
    
    def __init__(self, **kwargs):
        self.log = self.env._log
        
        self.data.dim_sma = btind.SimpleMovingAverage(self.datas[0], period=5)
        self.data.dim_sma.plotinfo.plot = False

        self.target_value = self.env.broker.startingcash * (1 + self.p.target_call / 100)
        
        self.current_value_embeded = np.ones(self.p.state_shape['raw_state'].shape[0]) * \
            self.p.target_call / (self.p.target_call + self.p.drawdown_call )

        self.order_penalty = 1
        self.trade_just_closed = False
        self.trade_result = None
        
        self.channel = 3
        self.x_buffer = np.ones(self.p.state_shape['raw_state'].shape[0])
        
    def nextstart(self):
        self.inner_embedding = self.data.close.buflen()
        self.log.debug('Inner time embedding: {}'.format(self.inner_embedding))
        self.x_buffer *= self.data.close[0]
        
    def notify_trade(self, trade):
        #if trade.justopened:
            #print('trade {} just opened'.format(trade.ref))
            
        if trade.isclosed:
            #print('trade {} closed, pnl_comm: {}'.format(trade.ref, trade.pnlcomm))
            # Set trade flag and result:
            self.trade_just_closed = True
            self.trade_result = trade.pnlcomm
    
    def sigmoid(self, x):
        return 1/(1 + np.exp(-x))
        
    def get_state(self):
        """
        Computes featurized RL-ready environment observation state
        by applying continious wavelet transform to time-embedded vector
        of close-price gradients.
        """
        # Use close price:
        
        
        #X = self.raw_state[:, self.channel]
        X = self.x_buffer
        
        # Prepare parameters:
        Tau = 2
        max_cwt_scale = self.p.state_shape['model_input'].shape[1] #- 1
        cwt_width = np.linspace(Tau, max_cwt_scale + Tau - 1, max_cwt_scale) # scale of wavelet transdorm [n]
    
        T = 1e4
        
        # Get vector of gradients of last [n] prices:
        X = np.gradient(X, axis=0) * T
        
        # Compute continious wavelet transform using Ricker wavelet, get [n,m,1]-dim. matrix:
        X = signal.cwt(X, signal.ricker, cwt_width).T + 1

        #print('X:', X.shape)
        #print('self.current_value_embeded:', self.current_value_embeded.shape)
        
        #self.state['model_input'] = np.concatenate([X, self.current_value_embeded[:, None] ], axis=-1)
        self.state['model_input'] = X[-self.p.state_shape['model_input'].shape[0]:, :]
        
        #print('model_input:', self.state['model_input'].shape)
        
        return self.state
    
    
    def get_reward(self):
        """
        Defines reward as function of last closed trade result.
        """
        #r = 0
        
        r = (self.broker.get_value() / self.env.broker.startingcash - 1) * 10
        
        # Result
        if self.trade_just_closed:
            r += self.trade_result
            self.trade_just_closed = False
            #print('R-trade:', r)
            
        # Penalty for failed order:

        r -= self.order_penalty * self.order_failed
        self.order_failed = 0

            
        #print('reward_', r)
        
        return r / 10
    
    def next(self):
        """
        Extension of default implementation.
        Defines one step environment routine for server 'Episode mode';
        At least, it should handle order execution logic according to action received.
        """
        # Normalized time-embedded vector of broker values:
        self.current_value_embeded = np.roll(self.current_value_embeded, -1)
        
        self.x_buffer = np.roll(self.x_buffer, -1)
        
        self.current_value_embeded[-1] =\
            (self.broker.get_value() / self.env.broker.startingcash - 1 + self.p.drawdown_call / 100) / \
            (self.p.target_call + self.p.drawdown_call) * 100
            
        self.x_buffer[-1] = self.data.close[0]
        
        # Simple action-to-order logic:
        if self.action == 'hold' or self.order:
            pass
        elif self.action == 'buy':
            self.order = self.buy()
            self.broker_message = 'New BUY created; ' + self.broker_message
        elif self.action == 'sell':
            self.order = self.sell()
            self.broker_message = 'New SELL created; ' + self.broker_message
        elif self.action == 'close':
            self.order = self.close()
            self.broker_message = 'New CLOSE created; ' + self.broker_message
            
# Set backtesting engine parameters:
time_embed_dim = 30
state_shape = {
    'raw_state': spaces.Box(low=-1, high=1, shape=(2*time_embed_dim, 4)),
    'model_input': spaces.Box(low=-100, high=100, shape=(time_embed_dim, time_embed_dim))
}

MyCerebro = bt.Cerebro()

MyCerebro.addstrategy(
    MyStrategy,
    state_shape=state_shape,
    portfolio_actions=('hold', 'buy', 'sell'),
    drawdown_call=5, # in percent of initial cash
    target_call=20,
    skip_frame=10,
)

# Set leveraged account:
MyCerebro.broker.setcash(2000)
MyCerebro.broker.setcommission(commission=0.0001, leverage=10.0)
MyCerebro.broker.set_shortcash(False)
MyCerebro.addsizer(bt.sizers.SizerFix, stake=10000,)


MyCerebro.addanalyzer(bt.analyzers.DrawDown)

# Provide data (seven years of 1 minute bars):
filenames = [
    '../data/DAT_ASCII_EURUSD_M1_2010.csv',
    '../data/DAT_ASCII_EURUSD_M1_2011.csv',
    '../data/DAT_ASCII_EURUSD_M1_2012.csv',
    '../data/DAT_ASCII_EURUSD_M1_2013.csv',
    '../data/DAT_ASCII_EURUSD_M1_2014.csv',
    '../data/DAT_ASCII_EURUSD_M1_2015.csv',
    '../data/DAT_ASCII_EURUSD_M1_2016.csv',
]

MyDataset = BTgymDataset(
    #filename=filenames,
    #filename='../data/test_sine_1min_period256_delta0002.csv',
    filename='../data/DAT_ASCII_EURUSD_M1_2016.csv',
    start_weekdays=[0, 1, 2, 3, 4],
    episode_len_days=0,
    episode_len_hours=23,
    episode_len_minutes=0,
    start_00=False,
    time_gap_hours=6,
)
env_config = dict(
    dataset=MyDataset,
    engine=MyCerebro,
    render_modes=['episode', 'human', 'model_input'],
    render_state_as_image=False,
    render_ylabel='CWT transform',
    render_size_episode=(12,8),
    render_size_human=(8, 3.5),
    render_size_state=(10, 5),
    render_dpi=75,
    port=5000,
    data_port=4999,
    connect_timeout=60,
    verbose=0,
)
cluster_config = dict(
    host='127.0.0.1',
    port=12222,
    num_workers=8,
    num_ps=1,
    log_dir='./tmp/a3c_testing_7',
)
launcher = Launcher(
    cluster_config=cluster_config,
    env_class=BTgymEnv,
    env_config=env_config,
    model_class=SimpleLSTM,
    rollout_length=20,
    test_mode=False,
    train_steps=1000000000,
    model_summary_freq=20,
    episode_summary_freq=1,
    env_render_freq=20,
    verbose=1
    
)

In [None]:
launcher.run()

In [None]:
print(launcher.kwargs, '\n\n')
print(launcher.env_config)
print(launcher.cluster_config)
print(launcher.cluster_spec)
for config in launcher.workers_config_list:
    print('============')
    for k, v in config.items():
        print('{}:\n{}\n'.format(k, v))
    

In [None]:
def func1(max_step):
    step = 0
    done = False
    
    def func2(max_step):
        nonlocal step
        nonlocal done
        step +=1
        if step == max_step:
            step = 0
            done = True
        return step
    
    for i in range(20):
        done = False
        print(func2(max_step), step, done)
        


func1(7)
            

In [None]:
a = dict()
a.update({'b': 2, 'c':4})
type(a) == dict