In [1]:
#%load_ext autoreload
#%autoreload 2

#import sys
#sys.path.insert(0,'..')

import os

import backtrader as bt
import backtrader.indicators as btind
import numpy as np
import scipy.signal as signal
from scipy import stats

from gym import spaces

from btgym import BTgymEnv, BTgymStrategy, BTgymDataset

from launcher import Launcher
from model import LSTMPolicy


In [10]:
import tensorflow as tf
import tensorflow.contrib.rnn as rnn
from tensorflow.contrib.layers import flatten as flatten_on_batch
from tensorflow.python.util.nest import flatten as flatten_nested

class LSTMPolicy2(object):
    """
    Policy estimator with multi-layer LSTM cells. 
    """
    def __init__(self, ob_space, ac_space, lstm_class=rnn.BasicLSTMCell, lstm_sizes=(256,)):

        self.x = x = tf.placeholder(tf.float32, [None] + list(ob_space), name='x_in_pl')
        
        print('LSTM init started')
        
        # Conv layers, features here:
        for i in range(4):
            x = tf.nn.elu(self.conv2d(x, 32, "l{}".format(i + 1), [3, 3], [2, 2]))
                 
        # Flatten to feed to LSTM babk:
        x = tf.expand_dims(flatten_on_batch(x), [0])
        
        print('flatten_x_shape:', x.shape)
        
        # Define LSTM layers:
        lstm = []
        for size in lstm_sizes:
            lstm += [lstm_class(size, state_is_tuple=True)]

        #self.lstm = rnn.MultiRNNCell(lstm, state_is_tuple=True)
        self.lstm = lstm[0]
        
        state_size = self.lstm.state_size
        step_size = tf.shape(x)[:1]
        print('step_size:', step_size)
        
        self.lstm_init_state = self.lstm.zero_state(1, dtype=tf.float32)

        lstm_state_pl = self.rnn_placeholders(self.lstm.zero_state(1, dtype=tf.float32))
        self.lstm_state_pl_flatten = flatten_nested(lstm_state_pl)

        print('lstm_state_pl_flattened :', self.lstm_state_pl_flatten )

        lstm_outputs, self.lstm_state_out = tf.nn.dynamic_rnn(
            self.lstm,
            x,
            initial_state=lstm_state_pl,
            sequence_length=step_size,
            time_major=False
        )

        x = tf.reshape(lstm_outputs, [-1, size])
        
        print('x_shape_before_logits:', x.shape)
                 
        self.logits = self.linear(x, ac_space, "action", self.normalized_columns_initializer(0.01))
        self.vf = tf.reshape(self.linear(x, 1, "value", self.normalized_columns_initializer(1.0)), [-1])
        #self.state_out = [lstm_c[:1, :], lstm_h[:1, :]]
        self.sample = self.categorical_sample(self.logits, ac_space)[0, :]
        self.var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, tf.get_variable_scope().name)

        
    def get_initial_features(self):
        sess = tf.get_default_session()
        return sess.run(self.lstm_init_state)
    
    def __get_initial_features(self):
        return[np.zeros((1,8)), np.ones((1,8))]

    def act(self, ob, lstm_state):
        sess = tf.get_default_session()
        feeder = {pl: value for pl, value in zip(self.lstm_state_pl_flatten, flatten_nested(lstm_state))}
        feeder.update({self.x: [ob]})
        return sess.run([self.sample, self.vf, self.lstm_state_out], feeder)
    
    def value(self, ob, lstm_state):
        sess = tf.get_default_session()
        feeder = {pl: value for pl, value in zip(self.lstm_state_pl_flatten, flatten_nested(lstm_state))}
        feeder.update({self.x: [ob]})
        return sess.run(self.vf, feeder)[0]
                 
    def normalized_columns_initializer(self, std=1.0):
        def _initializer(shape, dtype=None, partition_info=None):
            out = np.random.randn(*shape).astype(np.float32)
            out *= std / np.sqrt(np.square(out).sum(axis=0, keepdims=True))
            return tf.constant(out)

        return _initializer
                 
    def conv2d(self, x, num_filters, name, filter_size=(3, 3), stride=(1, 1), pad="SAME", dtype=tf.float32, collections=None):
        with tf.variable_scope(name):
            stride_shape = [1, stride[0], stride[1], 1]
            filter_shape = [filter_size[0], filter_size[1], int(x.get_shape()[3]), num_filters]

            # there are "num input feature maps * filter height * filter width"
            # inputs to each hidden unit
            fan_in = np.prod(filter_shape[:3])
            # each unit in the lower layer receives a gradient from:
            # "num output feature maps * filter height * filter width" /
            #   pooling size
            fan_out = np.prod(filter_shape[:2]) * num_filters
            # initialize weights with random weights
            w_bound = np.sqrt(6. / (fan_in + fan_out))

            w = tf.get_variable("W", filter_shape, dtype, tf.random_uniform_initializer(-w_bound, w_bound),
                                collections=collections)
            b = tf.get_variable("b", [1, 1, 1, num_filters], initializer=tf.constant_initializer(0.0),
                                collections=collections)
            return tf.nn.conv2d(x, w, stride_shape, pad) + b

    def linear(self, x, size, name, initializer=None, bias_init=0):
        w = tf.get_variable(name + "/w", [x.get_shape()[1], size], initializer=initializer)
        b = tf.get_variable(name + "/b", [size], initializer=tf.constant_initializer(bias_init))
        return tf.matmul(x, w) + b

    def categorical_sample(self, logits, d):
        value = tf.squeeze(tf.multinomial(logits - tf.reduce_max(logits, [1], keep_dims=True), 1), [1])
        return tf.one_hot(value, d)
            
    def __rnn_placeholders(self, state):
        """
        Converts RNN state tensors to placeholders with the zero state as default.
        """
        if isinstance(state, tf.contrib.rnn.LSTMStateTuple):
            c, h = state
            c = tf.placeholder_with_default(c, c.shape, c.op.name + '_c_pl')
            print('c_shape:', c.shape)
            h = tf.placeholder_with_default(h, h.shape, h.op.name + '_h_pl')
            return tf.contrib.rnn.LSTMStateTuple(c, h)
        elif isinstance(state, tf.Tensor):
            h = state
            h = tf.placeholder_with_default(h, h.shape, h.op.name + '_h_pl')
            return h
        else:
            structure = [self.rnn_placeholders(x) for x in state]
            return tuple(structure)   
        
    def rnn_placeholders(self, state):
        """
        Converts RNN state tensors to placeholders with the zero state as default.
        """
        if isinstance(state, tf.contrib.rnn.LSTMStateTuple):
            c, h = state
            c = tf.placeholder(tf.float32, c.shape, c.op.name + '_c_pl')
            print('c_shape:', c.shape)
            h = tf.placeholder(tf.float32, h.shape, h.op.name + '_h_pl')
            return tf.contrib.rnn.LSTMStateTuple(c, h)
        elif isinstance(state, tf.Tensor):
            h = state
            h = tf.placeholder(tf.float32, h.shape, h.op.name + '_h_pl')
            return h
        else:
            structure = [self.rnn_placeholders(x) for x in state]
            return tuple(structure)  

In [None]:
import gym
gym.envs.register(id='test-v01', entry_point='test:test_env')


In [11]:
# GYM TEST ENV:
cluster_config = dict(
    host='127.0.0.1',
    port=22222,
    num_workers=8,
    num_ps=1,
    log_dir='./tmp/a3c_testing_gym',
)

env_config = dict(
    gym_id='Breakout-v0'
    #gym_id='test-v01'
)

launcher = Launcher(
    cluster_config=cluster_config,
    env_config=env_config,
    model_class=LSTMPolicy2,
    train_steps=500000000,
    opt_learn_rate=1e-4,
    rollout_length=20,
    test_mode=True,
    model_summary_freq=50,
    episode_summary_freq=2,
    env_render_freq=10,
    verbose=2
)

[2017-08-29 08:10:52,333] ./tmp/a3c_testing_gym created.
[2017-08-29 08:10:52,956] Launcher ready.


In [None]:
launcher.run()

[2017-08-29 08:10:59,362] worker_0 tf.server started.
[2017-08-29 08:10:59,358] parameters_server started.
[2017-08-29 08:10:59,403] making environment.
[2017-08-29 08:10:59,405] Making new env: Breakout-v0
[2017-08-29 08:10:59,598] worker_0:envronment ok.
[2017-08-29 08:10:59,600] A3C_0: init() started


LSTM init started
flatten_x_shape: (1, ?, 288)
step_size: Tensor("global/strided_slice:0", shape=(1,), dtype=int32, device=/job:worker/task:0/device:CPU:0)
c_shape: (1, 256)
lstm_state_pl_flattened : [<tf.Tensor 'global/global/BasicLSTMCellZeroState_1/zeros_c_pl:0' shape=(1, 256) dtype=float32>, <tf.Tensor 'global/global/BasicLSTMCellZeroState_1/zeros_1_h_pl:0' shape=(1, 256) dtype=float32>]
x_shape_before_logits: (?, 256)
LSTM init started
flatten_x_shape: (1, ?, 288)
step_size: Tensor("local/strided_slice:0", shape=(1,), dtype=int32, device=/job:worker/task:0/device:CPU:0)
c_shape: (1, 256)
lstm_state_pl_flattened : [<tf.Tensor 'local/local/BasicLSTMCellZeroState_1/zeros_c_pl:0' shape=(1, 256) dtype=float32>, <tf.Tensor 'local/local/BasicLSTMCellZeroState_1/zeros_1_h_pl:0' shape=(1, 256) dtype=float32>]
x_shape_before_logits: (?, 256)


[2017-08-29 08:11:01,368] A3C_0: train op defined
[2017-08-29 08:11:01,436] A3C_0: init() done
[2017-08-29 08:11:01,438] worker_0:trainer ok.
[2017-08-29 08:11:02,348] connecting to the parameter server... 
[2017-08-29 08:11:03,025] Initializing all parameters.
[2017-08-29 08:11:04,371] worker_1 tf.server started.
[2017-08-29 08:11:04,376] making environment.
[2017-08-29 08:11:04,376] worker_2 tf.server started.
[2017-08-29 08:11:04,387] Press `Ctrl-C` to stop training and close launcher.
[2017-08-29 08:11:04,387] making environment.
[2017-08-29 08:11:04,380] worker_3 tf.server started.
[2017-08-29 08:11:04,387] Making new env: Breakout-v0
[2017-08-29 08:11:04,393] worker_4 tf.server started.
[2017-08-29 08:11:04,400] worker_6 tf.server started.
[2017-08-29 08:11:04,394] worker_5 tf.server started.
[2017-08-29 08:11:04,399] making environment.
[2017-08-29 08:11:04,403] making environment.
[2017-08-29 08:11:04,399] Making new env: Breakout-v0
[2017-08-29 08:11:04,406] making environment

LSTM init started


[2017-08-29 08:11:04,508] A3C_3: init() started
[2017-08-29 08:11:04,507] worker_4:envronment ok.
[2017-08-29 08:11:04,509] worker_5:envronment ok.


LSTM init started


[2017-08-29 08:11:04,518] A3C_4: init() started


LSTM init started


[2017-08-29 08:11:04,518] A3C_5: init() started


LSTM init started
LSTM init started


[2017-08-29 08:11:04,545] worker_6:envronment ok.
[2017-08-29 08:11:04,553] worker_7:envronment ok.
[2017-08-29 08:11:04,555] A3C_6: init() started
[2017-08-29 08:11:04,559] A3C_7: init() started


LSTM init started
LSTM init started
Press `Ctrl-C` to stop training and close launcher.
flatten_x_shape: (1, ?, 288)
step_size: Tensor("global/strided_slice:0", shape=(1,), dtype=int32, device=/job:worker/task:1/device:CPU:0)
flatten_x_shape: (1, ?, 288)
flatten_x_shape: (1, ?, 288)
flatten_x_shape: (1, ?, 288)
flatten_x_shape: (1, ?, 288)
step_size: Tensor("global/strided_slice:0", shape=(1,), dtype=int32, device=/job:worker/task:2/device:CPU:0)
step_size: Tensor("global/strided_slice:0", shape=(1,), dtype=int32, device=/job:worker/task:3/device:CPU:0)
step_size: Tensor("global/strided_slice:0", shape=(1,), dtype=int32, device=/job:worker/task:4/device:CPU:0)
step_size: Tensor("global/strided_slice:0", shape=(1,), dtype=int32, device=/job:worker/task:5/device:CPU:0)
flatten_x_shape: (1, ?, 288)
flatten_x_shape: (1, ?, 288)
c_shape: (1, 256)
step_size: Tensor("global/strided_slice:0", shape=(1,), dtype=int32, device=/job:worker/task:7/device:CPU:0)
lstm_state_pl_flattened : [<tf.Tensor

[2017-08-29 08:11:07,420] Starting standard services.


INFO:tensorflow:Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


[2017-08-29 08:11:07,438] Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


INFO:tensorflow:Starting queue runners.
INFO:tensorflow:global/global_step/sec: 0


[2017-08-29 08:11:07,439] Starting queue runners.
[2017-08-29 08:11:07,441] global/global_step/sec: 0
[2017-08-29 08:11:07,469] worker_0: starting training at step: 0
[2017-08-29 08:11:08,308] A3C_4: train op defined
[2017-08-29 08:11:08,328] A3C_1: train op defined
[2017-08-29 08:11:08,351] A3C_2: train op defined
[2017-08-29 08:11:08,397] A3C_3: train op defined
[2017-08-29 08:11:08,417] A3C_5: train op defined
[2017-08-29 08:11:08,413] A3C_7: train op defined
[2017-08-29 08:11:08,437] A3C_6: train op defined
[2017-08-29 08:11:08,481] A3C_4: init() done
[2017-08-29 08:11:08,492] A3C_1: init() done
[2017-08-29 08:11:08,498] worker_4:trainer ok.
[2017-08-29 08:11:08,516] worker_1:trainer ok.
[2017-08-29 08:11:08,526] A3C_2: init() done
[2017-08-29 08:11:08,544] worker_2:trainer ok.
[2017-08-29 08:11:08,569] A3C_3: init() done
[2017-08-29 08:11:08,574] worker_3:trainer ok.
[2017-08-29 08:11:08,594] A3C_7: init() done
[2017-08-29 08:11:08,598] worker_7:trainer ok.
[2017-08-29 08:11:08,60

INFO:tensorflow:Starting queue runners.


[2017-08-29 08:11:12,503] Starting queue runners.
[2017-08-29 08:11:12,566] worker_4: starting training at step: 549


INFO:tensorflow:Starting queue runners.


[2017-08-29 08:11:12,585] Starting queue runners.


INFO:tensorflow:Starting queue runners.


[2017-08-29 08:11:12,591] Starting queue runners.


INFO:tensorflow:Starting queue runners.


[2017-08-29 08:11:12,607] Starting queue runners.
[2017-08-29 08:11:12,624] worker_6: starting training at step: 549
[2017-08-29 08:11:12,623] worker_1: starting training at step: 549


INFO:tensorflow:Starting queue runners.


[2017-08-29 08:11:12,662] Starting queue runners.
[2017-08-29 08:11:12,667] worker_7: starting training at step: 549
[2017-08-29 08:11:12,697] worker_2: starting training at step: 549


INFO:tensorflow:Starting queue runners.


[2017-08-29 08:11:12,823] Starting queue runners.


INFO:tensorflow:Starting queue runners.


[2017-08-29 08:11:12,854] Starting queue runners.
[2017-08-29 08:11:12,883] worker_3: starting training at step: 589
[2017-08-29 08:11:12,922] worker_5: starting training at step: 589


INFO:tensorflow:global/global_step/sec: 528.702


[2017-08-29 08:13:07,449] global/global_step/sec: 528.702


INFO:tensorflow:global/global_step/sec: 550.811


[2017-08-29 08:15:07,465] global/global_step/sec: 550.811


INFO:tensorflow:Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


[2017-08-29 08:16:07,428] Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


INFO:tensorflow:global/global_step/sec: 540.889


[2017-08-29 08:17:07,455] global/global_step/sec: 540.889


INFO:tensorflow:global/global_step/sec: 538.096


[2017-08-29 08:19:07,454] global/global_step/sec: 538.096


INFO:tensorflow:Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


[2017-08-29 08:21:07,430] Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


INFO:tensorflow:global/global_step/sec: 537.518


[2017-08-29 08:21:07,474] global/global_step/sec: 537.518


INFO:tensorflow:global/global_step/sec: 533.111


[2017-08-29 08:23:07,445] global/global_step/sec: 533.111


INFO:tensorflow:global/global_step/sec: 534.339


[2017-08-29 08:25:07,445] global/global_step/sec: 534.339


INFO:tensorflow:Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


[2017-08-29 08:26:07,426] Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


INFO:tensorflow:global/global_step/sec: 546.264


[2017-08-29 08:27:07,448] global/global_step/sec: 546.264


INFO:tensorflow:global/global_step/sec: 558.791


[2017-08-29 08:29:07,448] global/global_step/sec: 558.791


INFO:tensorflow:Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


[2017-08-29 08:31:07,426] Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


INFO:tensorflow:Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


[2017-08-29 08:36:07,430] Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


INFO:tensorflow:Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


[2017-08-29 08:41:07,429] Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


INFO:tensorflow:Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


[2017-08-29 08:46:07,431] Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


INFO:tensorflow:Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


[2017-08-29 08:51:07,433] Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


INFO:tensorflow:Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


[2017-08-29 08:56:07,431] Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


INFO:tensorflow:Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


[2017-08-29 09:01:07,426] Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


INFO:tensorflow:Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


[2017-08-29 09:06:07,429] Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


INFO:tensorflow:Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


[2017-08-29 09:11:07,427] Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


INFO:tensorflow:Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


[2017-08-29 09:16:07,427] Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


INFO:tensorflow:Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


[2017-08-29 09:21:07,432] Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


INFO:tensorflow:Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


[2017-08-29 09:26:07,433] Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


INFO:tensorflow:Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


[2017-08-29 09:31:07,427] Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


INFO:tensorflow:Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


[2017-08-29 09:36:07,435] Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


INFO:tensorflow:Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


[2017-08-29 09:41:07,427] Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


INFO:tensorflow:Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


[2017-08-29 09:46:07,429] Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


INFO:tensorflow:Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


[2017-08-29 09:51:07,433] Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


INFO:tensorflow:Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


[2017-08-29 09:56:07,429] Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


INFO:tensorflow:Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


[2017-08-29 10:01:07,428] Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


INFO:tensorflow:Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


[2017-08-29 10:06:07,427] Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


INFO:tensorflow:Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


[2017-08-29 10:11:07,430] Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


INFO:tensorflow:Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


[2017-08-29 10:16:07,429] Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


INFO:tensorflow:Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


[2017-08-29 10:21:07,428] Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


INFO:tensorflow:Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


[2017-08-29 10:26:07,434] Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


INFO:tensorflow:Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


[2017-08-29 10:31:07,433] Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


INFO:tensorflow:Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


[2017-08-29 10:36:07,435] Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


INFO:tensorflow:Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


[2017-08-29 10:41:07,430] Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


INFO:tensorflow:Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


[2017-08-29 10:46:07,435] Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


INFO:tensorflow:Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


[2017-08-29 10:51:07,429] Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


INFO:tensorflow:Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


[2017-08-29 10:56:07,433] Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


INFO:tensorflow:Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


[2017-08-29 11:01:07,426] Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


INFO:tensorflow:Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


[2017-08-29 11:06:07,434] Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


INFO:tensorflow:Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


[2017-08-29 11:11:07,426] Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


INFO:tensorflow:Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


[2017-08-29 11:16:07,432] Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


INFO:tensorflow:Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


[2017-08-29 11:21:07,435] Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


INFO:tensorflow:Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


[2017-08-29 11:26:07,427] Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


INFO:tensorflow:Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


[2017-08-29 11:31:07,432] Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


INFO:tensorflow:Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


[2017-08-29 11:36:07,429] Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


INFO:tensorflow:Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


[2017-08-29 11:41:07,425] Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


INFO:tensorflow:Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


[2017-08-29 11:46:07,431] Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


INFO:tensorflow:Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


[2017-08-29 11:51:07,427] Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


INFO:tensorflow:Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


[2017-08-29 11:56:07,427] Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


INFO:tensorflow:Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


[2017-08-29 12:01:07,429] Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


INFO:tensorflow:Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


[2017-08-29 12:06:07,431] Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


INFO:tensorflow:Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


[2017-08-29 12:11:07,431] Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


INFO:tensorflow:Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


[2017-08-29 12:16:07,433] Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


INFO:tensorflow:Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


[2017-08-29 12:21:07,427] Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


INFO:tensorflow:Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


[2017-08-29 12:26:07,432] Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


INFO:tensorflow:Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


[2017-08-29 12:31:07,435] Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


INFO:tensorflow:Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


[2017-08-29 12:36:07,429] Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


INFO:tensorflow:Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


[2017-08-29 12:41:07,427] Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


INFO:tensorflow:Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


[2017-08-29 12:46:07,432] Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


INFO:tensorflow:Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


[2017-08-29 12:51:07,431] Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


INFO:tensorflow:Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


[2017-08-29 12:56:07,431] Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


INFO:tensorflow:Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


[2017-08-29 13:01:07,433] Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


INFO:tensorflow:Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


[2017-08-29 13:06:07,433] Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


INFO:tensorflow:Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


[2017-08-29 13:11:07,434] Saving checkpoint to path ./tmp/a3c_testing_gym/train/model.ckpt


In [None]:
class MyStrategy(BTgymStrategy):
    """
    Example subclass of BT server inner computation startegy.
    """
    
    def __init__(self, **kwargs):
        self.log = self.env._log
        
        self.data.dim_sma = btind.SimpleMovingAverage(self.datas[0], period=5)
        self.data.dim_sma.plotinfo.plot = False

        self.target_value = self.env.broker.startingcash * (1 + self.p.target_call / 100)
        
        self.current_value_embeded = np.ones(self.p.state_shape['raw_state'].shape[0]) * \
            self.p.target_call / (self.p.target_call + self.p.drawdown_call )

        self.order_penalty = 1
        self.trade_just_closed = False
        self.trade_result = None
        
        self.channel = 3
        self.x_buffer = np.ones(self.p.state_shape['raw_state'].shape[0])
        self.zero_state = np.zeros(self.p.state_shape['model_input'].shape)
        
    def nextstart(self):
        self.inner_embedding = self.data.close.buflen()
        self.log.debug('Inner time embedding: {}'.format(self.inner_embedding))
        self.x_buffer *= self.data.close[0]
        
    def notify_trade(self, trade):
        #if trade.justopened:
            #print('trade {} just opened'.format(trade.ref))
            
        if trade.isclosed:
            #print('trade {} closed, pnl_comm: {}'.format(trade.ref, trade.pnlcomm))
            # Set trade flag and result:
            self.trade_just_closed = True
            self.trade_result = trade.pnlcomm
    
    def sigmoid(self, x):
        return 1/(1 + np.exp(-x))
        
    def get_state(self):
        """
        Computes featurized RL-ready environment observation state
        by applying continious wavelet transform to time-embedded vector
        of close-price gradients.
        """
        # Use close price:
        
        
        #X = self.raw_state[:, self.channel]
        X = self.x_buffer
        
        # Prepare parameters:
        Tau = 2
        max_cwt_scale = self.p.state_shape['model_input'].shape[1] #- 1
        cwt_width = np.linspace(Tau, max_cwt_scale + Tau - 1, max_cwt_scale) # scale of wavelet transdorm [n]
    
        T = 1e4
        
        # Get vector of gradients of last [n] prices:
        X = np.gradient(X, axis=0) * T
        
        # Compute continious wavelet transform using Ricker wavelet, get [n,m,1]-dim. matrix:
        X = signal.cwt(X, signal.ricker, cwt_width).T + 1

        #print('X:', X.shape)
        #print('self.current_value_embeded:', self.current_value_embeded.shape)
        
        #self.state['model_input'] = np.concatenate([X, self.current_value_embeded[:, None] ], axis=-1)
        self.state['model_input'] = X[-self.p.state_shape['model_input'].shape[0]:, :]
        
        #print('model_input:', self.state['model_input'].shape)
        
        return self.state
    
    def __get_state(self):
        "Zero-test state"
        self.state['model_input'] = self.zero_state
        return self.state
        
    
    
    def get_reward(self):
        """
        Defines reward as function of last closed trade result.
        """
        #r = 0
        
        r = (self.broker.get_value() / self.env.broker.startingcash - 1) * 10
        
        # Result
        if self.trade_just_closed:
            r += self.trade_result
            self.trade_just_closed = False
            #print('R-trade:', r)
            
        # Penalty for failed order:

        r -= self.order_penalty * self.order_failed
        self.order_failed = 0

            
        #print('reward_', r)
        
        return r / 10
    
    def next(self):
        """
        Extension of default implementation.
        Defines one step environment routine for server 'Episode mode';
        At least, it should handle order execution logic according to action received.
        """
        # Normalized time-embedded vector of broker values:
        self.current_value_embeded = np.roll(self.current_value_embeded, -1)
        
        self.x_buffer = np.roll(self.x_buffer, -1)
        
        self.current_value_embeded[-1] =\
            (self.broker.get_value() / self.env.broker.startingcash - 1 + self.p.drawdown_call / 100) / \
            (self.p.target_call + self.p.drawdown_call) * 100
            
        self.x_buffer[-1] = self.data.close[0]
        
        # Simple action-to-order logic:
        if self.action == 'hold' or self.order:
            pass
        elif self.action == 'buy':
            self.order = self.buy()
            self.broker_message = 'New BUY created; ' + self.broker_message
        elif self.action == 'sell':
            self.order = self.sell()
            self.broker_message = 'New SELL created; ' + self.broker_message
        elif self.action == 'close':
            self.order = self.close()
            self.broker_message = 'New CLOSE created; ' + self.broker_message
            
# Set backtesting engine parameters:
time_embed_dim = 30
state_shape = {
    'raw_state': spaces.Box(low=-1, high=1, shape=(2*time_embed_dim, 4)),
    'model_input': spaces.Box(low=-100, high=100, shape=(time_embed_dim, time_embed_dim))
}

MyCerebro = bt.Cerebro()

MyCerebro.addstrategy(
    MyStrategy,
    state_shape=state_shape,
    portfolio_actions=('hold','buy', 'sell'),
    drawdown_call=5, # in percent of initial cash
    target_call=20,
    skip_frame=10,
)

# Set leveraged account:
MyCerebro.broker.setcash(2000)
MyCerebro.broker.setcommission(commission=0.0001, leverage=10.0)
MyCerebro.broker.set_shortcash(False)
MyCerebro.addsizer(bt.sizers.SizerFix, stake=10000,)


MyCerebro.addanalyzer(bt.analyzers.DrawDown)

# Provide data (seven years of 1 minute bars):
filenames = [
    '../data/DAT_ASCII_EURUSD_M1_2010.csv',
    '../data/DAT_ASCII_EURUSD_M1_2011.csv',
    '../data/DAT_ASCII_EURUSD_M1_2012.csv',
    '../data/DAT_ASCII_EURUSD_M1_2013.csv',
    '../data/DAT_ASCII_EURUSD_M1_2014.csv',
    '../data/DAT_ASCII_EURUSD_M1_2015.csv',
    '../data/DAT_ASCII_EURUSD_M1_2016.csv',
]

MyDataset = BTgymDataset(
    #filename=filenames,
    filename='../data/test_sine_1min_period256_delta0002.csv',
    #filename='../data/DAT_ASCII_EURUSD_M1_2016.csv',
    start_weekdays=[0, 1, 2, 3, 4],
    episode_len_days=0,
    episode_len_hours=23,
    episode_len_minutes=0,
    start_00=False,
    time_gap_hours=6,
)
env_config = dict(
    dataset=MyDataset,
    engine=MyCerebro,
    render_modes=['episode', 'human', 'model_input'],
    render_state_as_image=False,
    render_ylabel='CWT transform',
    render_size_episode=(12,8),
    render_size_human=(8, 3.5),
    render_size_state=(10, 5),
    render_dpi=75,
    port=5000,
    data_port=4999,
    connect_timeout=60,
    verbose=0,
)
cluster_config = dict(
    host='127.0.0.1',
    port=12222,
    num_workers=8,
    num_ps=1,
    log_dir='./tmp/a3c_testing_7',
)
launcher = Launcher(
    cluster_config=cluster_config,
    env_class=BTgymEnv,
    env_config=env_config,
    model_class=LSTMPolicy2,
    rollout_length=5,
    test_mode=False,
    train_steps=1000000000,
    model_summary_freq=20,
    episode_summary_freq=1,
    env_render_freq=20,
    verbose=2
    
)

In [None]:
launcher.run()

In [None]:
print(launcher.kwargs, '\n\n')
print(launcher.env_config)
print(launcher.cluster_config)
print(launcher.cluster_spec)
for config in launcher.workers_config_list:
    print('============')
    for k, v in config.items():
        print('{}:\n{}\n'.format(k, v))
    

In [None]:
def func1(max_step):
    step = 0
    done = False
    
    def func2(max_step):
        nonlocal step
        nonlocal done
        step +=1
        if step == max_step:
            step = 0
            done = True
        return step
    
    for i in range(20):
        done = False
        print(func2(max_step), step, done)
        


func1(7)
            

In [None]:
a = dict()
a.update({'b': 2, 'c':4})
type(a) == dict