In [3]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import tensorflow as tf
import numpy as np
import pandas as pd
import os
import sys
import datetime
from pathlib import Path
import os
from absl import logging

sys.path.append('Enviroment')
from tf_trade_enviroment import MyTradeEnv

from tf_agents.environments import utils
from tf_agents.environments import tf_py_environment

from tf_agents.agents.dqn import dqn_agent
from tf_agents.networks import q_rnn_network
from tf_agents.utils import common

from learningClass import learningHelper


logging.set_verbosity(logging.INFO)
tf.random.set_seed(12)
tf.print(tf.config.list_physical_devices('GPU') )
tf.print(tf.__version__)
tf.compat.v1.enable_v2_behavior()

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU'),
 PhysicalDevice(name='/physical_device:GPU:1', device_type='GPU')]
2.3.1


## Read OCHLV data from the file

In [5]:
stocks_train = ['AAPL', 'C', 'SPY', 'QQQ', 'WYNN', 'NVDA', 'VNET', 'GOLD', 'UBS']
stocks_test = ['MSFT', 'AA', 'GPRO', 'AMD']
history_range = {'start': datetime.datetime(1990, 1, 1), 
                 'end': datetime.datetime(2020, 10, 1)}
history_range_test = {'start': datetime.datetime(2020, 10, 1), 
                 'end': datetime.datetime(2020, 11, 21)}

## Create and validate enviroment

In [6]:
environment = MyTradeEnv(stocks_train, history_range)
utils.validate_py_environment(environment, episodes=2)

print('action_spec:', environment.action_spec())
print('time_step_spec.observation:', environment.time_step_spec().observation)
print('time_step_spec.step_type:', environment.time_step_spec().step_type)
print('time_step_spec.discount:', environment.time_step_spec().discount)
print('time_step_spec.reward:', environment.time_step_spec().reward)



action_spec: BoundedArraySpec(shape=(), dtype=dtype('int32'), name='action', minimum=0, maximum=3)
time_step_spec.observation: {'price': BoundedArraySpec(shape=(20, 5), dtype=dtype('float32'), name='obs_price', minimum=0.0, maximum=3.4028234663852886e+38), 'pos': BoundedArraySpec(shape=(2,), dtype=dtype('int32'), name='obs_pos', minimum=0, maximum=1)}
time_step_spec.step_type: ArraySpec(shape=(), dtype=dtype('int32'), name='step_type')
time_step_spec.discount: BoundedArraySpec(shape=(), dtype=dtype('float32'), name='discount', minimum=0.0, maximum=1.0)
time_step_spec.reward: ArraySpec(shape=(), dtype=dtype('float32'), name='reward')


## Create traning and validation enviroment

In [7]:
train_env = tf_py_environment.TFPyEnvironment(MyTradeEnv(stocks_train, history_range))
eval_env = tf_py_environment.TFPyEnvironment(MyTradeEnv(stocks_test, history_range_test))

## Define trainig paramters

In [5]:
learning_rate = 1e-3  

## Create q_network

In [6]:
#network configuration
input_fc_layer_params = (40,)
lstm_size=(20,)
output_fc_layer_params=(20,)

# as we are using dictionary in our enviroment, we will create preprocessing layer
preprocessing_layers = {
    'price': tf.keras.layers.Flatten(),
    'pos': tf.keras.layers.Dense(2)
    }
preprocessing_combiner = tf.keras.layers.Concatenate(axis=-1)

#create a q_RNNnet
q_net = q_rnn_network.QRnnNetwork(
    train_env.observation_spec(),
    train_env.action_spec(),
    preprocessing_layers=preprocessing_layers,
    preprocessing_combiner=preprocessing_combiner,
    input_fc_layer_params=input_fc_layer_params,
    lstm_size=lstm_size,
    output_fc_layer_params=output_fc_layer_params)    

## Create the DQN-agent

In [7]:
#create optimizer
optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate=learning_rate)

#create a global step coubter
#train_step_counter = tf.Variable(0)
global_step = tf.compat.v1.train.get_or_create_global_step()

#create agent
agent = dqn_agent.DqnAgent(
    train_env.time_step_spec(),
    train_env.action_spec(),
    q_network=q_net,
    optimizer=optimizer,
    td_errors_loss_fn=common.element_wise_squared_loss,
    #train_step_counter=train_step_counter)
    train_step_counter=global_step)

agent.initialize()

# (Optional) Optimize by wrapping some of the code in a graph using TF function.
agent.train = common.function(agent.train)

In [8]:
magent = learningHelper(train_env=train_env, test_env=eval_env, agent=agent, global_step=global_step, 
                        collect_episodes = 3,
                        collect_steps_per_iteration = 10000,
                        eval_interval=5,
                        replay_buffer_capacity=20000,
                        batch_size=500,
                        #collect_policy = mp,
                        log_interval = 10,
                        chkpdir='./rnn_chkp/',
                        train_sequence_length = 5,
)
magent.restore_check_point()

INFO:absl:selected agent collect_policy
INFO:absl:No checkpoint available at ./rnn_chkp/checkpoint
INFO:absl:No checkpoint available at ./rnn_chkp/policy_checkpoint
INFO:absl:Initializing is done


In [9]:
%%time
loss = magent.train_agent(1)
#magent.train_agent_with_avg_ret_condition(50, 10000, 100)

Instructions for updating:
Use `as_dataset(..., single_deterministic_pass=False) instead.
Instructions for updating:
Use `as_dataset(..., single_deterministic_pass=False) instead.
Instructions for updating:
back_prop=False is deprecated. Consider using tf.stop_gradient instead.
Instead of:
results = tf.while_loop(c, b, vars, back_prop=False)
Use:
results = tf.nest.map_structure(tf.stop_gradient, tf.while_loop(c, b, vars))
Instructions for updating:
back_prop=False is deprecated. Consider using tf.stop_gradient instead.
Instead of:
results = tf.while_loop(c, b, vars, back_prop=False)
Use:
results = tf.nest.map_structure(tf.stop_gradient, tf.while_loop(c, b, vars))
Epoch: 1, global_step 21, epoch train time: 299.3964638710022
CPU times: user 6min 8s, sys: 21.7 s, total: 6min 30s
Wall time: 5min


In [10]:
for _ in range(1):
    magent.evaluate_agent(1)


INFO:absl: 
		 EnvironmentSteps = 10
		 AverageReturn = -0.8629794120788574
		 AverageEpisodeLength = 10.0
		 NumberOfEpisodes = 1


In [11]:
magent.save_policy()

Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
INFO:tensorflow:Assets written to: ./rnn_chkp/policy/assets
INFO:tensorflow:Assets written to: ./rnn_chkp/policy/assets


In [37]:
magent.store_check_point()

In [16]:
magent.restore_check_point()