In [12]:
from __future__ import print_function, division

from model.ddpg.actor import ActorNetwork
from model.ddpg.critic import CriticNetwork
from model.ddpg.ddpg import DDPG
from model.ornstein_uhlenbeck import OrnsteinUhlenbeckActionNoise
from model.td3.actor import TD3ActorNetwork
from model.td3.critic import TD3CriticNetwork
from model.td3.td3 import TD3
from stock_trading import StockActor, StockCritic, TD3StockActor, TD3StockCritic, obs_normalizer, get_model_path, get_result_path, test_model, get_variable_scope, test_model_multiple

from environment.portfolio import PortfolioEnv
from utils.data import read_stock_history, normalize

import numpy as np
import tflearn
import tensorflow as tf
import argparse
import pprint
import pandas as pd
import matplotlib.pyplot as plt
from technical_indicators.technical_indicators import moving_average, exponential_moving_average, momentum, rate_of_change, bollinger_bands, trix, standard_deviation

print("Imports Complete!")

Imports Complete!


In [2]:
# Main Configuration
framework = 'DDPG'
predictor_type = 'lstm'
window_length = 5
use_batch_norm = True
log_return = True
config_file_path = 'config/stock.json'

# Validation
assert framework in ['DDPG', 'TD3'], 'Framework must be either PG, DDPG or TD3'
assert predictor_type in ['cnn', 'lstm'], 'Predictor must be either cnn or lstm'

# Print
print("Model: {}-{}-{}".format(framework, predictor_type, window_length))

Model: DDPG-lstm-5


In [3]:
# Stock History
dataset_name = 'DJIA'
history, assets, date_list = read_stock_history(filepath='utils/datasets/{}.h5'.format(dataset_name))
history = history[:, :, :4]
print("Stock History Shape: {}".format(history.shape))
print("Full Stock History Date Range: {} -> {}".format(date_list[0], date_list[-1]))

Stock History Shape: (26, 6483, 4)
Full Stock History Date Range: 03/01/1995 -> 30/09/2020


In [4]:
# Training/Testing Date Range
full_length = len(date_list)
train_test_ratio = 6/7
train_start_date = date_list[window_length]
train_end_date = date_list[(int)(full_length * train_test_ratio)-1]
test_start_date = date_list[(int)(full_length * train_test_ratio)]
test_end_date = date_list[full_length-2]
print("Training Date Range: {} -> {} ({} Steps)".format(train_start_date, train_end_date, 
                                                    (int)(date_list.index(train_end_date) - date_list.index(train_start_date))))
print("Testing Date Range: {} -> {} ({} Steps)".format(test_start_date, test_end_date, 
                                                    (int)(date_list.index(test_end_date) - date_list.index(test_start_date))))

Training Date Range: 10/01/1995 -> 25/01/2017 (5550 Steps)
Testing Date Range: 26/01/2017 -> 29/09/2020 (925 Steps)


In [5]:
# Episode steps
steps = 1000
print("Episode Steps: {}".format(steps))

Episode Steps: 1000


In [16]:
# Generate Technical Indicators
history_close = history[:, :, 3]
print('history_close.shape: {}'.format(history_close.shape))
technical_indicators = []
for i in range(history_close.shape[0]):
    # Create List
    ti = []
    ti.append([moving_average(history_close[i], window_length)])
    ti.append([moving_average(history_close[i], round(window_length/2))])
    #ti.append([momentum(history_close[i], window_length)])
    ti.append([rate_of_change(history_close[i], window_length)])

    Upper, Lower = bollinger_bands(history_close[i], window_length)
    ti.append([Upper])
    ti.append([Lower])
    ti.append([standard_deviation(history_close[i], window_length)])
    
    # Turn to Array
    ti = np.vstack(ti)
    technical_indicators.append(ti)
technical_indicators.shape

history_close.shape: (26, 6483)


NameError: name 'window' is not defined

In [14]:
history_close

array([[  5.390625  ,   5.328125  ,   5.34375   , ..., 303.23001099,
        304.1499939 , 311.76998901],
       [ 10.08333302,  10.02777767,   9.97222233, ..., 272.32998657,
        272.10998535, 277.70999146],
       [ 23.375     ,  23.3125    ,  23.375     , ..., 166.08000183,
        163.6000061 , 165.25999451],
       ...,
       [  3.984375  ,   3.9765625 ,   4.0078125 , ...,  51.43000031,
         51.18999863,  51.77999878],
       [  1.89583337,   1.90972221,   1.89583337, ...,  39.13000107,
         39.06000137,  39.38999939],
       [  5.59375   ,   5.59375   ,   5.59375   , ...,  36.06999969,
         35.38000107,  35.91999817]])

In [None]:
# setup environment
env = PortfolioEnv(history, assets, date_list, end_date=train_end_date, steps=steps, window_length=window_length)

nb_classes = len(assets) + 1
action_dim = [nb_classes]
state_dim = [nb_classes, window_length]
batch_size = 64
action_bound = 1.
tau = 1e-3

In [None]:
actor_noise = OrnsteinUhlenbeckActionNoise(mu=np.zeros(action_dim))
model_save_path = get_model_path(framework, window_length, predictor_type, use_batch_norm)
summary_path = get_result_path(framework, window_length, predictor_type, use_batch_norm)
variable_scope = get_variable_scope(framework, window_length, predictor_type, use_batch_norm)

In [None]:
with tf.variable_scope(variable_scope):
    sess = tf.Session()
    
    if(framework == 'DDPG'):
        actor = StockActor(sess, state_dim, action_dim, action_bound, 1e-4, tau, batch_size,
                           predictor_type, use_batch_norm)
        critic = StockCritic(sess=sess, state_dim=state_dim, action_dim=action_dim, tau=1e-3,
                             learning_rate=1e-3, num_actor_vars=actor.get_num_trainable_vars(),
                             predictor_type=predictor_type, use_batch_norm=use_batch_norm)
        model = DDPG(env, sess, actor, critic, actor_noise, obs_normalizer=obs_normalizer, 
                     log_return=log_return, config_file=config_file_path, 
                     model_save_path=model_save_path, summary_path=summary_path)
        model.initialize(load_weights=False)
        print('calling DDPG train')
        model.train()
        
    elif(framework =='TD3'):
        actor = TD3StockActor(sess, state_dim, action_dim, action_bound, 1e-4, tau, batch_size,
                           predictor_type, use_batch_norm)
        critic = TD3StockCritic(sess=sess, state_dim=state_dim, action_dim=action_dim, tau=1e-3,
                             learning_rate=1e-3, num_actor_vars=actor.get_num_trainable_vars(),
                             predictor_type=predictor_type, use_batch_norm=use_batch_norm, 
                                log_return=log_return, inp_actions=actor.scaled_out)
        model = TD3(env, sess, actor, critic, actor_noise, obs_normalizer=obs_normalizer,
                          config_file=config_file_path, model_save_path=model_save_path,
                          summary_path=summary_path)
        model.initialize(load_weights=False)
        print('calling TD3 train')
        model.train()
        

In [None]:
def movingaverage(interval, window_size):
    window= np.ones(int(window_size))/float(window_size)
    return np.convolve(interval, window, 'same')

In [None]:
# Plot showing the Rewards
df = pd.DataFrame()
df["Episode"] = range(len(model.ep_rewards))
df.set_index('Episode', inplace=True)
plt.figure(figsize=(10, 6), dpi=100)
plt.title('Episode Rewards')
plt.xlabel('Episode')
plt.ylabel('Rewards')
df['Rewards'] = model.ep_rewards
plt.plot(df['Rewards'],label='Rewards')
plt.plot(movingaverage(model.ep_rewards, 20), label='Rewards_ma_20')
plt.legend()
plt.show()