** Import Dependencies**

In [1]:
from google.colab import drive
from datetime import timedelta, datetime
from matplotlib import pyplot as ply
import pandas as pd
import numpy as np

drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [9]:
old_history = pd.read_csv('/content/gdrive/MyDrive/old_btc.csv')
old_history['date'] = pd.to_datetime(old_history['Timestamp'])
old_history['Timestamp'] = pd.to_datetime(old_history['Timestamp'])
history = old_history.set_index('Timestamp').resample('1H').agg({'Open': 'first', 'High': 'max', 'Low': 'min', 'Close': 'last', 'Volume': 'sum', 'date': 'first'})
history['Open'] = history['Open'] / 100
history['High'] = history['High'] / 100
history['Low'] = history['Low'] / 100
history['Close'] = history['Close'] / 100

print(history.columns)
history

Index(['Open', 'High', 'Low', 'Close', 'Volume', 'date'], dtype='object')


Unnamed: 0_level_0,Open,High,Low,Close,Volume,date
Timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2011-12-31 07:00:00,0.0439,0.0439,0.0439,0.0439,0.455581,2011-12-31 07:52:00
2011-12-31 08:00:00,0.0439,0.0439,0.0439,0.0439,0.000000,2011-12-31 08:00:00
2011-12-31 09:00:00,0.0439,0.0439,0.0439,0.0439,0.000000,2011-12-31 09:00:00
2011-12-31 10:00:00,0.0439,0.0439,0.0439,0.0439,0.000000,2011-12-31 10:00:00
2011-12-31 11:00:00,0.0439,0.0439,0.0439,0.0439,0.000000,2011-12-31 11:00:00
...,...,...,...,...,...,...
2017-08-19 19:00:00,40.4624,40.5746,40.1601,40.5440,162.552856,2017-08-19 19:00:00
2017-08-19 20:00:00,40.5357,40.9879,40.2014,40.8796,438.344254,2017-08-19 20:00:00
2017-08-19 21:00:00,40.8797,41.1002,40.6487,40.7720,241.957460,2017-08-19 21:00:00
2017-08-19 22:00:00,40.7730,41.2595,40.7322,40.9999,212.053965,2017-08-19 22:00:00


In [10]:
end = pd.to_datetime('2017-08-19')
index = pd.date_range(end, periods=25, freq='H')
columns = history.columns
future_null = pd.DataFrame(index=index, columns=columns)
future_null

Unnamed: 0,Open,High,Low,Close,Volume,date
2017-08-19 00:00:00,,,,,,
2017-08-19 01:00:00,,,,,,
2017-08-19 02:00:00,,,,,,
2017-08-19 03:00:00,,,,,,
2017-08-19 04:00:00,,,,,,
2017-08-19 05:00:00,,,,,,
2017-08-19 06:00:00,,,,,,
2017-08-19 07:00:00,,,,,,
2017-08-19 08:00:00,,,,,,
2017-08-19 09:00:00,,,,,,


In [11]:
df = pd.concat([history,future_null])
df

Unnamed: 0,Open,High,Low,Close,Volume,date
2011-12-31 07:00:00,0.0439,0.0439,0.0439,0.0439,0.455581,2011-12-31 07:52:00
2011-12-31 08:00:00,0.0439,0.0439,0.0439,0.0439,0.000000,2011-12-31 08:00:00
2011-12-31 09:00:00,0.0439,0.0439,0.0439,0.0439,0.000000,2011-12-31 09:00:00
2011-12-31 10:00:00,0.0439,0.0439,0.0439,0.0439,0.000000,2011-12-31 10:00:00
2011-12-31 11:00:00,0.0439,0.0439,0.0439,0.0439,0.000000,2011-12-31 11:00:00
...,...,...,...,...,...,...
2017-08-19 20:00:00,,,,,,NaT
2017-08-19 21:00:00,,,,,,NaT
2017-08-19 22:00:00,,,,,,NaT
2017-08-19 23:00:00,,,,,,NaT


*Formulas* for Ichimoku

PH -> period high / PL -> period low

* Conversion Line = 9-PH + 9-PL / 2
* Base Line = 26-PH + 26-PL / 2
* Leading Span A = Conversion Line + Base Line / 2
* Leading Span B = 52-PH + 52-PL / 2
* Lagging Span =  closed plotted 26 periods in the past

Price to BaseLine in %

Price to ConversionLine in %

Price to leading_span_a in %

price to leading_span_b in %

Baselin to conversionline in %

future_leading_span_a to price

future_leading_span_b to price

future_leading_span_a to future_leading_span_b

leading_span_a to leading_span_b


baseline to leading_span_a

baseline to leading_span_b

conversionline to leading_span_a

conversionline to leading_span_b

Stoch RSI , 8 5 3 3 

In [5]:
!pip install finta
from finta import TA

Collecting finta
  Downloading finta-1.3-py3-none-any.whl (29 kB)
Installing collected packages: finta
Successfully installed finta-1.3


In [12]:
# conversion line
nine_period_high = df['High'].rolling(window=9).max()
nine_period_low = df['Low'].rolling(window=9).min()
df['conversion_line'] = (nine_period_high + nine_period_low) / 2
df['close_conversion_line'] = round((df['Close'] - df['conversion_line']) / df['conversion_line'], 3)

# base line
twenty_six_period_high = df['High'].rolling(window=26).max()
twenty_six_period_low = df['Low'].rolling(window=26).min()
df['base_line'] = (twenty_six_period_high + twenty_six_period_low) / 2
df['close_base_line'] = round((df['Close'] - df['base_line']) / df['base_line'], 3)

# leading span A
df['leading_span_a'] = ((df['conversion_line'] + df['base_line']) / 2).shift(26)
df['close_leading_span_a'] = round((df['Close'] - df['leading_span_a']) / df['leading_span_a'], 3)

# leading span B
fifty_two_period_high = df['High'].rolling(window=52).max()
fifty_two_period_low = df['Low'].rolling(window=52).min()
df['leading_span_b'] = ((fifty_two_period_high + fifty_two_period_low) / 2).shift(26)
df['close_leading_span_b'] = round((df['Close'] - df['leading_span_b']) / df['leading_span_b'], 3)

# lagging span
# df['lagging_span'] = df['Close'].shift(-26)
df['lagging_span'] = df['Close'].shift(26)

# lagging span direction
# df['lagging_span_direction'] = np.where(df['lagging_span'] > df['Close'], 1, -1)

# base line to convertion line
# df['base_line_to_conversion_line'] = round((df['base_line'] - df['conversion_line']) / df['conversion_line'], 3) # not a good result
df['stochastic_rsi'] = TA.STOCHRSI(df)
df['tic'] = 'BTC'
df['day'] = 1
# df['date'] = pd.date_range(start='09/01/2012', periods=len(df), freq='1H')

# drop null
df.dropna(inplace=True)

# final result
df.head(100)

Unnamed: 0,Open,High,Low,Close,Volume,date,conversion_line,close_conversion_line,base_line,close_base_line,leading_span_a,close_leading_span_a,leading_span_b,close_leading_span_b,lagging_span,stochastic_rsi,tic,day
2012-01-03 12:00:00,0.0532,0.0532,0.0532,0.0532,0.000000,2012-01-03 12:00:00,0.0516,0.031,0.0516,0.031,0.048950,0.087,0.04695,0.133,0.0500,1.000000,BTC,1
2012-01-03 13:00:00,0.0532,0.0532,0.0532,0.0532,0.000000,2012-01-03 13:00:00,0.0516,0.031,0.0516,0.031,0.048950,0.087,0.04695,0.133,0.0500,1.000000,BTC,1
2012-01-03 14:00:00,0.0532,0.0532,0.0514,0.0526,29.999392,2012-01-03 14:00:00,0.0516,0.019,0.0516,0.019,0.048950,0.075,0.04695,0.120,0.0500,0.986530,BTC,1
2012-01-03 15:00:00,0.0526,0.0529,0.0526,0.0529,29.302457,2012-01-03 15:00:00,0.0516,0.025,0.0516,0.025,0.048950,0.081,0.04695,0.127,0.0500,0.974249,BTC,1
2012-01-03 16:00:00,0.0529,0.0529,0.0529,0.0529,0.000000,2012-01-03 16:00:00,0.0516,0.025,0.0516,0.025,0.048950,0.081,0.04695,0.127,0.0500,0.961968,BTC,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2012-01-07 11:00:00,0.0600,0.0600,0.0600,0.0600,0.000000,2012-01-07 11:00:00,0.0600,0.000,0.0645,-0.070,0.064650,-0.072,0.05830,0.029,0.0673,0.213269,BTC,1
2012-01-07 12:00:00,0.0600,0.0600,0.0600,0.0600,0.000000,2012-01-07 12:00:00,0.0600,0.000,0.0645,-0.070,0.065275,-0.081,0.05830,0.029,0.0673,0.213269,BTC,1
2012-01-07 13:00:00,0.0600,0.0600,0.0600,0.0600,0.000000,2012-01-07 13:00:00,0.0600,0.000,0.0645,-0.070,0.065650,-0.086,0.05830,0.029,0.0673,0.213269,BTC,1
2012-01-07 14:00:00,0.0600,0.0600,0.0600,0.0600,0.000000,2012-01-07 14:00:00,0.0600,0.000,0.0645,-0.070,0.065850,-0.089,0.05830,0.029,0.0673,0.213269,BTC,1


# [**Use OpenAI gym for training our model**](https://)

In [13]:
!pip install tensorflow-gpu==1.15.0 tensorflow==1.15.0 stable-baselines gym
# !pip install stable-baselines gym
# install finrl library
!pip install git+https://github.com/AI4Finance-LLC/FinRL-Library.git

Collecting tensorflow-gpu==1.15.0
  Downloading tensorflow_gpu-1.15.0-cp37-cp37m-manylinux2010_x86_64.whl (411.5 MB)
[K     |████████████████████████████████| 411.5 MB 5.0 kB/s 
[?25hCollecting tensorflow==1.15.0
  Downloading tensorflow-1.15.0-cp37-cp37m-manylinux2010_x86_64.whl (412.3 MB)
[K     |████████████████████████████████| 412.3 MB 24 kB/s 
[?25hCollecting stable-baselines
  Downloading stable_baselines-2.10.2-py3-none-any.whl (240 kB)
[K     |████████████████████████████████| 240 kB 36.3 MB/s 
Collecting tensorflow-estimator==1.15.1
  Downloading tensorflow_estimator-1.15.1-py2.py3-none-any.whl (503 kB)
[K     |████████████████████████████████| 503 kB 33.0 MB/s 
Collecting keras-applications>=1.0.8
  Downloading Keras_Applications-1.0.8-py3-none-any.whl (50 kB)
[K     |████████████████████████████████| 50 kB 6.2 MB/s 
Collecting gast==0.2.2
  Downloading gast-0.2.2.tar.gz (10 kB)
Collecting tensorboard<1.16.0,>=1.15.0
  Downloading tensorboard-1.15.0-py3-none-any.whl (

In [18]:
import matplotlib
import matplotlib.pyplot as plt
# matplotlib.use('Agg')
import datetime

%matplotlib inline
from finrl.apps import config
from finrl.neo_finrl.preprocessor.preprocessors import FeatureEngineer, data_split
from finrl.drl_agents.stablebaselines3.models import DRLAgent, DRLEnsembleAgent
from finrl.plot import backtest_stats, backtest_plot, get_daily_return, get_baseline

from pprint import pprint

import sys
sys.path.append("../FinRL-Library")

import itertools

In [19]:
from gym.utils import seeding
import gym
from gym import spaces
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import pickle
from stable_baselines3.common.vec_env import DummyVecEnv
#from stable_baselines3.common import logger

class StockTradingEnv(gym.Env):
    """A stock trading environment for OpenAI gym"""
    metadata = {'render.modes': ['human']}

    def __init__(self, 
                df, 
                stock_dim,
                hmax,                
                initial_amount,
                buy_cost_pct,
                sell_cost_pct,
                reward_scaling,
                state_space,
                action_space,
                tech_indicator_list,
                turbulence_threshold=None,
                risk_indicator_col='turbulence',
                make_plots = False, 
                print_verbosity = 10,
                day = 0, 
                initial=True,
                previous_state=[],
                model_name = '',
                mode='',
                iteration=''):
        self.day = day
        self.df = df
        self.stock_dim = stock_dim
        self.hmax = hmax
        self.initial_amount = initial_amount
        self.buy_cost_pct = buy_cost_pct
        self.sell_cost_pct = sell_cost_pct
        self.reward_scaling = reward_scaling
        self.state_space = state_space
        self.action_space = action_space
        self.tech_indicator_list = tech_indicator_list
        self.action_space = spaces.Box(low = -1, high = 1,shape = (self.action_space,)) 
        self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape = (self.state_space,))
        self.data = self.df.iloc[self.day,:]
        self.terminal = False     
        self.make_plots = make_plots
        self.print_verbosity = print_verbosity
        self.turbulence_threshold = turbulence_threshold
        self.risk_indicator_col = risk_indicator_col
        self.initial = initial
        self.previous_state = previous_state
        self.model_name=model_name
        self.mode=mode 
        self.iteration=iteration
        # initalize state
        self.state = self._initiate_state()
        
        # initialize reward
        self.reward = 0
        self.turbulence = 0
        self.cost = 0
        self.trades = 0
        self.episode = 0
        # memorize all the total balance change
        self.asset_memory = [self.initial_amount]
        self.rewards_memory = []
        self.actions_memory=[]
        self.date_memory=[self._get_date()]
        #self.reset()
        self._seed()


    def _sell_stock(self, index, action):
        def _do_sell_normal():
            if self.state[index+1]>0: 
                # Sell only if the price is > 0 (no missing data in this particular date)
                # perform sell action based on the sign of the action
                if self.state[index+self.stock_dim+1] > 0:
                    # Sell only if current asset is > 0
                    sell_num_shares = min(abs(action),self.state[index+self.stock_dim+1])
                    sell_amount = self.state[index+1] * sell_num_shares * (1- self.sell_cost_pct)
                    #update balance
                    self.state[0] += sell_amount

                    self.state[index+self.stock_dim+1] -= sell_num_shares
                    self.cost +=self.state[index+1] * sell_num_shares * self.sell_cost_pct
                    self.trades+=1
                else:
                    sell_num_shares = 0
            else:
                sell_num_shares = 0

            return sell_num_shares
            
        # perform sell action based on the sign of the action
        if self.turbulence_threshold is not None:
            if self.turbulence>=self.turbulence_threshold:
                if self.state[index+1]>0: 
                    # Sell only if the price is > 0 (no missing data in this particular date)
                    # if turbulence goes over threshold, just clear out all positions 
                    if self.state[index+self.stock_dim+1] > 0:
                        # Sell only if current asset is > 0
                        sell_num_shares = self.state[index+self.stock_dim+1]
                        sell_amount = self.state[index+1]*sell_num_shares* (1- self.sell_cost_pct)
                        #update balance
                        self.state[0] += sell_amount
                        self.state[index+self.stock_dim+1] =0
                        self.cost += self.state[index+1]*sell_num_shares* \
                                    self.sell_cost_pct
                        self.trades+=1
                    else:
                        sell_num_shares = 0
                else:
                    sell_num_shares = 0
            else:
                sell_num_shares = _do_sell_normal()
        else:
            sell_num_shares = _do_sell_normal()

        return sell_num_shares

    
    def _buy_stock(self, index, action):
        def _do_buy():
            if self.state[index+1]>0: 
                #Buy only if the price is > 0 (no missing data in this particular date)       
                available_amount = self.state[0] // self.state[index+1]
                # print('available_amount:{}'.format(available_amount))
                
                #update balance
                buy_num_shares = min(available_amount, action)
                buy_amount = self.state[index+1] * buy_num_shares * (1+ self.buy_cost_pct)
                self.state[0] -= buy_amount

                self.state[index+self.stock_dim+1] += buy_num_shares
                
                self.cost+=self.state[index+1] * buy_num_shares * self.buy_cost_pct
                self.trades+=1
            else:
                buy_num_shares = 0

            return buy_num_shares

        # perform buy action based on the sign of the action
        if self.turbulence_threshold is None:
            buy_num_shares = _do_buy()
        else:
            if self.turbulence< self.turbulence_threshold:
                buy_num_shares = _do_buy()
            else:
                buy_num_shares = 0
                pass

        return buy_num_shares

    def _make_plot(self):
        plt.plot(self.asset_memory,'r')
        plt.savefig('results/account_value_trade_{}.png'.format(self.episode))
        plt.close()

    def step(self, actions):
        self.terminal = self.day >= len(self.df.index.unique())-1
        if self.terminal:
            # print(f"Episode: {self.episode}")
            if self.make_plots:
                self._make_plot()            
            end_total_asset = self.state[0]+ \
                sum(np.array(self.state[1:(self.stock_dim+1)])*np.array(self.state[(self.stock_dim+1):(self.stock_dim*2+1)]))
            df_total_value = pd.DataFrame(self.asset_memory)
            tot_reward = self.state[0]+sum(np.array(self.state[1:(self.stock_dim+1)])*np.array(self.state[(self.stock_dim+1):(self.stock_dim*2+1)]))- self.initial_amount 
            df_total_value.columns = ['account_value']
            df_total_value['date'] = self.date_memory
            df_total_value['daily_return']=df_total_value['account_value'].pct_change(1)
            if df_total_value['daily_return'].std() !=0:
                sharpe = (252**0.5)*df_total_value['daily_return'].mean()/ \
                      df_total_value['daily_return'].std()
            df_rewards = pd.DataFrame(self.rewards_memory)
            df_rewards.columns = ['account_rewards']
            df_rewards['date'] = self.date_memory[:-1]
            if self.episode % self.print_verbosity == 0:
                print(f"day: {self.day}, episode: {self.episode}")
                print(f"begin_total_asset: {self.asset_memory[0]:0.2f}")
                print(f"end_total_asset: {end_total_asset:0.2f}")
                print(f"total_reward: {tot_reward:0.2f}")
                print(f"total_cost: {self.cost:0.2f}")
                print(f"total_trades: {self.trades}")
                if df_total_value['daily_return'].std() != 0:
                    print(f"Sharpe: {sharpe:0.3f}")
                print("=================================")

            if (self.model_name!='') and (self.mode!=''):
                df_actions = self.save_action_memory()
                df_actions.to_csv('results/actions_{}_{}_{}.csv'.format(self.mode,self.model_name, self.iteration))
                df_total_value.to_csv('results/account_value_{}_{}_{}.csv'.format(self.mode,self.model_name, self.iteration),index=False)
                df_rewards.to_csv('results/account_rewards_{}_{}_{}.csv'.format(self.mode,self.model_name, self.iteration),index=False)
                plt.plot(self.asset_memory,'r')
                plt.savefig('results/account_value_{}_{}_{}.png'.format(self.mode,self.model_name, self.iteration),index=False)
                plt.close()

            # Add outputs to logger interface
            #logger.record("environment/portfolio_value", end_total_asset)
            #logger.record("environment/total_reward", tot_reward)
            #logger.record("environment/total_reward_pct", (tot_reward / (end_total_asset - tot_reward)) * 100)
            #logger.record("environment/total_cost", self.cost)
            #logger.record("environment/total_trades", self.trades)

            return self.state, self.reward, self.terminal, {}

        else:

            actions = actions * self.hmax #actions initially is scaled between 0 to 1
            actions = (actions.astype(int)) #convert into integer because we can't by fraction of shares
            if self.turbulence_threshold is not None:
                if self.turbulence>=self.turbulence_threshold:
                    actions=np.array([-self.hmax]*self.stock_dim)
            begin_total_asset = self.state[0]+ \
            sum(np.array(self.state[1:(self.stock_dim+1)])*np.array(self.state[(self.stock_dim+1):(self.stock_dim*2+1)]))
            #print("begin_total_asset:{}".format(begin_total_asset))
            
            argsort_actions = np.argsort(actions)
            
            sell_index = argsort_actions[:np.where(actions < 0)[0].shape[0]]
            buy_index = argsort_actions[::-1][:np.where(actions > 0)[0].shape[0]]

            for index in sell_index:
                # print(f"Num shares before: {self.state[index+self.stock_dim+1]}")
                # print(f'take sell action before : {actions[index]}')
                actions[index] = self._sell_stock(index, actions[index]) * (-1)
                # print(f'take sell action after : {actions[index]}')
                # print(f"Num shares after: {self.state[index+self.stock_dim+1]}")

            for index in buy_index:
                # print('take buy action: {}'.format(actions[index]))
                actions[index] = self._buy_stock(index, actions[index])

            self.actions_memory.append(actions)
            
            #state: s -> s+1
            self.day += 1
            self.data = self.df.iloc[self.day,:]    
            if self.turbulence_threshold is not None:     
                self.turbulence = self.data[self.risk_indicator_col].values[0]
            self.state =  self._update_state()
                           
            end_total_asset = self.state[0]+ \
            sum(np.array(self.state[1:(self.stock_dim+1)])*np.array(self.state[(self.stock_dim+1):(self.stock_dim*2+1)]))
            self.asset_memory.append(end_total_asset)
            self.date_memory.append(self._get_date())
            self.reward = end_total_asset - begin_total_asset            
            self.rewards_memory.append(self.reward)
            self.reward = self.reward*self.reward_scaling

        return self.state, self.reward, self.terminal, {}

    def reset(self):  
        #initiate state
        self.state = self._initiate_state()
        
        if self.initial:
            self.asset_memory = [self.initial_amount]
        else:
            previous_total_asset = self.previous_state[0]+ \
            sum(np.array(self.state[1:(self.stock_dim+1)])*np.array(self.previous_state[(self.stock_dim+1):(self.stock_dim*2+1)]))
            self.asset_memory = [previous_total_asset]

        self.day = 0
        self.data = self.df.iloc[self.day,:]
        self.turbulence = 0
        self.cost = 0
        self.trades = 0
        self.terminal = False 
        # self.iteration=self.iteration
        self.rewards_memory = []
        self.actions_memory=[]
        self.date_memory=[self._get_date()]
        
        self.episode+=1

        return self.state
    
    def render(self, mode='human',close=False):
        return self.state

    def _initiate_state(self):
        if self.initial:
            # For Initial State
            if len(self.df.tic.unique())>1:
                # for multiple stock
                state = [self.initial_amount] + \
                         self.data.Close.values.tolist() + \
                         [0]*self.stock_dim  + \
                         sum([self.data[tech].values.tolist() for tech in self.tech_indicator_list ], [])
            else:
                # for single stock
                state = [self.initial_amount] + \
                        [self.data.Close] + \
                        [0]*self.stock_dim  + \
                        sum([[self.data[tech]] for tech in self.tech_indicator_list ], [])
        else:
            #Using Previous State
            if len(self.df.tic.unique())>1:
                # for multiple stock
                state = [self.previous_state[0]] + \
                         self.data.Close.values.tolist() + \
                         self.previous_state[(self.stock_dim+1):(self.stock_dim*2+1)]  + \
                         sum([self.data[tech].values.tolist() for tech in self.tech_indicator_list ], [])
            else:
                # for single stock
                state = [self.previous_state[0]] + \
                        [self.data.Close] + \
                        self.previous_state[(self.stock_dim+1):(self.stock_dim*2+1)]  + \
                        sum([[self.data[tech]] for tech in self.tech_indicator_list ], [])
        return state

    def _update_state(self):
        if len(self.df.tic.unique())>1:
            # for multiple stock
            state =  [self.state[0]] + \
                      self.data.Close.values.tolist() + \
                      list(self.state[(self.stock_dim+1):(self.stock_dim*2+1)]) + \
                      sum([self.data[tech].values.tolist() for tech in self.tech_indicator_list ], [])

        else:
            # for single stock
            state =  [self.state[0]] + \
                     [self.data.Close] + \
                     list(self.state[(self.stock_dim+1):(self.stock_dim*2+1)]) + \
                     sum([[self.data[tech]] for tech in self.tech_indicator_list ], [])
                          
        return state

    def _get_date(self):
        if len(self.df.tic.unique())>1:
            date = self.data.date.unique()[0]
        else:
            date = self.data.date
        return date

    def save_asset_memory(self):
        date_list = self.date_memory
        asset_list = self.asset_memory
        #print(len(date_list))
        #print(len(asset_list))
        df_account_value = pd.DataFrame({'date':date_list,'account_value':asset_list})
        return df_account_value

    def save_action_memory(self):
        if len(self.df.tic.unique())>1:
            # date and close price length must match actions length
            date_list = self.date_memory[:-1]
            df_date = pd.DataFrame(date_list)
            df_date.columns = ['date']
            
            action_list = self.actions_memory
            df_actions = pd.DataFrame(action_list)
            df_actions.columns = self.data.tic.values
            df_actions.index = df_date.date
            #df_actions = pd.DataFrame({'date':date_list,'actions':action_list})
        else:
            date_list = self.date_memory[:-1]
            action_list = self.actions_memory
            df_actions = pd.DataFrame({'date':date_list,'actions':action_list})
        return df_actions

    def _seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]


    def get_sb_env(self):
        e = DummyVecEnv([lambda: self])
        obs = e.reset()
        return e, obs

In [16]:
import os
if not os.path.exists("./" + config.DATA_SAVE_DIR):
    os.makedirs("./" + config.DATA_SAVE_DIR)
if not os.path.exists("./" + config.TRAINED_MODEL_DIR):
    os.makedirs("./" + config.TRAINED_MODEL_DIR)
if not os.path.exists("./" + config.TENSORBOARD_LOG_DIR):
    os.makedirs("./" + config.TENSORBOARD_LOG_DIR)
if not os.path.exists("./" + config.RESULTS_DIR):
    os.makedirs("./" + config.RESULTS_DIR)

In [20]:
information_cols = ['Close', 'Volume', 'close_base_line', 'close_conversion_line', 'stochastic_rsi']
stock_dimension = 1
state_space = 1 + 2*stock_dimension + len(information_cols)*stock_dimension
print(f"Stock Dimension: {stock_dimension}, State Space: {state_space}")

Stock Dimension: 1, State Space: 8


In [34]:
# train our model
train_frame_size = round(df.shape[0] * 80 / 100)
train = df[0:train_frame_size]
env_kwargs = {
    "hmax": 100, 
    "initial_amount": 1000000, 
    "buy_cost_pct": 0.001,
    "sell_cost_pct": 0.001,
    "state_space": state_space, 
    "stock_dim": stock_dimension, 
    "tech_indicator_list": information_cols, 
    "action_space": stock_dimension, 
    "reward_scaling": 1e-4
}

In [35]:
import multiprocessing

n_cores = multiprocessing.cpu_count() - 2
n_cores = 12
print(f"using {n_cores} cores")

e_train_gym = StockTradingEnv(df = train, **env_kwargs)
env_train, _ = e_train_gym.get_sb_env()
print(type(env_train))

using 12 cores
<class 'stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv'>


In [46]:
agent = DRLAgent(env = env_train)

# a2c algorithm

def train_with_A2C(timesteps=100000):
  model_a2c = agent.get_model("a2c")
  trained_a2c = agent.train_model(model=model_a2c, 
                             tb_log_name='a2c',
                             total_timesteps=timesteps)
  return trained_a2c

def train_with_PPO(timesteps=100000):
  PPO_PARAMS = {
    "n_steps": 2048,
    "ent_coef": 0.01,
    "learning_rate": 0.00025,
    "batch_size": 128,
  }
  model_ppo = agent.get_model("ppo",model_kwargs = PPO_PARAMS)
  trained_ppo = agent.train_model(model=model_ppo, 
                             tb_log_name='ppo',
                             total_timesteps=timesteps)
  return trained_ppo

def train_with_DDPG(timesteps=100000):
  model_ddpg = agent.get_model("ddpg")
  trained_ddpg = agent.train_model(model=model_ddpg, 
                             tb_log_name='ddpg',
                             total_timesteps=timesteps)

  return trained_ddpg

def train_with_TD3(timesteps=100000):
  TD3_PARAMS = {"batch_size": 100, 
              "buffer_size": 1000000, 
              "learning_rate": 0.001}

  model_td3 = agent.get_model("td3",model_kwargs = TD3_PARAMS)
  trained_td3 = agent.train_model(model=model_td3, 
                             tb_log_name='td3',
                             total_timesteps=50000) 

  return trained_td3

In [48]:
learned_model = train_with_PPO()

{'n_steps': 2048, 'ent_coef': 0.01, 'learning_rate': 0.00025, 'batch_size': 128}
Using cpu device
Logging to tensorboard_log/ppo/ppo_1
-----------------------------
| time/              |      |
|    fps             | 125  |
|    iterations      | 1    |
|    time_elapsed    | 16   |
|    total_timesteps | 2048 |
-----------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 124          |
|    iterations           | 2            |
|    time_elapsed         | 32           |
|    total_timesteps      | 4096         |
| train/                  |              |
|    approx_kl            | 0.0002455087 |
|    clip_fraction        | 4.88e-05     |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.43        |
|    explained_variance   | 0            |
|    learning_rate        | 0.00025      |
|    loss                 | -0.0173      |
|    n_updates            | 10           |
|    polic

In [49]:
test = df[train_frame_size:df.shape[0] - 1]
e_test_gym = StockTradingEnv(df = test, **env_kwargs)

df_account_value, df_actions = DRLAgent.DRL_prediction(model=learned_model, environment = e_test_gym)

hit end!


In [50]:
# show result
print(df_account_value.shape)
df_account_value

(9829, 2)


Unnamed: 0,date,account_value
0,2016-07-06 10:00:00,1.000000e+06
1,2016-07-06 11:00:00,1.000000e+06
2,2016-07-06 12:00:00,1.000000e+06
3,2016-07-06 13:00:00,9.999977e+05
4,2016-07-06 14:00:00,9.999972e+05
...,...,...
9824,2017-08-19 18:00:00,1.015120e+06
9825,2017-08-19 19:00:00,1.015174e+06
9826,2017-08-19 20:00:00,1.015419e+06
9827,2017-08-19 21:00:00,1.015325e+06


In [51]:
print("==============Get Backtest Results===========")
now = datetime.datetime.now().strftime('%Y%m%d-%Hh%M')

perf_stats_all = backtest_stats(account_value=df_account_value)
perf_stats_all = pd.DataFrame(perf_stats_all)
perf_stats_all.to_csv("./"+config.RESULTS_DIR+"/perf_stats_all_"+now+'.csv')

Annual return          0.000395
Cumulative returns     0.015526
Annual volatility      0.002426
Sharpe ratio           0.164080
Calmar ratio           0.024466
Stability              0.599127
Max drawdown          -0.016149
Omega ratio            1.069057
Sortino ratio          0.215231
Skew                        NaN
Kurtosis                    NaN
Tail ratio             1.222973
Daily value at risk   -0.000304
dtype: float64
