<a href="https://colab.research.google.com/github/NicoleRichards1998/FinRL/blob/master/Optimisation_DayTrading_FinRL_JSE_Singe_Stock.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import gym
import numpy as np
from numpy import random as rd

class NoIndicatorsStockTradingEnv(gym.Env):
    def __init__(
        self,
        config,
        initial_account=1e6,
        gamma=0.99,
        turbulence_thresh=50000,
        min_stock_rate=0.1,
        max_stock=1e2,
        initial_capital=1e6,
        buy_cost_pct=1e-3,
        sell_cost_pct=1e-3,
        reward_scaling=2 ** -11,
        initial_stocks=None,
    ):
        price_ary = config["price_array"]
        #tech_ary = config["tech_array"]
        turbulence_ary = config["turbulence_array"]
        if_train = config["if_train"]
        self.price_ary = price_ary.astype(np.float32)
        #self.tech_ary = tech_ary.astype(np.float32)
        self.turbulence_ary = turbulence_ary

        #self.tech_ary = self.tech_ary * 2 ** -7
        self.turbulence_bool = (turbulence_ary > turbulence_thresh).astype(np.float32)
        self.turbulence_ary = (
            self.sigmoid_sign(turbulence_ary, turbulence_thresh) * 2 ** -5
        ).astype(np.float32)

        stock_dim = self.price_ary.shape[1]
        self.gamma = gamma
        self.max_stock = max_stock
        self.min_stock_rate = min_stock_rate
        self.buy_cost_pct = buy_cost_pct
        self.sell_cost_pct = sell_cost_pct
        self.reward_scaling = reward_scaling
        self.initial_capital = initial_capital
        self.initial_stocks = (
            np.zeros(stock_dim, dtype=np.float32)
            if initial_stocks is None
            else initial_stocks
        )

        # reset()
        self.day = None
        self.amount = None
        self.stocks = None
        self.total_asset = None
        self.gamma_reward = None
        self.initial_total_asset = None
        
        self.sell_index = False

        # environment information
        self.env_name = "StockEnv"
        # self.state_dim = 1 + 2 + 2 * stock_dim + self.tech_ary.shape[1]
        # # amount + (turbulence, turbulence_bool) + (price, stock) * stock_dim + tech_dim
        self.state_dim = 1 + 2 + 3 * stock_dim
        # amount + (turbulence, turbulence_bool) + (price, stock) * stock_dim + tech_dim
        self.stocks_cd = None
        self.action_dim = stock_dim
        self.max_step = self.price_ary.shape[0] - 1
        self.if_train = if_train
        self.if_discrete = False
        self.target_return = 10.0
        self.episode_return = 0.0

        self.observation_space = gym.spaces.Box(
            low=-3000, high=3000, shape=(self.state_dim,), dtype=np.float32
        )
        self.action_space = gym.spaces.Box(
            low=-1, high=1, shape=(self.action_dim,), dtype=np.float32
        )

    def reset(self):
        self.day = 0
        price = self.price_ary[self.day]

        if self.if_train:
            self.stocks = (
                self.initial_stocks + rd.randint(0, 64, size=self.initial_stocks.shape)
            ).astype(np.float32)
            self.stocks_cool_down = np.zeros_like(self.stocks)
            self.amount = (
                self.initial_capital * rd.uniform(0.95, 1.05)
                - (self.stocks * price).sum()
            )
        else:
            self.stocks = self.initial_stocks.astype(np.float32)
            self.stocks_cool_down = np.zeros_like(self.stocks)
            self.amount = self.initial_capital

        self.total_asset = self.amount + (self.stocks * price).sum()
        self.initial_total_asset = self.total_asset
        self.gamma_reward = 0.0
        return self.get_state(price)  # state

    def step(self, actions):
        actions = (actions * self.max_stock).astype(int)

        self.day += 1
        price = self.price_ary[self.day]
        self.stocks_cool_down += 1
        sell_index = self.sell_index

        if self.turbulence_bool[self.day] == 0:
            min_action = int(self.max_stock * self.min_stock_rate)  # stock_cd
            for index in np.where(actions < -min_action)[0]:  # sell_index:
                if price[index] > 0:  # Sell only if current asset is > 0
                    sell_num_shares = min(self.stocks[index], -actions[index])
                    self.stocks[index] -= sell_num_shares
                    self.amount += (
                        price[index] * sell_num_shares * (1 - self.sell_cost_pct)
                    )
                    self.stocks_cool_down[index] = 0
                    sell_index = True
            for index in np.where(actions > min_action)[0]:  # buy_index:
                if (
                    price[index] > 0
                ):  # Buy only if the price is > 0 (no missing data in this particular date)
                    buy_num_shares = min(self.amount // price[index], actions[index])
                    self.stocks[index] += buy_num_shares
                    self.amount -= (
                        price[index] * buy_num_shares * (1 + self.buy_cost_pct)
                    )
                    self.stocks_cool_down[index] = 0

        else:  # sell all when turbulence
            self.amount += (self.stocks * price).sum() * (1 - self.sell_cost_pct)
            self.stocks[:] = 0
            self.stocks_cool_down[:] = 0

        state = self.get_state(price)
        total_asset = self.amount + (self.stocks * price).sum()
        real_reward = (total_asset - self.total_asset) * self.reward_scaling
        self.total_asset = total_asset

        # for my reward function
        
        #if sell_index == True:
        #  reward = real_reward
        #  sell_index = False
        #else:
        #  reward = int(0)
        #print('reward', reward)

        # for reward at end

        #reward = int(0)

        # for OG reward function

        reward = (total_asset - self.total_asset) * self.reward_scaling

        # the rest

        self.gamma_reward = self.gamma_reward * self.gamma + real_reward
        done = self.day == self.max_step
        if done:
            reward = self.gamma_reward
            self.episode_return = total_asset / self.initial_total_asset

        return state, reward, done, dict()

    def get_state(self, price):
        amount = np.array(self.amount * (2 ** -12), dtype=np.float32)
        scale = np.array(2 ** -6, dtype=np.float32)
        return np.hstack(
            (
                amount,
                self.turbulence_ary[self.day],
                self.turbulence_bool[self.day],
                price * scale,
                self.stocks * scale,
                self.stocks_cool_down,
            )
        )  # state.astype(np.float32)

    @staticmethod
    def sigmoid_sign(ary, thresh):
        def sigmoid(x):
            return 1 / (1 + np.exp(-x * np.e)) - 0.5

        return sigmoid(ary / thresh) * thresh

In [63]:
import gym
import numpy as np
from numpy import random as rd

class RandomEnv(gym.Env):
    def __init__(
        self,
        config,
        initial_account=1e6,
        gamma=0.99,
        turbulence_thresh=50000,
        min_stock_rate=0.1,
        max_stock=1e2,
        initial_capital=1e6,
        buy_cost_pct=1e-3,
        sell_cost_pct=1e-3,
        reward_scaling=2 ** -11,
        initial_stocks=None,
    ):
        price_ary = config["price_array"]
        #tech_ary = config["tech_array"]
        turbulence_ary = config["turbulence_array"]
        if_train = config["if_train"]
        self.price_ary = price_ary.astype(np.float32)
        #self.tech_ary = tech_ary.astype(np.float32)
        self.turbulence_ary = turbulence_ary

        #self.tech_ary = self.tech_ary * 2 ** -7
        self.turbulence_bool = (turbulence_ary > turbulence_thresh).astype(np.float32)
        self.turbulence_ary = (
            self.sigmoid_sign(turbulence_ary, turbulence_thresh) * 2 ** -5
        ).astype(np.float32)

        stock_dim = self.price_ary.shape[1]
        self.gamma = gamma
        self.max_stock = max_stock
        self.min_stock_rate = min_stock_rate
        self.buy_cost_pct = buy_cost_pct
        self.sell_cost_pct = sell_cost_pct
        self.reward_scaling = reward_scaling
        self.initial_capital = initial_capital
        self.initial_stocks = (
            np.zeros(stock_dim, dtype=np.float32)
            if initial_stocks is None
            else initial_stocks
        )

        # reset()
        self.day = None
        self.amount = None
        self.stocks = None
        self.total_asset = None
        self.gamma_reward = None
        self.initial_total_asset = None
        
        self.sell_index = False

        # environment information
        self.env_name = "StockEnv"
        # self.state_dim = 1 + 2 + 2 * stock_dim + self.tech_ary.shape[1]
        # # amount + (turbulence, turbulence_bool) + (price, stock) * stock_dim + tech_dim
        self.state_dim = 1 + 2 + 3 * stock_dim
        # amount + (turbulence, turbulence_bool) + (price, stock) * stock_dim + tech_dim
        self.stocks_cd = None
        self.action_dim = stock_dim
        self.max_step = self.price_ary.shape[0] - 1
        self.if_train = if_train
        self.if_discrete = False
        self.target_return = 10.0
        self.episode_return = 0.0

        self.observation_space = gym.spaces.Box(
            low=-3000, high=3000, shape=(self.state_dim,), dtype=np.float32
        )
        self.action_space = gym.spaces.Box(
            low=-1, high=1, shape=(self.action_dim,), dtype=np.float32
        )

    def reset(self):
        self.day = 0
        price = self.price_ary[self.day]

        if self.if_train:
            self.stocks = (
                self.initial_stocks + rd.randint(0, 64, size=self.initial_stocks.shape)
            ).astype(np.float32)
            self.stocks_cool_down = np.zeros_like(self.stocks)
            self.amount = (
                self.initial_capital * rd.uniform(0.95, 1.05)
                - (self.stocks * price).sum()
            )
        else:
            self.stocks = self.initial_stocks.astype(np.float32)
            self.stocks_cool_down = np.zeros_like(self.stocks)
            self.amount = self.initial_capital

        self.total_asset = self.amount + (self.stocks * price).sum()
        self.initial_total_asset = self.total_asset
        self.gamma_reward = 0.0
        return self.get_state(price)  # state

    def step(self, actions):
        #actions = np.array(rd.uniform(-1, 1), dtype=np.float32)
        #actions = (actions * self.max_stock).astype(int)

        actions = np.array([rd.uniform(-1, 1)])
        actions = (actions * self.max_stock)

        self.day += 1
        price = self.price_ary[self.day]
        self.stocks_cool_down += 1
        sell_index = self.sell_index

        if self.turbulence_bool[self.day] == 0:
            min_action = int(self.max_stock * self.min_stock_rate)  # stock_cd
            for index in np.where(actions < -min_action)[0]:  # sell_index:
                if price[index] > 0:  # Sell only if current asset is > 0
                    sell_num_shares = min(self.stocks[index], -actions[index])
                    self.stocks[index] -= sell_num_shares
                    self.amount += (
                        price[index] * sell_num_shares * (1 - self.sell_cost_pct)
                    )
                    self.stocks_cool_down[index] = 0
                    sell_index = True
            for index in np.where(actions > min_action)[0]:  # buy_index:
                if (
                    price[index] > 0
                ):  # Buy only if the price is > 0 (no missing data in this particular date)
                    buy_num_shares = min(self.amount // price[index], actions[index])
                    self.stocks[index] += buy_num_shares
                    self.amount -= (
                        price[index] * buy_num_shares * (1 + self.buy_cost_pct)
                    )
                    self.stocks_cool_down[index] = 0

        else:  # sell all when turbulence
            self.amount += (self.stocks * price).sum() * (1 - self.sell_cost_pct)
            self.stocks[:] = 0
            self.stocks_cool_down[:] = 0

        state = self.get_state(price)
        total_asset = self.amount + (self.stocks * price).sum()
        real_reward = (total_asset - self.total_asset) * self.reward_scaling
        self.total_asset = total_asset

        # for my reward function
        
        #if sell_index == True:
        #  reward = real_reward
        #  sell_index = False
        #else:
        #  reward = int(0)
        #print('reward', reward)

        # for reward at end

        #reward = int(0)

        # for OG reward function

        reward = (total_asset - self.total_asset) * self.reward_scaling

        # the rest

        self.gamma_reward = self.gamma_reward * self.gamma + real_reward
        done = self.day == self.max_step
        if done:
            reward = self.gamma_reward
            self.episode_return = total_asset / self.initial_total_asset

        return state, reward, done, dict()

    def get_state(self, price):
        amount = np.array(self.amount * (2 ** -12), dtype=np.float32)
        scale = np.array(2 ** -6, dtype=np.float32)
        return np.hstack(
            (
                amount,
                self.turbulence_ary[self.day],
                self.turbulence_bool[self.day],
                price * scale,
                self.stocks * scale,
                self.stocks_cool_down,
            )
        )  # state.astype(np.float32)

    @staticmethod
    def sigmoid_sign(ary, thresh):
        def sigmoid(x):
            return 1 / (1 + np.exp(-x * np.e)) - 0.5

        return sigmoid(ary / thresh) * thresh

In [62]:
import numpy as np
from numpy import random as rd
actions = np.array([rd.uniform(-1, 1)])
actions = (actions * 100)
actions[0]


-4.915787548913642

In [55]:
actions = rd.uniform(-1, 1)
actions = [actions * 0.1]
actions[0]

0.03540906386662768

# Part 1: Install FinRL

In [1]:
!pip install git+https://github.com/NicoleRichards1998/FinRL.git
!pip install 'HEBO>=0.2.0'

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting git+https://github.com/NicoleRichards1998/FinRL.git
  Cloning https://github.com/NicoleRichards1998/FinRL.git to /tmp/pip-req-build-bgbzvr7w
  Running command git clone -q https://github.com/NicoleRichards1998/FinRL.git /tmp/pip-req-build-bgbzvr7w
Collecting pyfolio@ git+https://github.com/quantopian/pyfolio.git#egg=pyfolio-0.9.2
  Cloning https://github.com/quantopian/pyfolio.git to /tmp/pip-install-p8v8mza0/pyfolio_248e61405553440d8e10adc538456783
  Running command git clone -q https://github.com/quantopian/pyfolio.git /tmp/pip-install-p8v8mza0/pyfolio_248e61405553440d8e10adc538456783
Collecting elegantrl@ git+https://github.com/AI4Finance-Foundation/ElegantRL.git#egg=elegantrl
  Cloning https://github.com/AI4Finance-Foundation/ElegantRL.git to /tmp/pip-install-p8v8mza0/elegantrl_33e5d82851c74190b34891d1c6bfbdda
  Running command git clone -q https://github.com/AI4Finance-Fou

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting HEBO>=0.2.0
  Downloading HEBO-0.3.2-py3-none-any.whl (106 kB)
[K     |████████████████████████████████| 106 kB 15.2 MB/s 
[?25hCollecting pymoo>=0.5.0
  Downloading pymoo-0.5.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (2.5 MB)
[K     |████████████████████████████████| 2.5 MB 65.4 MB/s 
[?25hCollecting catboost>=0.24.4
  Downloading catboost-1.0.6-cp37-none-manylinux1_x86_64.whl (76.6 MB)
[K     |████████████████████████████████| 76.6 MB 1.2 MB/s 
[?25hCollecting GPy>=1.9.9
  Downloading GPy-1.10.0.tar.gz (959 kB)
[K     |████████████████████████████████| 959 kB 55.4 MB/s 
Collecting gpytorch>=1.4.0
  Downloading gpytorch-1.7.0-py2.py3-none-any.whl (353 kB)
[K     |████████████████████████████████| 353 kB 69.8 MB/s 
Collecting paramz>=0.9.0
  Downloading paramz-0.9.5.tar.gz (71 kB)
[K     |████████████████████████

In [None]:
print("Setting up colab environment")
!pip uninstall -y -q pyarrow
!pip install -q -U ray[tune]
!pip install -q ray[debug]

# A hack to force the runtime to restart, needed to include the above dependencies.
print("Done installing! Restarting via forced crash (this is not an issue).")
import os
os._exit(0)

Setting up colab environment


## Import related modules

In [1]:
from finrl.finrl_meta.env_stock_trading.env_stocktrading_np import EndRewardStockTradingEnv as StockTradingEnv1
from finrl.finrl_meta.env_stock_trading.env_stocktrading_np import StockTradingEnv
from finrl.finrl_meta.env_stock_trading.env_stocktrading_np import ChangedStockTradingEnv as StockTradingEnv2

from finrl.finrl_meta.env_stock_trading.env_stock_papertrading import AlpacaPaperTrading
from finrl.finrl_meta.data_processor import DataProcessor
from finrl.plot import backtest_stats, backtest_plot, get_daily_return, get_baseline
from finrl.agents.rllib.models import DRLAgent as DRLAgent_rllib

import numpy as np
import pandas as pd
import ray

from ray.rllib.agents.a3c import a2c
from ray.rllib.agents.ddpg import ddpg, td3
from ray.rllib.agents.ppo import ppo
from ray.rllib.agents.ppo.ppo import PPOTrainer
from ray.rllib.agents.ddpg import DDPGTrainer
from ray.rllib.agents.a3c import A2CTrainer
from ray.rllib.agents.sac import sac
from ray.tune.logger import (
    CSVLoggerCallback,
    JsonLoggerCallback,
    JsonLogger,
    CSVLogger,
    TBXLoggerCallback,
    TBXLogger,
    UnifiedLogger
)
from ray.tune.logger import pretty_print

from ray.tune.logger import pretty_print
from ray import tune
from ray.tune.suggest import ConcurrencyLimiter
from ray.tune.schedulers import AsyncHyperBandScheduler, PopulationBasedTraining
from ray.tune.suggest.hebo import HEBOSearch
from ray.tune.suggest.optuna import OptunaSearch

from google.colab import files
from datetime import date, timedelta, datetime

from pandas.tseries.holiday import USFederalHolidayCalendar
from pandas.core.arrays import string_

import pytz
import exchange_calendars as tc

from stockstats import StockDataFrame as Sdf

import psutil
import ray
ray._private.utils.get_system_memory = lambda: psutil.virtual_memory().total

  'Module "zipline.assets" not found; multipliers will not be applied'


## Set variables

In [14]:
model_algorithm = 'ppo'
starting_capital = 1e6

Number_Train_Days = 10

Number_Test_Days = 9

In [3]:
JSEIndexes = [ 'ACL' ]

In [18]:
JSEIndexes = [ 'ACL', 'WHL' ]

In [None]:
JSEIndexes = [ 'ACL', 'SSW' ]

In [None]:
JSEIndexes = [ 'ACL', 'GFI' ]

In [None]:
JSEIndexes = [ 'ACL', 'HAR' ]

In [None]:
JSEIndexes = [ 'ACL', 'WHL' ]

In [None]:
JSEIndexes = 
['ACL',0
 'AVI',1
 'BLU',2
 'CML',3
 'EXX',4
 'FSR',5
 'GFI',6
 'HAR',7
 'LAB',8
 'LHC',9
 'MRF',10
 'MTN',11
 'NED',12
 'NPK',13
 'OMU',14
 'PPC',15
 'PPE',16
 'PPH',17
 'RMH',18
 'SBK',19
 'SNH',20
 'SOL',21
 'SPG',22
 'SSW',23
 'TCP',24
 'TGA',25
 'TRU',26
 'TSG',27
 'WHL',28
 'WSL']29

In [19]:
ticker_list = JSEIndexes
action_dim = len(ticker_list)

INDICATORS = ['macd', 'boll_ub', 'boll_lb', 'rsi_30', 'dx_30', 'close_30_sma', 'close_60_sma']
tech_indicator_list = INDICATORS
env = StockTradingEnv2

# amount + (turbulence, turbulence_bool) + (price, shares, cd (holding time)) * stock_dim + tech_dim
state_dim = 1 + 2 + 3 * action_dim + len(INDICATORS) * action_dim
#state_dim = 1 + 2 + 3 * action_dim

episodes_per_day = 150
tr_batch_size = episodes_per_day*480

rllib_params = {"lr": 5e-5, "train_batch_size": tr_batch_size, "gamma": 0.99}

MODELS = {"a2c": a2c, "ddpg": ddpg, "td3": td3, "sac": sac, "ppo": ppo}

# Download data

In [20]:
#from google.colab import drive
#drive.mount('/content/drive')

%cd /content/drive/MyDrive/csvfiles/

data_df = pd.read_csv("JSE_Minute_Data_With_Turbulance_Indicator.csv")

df = data_df.copy()
df = df.sort_values(by=["tic", "date"])
x = 9620
dic_tickers = {}

for i in range(30):

  dic_tickers[i] = df.iloc[(i*x):((i+1)*x),]

trading_days = pd.date_range(start = '02/28/22', end = '03/25/22')

trading_days_List = trading_days[0:].strftime('%m/%d/%y').to_numpy().tolist()
count = 0
delete_number = 0
while count < 26:
    if (trading_days[count].weekday()>=5):
      del trading_days_List[delete_number]
      delete_number = delete_number - 1
    count = count + 1
    delete_number = delete_number + 1 

/content/drive/MyDrive/csvfiles


Single ticker:

In [6]:
x = 481
df = dic_tickers[0].copy()
df = df.reset_index(drop=True)
dic = {}

for j in range(len(df)):
    df['date'][j] = datetime.strptime(df['date'][j], '%m/%d/%y %H:%M')
df = df.sort_values(by=["date"])

for i in range(20):
  
  dic[i] = df.iloc[(i*x):((i+1)*x),]
  dic[i] = dic[i].sort_values(by=["tic", "date"])
  dic[i] = dic[i].reset_index(drop=True)
  #for j in range(len(dic[i])):
  #  dic[i]['date'][j] = datetime.strptime(dic[i]['date'][j], '%m/%d/%y %H:%M')
  dic[i] = dic[i].sort_values(by=["date"])

del dic[15]
dic = dict(enumerate(dic[x] for x in sorted(dic)))

Two tickers:

In [21]:
x = 962
df1 = dic_tickers[0].copy()
df2 = dic_tickers[7].copy()
df3 = df1.append(df2, ignore_index=True)
df = df3.reset_index(drop=True)

dic = {}

for j in range(len(df)):
    df['date'][j] = datetime.strptime(df['date'][j], '%m/%d/%y %H:%M')
df = df.sort_values(by=["date"])

for i in range(20):
  
  dic[i] = df.iloc[(i*x):((i+1)*x),]
  dic[i] = dic[i].sort_values(by=["tic", "date"])
  dic[i] = dic[i].reset_index(drop=True)
  #for j in range(len(dic[i])):
  #  dic[i]['date'][j] = datetime.strptime(dic[i]['date'][j], '%m/%d/%y %H:%M')
  dic[i] = dic[i].sort_values(by=["date"])

del dic[15]
dic = dict(enumerate(dic[x] for x in sorted(dic)))

Multiple tickers:

In [None]:
x = 14430
df = data_df.copy()
dic = {}

for i in range(20):

  dic[i] = df.iloc[(i*x):((i+1)*x),]


In [7]:
def df_to_array(df, if_vix):
        df = df.copy()
        unique_ticker = df.tic.unique()
        if_first_time = True
        for tic in unique_ticker:
            if if_first_time:
                price_array = df[df.tic == tic][["close"]].values
                tech_array = df[df.tic == tic][tech_indicator_list].values
                if if_vix:
                    turbulence_array = df[df.tic == tic]["VIXY"].values
                else:
                    turbulence_array = df[df.tic == tic]["turbulence"].values
                if_first_time = False
            else:
                price_array = np.hstack(
                    [price_array, df[df.tic == tic][["close"]].values]
                )
                tech_array = np.hstack(
                    [tech_array, df[df.tic == tic][tech_indicator_list].values]
                )
        print("Successfully transformed into array")
        return price_array, tech_array, turbulence_array

In [8]:
def get_config_Train(dic, Number_Days):  

    env_config = []

    for index in range(Number_Days):

      print(index)

      price_array, tech_array, turbulence_array = df_to_array(dic[index], False)

      config = {
          "price_array": price_array,
          "tech_array": tech_array,
          "turbulence_array": turbulence_array,
          "if_train": True,
      }

      env_config.append(config)

    return env_config

In [22]:
train_env_config = get_config_Train(dic, Number_Train_Days)

0
Successfully transformed into array
1
Successfully transformed into array
2
Successfully transformed into array
3
Successfully transformed into array
4
Successfully transformed into array
5
Successfully transformed into array
6
Successfully transformed into array
7
Successfully transformed into array
8
Successfully transformed into array
9
Successfully transformed into array


In [10]:
from typing import Dict

import json

import os
import tempfile

from ray.tune.utils.util import SafeFallbackEncoder
from ray.tune.logger import Logger

EXPR_TRACK_FILE = "episode_tracking.json"
CUSTOM_RESULTS_DIR = '/content/drive/MyDrive/TrainingData'
DEFAULT_LOGGERS = (JsonLogger, CSVLogger, TBXLogger)

class CustomLogger(Logger):
    """Logs custom results in json format."""

    def _init(self):
        self.update_config(self.config)
        logger_config = self.config.get("logger_config")
        filename = EXPR_TRACK_FILE
        if logger_config:
            filename = logger_config.get("filename") or filename
        local_file = os.path.join(self.logdir, filename)
        self.local_out = open(local_file, "a")

    def on_result(self, result: Dict):
        tracking_data = result["episode_media"]
        if not tracking_data:
            return
        json.dump(tracking_data, self, cls=SafeFallbackEncoder)
        self.write("\n")
        self.local_out.flush()

    def write(self, b):
        self.local_out.write(b)

    def flush(self):
        if not self.local_out.closed:
            self.local_out.flush()

    def close(self):
        self.local_out.close()

    def update_config(self, config: Dict):
        self.config = config

def custom_logger_creator(config):
    """Creates a Unified logger with a default logdir prefix
    containing the agent name and the env id
    """
    timestr = datetime.today().strftime("%Y-%m-%d_%H-%M-%S")
    logdir_prefix = "{}_{}_{}".format("PPO", "HSP", timestr)
    if not os.path.exists(CUSTOM_RESULTS_DIR):
        os.makedirs(CUSTOM_RESULTS_DIR)
    logdir = tempfile.mkdtemp(
        prefix=logdir_prefix, dir=CUSTOM_RESULTS_DIR)
    loggers = list(DEFAULT_LOGGERS)
    loggers.append(CustomLogger)
    return UnifiedLogger(config, logdir, loggers=loggers)

# Tune the agent

In [11]:
day = 0

def sample_ppo_params():
  return {
      "clip_param": tune.choice([ 0.1, 0.2, 0.3, 0.4, 0.5, 0.6 ]),
      "lambda": tune.loguniform(0.9, 1),
      "lr": tune.loguniform(5e-6, 0.003),
      "gamma": tune.loguniform(0.9, 0.9997),
      "train_batch_size": 2400
  }

def sample_ddpg_params():
  
  return {
  "critic_lr": tune.loguniform(1e-3, 1e-4),
  "actor_lr": tune.loguniform(1e-3, 1e-5),
  "tau": tune.loguniform(1e-5, 1e-3),
  "gamma": tune.loguniform(0.9, 0.9997),
  'obs_batch' : 128,
  "train_batch_size": 960
  }

def sample_a2c_params():
  
  return{
      "lambda": tune.choice([0.1,0.3,0.5,0.7,0.9,1.0]),
      "lr": tune.loguniform(1e-4, 1e-2),
      "gamma": tune.loguniform(0.9, 0.9997),
      "train_batch_size": 960  
  } 

from ray.tune.registry import register_env
from ray.tune.integration.comet import CometLoggerCallback
from ray.tune.logger import (
    CSVLoggerCallback,
    JsonLoggerCallback,
    JsonLogger,
    CSVLogger,
    TBXLoggerCallback,
    TBXLogger,
)

env_name = 'StockTrading_train_env'
register_env(env_name, lambda config: env(train_env_config[day]))

MODEL_TRAINER = {'a2c':A2CTrainer,'ppo':PPOTrainer,'ddpg':DDPGTrainer}

if model_algorithm == "ddpg":
    sample_hyperparameters = sample_ddpg_params()
elif model_algorithm == "ppo":
  sample_hyperparameters = sample_ppo_params()
elif model_algorithm == "a2c":
  sample_hyperparameters = sample_a2c_params()
  
def run_tune():

  # maximum number of concurrent trials
  max_concurrent = 8

  algo = HEBOSearch(
        # space = space, # If you want to set the space
        #points_to_evaluate=previously_run_params,
        #evaluated_rewards=known_rewards,
        random_state_seed=123,  # for reproducibility
        max_concurrent=max_concurrent,
    )

  scheduler = AsyncHyperBandScheduler(grace_period=4)

  training_iterations = 100
  num_samples = 25

  analysis = tune.run(
        MODEL_TRAINER[model_algorithm],
        metric="episode_reward_mean", #The metric to optimize for tuning
        mode="max", #Maximize the metric
        #name='B1',
        search_alg=algo,
        scheduler=scheduler,
        num_samples=num_samples,
        keep_checkpoints_num = num_samples,
        stop = {'training_iteration':training_iterations},
        checkpoint_score_attr ='episode_reward_mean',#Only store keep_checkpoints_num trials based on this score
        checkpoint_freq=training_iterations,
        verbose=1,
        local_dir="./tuned_models",#Saving tensorboard plots
        callbacks=[
          TBXLoggerCallback()
        ],
        raise_on_failed_trial=False,
        config = {
                **sample_hyperparameters,
                'env':'StockTrading_train_env',
                'framework':'tf2',
                "eager_tracing" : False,
                "num_workers": 1,
                # Total GPU usage: num_gpus (trainer proc) + num_gpus_per_worker (workers)
                #"num_gpus_per_worker": 0.25,
                # this corresponds to the number of learner GPUs used,
                # not the total used for the environments/rollouts
                "num_gpus": 1,
                "num_envs_per_worker": 1,
                #"callbacks": LoggingCallbacks,
                "model":{
                    "use_lstm": True,
                    "lstm_cell_size": 256,
                    "lstm_use_prev_action": True,
                    "lstm_use_prev_reward": True,
                  }
                },
  )


  print("Best hyperparameter: ", analysis.best_config)
  #print("Training: ", analysis.trails)
  return analysis

In [None]:
ray.shutdown()

In [13]:
analysis = run_tune()

2022-06-29 10:32:07,782	ERROR tune.py:743 -- Trials did not complete: [DDPGTrainer_StockTrading_train_env_b5a0634c]


Best hyperparameter:  {'critic_lr': 0.001000000000000001, 'actor_lr': 0.001000000000000001, 'tau': 1e-05, 'gamma': 0.9, 'obs_batch': 128, 'train_batch_size': 960, 'env': 'StockTrading_train_env', 'framework': 'tf2', 'eager_tracing': False, 'num_workers': 1, 'num_gpus': 1, 'num_envs_per_worker': 1, 'model': {'use_lstm': True, 'lstm_cell_size': 256, 'lstm_use_prev_action': True, 'lstm_use_prev_reward': True}}


In [79]:
!pip install 'HEBO>=0.2.0'

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting HEBO>=0.2.0
  Downloading HEBO-0.3.2-py3-none-any.whl (106 kB)
[K     |████████████████████████████████| 106 kB 11.5 MB/s 
Collecting pymoo>=0.5.0
  Downloading pymoo-0.5.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (2.5 MB)
[K     |████████████████████████████████| 2.5 MB 60.2 MB/s 
Collecting GPy>=1.9.9
  Downloading GPy-1.10.0.tar.gz (959 kB)
[K     |████████████████████████████████| 959 kB 55.7 MB/s 
[?25hCollecting catboost>=0.24.4
  Downloading catboost-1.0.6-cp37-none-manylinux1_x86_64.whl (76.6 MB)
[K     |████████████████████████████████| 76.6 MB 1.4 MB/s 
[?25hCollecting gpytorch>=1.4.0
  Downloading gpytorch-1.7.0-py2.py3-none-any.whl (353 kB)
[K     |████████████████████████████████| 353 kB 67.1 MB/s 
Collecting paramz>=0.9.0
  Downloading paramz-0.9.5.tar.gz (71 kB)
[K     |██████████████████████████████

In [None]:
analysis.best_config

{'clip_param': 0.1,
 'eager_tracing': False,
 'env': 'StockTrading_train_env',
 'framework': 'tf2',
 'gamma': 0.9037485846525093,
 'lambda': 0.9386668141475626,
 'lr': 5.000197729565842e-06,
 'model': {'lstm_cell_size': 256,
  'lstm_use_prev_action': True,
  'lstm_use_prev_reward': True,
  'use_lstm': True},
 'num_envs_per_worker': 1,
 'num_gpus': 1,
 'num_workers': 1,
 'train_batch_size': 2400}

# Train the agent

In [31]:
#B1
best_config = {}
best_config[0] = {'clip_param': 0.2, 'lambda': 0.9740037464252967, 'lr': 2.4746160019198826e-05, 'gamma': 0.9239517770930289, 'train_batch_size': 2400, 'env': 'StockTrading_train_env', 'framework': 'tf2', 'eager_tracing': True, 'num_workers': 1, 'num_gpus': 1, 'num_envs_per_worker': 1, 'model': {'use_lstm': True, 'lstm_cell_size': 256, 'lstm_use_prev_action': True, 'lstm_use_prev_reward': True}}

In [None]:
#B2
best_config = {}
best_config[0] = {'clip_param': 0.1, 'lambda': 0.9, 'lr': 4.9999999999999996e-06, 'gamma': 0.9, 'train_batch_size': 2400, 'env': 'StockTrading_train_env', 'framework': 'tf2', 'eager_tracing': True, 'num_workers': 1, 'num_gpus': 1, 'num_envs_per_worker': 1, 'model': {'use_lstm': True, 'lstm_cell_size': 256, 'lstm_use_prev_action': True, 'lstm_use_prev_reward': True}}

In [None]:
#B3
best_config = {}
best_config[0] = {'clip_param': 0.4, 'lambda': 0.9612601554586079, 'lr': 1.1123434725658893e-05, 'gamma': 0.9866572109841178, 'train_batch_size': 2400, 'env': 'StockTrading_train_env', 'framework': 'tf2', 'eager_tracing': True, 'num_workers': 1, 'num_gpus': 1, 'num_envs_per_worker': 1, 'model': {'use_lstm': True, 'lstm_cell_size': 256, 'lstm_use_prev_action': True, 'lstm_use_prev_reward': True}}

In [None]:
#B4
best_config = {}
best_config[0] = {'clip_param': 0.1, 'lambda': 0.9009927357612731, 'lr': 0.00010502434259151515, 'gamma': 0.9538296116220573, 'train_batch_size': 2400, 'env': 'StockTrading_train_env', 'framework': 'tf2', 'eager_tracing': True, 'num_workers': 1, 'num_gpus': 1, 'num_envs_per_worker': 1, 'model': {'use_lstm': True, 'lstm_cell_size': 256, 'lstm_use_prev_action': True, 'lstm_use_prev_reward': True}}

In [None]:
#B5
best_config = {}
best_config[0] = {'clip_param': 0.1, 'lambda': 0.9700218400062812, 'lr': 0.00014620699838982138, 'gamma': 0.9951363917420465, 'train_batch_size': 2400, 'env': 'StockTrading_train_env', 'framework': 'tf2', 'eager_tracing': True, 'num_workers': 1, 'num_gpus': 1, 'num_envs_per_worker': 1, 'model': {'use_lstm': True, 'lstm_cell_size': 256, 'lstm_use_prev_action': True, 'lstm_use_prev_reward': True}}

In [None]:
#B6
best_config = {}
best_config[0] = {'clip_param': 0.5, 'lambda': 0.9070509140062014, 'lr': 0.0003133465721916071, 'gamma': 0.939069703010826, 'train_batch_size': 2400, 'env': 'StockTrading_train_env', 'framework': 'tf2', 'eager_tracing': True, 'num_workers': 1, 'num_gpus': 1, 'num_envs_per_worker': 1, 'model': {'use_lstm': True, 'lstm_cell_size': 256, 'lstm_use_prev_action': True, 'lstm_use_prev_reward': True}}

In [None]:
#C1
best_config = {}
best_config[0] = {'lambda': 0.9, 'lr': 0.0001519697811702668, 'gamma': 0.9748814421424221, 'train_batch_size': 960, 'env': 'StockTrading_train_env', 'framework': 'tf2', 'eager_tracing': True, 'num_workers': 1, 'num_gpus': 1, 'num_envs_per_worker': 1, 'model': {'use_lstm': True, 'lstm_cell_size': 256, 'lstm_use_prev_action': True, 'lstm_use_prev_reward': True}}

In [None]:
#C3
best_config = {}
best_config[0] = {'lambda': 0.3, 'lr': 0.00039720306727542226, 'gamma': 0.958235141786408, 'train_batch_size': 960, 'env': 'StockTrading_train_env', 'framework': 'tf2', 'eager_tracing': True, 'num_workers': 1, 'num_gpus': 1, 'num_envs_per_worker': 1, 'model': {'use_lstm': True, 'lstm_cell_size': 256, 'lstm_use_prev_action': True, 'lstm_use_prev_reward': True}}

In [None]:
#C5
best_config = {}
best_config[0] = {'lambda': 0.3, 'lr': 0.005623413251903496, 'gamma': 0.9866572109841178, 'train_batch_size': 960, 'env': 'StockTrading_train_env', 'framework': 'tf2', 'eager_tracing': True, 'num_workers': 1, 'num_gpus': 1, 'num_envs_per_worker': 1, 'model': {'use_lstm': True, 'lstm_cell_size': 256, 'lstm_use_prev_action': True, 'lstm_use_prev_reward': True}}

In [None]:
#C7
best_config = {}
best_config[0] =  {'lambda': 0.1, 'lr': 0.00010000000000000021, 'gamma': 0.9, 'train_batch_size': 960, 'env': 'StockTrading_train_env', 'framework': 'tf2', 'eager_tracing': True, 'num_workers': 1, 'num_gpus': 1, 'num_envs_per_worker': 1, 'model': {'use_lstm': True, 'lstm_cell_size': 256, 'lstm_use_prev_action': True, 'lstm_use_prev_reward': True}}

In [None]:
#C9
best_config = {}
best_config[0] =  {'lambda': 0.5, 'lr': 0.00017782794100389265, 'gamma': 0.9610799090397744, 'train_batch_size': 960, 'env': 'StockTrading_train_env', 'framework': 'tf2', 'eager_tracing': True, 'num_workers': 1, 'num_gpus': 1, 'num_envs_per_worker': 1, 'model': {'use_lstm': True, 'lstm_cell_size': 256, 'lstm_use_prev_action': True, 'lstm_use_prev_reward': True}}

In [None]:
#C11
best_config = {}
best_config[0] =  {'lambda': 0.3, 'lr': 0.0003484985143235064, 'gamma': 0.9433569781145408, 'train_batch_size': 960, 'env': 'StockTrading_train_env', 'framework': 'tf2', 'eager_tracing': True, 'num_workers': 1, 'num_gpus': 1, 'num_envs_per_worker': 1, 'model': {'use_lstm': True, 'lstm_cell_size': 256, 'lstm_use_prev_action': True, 'lstm_use_prev_reward': True}}

In [None]:
#D2
best_config = {}
best_config[0] =  {'clip_param': 0.1,
 'eager_tracing': False,
 'env': 'StockTrading_train_env',
 'framework': 'tf2',
 'gamma': 0.9037485846525093,
 'lambda': 0.9386668141475626,
 'lr': 5.000197729565842e-06,
 'model': {'lstm_cell_size': 256,
  'lstm_use_prev_action': True,
  'lstm_use_prev_reward': True,
  'use_lstm': True},
 'num_envs_per_worker': 1,
 'num_gpus': 1,
 'num_workers': 1,
 'train_batch_size': 2400}


In [32]:
def training(
        env_config,
        drl_lib,
        env,
        model_name,
        init_ray=True,
        **kwargs
):
   
    env_instance = env(config=env_config)

    print(env_instance)

    rllib_params = kwargs.get("rllib_params")
    
    agent_rllib = DRLAgent_rllib(
            env=env,
            price_array=env_config['price_array'],
            tech_array=env_config['tech_array'],
            turbulence_array=env_config['turbulence_array'],
        )
    
    print(agent_rllib)

    model, model_config = agent_rllib.get_model(model_name)

    print('got model_config')

    model_config["lr"] = best_config[0]["lr"]
    model_config["train_batch_size"] = rllib_params["train_batch_size"]
    model_config["gamma"] = best_config[0]["gamma"]
    model_config['clip_param'] = best_config[0]['clip_param']

    #model_config["critic_lr"] = best_config[0]["critic_lr"]
    #model_config["actor_lr"] = best_config[0]["actor_lr"]
    #model_config["tau"] = best_config[0]["tau"]

    #model_config["lambda"] = best_config[0]["lambda"]

    #model_config['obs_batch'] = 100

    model_config['framework'] = "tf2"
    model_config['num_workers'] = 4
    model_config['num_cpus_per_worker'] = 0 
    model_config['num_gpus'] = 1
    model_config["eager_tracing"] = True

    model_config["model"]["use_lstm"] = True
    model_config["model"]["lstm_cell_size"] = 256
    model_config["model"]["lstm_use_prev_action"] = True
    model_config["model"]["lstm_use_prev_reward"] = True

    if model_name not in MODELS:
            raise NotImplementedError("NotImplementedError")
    if init_ray:
            ray.init(
                ignore_reinit_error=True
            )  # Other Ray APIs will not work until `ray.init()` is called.
    print('ray initialised')

    if model_name == "ppo":
            trainer = model.PPOTrainer(env=env, logger_creator=custom_logger_creator, config=model_config)
    elif model_name == "a2c":
            trainer = model.A2CTrainer(env=env, config=model_config)
    elif model_name == "ddpg":
            trainer = model.DDPGTrainer(env=env, config=model_config)
    elif model_name == "td3":
            trainer = model.TD3Trainer(env=env, config=model_config)
    elif model_name == "sac":
            trainer = model.SACTrainer(env=env, config=model_config)

    print('got trainer')
    return(trainer)

In [33]:
def train_model(
        total_episodes,
        env_config_list,
        drl_lib,
        env,
        model_name,
        init_ray=True,
        **kwargs
):

        s = "{:3d} reward {:6.2f}/{:6.2f}/{:6.2f} len {:6.2f} saved {}"

        trainer = training(
                        env_config = env_config_list[0],
                        drl_lib='rllib', 
                        env=env,
                        model_name = model_algorithm, 
                        init_ray=True,
                        rllib_params = rllib_params)
        #trainer.restore('/tmp/rllib_checkpoint/checkpoint_000010/checkpoint-10')
        result = trainer.train()
        print(pretty_print(result))
            
        file_name = trainer.save("/tmp/rllib_checkpoint")

        print(s.format(
              1,
              result["episode_reward_min"],
              result["episode_reward_mean"],
              result["episode_reward_max"],
              result["episode_len_mean"],
              file_name
            ))

        ray.shutdown()

        for episode in range(total_episodes-1):
            trainer = training(
                        env_config = env_config_list[episode+1],
                        drl_lib='rllib',
                        env=env,
                        model_name = model_algorithm, 
                        init_ray=True,
                        rllib_params = rllib_params)
                        
            trainer.restore(file_name)
            result = trainer.train()
            print(pretty_print(result))
            
            file_name = trainer.save("/tmp/rllib_checkpoint")

            print(s.format(
              episode + 2,
              result["episode_reward_min"],
              result["episode_reward_mean"],
              result["episode_reward_max"],
              result["episode_len_mean"],
              file_name
            ))

            ray.shutdown()
  
        return file_name

In [65]:
ray.shutdown()

In [None]:
checkpoint_path = train_model(
      total_episodes = len(train_env_config),
      env_config_list = train_env_config,
      drl_lib='rllib', 
      env=env,
      model_name = model_algorithm, 
      rllib_params = rllib_params)

<ChangedStockTradingEnv instance>
<finrl.agents.rllib.models.DRLAgent object at 0x7f1c565719d0>
got model_config
ray initialised




got trainer


## Test

In [24]:
def get_config_Test(dic, Number_Days):  

    env_config = []

    for index in range(Number_Days):

      print(index)

      price_array, tech_array, turbulence_array = df_to_array(dic[index+Number_Train_Days], False)

      config = {
          "price_array": price_array,
          "tech_array": tech_array,
          "turbulence_array": turbulence_array,
          "if_train": False,
      }

      env_config.append(config)

    return env_config

In [25]:
test_env_config = get_config_Test(dic, Number_Test_Days)

0
Successfully transformed into array
1
Successfully transformed into array
2
Successfully transformed into array
3
Successfully transformed into array
4
Successfully transformed into array
5
Successfully transformed into array
6
Successfully transformed into array
7
Successfully transformed into array
8
Successfully transformed into array


In [27]:
def DRL_prediction(
            model_name,
            env,
            env_instance,
            price_array,
            tech_array,
            turbulence_array,
            agent_path,
    ):
        if model_name not in MODELS:
            raise NotImplementedError("NotImplementedError")

        if model_name == "a2c":
            model_config = MODELS[model_name].A2C_DEFAULT_CONFIG.copy()
        elif model_name == "td3":
            model_config = MODELS[model_name].TD3_DEFAULT_CONFIG.copy()
        else:
            model_config = MODELS[model_name].DEFAULT_CONFIG.copy()
        model_config["env"] = env
        model_config["log_level"] = "WARN"
        model_config["env_config"] = {
            "price_array": price_array,
            "tech_array": tech_array,
            "turbulence_array": turbulence_array,
            "if_train": False}

        #model_config["lr"] = best_config[0]["lr"]
        model_config["train_batch_size"] = rllib_params["train_batch_size"]
        #model_config["gamma"] = best_config[0]["gamma"]
        #model_config['clip_param'] = best_config[0]['clip_param']

        #model_config["critic_lr"] = best_config[0]["critic_lr"]
        #model_config["actor_lr"] = best_config[0]["actor_lr"]
        #model_config["tau"] = best_config[0]["tau"]

        #model_config["lambda"] = best_config[0]["lambda"]

        #model_config["output"] = "/content/drive/MyDrive/csvfiles" 
        #model_config["local_dir"] = "./trained_models"
        #model_config["reuse_actors"] = True
        #model_config["callbacks"] = [TBXLoggerCallback()]

        model_config['framework'] = "tf2"
        model_config['num_workers'] = 4
        model_config['num_cpus_per_worker'] = 0 
        model_config['num_gpus'] = 1
        model_config["eager_tracing"] = True

        model_config["model"]["use_lstm"] = True
        model_config["model"]["lstm_cell_size"] = 256
        model_config["model"]["lstm_use_prev_action"] = True
        model_config["model"]["lstm_use_prev_reward"] = True

        env_config = {
            "price_array": price_array,
            "tech_array": tech_array,
            "turbulence_array": turbulence_array,
            "if_train": False,
        }
        env_instance = env(config=env_config)

        # ray.init() # Other Ray APIs will not work until `ray.init()` is called.
        if model_name == "ppo":
            trainer = MODELS[model_name].PPOTrainer(env=env, config=model_config)
        elif model_name == "a2c":
            trainer = MODELS[model_name].A2CTrainer(env=env, config=model_config)
        elif model_name == "ddpg":
            trainer = MODELS[model_name].DDPGTrainer(env=env, config=model_config)
        elif model_name == "td3":
            trainer = MODELS[model_name].TD3Trainer(env=env, config=model_config)
        elif model_name == "sac":
            trainer = MODELS[model_name].SACTrainer(env=env, config=model_config)

        print('got trainer')
        
        #try:
         #   trainer.restore(agent_path)
          #  print("Restoring from checkpoint path", agent_path)
        #except BaseException:
         #   raise ValueError("Fail to load agent!")
        
        trainer.restore(agent_path)
        print("restored agent")
        print("Restoring from checkpoint path", agent_path)

        # test on the testing env
        episode_returns = []  # the cumulative_return / initial_account
        episode_total_assets = [env_instance.initial_total_asset]
        obs = env_instance.reset()
        state = [np.zeros([256], np.float32) for _ in range(2)]
        prev_a = [0, 0]
        prev_r = 0.0
        done = False
        total_reward = 0.0

        print("starting the loop")

        while not done:
            action, state, _ = trainer.compute_single_action(obs, state, prev_action=prev_a, prev_reward=prev_r)
            obs, reward, done, _ = env_instance.step(action)
            

            prev_a = action
            prev_r = reward

            total_reward += reward

            total_asset = (
                    env_instance.amount
                    + (env_instance.price_ary[env_instance.day] * env_instance.stocks).sum()
            )
            episode_total_assets.append(total_asset)
            episode_return = total_asset / env_instance.initial_total_asset
            episode_returns.append(episode_return)
        ray.shutdown()
        print("episode return: " + str(episode_return))
        print("Test Finished!")
        return episode_total_assets

In [None]:
def DRL_prediction(
            model_name,
            env,
            env_instance,
            price_array,
            tech_array,
            turbulence_array,
            agent_path,
    ):
        if model_name not in MODELS:
            raise NotImplementedError("NotImplementedError")

        if model_name == "a2c":
            model_config = MODELS[model_name].A2C_DEFAULT_CONFIG.copy()
        elif model_name == "td3":
            model_config = MODELS[model_name].TD3_DEFAULT_CONFIG.copy()
        else:
            model_config = MODELS[model_name].DEFAULT_CONFIG.copy()
        model_config["env"] = env
        model_config["log_level"] = "WARN"
        model_config["env_config"] = {
            "price_array": price_array,
            "tech_array": tech_array,
            "turbulence_array": turbulence_array,
            "if_train": False,
        }

        #model_config["model"]["use_lstm"] = True
        #model_config["model"]["lstm_cell_size"] = 256
        #model_config["model"]["lstm_use_prev_action"] = True
        #model_config["model"]["lstm_use_prev_reward"] = True

        env_config = {
            "price_array": price_array,
            "tech_array": tech_array,
            "turbulence_array": turbulence_array,
            "if_train": False,
        }
        #env_instance = env(config=env_config)

        # ray.init() # Other Ray APIs will not work until `ray.init()` is called.
        if model_name == "ppo":
            trainer = MODELS[model_name].PPOTrainer(env=env, config=model_config)
        elif model_name == "a2c":
            trainer = MODELS[model_name].A2CTrainer(env=env, config=model_config)
        elif model_name == "ddpg":
            trainer = MODELS[model_name].DDPGTrainer(env=env, config=model_config)
        elif model_name == "td3":
            trainer = MODELS[model_name].TD3Trainer(env=env, config=model_config)
        elif model_name == "sac":
            trainer = MODELS[model_name].SACTrainer(env=env, config=model_config)

        print('got trainer')
        
        #try:
         #   trainer.restore(agent_path)
          #  print("Restoring from checkpoint path", agent_path)
        #except BaseException:
         #   raise ValueError("Fail to load agent!")
        
        trainer.restore(agent_path)
        print("restored agent")
        print("Restoring from checkpoint path", agent_path)

        state = env_instance.reset()
        episode_returns = []  # the cumulative_return / initial_account
        episode_total_assets = [env_instance.initial_total_asset]
        actions_list = []
        done = False
        while not done:
            action = trainer.compute_single_action(state)
            action_list.append(action)
            state, reward, done, _ = env_instance.step(action)

            total_asset = (
                    env_instance.amount
                    + (env_instance.price_ary[env_instance.day] * env_instance.stocks).sum()
            )
            episode_total_assets.append(total_asset)
            episode_return = total_asset / env_instance.initial_total_asset
            episode_returns.append(episode_return)
        ray.shutdown()
        print("episode return: " + str(episode_return))
        print("Test Finished!")
        return episode_total_assets, action_list

In [28]:
def test(
        env_config,
        env,
        capital,
        model_name,
        cwd,
        if_vix=True,
        **kwargs
):

    env_instance = env(config=env_config, initial_capital=capital)

    # load elegantrl needs state dim, action dim and net dim
    net_dimension = kwargs.get("net_dimension", 2 ** 7)
    #cwd = "./trained_" + str(model_name)

    print("price_array: ", len(env_config["price_array"]))

        # load agent
    
    #episode_capital = [capital]
    episode_total_assets = DRL_prediction(
            model_name=model_name,
            env=env,
            env_instance = env_instance,
            price_array=env_config["price_array"],
            tech_array=env_config["tech_array"],
            turbulence_array=env_config["turbulence_array"],
            agent_path=cwd,
        )
    account_value = episode_total_assets

    return account_value



In [39]:
ray.shutdown()

In [29]:
test_results = []
starting_capital = 10e6
final_capital = [starting_capital]

for index in range(len(test_env_config)):

      print(index)

      account_value = test(env_config = test_env_config[index],
                      env=env, 
                      capital = final_capital[index], 
                      model_name=model_algorithm,
                      cwd =  checkpoint_path,
                      net_dimension = 512)
      final_capital.append(account_value[-1])
      ray.shutdown()
      test_results.append(account_value)
print(final_capital)
      

0
price_array:  481




got trainer
restored agent
Restoring from checkpoint path /tmp/rllib_checkpoint/checkpoint_000010/checkpoint-10
starting the loop
episode return: 0.8700540360000091
Test Finished!
1
price_array:  481




got trainer
restored agent
Restoring from checkpoint path /tmp/rllib_checkpoint/checkpoint_000010/checkpoint-10
starting the loop
episode return: 0.9007593810000079
Test Finished!
2
price_array:  481




got trainer
restored agent
Restoring from checkpoint path /tmp/rllib_checkpoint/checkpoint_000010/checkpoint-10
starting the loop
episode return: 0.8168705940000085
Test Finished!
3
price_array:  481




got trainer
restored agent
Restoring from checkpoint path /tmp/rllib_checkpoint/checkpoint_000010/checkpoint-10
starting the loop
episode return: 0.8616025040000082
Test Finished!
4
price_array:  481




got trainer
restored agent
Restoring from checkpoint path /tmp/rllib_checkpoint/checkpoint_000010/checkpoint-10
starting the loop
episode return: 0.8696510630000073
Test Finished!
5
price_array:  481




got trainer
restored agent
Restoring from checkpoint path /tmp/rllib_checkpoint/checkpoint_000010/checkpoint-10
starting the loop
episode return: 0.8678335150000065
Test Finished!
6
price_array:  481




got trainer
restored agent
Restoring from checkpoint path /tmp/rllib_checkpoint/checkpoint_000010/checkpoint-10
starting the loop
episode return: 0.8198178700000098
Test Finished!
7
price_array:  481




got trainer
restored agent
Restoring from checkpoint path /tmp/rllib_checkpoint/checkpoint_000010/checkpoint-10
starting the loop
episode return: 0.8609543120000077
Test Finished!
8
price_array:  481




got trainer
restored agent
Restoring from checkpoint path /tmp/rllib_checkpoint/checkpoint_000010/checkpoint-10
starting the loop
episode return: 0.8395146390000087
Test Finished!
[10000000.0, 870054.036000009, 900759.3810000079, 816870.5940000085, 861602.5040000081, 869651.0630000073, 867833.5150000065, 819817.8700000098, 860954.3120000077, 839514.6390000087]


In [30]:
for index in range(len(test_results)):

  df_account_test = pd.DataFrame(data=test_results[index],columns=['account_value'])
  file_name = "D1 test "+str(index+1)+".csv"
  df_account_test.to_csv(file_name)
  files.download(file_name)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>