<a href="https://colab.research.google.com/github/NicoleRichards1998/FinRL/blob/master/DayTradingEnv.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import gym
import numpy as np
from numpy import random as rd
import csv

In [None]:
class StockTradingEnv(gym.Env):
    def __init__(
        self,
        config,
        initial_account=1e6,
        gamma=0.99,
        turbulence_thresh=99,
        min_stock_rate=0.1,
        max_stock=1e2,
        initial_capital=1e6,
        buy_cost_pct=1e-3,
        sell_cost_pct=1e-3,
        reward_scaling=2 ** -11,
        initial_stocks=None,
        day=0,
    ):

        D1_price_ary = config["D1_price_array"]
        D1_tech_ary = config["D1_tech_array"]
        D1_turbulence_ary = config["D1_turbulence_array"]

        D2_price_ary = config["D2_price_array"]
        D2_tech_ary = config["D2_tech_array"]
        D2_turbulence_ary = config["D2_turbulence_array"]

        if_train = config["if_train"]

        self.D1_price_ary = D1_price_ary.astype(np.float32)
        self.D1_tech_ary = D1_tech_ary.astype(np.float32)
        self.D1_turbulence_ary = D1_turbulence_ary

        self.price_ary = D1_price_ary.astype(np.float32)
        self.tech_ary = D1_tech_ary.astype(np.float32)
        self.turbulence_ary = D1_turbulence_ary

        self.D1_tech_ary = self.D1_tech_ary * 2 ** -7
        self.D1_turbulence_bool = (D1_turbulence_ary > turbulence_thresh).astype(np.float32)
        self.D1_turbulence_ary = (
            self.sigmoid_sign(D1_turbulence_ary, turbulence_thresh) * 2 ** -5
        ).astype(np.float32)

        self.D2_price_ary = D2_price_ary.astype(np.float32)
        self.D2_tech_ary = D2_tech_ary.astype(np.float32)
        self.D2_turbulence_ary = D2_turbulence_ary

        self.D2_tech_ary = self.D2_tech_ary * 2 ** -7
        self.D2_turbulence_bool = (D2_turbulence_ary > turbulence_thresh).astype(np.float32)
        self.D2_turbulence_ary = (
            self.sigmoid_sign(D2_turbulence_ary, turbulence_thresh) * 2 ** -5
        ).astype(np.float32)

        stock_dim = self.D1_price_ary.shape[1]
        self.gamma = gamma
        self.max_stock = max_stock
        self.min_stock_rate = min_stock_rate
        self.buy_cost_pct = buy_cost_pct
        self.sell_cost_pct = sell_cost_pct
        self.reward_scaling = reward_scaling
        self.initial_capital = initial_capital
        self.initial_stocks = (
            np.zeros(stock_dim, dtype=np.float32)
            if initial_stocks is None
            else initial_stocks
        )

        # reset()
        self.day = None
        self.minute = None
        self.amount = None
        self.stocks = None
        self.total_asset = None
        self.gamma_reward = None
        self.initial_total_asset = None

        # environment information
        self.env_name = "StockEnv"
        # self.state_dim = 1 + 2 + 2 * stock_dim + self.tech_ary.shape[1]
        # # amount + (turbulence, turbulence_bool) + (price, stock) * stock_dim + tech_dim
        self.state_dim = 1 + 2 + 3 * stock_dim + self.tech_ary.shape[1] * stock_dim
        # amount + (turbulence, turbulence_bool) + (price, stock) * stock_dim + tech_dim * stock_dim
        self.stocks_cd = None
        self.action_dim = stock_dim

        #self.max_step = self.D1_price_ary.shape[0] - 1
        self.max_step = 2
        self.trading_minutes = self.D1_price_ary.shape[0] - 1

        self.if_train = if_train
        self.if_discrete = False
        self.target_return = 10.0
        self.episode_return = 0.0

        self.observation_space = gym.spaces.Box(
            low=-3000, high=3000, shape=(self.state_dim,), dtype=np.float32
        )
        self.action_space = gym.spaces.Box(
            low=-1, high=1, shape=(self.action_dim,), dtype=np.float32
        )

    def reset(self):
        self.day = 0
        self.minute = 0
        D1_price = self.D1_price_ary[self.minute]
        D1_turbulence_ary = self.D1_turbulence_ary[self.minute]
        D1_turbulence_bool = self.D1_turbulence_bool[self.minute]
        D1_tech_ary = self.D1_tech_ary[self.minute]

        if self.if_train:
            self.stocks = (
                self.initial_stocks + rd.randint(0, 64, size=self.initial_stocks.shape)
            ).astype(np.float32)
            self.stocks_cool_down = np.zeros_like(self.stocks)
            self.amount = (
                self.initial_capital * rd.uniform(0.95, 1.05)
                - (self.stocks * D1_price).sum()
            )
        else:
            self.stocks = self.initial_stocks.astype(np.float32)
            self.stocks_cool_down = np.zeros_like(self.stocks)
            self.amount = self.initial_capital

        self.total_asset = self.amount + (self.stocks * price).sum()
        self.initial_total_asset = self.total_asset
        self.gamma_reward = 0.0
        return self.get_state(D1_price, D1_turbulence_ary, D1_turbulence_bool, D1_tech_ary)  # state

    def step(self, actions):
        actions = (actions * self.max_stock).astype(int)

        #self.day += 1
        self.minute += 1 # if this is > self.trading_minutes then go to next day's array

        if self.minute < self.trading_minutes:

          D1_price = self.D1_price_ary[self.minute]
          D1_turbulence_ary = self.D1_turbulence_ary[self.minute]
          D1_turbulence_bool = self.D1_turbulence_bool[self.minute]
          D1_tech_ary = self.D1_tech_ary[self.minute]
          self.stocks_cool_down += 1

          if D1_turbulence_bool[self.minute] == 0:
              min_action = int(self.max_stock * self.min_stock_rate)  # stock_cd
              for index in np.where(actions < -min_action)[0]:  # sell_index:
                  if D1_price[index] > 0:  # Sell only if current asset is > 0
                      sell_num_shares = min(self.stocks[index], -actions[index])
                      self.stocks[index] -= sell_num_shares
                      self.amount += (
                          D1_price[index] * sell_num_shares * (1 - self.sell_cost_pct)
                      )
                      self.stocks_cool_down[index] = 0
              for index in np.where(actions > min_action)[0]:  # buy_index:
                  if (
                      D1_price[index] > 0
                  ):  # Buy only if the price is > 0 (no missing data in this particular date)
                      buy_num_shares = min(self.amount // D1_price[index], actions[index])
                      self.stocks[index] += buy_num_shares
                      self.amount -= (
                          D1_price[index] * buy_num_shares * (1 + self.buy_cost_pct)
                      )
                      self.stocks_cool_down[index] = 0

          else:  # sell all when turbulence
              self.amount += (self.stocks * price).sum() * (1 - self.sell_cost_pct)
              self.stocks[:] = 0
              self.stocks_cool_down[:] = 0

          state = self.get_state(D1_price, D1_turbulence_ary, D1_turbulence_bool, D1_tech_ary)
          total_asset = self.amount + (self.stocks * D1_price).sum()
          reward = (total_asset - self.total_asset) * self.reward_scaling
          self.total_asset = total_asset
          self.gamma_reward = self.gamma_reward * self.gamma + reward

        elif self.minute >= self.trading_minutes && self.minute < 2*self.trading_minutes:

          D2_price = self.D2_price_ary[self.minute-self.trading_minutes]
          D2_turbulence_ary = self.D2_turbulence_ary[self.minute-self.trading_minutes]
          D2_turbulence_bool = self.D2_turbulence_bool[self.minute-self.trading_minutes]
          D2_tech_ary = self.D2_tech_ary[self.minute-self.trading_minutes]
          self.stocks_cool_down += 1

          if D2_turbulence_bool[self.minute-self.trading_minutes] == 0:
              min_action = int(self.max_stock * self.min_stock_rate)  # stock_cd
              for index in np.where(actions < -min_action)[0]:  # sell_index:
                  if D2_price[index] > 0:  # Sell only if current asset is > 0
                      sell_num_shares = min(self.stocks[index], -actions[index])
                      self.stocks[index] -= sell_num_shares
                      self.amount += (
                          D2_price[index] * sell_num_shares * (1 - self.sell_cost_pct)
                      )
                      self.stocks_cool_down[index] = 0
              for index in np.where(actions > min_action)[0]:  # buy_index:
                  if (
                      D2_price[index] > 0
                  ):  # Buy only if the price is > 0 (no missing data in this particular date)
                      buy_num_shares = min(self.amount // D2_price[index], actions[index])
                      self.stocks[index] += buy_num_shares
                      self.amount -= (
                          D2_price[index] * buy_num_shares * (1 + self.buy_cost_pct)
                      )
                      self.stocks_cool_down[index] = 0

          else:  # sell all when turbulence
              self.amount += (self.stocks * D2_price).sum() * (1 - self.sell_cost_pct)
              self.stocks[:] = 0
              self.stocks_cool_down[:] = 0

          state = self.get_state(D2_price, D2_turbulence_ary, D2_turbulence_bool, D2_tech_ary)
          total_asset = self.amount + (self.stocks * D2_price).sum()
          reward = (total_asset - self.total_asset) * self.reward_scaling
          self.total_asset = total_asset
          self.gamma_reward = self.gamma_reward * self.gamma + reward

        else:
          self.day = 2

        done = self.day == self.max_step
        if done:
            reward = self.gamma_reward
            self.episode_return = total_asset / self.initial_total_asset

        return state, reward, done, dict()

    def get_state(self, price, turbulance_ar, turbulance_b, tech):
        amount = np.array(self.amount * (2 ** -12), dtype=np.float32)
        scale = np.array(2 ** -6, dtype=np.float32)
        return np.hstack(
            (
                amount,
                turbulance_ar,
                turbulance_b,
                price * scale,
                self.stocks * scale,
                self.stocks_cool_down,
                tech,
            )
        )  # state.astype(np.float32)

    @staticmethod
    def sigmoid_sign(ary, thresh):
        def sigmoid(x):
            return 1 / (1 + np.exp(-x * np.e)) - 0.5

        return sigmoid(ary / thresh) * thresh

    
    def _get_date(self):
        if len(self.df.tic.unique()) > 1:
            date = self.data.date.unique()[0]
        else:
            date = self.data.date
        return date

    def save_asset_memory(self):
        date_list = self.date_memory
        asset_list = self.asset_memory
        # print(len(date_list))
        # print(len(asset_list))
        df_account_value = pd.DataFrame(
            {"date": date_list, "account_value": asset_list}
        )

        df_account_value.to_csv('asset_memory')
        return df_account_value

    def save_action_memory(self):
        if len(self.df.tic.unique()) > 1:
            # date and close price length must match actions length
            date_list = self.date_memory[:-1]
            df_date = pd.DataFrame(date_list)
            df_date.columns = ["date"]

            action_list = self.actions_memory
            df_actions = pd.DataFrame(action_list)
            df_actions.columns = self.data.tic.values
            df_actions.index = df_date.date
            # df_actions = pd.DataFrame({'date':date_list,'actions':action_list})
            df_actions.to_csv('action_memory')
        else:
            date_list = self.date_memory[:-1]
            action_list = self.actions_memory
            df_actions = pd.DataFrame({"date": date_list, "actions": action_list})
            df_actions.to_csv('action_memory')
        return df_actions