In [None]:
# Define environment

In [24]:
import random
import gym
from gym import spaces
from gym.utils import seeding
import numpy as np
import pandas as pd

class TradingSPYEnv(gym.Env):
    """
    SPY (S&P500) trading environment.
  
    State: [[short, neutral, long], portfolio value]
      - The states are 
  
  
    Action: sell (0), hold (1), and buy (2)
      - I prescribe a very simple policy
      - when selling, sell all the shares
      - when buying, buy as many as cash in hand allows
    """
    def __init__(self, train_data_path='historySPY.csv', sma_len=[5], init_invest=10000, learning_rate=0.0002, gamma=0.98,
                normalize_price = True, mode = 'train', train_test_split = 0.9):
        train_data = pd.read_csv(train_data_path, index_col = False, parse_dates= ['Date'])
        self.stock_price_history = train_data 
        self.current_step = 0 # The step in the data
        self.iteration = 0 # the iteration step in an episode
        self.init_invest = init_invest
        self.accumulated_profit = 0.0
        self.normalize_price = normalize_price

        feature_dict = {'Date': self.stock_price_history['Date'],
                    'State': np.zeros(self.stock_price_history.shape[0], dtype=int),
                    'accumulated_profit': np.zeros(self.stock_price_history.shape[0], dtype=float), 
                    'portfolio_value': np.zeros(self.stock_price_history.shape[0], dtype=float),
                    'Close': self.stock_price_history['Close']
                    }
    
        # feature engineering. Put values like sma
#        if sma_len([],list):
            
#            for sma in sma_len:
#                feature_dict[feature+'_'+str(sma)] = self.stock_price_history[feature].rolling(sma).mean()
#            self.stock_price_history[feature+'_'+str(sma)] = self.stock_price_history[feature].rolling(sma).mean()
                    
        self.stock_price_history.dropna(axis=0,inplace=True)
        self.stock_price_history.reset_index(drop=True,inplace=True)

        self.features = pd.DataFrame(feature_dict)
        if isinstance(sma_len,list):
            self._set_sma(sma_len)
        self.features = self.features.dropna(axis=0)
        self.features.reset_index(drop=True,inplace=True)
    
        train_test_split_index = int(self.features.shape[0] * train_test_split)
        if mode == 'train':
            self.end_step = train_test_split_index
        elif mode == 'test':
            self.features.shape[0]
            self.current_step = train_test_split_index
            self.end_step = self.features.shape[0]

        # Set up data and features
        self.reset(current_step = self.current_step)
            
        # action space
        # 0: short, 1: neutral, 2: long
        self.action_space = spaces.Discrete(3)
    
        # observation space
        # This contains features to make decisions
        self.observation_space = spaces.Box(low=0, high=np.inf, shape=(self.features.columns.shape[0] -1,), dtype=np.float16)
    
    def _set_sma(self, sma_len):
        feature = 'Close'
        for sma in sma_len:
            col_name = feature+'_'+str(sma)
            self.stock_price_history[col_name] = self.stock_price_history[feature].rolling(sma).mean()
            self.features[col_name] = self.stock_price_history[feature].rolling(sma).mean()
        
    def _get_observation(self):
        # return features at current step
        tmp = self.features.drop(columns=['Date']).loc[self.current_step].to_numpy()
        # state, portfolio_value, Close, smas
        return tmp

    def reset(self, current_step = None):
        self.iteration = 0 
        self.features['State'] = 1 # State:1 means market neutral
        self.features['portfolio_value'] = 0.0       
        self.features['accumulated_profit'] = 0.0
        
        # Set the current step to a random point within the data frame
        if current_step is not None:
            self.current_step = current_step
        else:
            self.current_step = random.randint(0, int(self.features.shape[0] * 0.9))
            
        self.features['portfolio_value'].loc[self.current_step] = self.init_invest
        
        if self.normalize_price:
            price = self.stock_price_history['Close'].loc[self.current_step]
            for col in self.features.columns:
                if 'Close' in col:
                    self.features[col].loc[self.current_step:self.end_step] = self.stock_price_history[col].loc[self.current_step:self.end_step] / price

        return self._get_observation()

    """
    Compute what happens next step
    """
    def step(self, action):
        next_step = self.current_step + 1
        prev_step = self.current_step - 1
        if next_step == self.end_step: 
            # At the end, we have nothing to do
            done = True
            return None, None, done, {'accumulated_profit': self.accumulated_profit}
        
        col_name = 'Close'
        features = self.features
        portfolio_value = self.features.portfolio_value        

        done = False
        # reward 
        r_t = 0.0
        if (self.iteration > 1) and (self.current_step is not self.end_step): # Exclude the very first step
            # difference in portfolio value 
            r_t = portfolio_value.loc[self.current_step] - portfolio_value.loc[prev_step]
            

        features['accumulated_profit'].loc[self.current_step] = features['accumulated_profit'].loc[prev_step] + r_t    
            
        # Current state is set
        self.features.State.loc[self.current_step] = action
    
        # Compute next step
        # compute portfolio value at next step
        if action == 0: # shorting
            portfolio_value.loc[next_step] = portfolio_value.loc[self.current_step] * features[col_name].loc[self.current_step] / features[col_name].loc[next_step]
        elif action == 1: # market-neutral position (100% cash)  
            portfolio_value.loc[next_step] = portfolio_value.loc[self.current_step]
        elif action == 2: # longing
            portfolio_value.loc[next_step] = portfolio_value.loc[self.current_step] * features[col_name].loc[next_step] / features[col_name].loc[self.current_step]
        else:
            raise TypeError("Action is out of the space")
        self.features.State.loc[next_step] = action
    
        self.current_step += 1 
        self.iteration += 1
        s_prime = self._get_observation() # state at t+1
    
        return s_prime, r_t, done, None


In [25]:
tmp = TradingSPYEnv()
random.randint(0,5)

price 79.28
col  Close
price 79.28
col  Close_5
price 79.28


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)


5

In [26]:
tmp.features

Unnamed: 0,Date,State,accumulated_profit,portfolio_value,Close,Close_5
0,2002-01-07,1,0.0,10000.0,1.000000,
1,2002-01-08,1,0.0,0.0,1.010721,
2,2002-01-09,1,0.0,0.0,1.022200,
3,2002-01-10,1,0.0,0.0,1.029011,
4,2002-01-11,1,0.0,0.0,1.021821,1.016751
...,...,...,...,...,...,...
4714,2020-09-28,1,0.0,0.0,4.166246,4.187412
4715,2020-09-29,1,0.0,0.0,4.069627,4.150000
4716,2020-09-30,1,0.0,0.0,4.080474,4.122250
4717,2020-10-01,1,0.0,0.0,4.146443,4.117407


In [27]:
tmp.stock_price_history['Close_5'] = tmp.stock_price_history['Close'].rolling(5).mean()

In [28]:
tmp.stock_price_history.dropna().loc[4]

Date            2002-01-07 00:00:00
Open                          81.64
High                          81.84
Low                           80.85
Close                         81.01
Volume                     13106500
Dividends                         0
Stock Splits                      0
Close_5                      80.608
Name: 4, dtype: object

In [29]:
s = tmp.reset()
print(tmp.current_step)
print(s)
s_prime, r_t, done, info = tmp.step(2)
print(s_prime)
print(r_t)
print(done)

price 82.6
col  Close
price 82.6
col  Close_5
price 82.6
628
[1.0000000e+00 0.0000000e+00 1.0000000e+04 1.0000000e+00 9.9559322e-01]
[2.00000000e+00 0.00000000e+00 9.86077482e+03 9.86077482e-01
 9.93050847e-01]
0.0
False


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)


In [32]:
s = tmp.reset()
tmp.stock_price_history.loc[tmp.current_step:]

price 75.81
col  Close
price 75.81
col  Close_5
price 75.81


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)


Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits,Close_5
481,2003-11-26,75.85,75.87,75.11,75.81,33053600,0.0,0,74.970
482,2003-11-28,75.75,76.02,75.69,75.87,10507500,0.0,0,75.350
483,2003-12-01,76.15,76.74,76.12,76.69,38699000,0.0,0,75.834
484,2003-12-02,76.53,76.81,76.31,76.50,35352000,0.0,0,76.082
485,2003-12-03,76.72,77.03,76.31,76.37,39078600,0.0,0,76.248
...,...,...,...,...,...,...,...,...,...
4718,2020-09-28,333.22,334.96,332.15,334.19,64584600,0.0,0,327.872
4719,2020-09-29,333.97,334.77,331.62,332.37,51304000,0.0,0,328.286
4720,2020-09-30,333.09,338.29,332.88,334.89,103653800,0.0,0,330.736
4721,2020-10-01,337.69,338.74,335.01,337.04,88698700,0.0,0,333.444


In [33]:
tmp.features.loc[tmp.current_step:]

Unnamed: 0,Date,State,accumulated_profit,portfolio_value,Close,Close_5
481,2003-12-03,1,0.0,10000.0,1.000000,0.988920
482,2003-12-04,1,0.0,0.0,1.000791,0.993932
483,2003-12-05,1,0.0,0.0,1.011608,1.000317
484,2003-12-08,1,0.0,0.0,1.009102,1.003588
485,2003-12-09,1,0.0,0.0,1.007387,1.005778
...,...,...,...,...,...,...
4714,2020-09-28,1,0.0,0.0,4.356945,4.379079
4715,2020-09-29,1,0.0,0.0,4.255903,4.339955
4716,2020-09-30,1,0.0,0.0,4.267247,4.310935
4717,2020-10-01,1,0.0,0.0,4.336235,4.305870


In [None]:
tmp.stock_price_history['Close'].loc[tmp.current_step:]

In [None]:
tmp.features

In [None]:
'Close' in 'Close_5'

In [None]:
"""
import gym
from gym import spaces
from gym.utils import seeding
import numpy as np
import itertools


class TradingEnv(gym.Env):
  A 3-stock (MSFT, IBM, QCOM) trading environment.

  State: [# of stock owned, current stock prices, cash in hand]
    - array of length n_stock * 2 + 1
    - price is discretized (to integer) to reduce state space
    - use close price for each stock
    - cash in hand is evaluated at each step based on action performed

  Action: sell (0), hold (1), and buy (2)
    - when selling, sell all the shares
    - when buying, buy as many as cash in hand allows
    - if buying multiple stock, equally distribute cash in hand and then utilize the balance

def __init__(self, train_data, init_invest=20000):
    # data
    self.stock_price_history = np.around(train_data) # round up to integer to reduce state space
    self.n_stock, self.n_step = self.stock_price_history.shape

    # instance attributes
    self.init_invest = init_invest
    self.cur_step = None
    self.stock_owned = None
    self.stock_price = None
    self.cash_in_hand = None

    # action space
    self.action_space = spaces.Discrete(3**self.n_stock)

    # observation space: give estimates in order to sample and build scaler
    stock_max_price = self.stock_price_history.max(axis=1)
    stock_range = [[0, init_invest * 2 // mx] for mx in stock_max_price]
    price_range = [[0, mx] for mx in stock_max_price]
    cash_in_hand_range = [[0, init_invest * 2]]
    self.observation_space = spaces.MultiDiscrete(stock_range + price_range + cash_in_hand_range)

    # seed and start
    self._seed()
    self._reset()


  def _seed(self, seed=None):
    self.np_random, seed = seeding.np_random(seed)
    return [seed]


  def _reset(self):
    self.cur_step = 0
    self.stock_owned = [0] * self.n_stock
    self.stock_price = self.stock_price_history[:, self.cur_step]
    self.cash_in_hand = self.init_invest
    return self._get_obs()


  def _step(self, action):
    assert self.action_space.contains(action)
    prev_val = self._get_val()
    self.cur_step += 1
    self.stock_price = self.stock_price_history[:, self.cur_step] # update price
    self._trade(action)
    cur_val = self._get_val()
    reward = cur_val - prev_val
    done = self.cur_step == self.n_step - 1
    info = {'cur_val': cur_val}
    return self._get_obs(), reward, done, info


  def _get_obs(self):
    obs = []
    obs.extend(self.stock_owned)
    obs.extend(list(self.stock_price))
    obs.append(self.cash_in_hand)
    return obs


  def _get_val(self):
    return np.sum(self.stock_owned * self.stock_price) + self.cash_in_hand


  def _trade(self, action):
    # all combo to sell(0), hold(1), or buy(2) stocks
    action_combo = map(list, itertools.product([0, 1, 2], repeat=self.n_stock))
    action_vec = action_combo[action]

    # one pass to get sell/buy index
    sell_index = []
    buy_index = []
    for i, a in enumerate(action_vec):
      if a == 0:
        sell_index.append(i)
      elif a == 2:
        buy_index.append(i)

    # two passes: sell first, then buy; might be naive in real-world settings
    if sell_index:
      for i in sell_index:
        self.cash_in_hand += self.stock_price[i] * self.stock_owned[i]
        self.stock_owned[i] = 0
    if buy_index:
      can_buy = True
      while can_buy:
        for i in buy_index:
          if self.cash_in_hand > self.stock_price[i]:
            self.stock_owned[i] += 1 # buy one share
            self.cash_in_hand -= self.stock_price[i]
          else:
            can_buy = False

"""