In [4]:
import pandas as pd
from pandas_datareader import data
import numpy as np
import matplotlib.pyplot as plt
import gym
import tulipy as ti

In [5]:
INITIAL_BALANCE = 10
TRANSACTION_COST = 0.01
WINDOW_SIZE = 14
DELTA_DAY = pd.Timedelta(days=1)
DEFAULT_ACTIONS_LIST = [0]
DEFAULT_REWARDS_LIST = [0]
EXP_DECAY = 0.8
class TradingEnv(gym.Env):
    def __init__(self, ticker='AAPL', target_volatility=10, mode="train"):
        self.ticker = ticker
        self.window = pd.Timedelta(days=WINDOW_SIZE)
        assert mode in set(["train", "validation", "test", "dev"]), f"Invalid environment  mode: {mode}"
        self.mode = mode
        self.target_volatility = target_volatility
        self.returns_list = DEFAULT_REWARDS_LIST.copy()
        self.rewards_list = DEFAULT_REWARDS_LIST.copy()
        self.actions_list = DEFAULT_ACTIONS_LIST.copy()
        self.balance = INITIAL_BALANCE
        
        self._compute_simple_states()
        
    def _compute_simple_states(self):
        self.short_time = 63
        self.long_time = 252
        start, end = self.get_time_endpoints(self.mode)
        self.start = start
        self.end = end
        # 81 needs to be added for some reason to make sure MACD is a number ???
#         warn("Using unexplained extra pre-padding.")
        unexplained = 0
        prepadding =  pd.Timedelta(days=self.short_time + self.long_time + WINDOW_SIZE + 1 + unexplained)
        postpadding = self.window
        self.prices = data.DataReader(self.ticker, 'yahoo',
                                      start=start-prepadding, end=end+postpadding)['Close']

        # We compute the mean, and standard deviation of the first WINDOW_SIZE days, and use this to standardize 
        # the entire time series.
        self.mu_hat = self.prices[:WINDOW_SIZE].mean()
        self.sigma_hat = self.prices[:WINDOW_SIZE].std()
        self.data = pd.DataFrame({'mean' : (self.prices - self.mu_hat) / self.sigma_hat})
        self.data['std'] = self.data['mean'].rolling(WINDOW_SIZE).std()
        # Use additive returns, because the reward is computed using the additive return
        rets = (self.prices - self.prices.shift(1))

        self.data['sharpe'] = rets.rolling(WINDOW_SIZE).mean() / rets.rolling(WINDOW_SIZE).std()
#         warn('Sharpe ratio will need a risk-free return in the future, for proper calculation.')
        
        exp_short = self.prices.ewm(span=self.short_time, adjust=False).mean()
        exp_long  = self.prices.ewm(span=self.long_time,  adjust=False).mean()
        self.data['q'] = (exp_short - exp_long) / self.prices.rolling(self.short_time).std()
        macd = ti.macd(self.data['mean'].values, short_period=self.short_time,
                       long_period=self.long_time, signal_period=WINDOW_SIZE)
        
#         self.data['MACD'] = self.data['q'] / self.data['q'].rolling(self.long_time).std()
        self.data['macd_0'] = self.data['macd_1'] = self.data['macd_2'] = np.nan
        self.data['macd_0'][self.long_time-1:] = macd[0]
        self.data['macd_1'][self.long_time-1:] = macd[1]
        self.data['macd_2'][self.long_time-1:] = macd[2]
       
        # to look up current price from self.data, irrespective of the date break due to the weekend
        self.df_index = self.data.index.get_loc(self.start)
        
        
    def get_time_endpoints(self, mode):
        """
            Start must be in Monday - Friday
        """
        if mode == "train":
            return pd.Timestamp('2016-01-04'), pd.Timestamp('2018-12-31')
        elif mode == "dev":
            return pd.Timestamp('2016-01-04'), pd.Timestamp('2016-02-28')
        else:
            raise NotImplementedError()
        
    def _get_raw_price(self):
        return self.prices[self.df_index]
    
    def _get_normalized_price(self, diff=0):
        return self.data['mean'][self.df_index + diff]
        
    def _get_current_timestamp(self):
        return self.data.index[self.df_index]
    
    def _get_current_state(self):
        i = self.df_index
        indicators = self.data[['mean', 'std', 'sharpe', 'q']][(i-WINDOW_SIZE):i]
        state = indicators.values.reshape(-1).tolist()
        return state
    
    def reset(self):
        self.df_index = self.data.index.get_loc(self.start)  
        self.returns_list = DEFAULT_REWARDS_LIST.copy()
        self.rewards_list = DEFAULT_REWARDS_LIST.copy()
        self.actions_list = DEFAULT_ACTIONS_LIST.copy()
        return self._get_current_state()

    def step(self, action):
        """
            Executes an action in the stock environment, using 
            the discrete action space described in: Deep Reinforcement Learning for Trading
            
            i.e. -1 is maximally short, 0 is no holdings, 1 is maximally long
            Inputs: action (one of {-1,0,1})
            Outputs: a tuple (observation/state, step_reward, is_done, info)
        """
        assert action in [-1, 0, 1], f"Got {action} but expected one of {-1, 0, 1}"
        next_price = self._get_normalized_price(diff=1)
        price = self._get_normalized_price()
        r = next_price - price
        mu = 1
        
        sigma = self.data['std'][self.df_index - 1]
        sigma_prev = self.data['std'][self.df_index - 2]
       
        term1 = action * self.target_volatility * r / sigma
        prev_action = self.actions_list[-1]
        term2 = price * TRANSACTION_COST * np.abs(term1 - self.target_volatility * prev_action / sigma_prev)
        R = mu*(term1 - term2)
        self.rewards_list.append(R)
        
#         # Additive Returns as reward function
#         if action == 1:
#             R = r - TRANSACTION_COST
#         elif action == -1:
#             R = -r - TRANSACTION_COST
#         elif action == 0:
#             R = 0 - TRANSACTION_COST
    
        # TODO: Refactor rewards_list, actions_list into a pd.DataFrame so that
        # 1. I can plot things more easily, and group them together by ticker, and episode number
        # 2. I can collect rewards_list, actions_list into a single variable
        
        self.actions_list.append(action)
        self.df_index += 1
        return self._get_current_state(), R, self._get_current_timestamp() > self.end, {}
        
    def seed(self, seed=None):
        return
    
    def close(self):
        return
    
    def _update_portfolio(action):
        raise NotImplementedError()
        prev_action = self.actions_list[-1]
        if prev_action == 0:
            if action == 0:
                return self.balance
            elif action == 1:
                return self.balance
            elif action == -1:
                return self.balance
        if prev_action == 1:
            if action == 0:
                return self.balance
            elif action == 1:
                return self.balance
            elif action == -1:
                pass
        if prev_action == -1:
            if action == 0:
                return self.balance
            elif action == 1:
                return self.balance
            elif action == -1:
                pass

In [8]:
e = TradingEnv()
e.data

Unnamed: 0_level_0,mean,std,sharpe,q
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2015-02-09,-2.214492,,,
2015-02-10,-1.574513,,,
2015-02-11,-0.778710,,,
2015-02-12,-0.339071,,,
2015-02-13,-0.166553,,,
...,...,...,...,...
2019-01-08,6.419684,1.780165,-0.153386,-0.237989
2019-01-09,7.132009,1.493831,-0.147962,-0.260271
2019-01-10,7.268355,1.359710,-0.083805,-0.282812
2019-01-11,6.848191,1.316012,-0.054360,-0.303565


In [20]:
macd = ti.macd(e.data['mean'].values, short_period=63, long_period=252, signal_period=60)
macd[0].shape, macd[1].shape, macd[2].shape

((739,), (739,), (739,))

In [24]:
len(e.data['mean'])

990

In [23]:
ti.rsi(e.data['mean'].values,period=30).shape

(960,)

In [3]:

# env = TradingEnv('CARR')
len(env.reset())
# _ = env.step(1)
# assert len(env.prices) == len(env.data['mean'])


KeyError: 'Date'

In [131]:
env.data.columns

Index(['mean', 'std', 'sharpe', 'q', 'MACD'], dtype='object')

In [121]:
len(env.prices)

788

In [4]:
def basic_loop_test(t):
    env = TradingEnv(ticker=t)
    state = env.reset()
    done = False
#     np.random.seed(885)
#     while not done:
#     # for i in range(14):
#         action = np.random.randint(low=-1, high=2)
#         next_state, r, done, _ = env.step(action)
#         assert len(state) == len(next_state)

#     plt.plot(env.rewards_list)
#     plt.title(f'Reward vs. Time for {t}')
#     plt.show()

In [5]:
basic_loop_test('AAPL')

In [6]:
ticker_list = []
with open('./small_stock_name.txt') as src:
    ticker_list = src.read().split()

In [7]:
len(ticker_list)

82

In [8]:
from tqdm import tqdm
filtered_tickers = []
for i, t in tqdm(enumerate(ticker_list)):
    try:
        basic_loop_test(t)
        filtered_tickers.append(t)
    except:
        print(f'{t} failed')
        continue
print(f'\nTickers preserved: {len(filtered_tickers)} / {len(ticker_list)}')
assert len(filtered_tickers) > 0

18it [00:09,  2.29it/s]

CELG failed


24it [00:14,  1.45it/s]

CTRP failed


35it [00:20,  1.59it/s]

FOX failed


36it [00:21,  1.50it/s]

FOXA failed


62it [00:37,  1.51it/s]

QVCA failed


68it [00:44,  1.55s/it]

SYMC failed


74it [00:47,  1.61it/s]

VIAB failed


79it [00:49,  1.92it/s]

WFM failed


81it [00:51,  1.68it/s]

YHOO failed


82it [00:51,  1.59it/s]


Tickers preserved: 73 / 82





In [9]:
set(ticker_list) - set(filtered_tickers)

{'CELG', 'CTRP', 'FOX', 'FOXA', 'QVCA', 'SYMC', 'VIAB', 'WFM', 'YHOO'}

In [11]:
with open('filtered_tickers.txt', 'w') as target:
    target.write(','.join(filtered_tickers))