In [3]:
import pandas as pd
from pandas_datareader import data
import numpy as np
import matplotlib.pyplot as plt
import gym
import tulipy as ti
from sqlalchemy import create_engine
import ast
import re
from wsb_pipeline import get_all_embeddings

  from pandas.util.testing import assert_frame_equal


In [24]:
INITIAL_BALANCE = 10
TRANSACTION_COST = 0.01
WINDOW_SIZE = 14
DELTA_DAY = pd.Timedelta(days=1)
DEFAULT_ACTIONS_LIST = [0]
DEFAULT_REWARDS_LIST = [0]
EXP_DECAY = 0.8
class TradingEnv(gym.Env):
    def __init__(self, ticker='AAPL', target_volatility=10, mode="train", **kwargs):
        self.ticker = ticker
        self.window = pd.Timedelta(days=WINDOW_SIZE)
        assert mode in set(["train", "validation", "test", "dev"]), f"Invalid environment  mode: {mode}"
        self.mode = mode
        self.target_volatility = target_volatility
        self.returns_list = DEFAULT_REWARDS_LIST.copy()
        self.rewards_list = DEFAULT_REWARDS_LIST.copy()
        self.actions_list = DEFAULT_ACTIONS_LIST.copy()
#         self.balance = INITIAL_BALANCE
        
        self._compute_simple_states()
        
    def _compute_simple_states(self):
        self.short_time = 63
        self.long_time = 252
        start, end = self.get_time_endpoints(self.mode)
        self.start = start
        self.end = end
        # 81 needs to be added for some reason to make sure MACD is a number ???
#         warn("Using unexplained extra pre-padding.")
        unexplained = 0
        prepadding =  pd.Timedelta(days=self.short_time + self.long_time + WINDOW_SIZE + 1 + unexplained)
        postpadding = self.window
        self.prices = data.DataReader(self.ticker, 'yahoo',
                                      start=start-prepadding, end=end+postpadding)['Close']

        # We compute the mean, and standard deviation of the first WINDOW_SIZE days, and use this to standardize 
        # the entire time series.
        assert WINDOW_SIZE > 5, "WINDOW_SIZE is too small for rolling computations to be meaningful"
        self.mu_hat = self.prices[:WINDOW_SIZE].mean()
        self.sigma_hat = self.prices[:WINDOW_SIZE].std()
        self.data = pd.DataFrame({'mean' : (self.prices - self.mu_hat) / self.sigma_hat})
        self.data['std'] = self.data['mean'].rolling(WINDOW_SIZE).std()
        # Use additive returns, because the reward is computed using the additive return
        rets = (self.prices - self.prices.shift(1))

#         self.data['sharpe'] = rets.rolling(WINDOW_SIZE).mean() / rets.rolling(WINDOW_SIZE).std()
#         warn('Sharpe ratio will need a risk-free return in the future, for proper calculation.')
        
#         exp_short = self.prices.ewm(span=self.short_time, adjust=False).mean()
#         exp_long  = self.prices.ewm(span=self.long_time,  adjust=False).mean()
#         self.data['q'] = (exp_short - exp_long) # / self.prices.rolling(self.short_time).std()
        
#         macd = ti.macd(self.data['mean'].values, short_period=self.short_time,
#                        long_period=self.long_time, signal_period=WINDOW_SIZE)

# #         self.data['MACD'] = self.data['q'] / self.data['q'].rolling(self.long_time).std()
#         self.data['macd_0'] = self.data['macd_1'] = self.data['macd_2'] = np.nan
#         self.data['macd_0'][self.long_time-1:] = macd[0]
#         self.data['macd_1'][self.long_time-1:] = macd[1]
#         self.data['macd_2'][self.long_time-1:] = macd[2]
       
        # to look up current price from self.data, irrespective of the date break due to the weekend
        self.df_index = self.data.index.get_loc(self.start)
        
        
    def get_time_endpoints(self, mode):
        """
            Start must be in Monday - Friday
        """
        if mode == "train":
            return pd.Timestamp('2014-01-06'), pd.Timestamp('2017-12-31')
        elif mode == "dev":
            return pd.Timestamp('2014-01-06'), pd.Timestamp('2014-12-28')
        else:
            raise NotImplementedError()
        
    def _get_raw_price(self, diff=0):
        return self.prices[self.df_index + diff]
    
    def _get_normalized_price(self, diff=0):
        return self.data['mean'][self.df_index + diff]
        
    def _get_current_timestamp(self):
        return self.data.index[self.df_index]
    
    def _get_melted_technical_indicators(self):
        i = self.df_index
#         indicators = self.data[['mean', 'std', 'sharpe', 'q']][(i-WINDOW_SIZE):i]
#         indicators = self.data[['mean', 'std', 'sharpe']][(i-WINDOW_SIZE):i]
#         indicators = self.data[['mean', 'std']][(i-WINDOW_SIZE):i]
        indicators = self.data[(i-WINDOW_SIZE):i]
        return indicators.values.reshape(-1).tolist()
        
    def _get_current_state(self):
        return self._get_melted_technical_indicators()
    
    def _get_date(self, diff=0):
        return self.data.index[self.df_index + diff]

    def reset(self):
        self.df_index = self.data.index.get_loc(self.start)  
        self.returns_list = DEFAULT_REWARDS_LIST.copy()
        self.rewards_list = DEFAULT_REWARDS_LIST.copy()
        self.actions_list = DEFAULT_ACTIONS_LIST.copy()
        return self._get_current_state()

    def _compute_reward_function(self, action):
        assert action in [-1, 0, 1], f"Got {action} but expected one of {-1, 0, 1}"
        next_price = self._get_normalized_price(diff=1)
        price = self._get_normalized_price()
        r = next_price - price
        mu = 1
        
        sigma = self.data['std'][self.df_index - 1]
        sigma_prev = self.data['std'][self.df_index - 2]
       
        term1 = action * self.target_volatility * r / sigma
        prev_action = self.actions_list[-1]
        term2 = price * TRANSACTION_COST * np.abs(term1 - self.target_volatility * prev_action / sigma_prev)
        R = mu*(term1 - term2)
        self.rewards_list.append(R)
        # # Additive Returns as reward function
        # if action == 1:
        #     R = r - TRANSACTION_COST
        # elif action == -1:
        #     R = -r - TRANSACTION_COST
        # elif action == 0:
        #     R = 0 - TRANSACTION_COST
        return R
    
    def step(self, action):
        """
            Executes an action in the stock environment, using 
            the discrete action space described in: Deep Reinforcement Learning for Trading
            
            i.e. -1 is maximally short, 0 is no holdings, 1 is maximally long
            Inputs: action (one of {-1,0,1})
            Outputs: a tuple (observation/state, step_reward, is_done, info)
        """
        # TODO: Refactor rewards_list, actions_list into a pd.DataFrame so that
        # 1. I can plot things more easily, and group them together by ticker, and episode number
        # 2. I can collect rewards_list, actions_list into a single variable
        R = self._compute_reward_function(action)
        self.actions_list.append(action)
        self.df_index += 1
        return self._get_current_state(), R, self._get_current_timestamp() > self.end, {}
        
    def seed(self, seed=None):
        return
    
    def close(self):
        return
    
    def compute_returns():
        raise NotImplementedError()
        self.returns_list = [INITIAL_BALANCE]
        episode_length = (self.end - self.start).days
        for i in range(1, episode_length):
            past_value = self.returns_list[-1]
            a = self.actions_list[i]
            prev_a = self.actions_list[i-1]
            current_price = self._get_raw_price(i)
            next_price = self._get_raw_price(i+1)
            if a == 0:
                self.returns_list.append(past_value)
            elif a == 1:
                value = past_value * next_price / current_price
        
e = TradingEnv()

In [26]:
class TradingWithRedditEnv(TradingEnv):
#     def __init__(self, ticker='AAPL', target_volatility=10, mode="train"):
    def __init__(self, **kwargs):
        super(TradingWithRedditEnv, self).__init__(**kwargs)
#         super(TradingWithRedditEnv, self).__init__(ticker=ticker, target_volatility=target_volatility, mode=mode)
        text = get_all_embeddings(ticker=self.ticker)
        text['date'] = pd.to_datetime(text['date'])
        self.text_embeddings = text
        stocks = self.data[self.df_index:]
        stocks['date'] = stocks.index
        self.embedding_lookup = pd.merge(stocks, text, how='left')[['date', 'embeddings']]
    
    def _get_current_embeddings(self):
        date = self._get_date()
        daily_data = self.embedding_lookup.loc[self.embedding_lookup.date == date]
        raw_values = daily_data.embeddings.values
        vectors = []
        for v in raw_values:
            if np.isnan(v):
                vectors.append((np.zeros(50)))
            else:
                vectors.append(v)
        return vectors
        
    def _get_current_state(self):
        melted = self._get_melted_technical_indicators()
        embedded = self._get_current_embeddings()
        return melted, embedded

e = TradingWithRedditEnv()
# stocks = e.data
# text = e.text_embeddings
# stocks
# text

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # Remove the CWD from sys.path while we load stuff.


In [63]:
e._get_current_embeddings()

[array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])]

In [48]:
e.text_embeddings.dtypes

date          datetime64[ns]
embeddings            object
dtype: object

In [49]:
e.data.dtypes

mean           float64
std            float64
date    datetime64[ns]
dtype: object

In [13]:
stocks['date'] = stocks.index
stocks

Unnamed: 0_level_0,mean,std,date
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2013-02-11,1.944069,,2013-02-11
2013-02-12,1.043350,,2013-02-12
2013-02-13,0.976712,,2013-02-13
2013-02-14,0.945267,,2013-02-14
2013-02-15,0.463836,,2013-02-15
...,...,...,...
2018-01-08,57.388741,1.128750,2018-01-08
2018-01-09,57.378256,1.033593,2018-01-09
2018-01-10,57.357287,1.026139,2018-01-10
2018-01-11,57.876157,1.060585,2018-01-11


In [5]:
text

Unnamed: 0,date,embeddings
0,2014-10-14,"[-0.03803207725286484, 0.0013342425227165222, ..."
1,2015-06-03,"[-0.4903160035610199, -0.3853575885295868, 0.2..."
2,2015-06-10,"[-0.4903160035610199, -0.3853575885295868, 0.2..."
3,2015-08-25,"[-0.4903160035610199, -0.3853575885295868, 0.2..."
4,2015-09-08,"[-0.4903160035610199, -0.3853575885295868, 0.2..."
5,2015-11-19,"[-0.4903160035610199, -0.3853575885295868, 0.2..."
6,2015-12-10,"[-1.143259048461914, -0.9430350661277771, 0.92..."
7,2015-12-17,"[-0.4903160035610199, -0.3853575885295868, 0.2..."
8,2015-12-20,"[-0.4903160035610199, -0.3853575885295868, 0.2..."
9,2016-01-09,"[-0.4903160035610199, -0.3853575885295868, 0.2..."


In [32]:
embedding_lookup = pd.merge(stocks[e.df_index:], text, how='left')
embedding_lookup.fillna(0, axis=0, inplace=True)
embedding_lookup

Unnamed: 0,mean,std,date,embeddings
0,6.735914,0.664223,2014-01-06,0
1,6.444662,0.726527,2014-01-07,0
2,6.700726,0.755144,2014-01-08,0
3,6.181108,0.824565,2014-01-09,0
4,5.913064,0.899194,2014-01-10,0
...,...,...,...,...
1012,57.388741,1.128750,2018-01-08,0
1013,57.378256,1.033593,2018-01-09,0
1014,57.357287,1.026139,2018-01-10,0
1015,57.876157,1.060585,2018-01-11,0


In [41]:
embedding_lookup.loc[embedding_lookup.date == '2016-05-28']

Unnamed: 0,mean,std,date,embeddings


In [38]:
embedding_lookup.embeddings.apply(lambda x : type(x)).unique()

array([<class 'int'>, <class 'str'>], dtype=object)

In [None]:
unique

In [6]:
e._get_date()
# stocks.dropna()

Timestamp('2014-01-06 00:00:00')

In [7]:
def _get_embeddings(diff=0):
    date = e._get_date(diff=diff)
    relevant_text = text[text.date == date]
    return values_to_matrix(relevant_text.embeddings.values)

# _get_embeddings(diff=3).shape

In [9]:
len(_get_embeddings(diff=0))

0

In [12]:
len(_get_embeddings(diff=1))

2

In [None]:
_get_embeddings(diff=2).shape

In [None]:
_get_embeddings(diff=3).shape

In [None]:
text.groupby('date')

In [None]:
stocks[e.df_index:]

In [13]:
text

Unnamed: 0,date,embeddings
0,2012-04-11,[-0.44496807 0.3513139 0.5084499 0.490831...
1,2012-04-11,[-0.44496807 0.3513139 0.5084499 0.490831...
2,2012-04-17,[-0.37154755 0.11488254 0.32077706 0.469549...
3,2012-04-17,[-0.3431134 0.12053894 0.25498617 0.317914...
4,2012-04-17,[-0.21416569 0.18238343 0.29016954 0.335739...
...,...,...
28303,2018-10-31,[-0.15065286 -0.01796762 0.2625997 0.458763...
28304,2018-10-31,[-0.07056472 -0.13153344 0.24480873 0.401294...
28305,2018-10-31,[-0.2272068 -0.011285 0.31687522 0.534255...
28306,2018-10-31,[ 0.06805411 0.03550359 0.2536066 0.723040...


In [None]:
(e.end - e.start).days

In [None]:
e = TradingWithRedditEnv()
stocks = e.data
stocks['date'] = stocks.index
stocks.head()

In [None]:
text = e.text_embeddings
text['date'] = pd.to_datetime(text['date'])
text.dtypes

In [None]:
stocks.dtypes

In [None]:
# stocks.join(text, on='date')
merged = pd.merge(stocks, text, how='left')
merged.head()

In [None]:
macd = ti.macd(e.data['mean'].values, short_period=63, long_period=252, signal_period=60)
macd[0].shape, macd[1].shape, macd[2].shape

In [None]:
len(e.data['mean'])

In [None]:
ti.rsi(e.data['mean'].values,period=30).shape

In [None]:
def basic_loop_test(t):
    env = TradingEnv(ticker=t)
    state = env.reset()
    done = False
#     np.random.seed(885)
#     while not done:
#     # for i in range(14):
#         action = np.random.randint(low=-1, high=2)
#         next_state, r, done, _ = env.step(action)
#         assert len(state) == len(next_state)

#     plt.plot(env.rewards_list)
#     plt.title(f'Reward vs. Time for {t}')
#     plt.show()

In [None]:
basic_loop_test('AAPL')

In [None]:
ticker_list = []
with open('./small_stock_name.txt') as src:
    ticker_list = src.read().split()

In [None]:
len(ticker_list)

In [None]:
from tqdm import tqdm
filtered_tickers = []
for i, t in tqdm(enumerate(ticker_list)):
    try:
        basic_loop_test(t)
        filtered_tickers.append(t)
    except:
        print(f'{t} failed')
        continue
print(f'\nTickers preserved: {len(filtered_tickers)} / {len(ticker_list)}')
assert len(filtered_tickers) > 0

In [None]:
set(ticker_list) - set(filtered_tickers)

In [None]:
with open('filtered_tickers.txt', 'w') as target:
    target.write(','.join(filtered_tickers))

In [23]:
def ContinuousTradingEnv(TradingEnv):
    def __init__(self, **kwargs):
        super(ContinuousTradingEnv, self).__init__(**kwargs)
#         super(ContinuousTradingEnv, self).__init__()
#         super(ContinuousTradingEnv, self).__init__(*args, **kwargs)

    def step(self, action):
        """
            Executes an action in the stock environment, using 
            the CONTINUOUS action space described in: Deep Reinforcement Learning for Trading
            
            i.e. -1 is maximally short, 0 is no holdings, 1 is maximally long
            Inputs: action in [-1, 1]
            Outputs: a tuple (observation/state, step_reward, is_done, info)
        """
        assert -1 <= action <= 1, f"Got {action} but it is outside of [-1, 1]"

        roi = self._get_new_return(action)
        self.returns_list.append(roi)

        R = self._compute_reward_function(action)
        self.rewards_list.append(R)
        self.actions_list.append(action)
        self.df_index += 1
        return (
            self._get_current_state(),
            R,
            self._get_current_timestamp() > self.end,
            {},
        )
c = ContinuousTradingEnv()

TypeError: ContinuousTradingEnv() missing 1 required positional argument: 'TradingEnv'

In [12]:
%debug

> [0;32m<ipython-input-11-69799cdd0e7e>[0m(30)[0;36m<module>[0;34m()[0m
[0;32m     26 [0;31m            [0mR[0m[0;34m,[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     27 [0;31m            [0mself[0m[0;34m.[0m[0m_get_current_timestamp[0m[0;34m([0m[0;34m)[0m [0;34m>[0m [0mself[0m[0;34m.[0m[0mend[0m[0;34m,[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     28 [0;31m            [0;34m{[0m[0;34m}[0m[0;34m,[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     29 [0;31m        )
[0m[0;32m---> 30 [0;31m[0mc[0m [0;34m=[0m [0mContinuousTradingEnv[0m[0;34m([0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m


ipdb>  d


*** Newest frame


ipdb>  d


*** Newest frame


ipdb>  c
