# Setup

In [None]:
!pip install pandas==1.3.5



In [None]:

import pandas as pd
print(pd.__version__)

1.3.5


In [None]:
!pip install fastquant
!pip install yfinance
from IPython.display import clear_output
from fastquant import get_stock_data, backtest
from datetime import date, datetime, timedelta
from nltk.sentiment.vader import SentimentIntensityAnalyzer as SIA
import yfinance as yf
import requests
import nltk
nltk.download('vader_lexicon')

import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
import sklearn
import matplotlib.pyplot as plt

Collecting pandas==1.1.5
  Using cached pandas-1.1.5-cp37-cp37m-manylinux1_x86_64.whl (9.5 MB)
Installing collected packages: pandas
  Attempting uninstall: pandas
    Found existing installation: pandas 1.3.5
    Uninstalling pandas-1.3.5:
      Successfully uninstalled pandas-1.3.5
Successfully installed pandas-1.1.5




[nltk_data] Downloading package vader_lexicon to /root/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [None]:
### Important values ###
# start_time = date.today() - timedelta(days = 5000) # > 8 years prior (just need 7 but to be safe)
# end_time = date.today() # today
# tickers = ["AAPL", "AMZN", "CLX", "CVX", "GS", "PFE", "BA", "WY", "ATVI", "EXC"]
# ticker = "AAPL"
period = 20 # the MA period for market conditions
feature_size = 11 # number of features in each RL observation
n_actions = 5 # number of bots
reddit_mappings = { # to get subreddit
    "AAPL": "apple",
    "AMZN": "amazon",
    "CLX": "Clorox",
    "CVX": "Chevron",
    "GS": "goldmansachs",
    "PFE": "Coronavirus",
    "BA": "boeing",
    "WY": "RealEstate",
    "ATVI": "activision",
    "EXC": "nuclear",
    "DD": "RealEstate",
    "WM": "Waste",
    "JPM": "Banking",
    "BRK-A": "BerkshireHathaway",
    "WFC": "Banking",
    "V": "CreditCards",
    "Z":"Zillow",
    "ACI": "Albertsons",
    "HD": "HomeDepot",
    "PYPL": "PayPal",
    "NDVA": "ndivia"
}

# Data Collection

In [None]:
## Get Fama-French 5 factors
def get_fama_french():
    ff_original = pd.read_csv("https://raw.githubusercontent.com/MT-GoCode/RL_Stocks_Project/Data/F-F_Research_Data_5_Factors_2x3_daily.CSV", sep = "\0")
    ff = ff_original[1:]
    ff = ff.iloc[:,0].str.split(',', expand = True)
    ff.loc[1, 0] = 0
    ff.columns = ff.loc[1]
    ff = ff[1:]
    ff[0] = pd.to_datetime(ff[0])
    ff = ff.rename(columns = {0: 'Date'})
    ff = ff.set_index('Date')
    ff = ff.astype('float64')
    return ff

In [None]:
## Get S&P 500 data
def get_sandp(start_time, end_time):
    sandp = yf.download("SPY", start = start_time, end = end_time)
    # sandp = pd.read_csv("https://raw.githubusercontent.com/tu-trinh/metaml/main/S%26P500.csv")
    sandp.rename(columns = {"Close": "S&PClose"}, inplace = True)
    sandp = sandp.reset_index()
    sandp["Date"] = [np.datetime64(date) for date in sandp["Date"].dt.date]
    # sandp = sandp[sandp["Date"] >= f[0]]
    # sandp = sandp[sandp["Date"] <= f[len(f)-1]]
    sandp.set_index("Date", inplace = True)
    return sandp

In [None]:
def collect_data(ticker):
    ## Retrieve data for the ticker
    
    df = pd.read_csv("/content/" + ticker + ".csv")   
    
    
    df.drop(columns = ["Unnamed: 0"], inplace = True)
    df.rename(columns = {"time": "dt"}, inplace = True)

    df = df.interpolate()
    df["dt"] = pd.to_datetime(df["dt"])
    df = df.set_index("dt").resample("15T").first().reset_index().reindex(columns = df.columns)
    df = df.set_index("dt").between_time("09:30", "16:00")

    ## Attach S&P 500 data
    sandp = get_sandp(df.index.values[0].astype('datetime64[D]'), df.index.values[-1].astype('datetime64[D]'))
    f = pd.to_datetime(df.reset_index()['dt'].dt.strftime('%Y-%m-%d'))
    df = df.reset_index()
    def sandp_helper(row):
        if len(sandp[sandp.index == f[row.name]].values) > 0:
            return sandp[sandp.index == f[row.name]].values[0][0]
    df["S&PClose"] = df.apply(sandp_helper, axis = 1)

    ## Attach Fama-French
    df = df.set_index("dt").dropna()
    ff = get_fama_french()
    data = df.reset_index().join(ff.reset_index())

    ## Complete data table
    data["dt"] = data["dt"].apply(lambda x: x.to_pydatetime())
    data.drop(columns = ["Date"], inplace = True)
    data.rename(columns = {"open": "Open", "high": "High", "low": "Low", "close": "Close", "volume": "Volume"}, inplace = True)
    data.dropna(inplace = True)

    return data

# Replay Agent
Streams historical stock market data for bots to trade

In [None]:

class ReplayAgent():

    def __init__(self, ticker, data):
        self.data = data
        actions = yf.Ticker(ticker)
        self.dividends = actions.dividends[data["dt"].dt.date[0] : data["dt"].dt.date[len(data) - 1]]
        self.splits = actions.splits[data["dt"].dt.date[0] : data["dt"].dt.date[len(data) - 1]]
    
    def next(self, idx):
        # print(self.data)
        # print("requesting ", idx)
        return self.data[idx:idx+1]

    def full_dataset(self):
        return self.data
    
# RA = ReplayAgent(0) # nothing

# Reinforcement Learning Agent
RL Agent contains a neural network that selects optimal bot based on market condiiton

In [None]:
class RLAgent:

    def __init__(self, replay_agent):
        self.RA = replay_agent
        self.obs_length = 5
        self.model = self.initialize_model()
        self.learning_rate = 1e-8
        self.optimizer = tf.keras.optimizers.Nadam(self.learning_rate)

    ## Initialize the RL model to train the bots
    def initialize_model(self):
        model = tf.keras.models.Sequential([tf.keras.layers.Dense(units = feature_size, activation = "relu", input_shape = (feature_size,)),
                                            tf.keras.layers.Dense(units = feature_size, activation = keras.layers.LeakyReLU(alpha = 0.05)),
                                            tf.keras.layers.Dense(units = n_actions * 2, activation = keras.layers.LeakyReLU(alpha = 0.01)),
                                            tf.keras.layers.Dense(units = n_actions, activation = "softmax")])
        print(model.summary())
        return model

    ## RSI STRATEGY
    def rsi_strategy(self, cash, idx, ep):
        in_pos = False
        hold = 0
        balance = cash
        collect = []
        last_buy = 0
        for i in range(idx, idx+ep):
            indicators = self.RA.next(i)
            if (indicators.RSI.values[0] < 30):
                hold += balance // indicators.close.values[0]
                balance -= (balance // indicators.close.values[0]) * indicators.close.values[0]
                last_buy = i
            elif (indicators.RSI.values[0] > 70):
                balance += hold * indicators.close.values[0]
                hold = 0
                collect.append((last_buy, i, indicators.close.values[0]))
        if (hold > 0):
            balance += self.RA.next(idx+ep).close.values[0] * hold;
            collect.append((last_buy, idx+ep, self.RA.next(idx+ep).close.values[0]))
        total = 0
        for i in collect:
            total += self.sharpe(i[0], i[1])
        if (len(collect) == 0):
            return (0,0)
        else:
            return (balance - cash, total / len(collect))

    ## MACD STRATEGY
    def macd_signals(self, s, e):
        hold = False
        stop = 0
        in_pos = False
        buy_on = []
        sell_on = []
        for i in range(s, e):
            indicators = self.RA.next(i)
            p_indicators = self.RA.next(i-1)
            if (indicators.MACD.values[0] > indicators.MACDSignal.values[0] and p_indicators.MACD.values[0] < p_indicators.MACDSignal.values[0]): # MACD crosses upwards over Signal line -> Buy 
                if (indicators.Volume.values[0] * 1.3 < indicators.VOL20.values[0]):
                    buy_on.append(i+1)
                    stop = indicators.close.values[0] - 3 * indicators.ATR.values[0]
                    in_pos = True
            elif (in_pos == True and indicators.close.values[0] < stop): 
                sell_on.append(i)
            elif (indicators.MACDSignal.values[0] > indicators.MACD.values[0] and p_indicators.MACDSignal.values[0] < p_indicators.MACD.values[0]): # MACD crosses downward over Signal Line -> Sell
                sell_on.append(i+1)
        return [buy_on, sell_on]

    def macd_strategy(self, cash, idx, ep):
        balance = cash
        hold = 0
        buy_on, sell_on = self.macd_signals(idx-ep, idx)
        last_buy = 0
        if (len(buy_on) > 0 and len(sell_on) == 0) or (len(buy_on) > 0 and len(sell_on) > 0 and buy_on[-1] > sell_on[-1]): # currently in a buy.
            price = self.RA.next(idx).close.values[0]
            hold += (balance // price)
            balance -= price * (balance // price)
            last_buy = idx
        buy_on, sell_on = self.macd_signals(idx, idx+ep)
        bp = 0
        sp = 0
        collect = []
        while (bp < len(buy_on) or sp < len(sell_on)):
            if (bp >= len(buy_on) or sp < len(sell_on) and sell_on[sp] < buy_on[bp]):
                if (hold > 0): # sell
                    S = self.RA.next(sell_on[sp])
                    balance += S.Open.values[0] * hold
                    collect.append((last_buy, sell_on[sp], S.Open.values[0]))
                    hold = 0
                sp += 1
            elif (bp <= len(buy_on)):
                B = self.RA.next(buy_on[bp])
                hold += (balance // B.Open.values[0] )
                balance -= B.Open.values[0] * hold
                last_buy = buy_on[bp]
                bp+=1      
        if (hold > 0):
            balance += self.RA.next(idx+ep).close.values[0] * hold
            collect.append((last_buy, idx+ep, self.RA.next(idx+ep).close.values[0]))
        total = 0
        for i in collect:
            total += self.sharpe(i[0], i[1])
        if (len(collect) == 0):
            return (0,0)
        else:
            return (balance-cash, total/len(collect))

    ## BOLLINGER BANDS STRATEGY
    def run_bbands(self, start_idx, end_idx, period = 10, devfactor = 2.0):
        data = self.RA.full_dataset()
        bbands = backtest("bbands", data[start_idx : end_idx], period = period, devfactor = devfactor)
        pnl = bbands.pnl.values[0]
        sharpe = (bbands.rtot.values[0] - data[start_idx : end_idx]["RF"].mean()) / 0.05
        return (pnl, sharpe)

    ## VWAP STRATEGY
    def run_vwap(self, idx, ep):
        df = self.RA.full_dataset()
        prices = df["Close"]
        def vwap(df):
            v = df['Volume'].values
            tp = (df['Low'] + df['Close'] + df['High']).div(3).values
            return df.assign(vwap=(tp * v).cumsum() / v.cumsum())
        df = vwap(df)
        # Compute the z-scores for each day using the historical data up to that day
        zscores = [(prices[i] - df['vwap'][i]) / np.std(prices[:i]) for i in range(len(prices))]
        # plt.plot(zscores[idx:idx+ep])
        # Start with no money  and no positions
        money = 100000
        count = 0
        collect = []
        last_buy = 0
        for i in range(idx, idx+ep):
            if zscores[i] < -2:
                count += money // self.RA.next(i+1).Open.values[0]
                money -= count * self.RA.next(i+1).Open.values[0]
                last_buy = i
                # print(count, money, last_buy)
            # Clear positions if the z-score between -.5 and .5
            elif abs(zscores[i]) < 0.5 and count > 0:
                collect.append((last_buy, i, self.RA.next(i+1).Open.values[0]))
                money += count*self.RA.next(i+1).Open.values[0]
                count = 0
        if (count > 0):
            print("reconnecting vwap")
            money += count*self.RA.next(idx+ep).Open.values[0]
            collect.append((last_buy, idx+ep))
        total = 0;
        for i in collect:
            total += self.sharpe(i[0], i[1])
        if (len(collect) == 0):
            return (0,0)
        else:
            return (money-100000, total/len(collect))
    
    ## SENTIMENT STRATEGY
    def run_sentiment(self, company, cash, start_idx, end_idx, thresholds = (-0.5, 0.5)):
        data = self.RA.full_dataset()
        sia = SIA()
        after_date = int(abs(date.today() - data["dt"][start_idx].date()).days)
        before_date = int(abs(date.today() - data["dt"][end_idx].date()).days)
        url = "https://api.pushshift.io/reddit/search/submission/?q=sleep&subreddit={}&after={}d&before={}d".format(company, after_date, before_date)
        x = requests.get(url)
        try:
            l = x.json()
        except:
            return (0, 0)
        all = [b for b in l['data']]
        if len(all) == 0:
            return (0, 0)
        in_pos = False
        hold = 0
        balance = cash
        collect = []
        can_buy = True
        buy_date, sell_date = None, None
        for l in range(len(all)):
            line = all[l]
            pol_score = sia.polarity_scores(line['title'])
            if pol_score['compound'] >= thresholds[0] and can_buy:
                buy_date = datetime.utcfromtimestamp(line["created_utc"]).date()
                can_buy = False
            elif pol_score['compound'] < thresholds[1] and not can_buy:
                sell_date = datetime.utcfromtimestamp(line["created_utc"]).date()
                can_buy = True
                collect.append((buy_date, sell_date))
            if l == len(all) - 1 and not can_buy:
                collect.append((buy_date, None))
        for i in collect:
            buy_df = data[data["dt"].dt.date == i[0]]
            if len(buy_df) == 0:
                buy_df = data[data["dt"].dt.date == i[0] + timedelta(days = 2)] # fudge the weekends
            if len(buy_df) == 0:
                break
            else:
                buy_price = buy_df.Close.values[0]
            hold += balance // buy_price
            balance -= (balance // buy_price) * buy_price
            if i[1] is not None:
                sell_df = data[data["dt"].dt.date == i[1]]
                if len(sell_df) == 0:
                    sell_df = data[data["dt"].dt.date == i[1] + timedelta(days = 2)] # fudge the weekends
                if len(sell_df) == 0:
                    sell_price = buy_price * 1.1
                else:
                    sell_price = sell_df.Close.values[0]
                balance += hold * sell_price
                hold = 0
        if hold > 0:
            balance += data.loc[end_idx, "close"] * hold
        total = 0
        for i in collect:
            buy_df = data[data["dt"].dt.date == i[0]]
            if len(buy_df) == 0:
                buy_df = data[data["dt"].dt.date == i[0] + timedelta(days = 2)] # fudge the weekends
            if len(buy_df) == 0:
                break
            else:
                buy_idx = buy_df.index.values[0]
            if i[1] is not None:
                sell_df = data[data["dt"].dt.date == i[1]]
                if len(sell_df) == 0:
                    sell_df = data[data["dt"].dt.date == i[1] + timedelta(days = 2)] # fudge the weekends
                if len(sell_df) == 0:
                    sell_idx = buy_idx
                else:
                    sell_idx = sell_df.index.values[0]
                total += self.sharpe(buy_idx, sell_idx)
            else:
                total += self.sharpe(buy_idx, end_idx)
        if len(collect) == 0:
            return (0, 0)
        else:
            return (balance - cash, total / len(collect))
    
    ## Method to get solo performances of each strategy
    def compare(self, stock, epochs, start_idx, trading_interval, bot):
        data = self.RA.full_dataset()
        pnl_over_time = []
        sharpe_over_time = []
        curr_idx = start_idx
        for i in range(epochs):
            if bot == 0:
                pnl, sharpe = self.rsi_strategy(100000, curr_idx, trading_interval)
            elif bot == 1:
                pnl, sharpe = self.macd_strategy(100000, curr_idx, trading_interval)
            elif bot == 2:
                pnl, sharpe = self.run_bbands(curr_idx, curr_idx + trading_interval)
            elif bot == 3:
                pnl, sharpe = self.run_vwap(curr_idx, trading_interval)
            elif bot == 4:
                pnl, sharpe = self.run_sentiment(reddit_mappings[stock], 100000, curr_idx, curr_idx + trading_interval)
            pnl_over_time.append(pnl)
            sharpe_over_time.append(sharpe)
            curr_idx += trading_interval
        return pnl_over_time, sum(pnl_over_time), np.mean(sharpe_over_time)
    
    ## Calculate Sharpe ratio of a period of holding a stock, using the global complete dataframe
    def sharpe(self, buy_time, sell_time):
        rl_data = self.RA.full_dataset()
        relevant = rl_data[buy_time : sell_time + 1]
        close_prices = relevant["Close"]
        if len(close_prices) == 0:
            return 0
            
        norm_returns = close_prices / close_prices.iloc[0]
        daily_returns = norm_returns.pct_change()[1:]
        sigma = np.std(daily_returns)
        ratio = 3 if sigma == 0 else daily_returns.mean() * 4 / sigma * np.sqrt(252)
        if np.isnan(ratio):
            return 3
        return ratio

    ## Combines reward parameters into one value
    def combine_pnl_sharpe(self, pnl, sharpe):
        return pnl + sharpe * 100
        # return pnl

    ## Decide which bot should be launched
    def choose_action(self, obs):
        obs = list(obs)
        rf_tensor = tf.convert_to_tensor(
          obs, dtype = None, dtype_hint = None, name = None
        )
        n = tf.reshape(
          rf_tensor, [1, feature_size], name = None
        )
        pred = self.model.predict(n)
        action = np.argmax(pred)
        return action

    ## Discount rewards
    def discount_rewards(self, rewards, gamma = 0.5): 
        discounted_rewards = np.zeros_like(rewards)
        R = 0
        for t in reversed(range(0, len(rewards))):
            # update the total discounted reward
            R = R * gamma + rewards[t]
            discounted_rewards[t] = R
        return discounted_rewards

    ## Compute "loss". Smaller cross entropy = less uncertainty = better
    def compute_loss(self, logits, actions, rewards):
        # neg_logprob = tf.nn.sparse_softmax_cross_entropy_with_logits(logits = logits, labels = [actions])
        scce = tf.keras.losses.SparseCategoricalCrossentropy() # takes in true, pred
        neg_logprob = scce(actions, logits)
        # Higher rewards ==> smaller loss. Smaller scce ==> smaller loss.
        if rewards > 1:
            loss = tf.convert_to_tensor(neg_logprob / rewards)
        else:
            loss = tf.convert_to_tensor((2 - neg_logprob) / rewards)
        return loss

    ## Critical training function
    def train_step(self, observations, actions, discounted_rewards):
        with tf.GradientTape() as tape:
            logits = self.model(observations)
            print("LOGITS", logits)
            loss = self.compute_loss(logits, actions, discounted_rewards)
            # rew = tf.Variable(discounted_rewards)
            # loss = rew
        grads = tape.gradient(loss, self.model.trainable_variables)
        self.optimizer.apply_gradients(zip(grads, self.model.trainable_variables)) 

# AI System
Handles the data fetching, training, testing of RL Agents for different stocks

In [None]:
class AISystem:

    def __init__(self, stock, bots = [0, 1, 2, 3,4]):
        self.ticker = stock
        self.actions = bots
        self.n_actions = len(bots)
        self.init_balance = 100000
        self.start_idx = 140 ##### can be anywhere in [140, 1500]
        self.trading_interval = 27
        self.backtest_length = 20
        self.training_length = 85
        self.testing_length = 75 #########
        self.epsilon = 0.75 # encourage exploration
        self.rl_data = None
        self.RA = None
        self.model = None

    def gather_data(self):
        # Data setup
        data = collect_data(self.ticker)
        self.rl_data = data[:]
        indicators = data
        # Calculating indicators
        indicators['ModPrice'] = (data['Close'] + data['High'] + data['Low'])/ 3
        indicators['RSI'] = rsi(data).values
        indicators['EMA12'] = indicators['ModPrice'].ewm(span=12, adjust=False).mean()
        indicators['EMA26'] = indicators['ModPrice'].ewm(span=26, adjust=False).mean()
        indicators['MACD'] = indicators['EMA12']-indicators['EMA26']
        indicators['MACDSignal'] = indicators['MACD'].ewm(span=9, adjust=False).mean()
        indicators['SMA30'] = data['Close'].rolling(window=30).mean()
        indicators['VOL20'] = data['Volume'].rolling(window=20).mean()
        high_low = data['High'] - data['Low']
        high_close = np.abs(data['High'] - data['Close'].shift())
        low_close = np.abs(data['Low'] - data['Close'].shift())
        ranges = pd.concat([high_low, high_close, low_close], axis=1)
        true_range = np.max(ranges, axis=1)
        atr = true_range.rolling(14).sum()/14
        indicators['ATR'] = atr
        indicators['close'] = indicators.Close
        indicators.close.values[0] = indicators.close.values[0]
        # Set up replay agent
        self.RA = ReplayAgent(self.ticker, indicators)
        # print("Workable data points:", len(indicators))
    
    def create_model(self):
        self.model = RLAgent(self.RA)

    ## Assumptions for RL model: it's a beautiful Monday morning and you're a disgruntled trader
    ## Index passed in is the day of said Monday morning, in datetime.datetime format
    def get_obs(self, idx):
        relevant = self.rl_data[idx - 1 : idx]
        relevant["dividend"] = [self.rl_data["dt"][idx - 1] in self.RA.dividends.index.values]
        relevant["split"] = [self.rl_data["dt"][idx - 1] in self.RA.splits.index.values]
        relevant["next_dividend"] = [self.rl_data["dt"][idx + 1] in self.RA.dividends.index.values]
        relevant["next_split"] = [self.rl_data["dt"][idx + 1] in self.RA.splits.index.values]
        relevant.drop(columns = ["dt", "Open", "High", "Low", "Close", "Volume"], inplace = True)
        obs = relevant[relevant.columns].values
        return obs
    
    def tensorize(self, obs):
        rf_tensor = tf.convert_to_tensor(
            list(obs), dtype = None, dtype_hint = None, name = None
        )
        n = tf.reshape(
            rf_tensor, [1, feature_size], name = None
        )
        return n
    
    def backtest(self):
        # Gather backtesting data
        # Codes: RSI: 0, MACD: 1, Bollinger Bands: 2, VWAP: 3, Sentiment: 4
        rsi_results = []
        macd_results = []
        bbands_results = []
        vwap_results = []
        senti_results = []
        curr_idx = self.start_idx
        for i in range(self.backtest_length):
            if 0 in self.actions:
                rsi_res = self.model.rsi_strategy(self.init_balance, curr_idx, self.trading_interval)
                rsi_results.append([0, self.model.combine_pnl_sharpe(rsi_res[0], rsi_res[1])])
            if 1 in self.actions:
                macd_res = self.model.macd_strategy(self.init_balance, curr_idx, self.trading_interval)
                macd_results.append([1, self.model.combine_pnl_sharpe(macd_res[0], macd_res[1])])
            if 2 in self.actions:
                # bbands_res = self.model.run_bbands(curr_idx, curr_idx + self.trading_interval, period = 10, devfactor = 2.0)
                bbands_res = self.model.macd_strategy(self.init_balance, curr_idx, self.trading_interval)
                bbands_results.append([2, self.model.combine_pnl_sharpe(bbands_res[0], bbands_res[1])])
            if 3 in self.actions:
                vwap_res = self.model.run_vwap(curr_idx, self.trading_interval)
                vwap_results.append([3, self.model.combine_pnl_sharpe(vwap_res[0], vwap_res[1])])
            if 4 in self.actions:
                senti_res = self.model.run_sentiment(reddit_mappings[self.ticker], self.init_balance, curr_idx, curr_idx + self.trading_interval)
                senti_results.append([4, self.model.combine_pnl_sharpe(senti_res[0], senti_res[1])])
            curr_idx += self.trading_interval

        # Train model on backtested data
        for i in range(self.backtest_length):
            curr_idx = self.start_idx
            obs = self.get_obs(curr_idx)
            pairings = []
            for test in [rsi_results, macd_results, bbands_results, vwap_results, senti_results]:
                pairings.append(test[i])
            pairings.sort(key = lambda x : x[1], reverse = True)
            for j in range(len(pairings)):
                self.model.train_step(self.tensorize(obs), pairings[j][0], self.get_reward(j))
            curr_idx += self.trading_interval
    
    # Run tests
    def run_test(self, action, idx, length):
        data = self.RA.full_dataset()
        if (action == 0):
            return self.model.rsi_strategy(self.init_balance, idx, length)
        if (action == 1 or action == 2):
            return self.model.macd_strategy(self.init_balance, idx, length)
        if (action == 2):
            return self.model.run_bbands(idx, idx + length)
        if (action == 3):
            return self.model.run_vwap(idx, length)
        if (action == 4):
            return self.model.run_sentiment(reddit_mappings[self.ticker], self.init_balance, idx, idx + length)
    
    # Return reward given position of chosen action
    def get_reward(self, ranking):
        if ranking == 0:
            return 200000
        if ranking == 1:
            return 100000
        if ranking == 2:
            return 0.0001
        if ranking == 3:
            return 0.00001
        if ranking == 4:
            return 0.000001
    
    def train(self):
        # Reinforcement learning training
        epochs = self.training_length
        idx = self.start_idx + self.trading_interval * self.backtest_length
        rewards_over_time = []
        pnl_over_time = []
        sharpe_over_time = []
        actions_over_time = []
        for i in range(epochs):
            
            print("TRADING DAY", str(i + 1))
            cur_idx = idx + self.trading_interval * i
            # get market conditions
            obs = self.get_obs(cur_idx)
            print("Market condition:", list(obs))
            # choose which bot has to run
            eps = np.random.uniform()
            if eps > self.epsilon:
                action = self.model.choose_action(obs)
            else:
                action = np.random.randint(0, 5)
            print("Chosen bot:", action)
            actions_over_time.append(self.actions[action])
            # run bots
            pairings = []
            for bot in self.actions:
                pnl, sr = self.run_test(bot, cur_idx, self.trading_interval) # returns [pnl, sharpe]
                pnlsr = self.model.combine_pnl_sharpe(pnl, sr)
                pairings.append((bot, pnlsr))
                if bot == action:
                    pnl_over_time.append(pnl)
                    sharpe_over_time.append(sr)
                    print("Chosen action achieved {} PnL".format(pnl))
            # get reward from rankings
            pairings.sort(key = lambda x : x[1], reverse = True)
            ranking = 0
            for j in range(len(pairings)):
                if pairings[j][0] == action:
                    ranking = j
                    break
            reward_value = self.get_reward(ranking)
            rewards_over_time.append(reward_value)
            self.model.train_step(self.tensorize(obs), action, reward_value)
    
    def test(self):
        # RL training
        epochs = self.testing_length
        idx = self.start_idx + self.trading_interval * self.backtest_length + self.trading_interval * self.training_length
        pnl_over_time = []
        sharpe_over_time = []
        actions_over_time = []

        rewards_for_each = {0:0,1:0,2:0,3:0,4:0}

        for i in range(epochs):
            print("TRADING DAY", str(i + 1))
            cur_idx = idx + self.trading_interval * i
            # get market conditions
            obs = self.get_obs(cur_idx)
            print("Market condition:", list(obs))
            # choose which bot has to run
            eps = np.random.uniform()
            action = self.model.choose_action(obs)           


            print("Chosen bot:", action)
            actions_over_time.append(self.actions[action])
            # counts[action] += 1
            # run bots
            pairings = [] # self.actions[action]
            for bot in self.actions:

                pnl, sr = self.run_test(bot, cur_idx, self.trading_interval) # returns [pnl, sharpe]
                pnlsr = self.model.combine_pnl_sharpe(pnl, sr)
                pairings.append((bot, pnlsr))
                

            rewards_for_each[action] += pairings[action][0]
            pnl_over_time.append(pairings[action][0])
            sharpe_over_time.append(pairings[action][0])
            print("Chosen action achieved {} PnL".format(pnl))
            # get reward from rankings

            

            pairings.sort(key = lambda x : x[1], reverse = True)
            ranking = 0
            for j in range(len(pairings)):
                if pairings[j][0] == action:
                    ranking = j
                    break
            reward_value = self.get_reward(ranking)
            # rewards_over_time.append(reward_value)
            self.model.train_step(self.tensorize(obs), action, reward_value)
            # last_chosen = action
        return pnl_over_time, sharpe_over_time, actions_over_time
    
    # Comparing the PnLs over time, cumulative PnL, and avg. Sharpe ratio
    def compare(self, pnl_over_time, cum_pnl, avg_sharpe):
        epochs = self.testing_length
        idx = self.start_idx + self.trading_interval * self.backtest_length + self.trading_interval * self.training_length
        pnl_over_times = []
        cum_pnls = []
        avg_sharpes = []
        for bot in self.actions:
            bot_pnl_over_time, bot_cum_pnl, bot_avg_sharpe = self.model.compare(self.ticker, epochs, idx, self.trading_interval, bot)
            pnl_over_times.append(bot_pnl_over_time)
            cum_pnls.append(bot_cum_pnl)
            avg_sharpes.append(bot_avg_sharpe)
        return pnl_over_times

# DEMO: Retrainable AI System

In [None]:
aisystem = AISystem("AAPL", bots = [rsi, macd, bollinger_bands, vwap, sentiment])
aisystem.gather_data()
aisystem.create_model()

[*********************100%***********************]  1 of 1 completed
Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_4 (Dense)             (None, 11)                132       
                                                                 
 dense_5 (Dense)             (None, 11)                132       
                                                                 
 dense_6 (Dense)             (None, 10)                120       
                                                                 
 dense_7 (Dense)             (None, 5)                 55        
                                                                 
Total params: 439
Trainable params: 439
Non-trainable params: 0
_________________________________________________________________
None


In [None]:
aisystem.backtest()

LOGITS tf.Tensor([[0.0000000e+00 1.7250078e-35 0.0000000e+00 2.1459083e-03 9.9785405e-01]], shape=(1, 5), dtype=float32)
LOGITS tf.Tensor([[0.0000000e+00 1.7250341e-35 0.0000000e+00 2.1459248e-03 9.9785405e-01]], shape=(1, 5), dtype=float32)
LOGITS tf.Tensor([[0.0000000e+00 1.7250471e-35 0.0000000e+00 2.1459493e-03 9.9785405e-01]], shape=(1, 5), dtype=float32)
LOGITS tf.Tensor([[0.0000000e+00 1.7250471e-35 0.0000000e+00 2.1459493e-03 9.9785405e-01]], shape=(1, 5), dtype=float32)
LOGITS tf.Tensor([[0.0000000e+00 1.7250471e-35 0.0000000e+00 2.1459577e-03 9.9785405e-01]], shape=(1, 5), dtype=float32)
LOGITS tf.Tensor([[0.0000000e+00 1.7250210e-35 0.0000000e+00 2.1459577e-03 9.9785405e-01]], shape=(1, 5), dtype=float32)
LOGITS tf.Tensor([[0.0000000e+00 1.7250210e-35 0.0000000e+00 2.1459577e-03 9.9785405e-01]], shape=(1, 5), dtype=float32)
LOGITS tf.Tensor([[0.0000000e+00 1.7250210e-35 0.0000000e+00 2.1459577e-03 9.9785405e-01]], shape=(1, 5), dtype=float32)
LOGITS tf.Tensor([[0.0000000e+00

In [None]:
aisystem.train()

TRADING DAY 1
Market condition: [array([407.07000732421875, -0.08, 0.36, 0.28, -0.52, 0.15, 0.017, False,
       False, False, False], dtype=object)]
Chosen bot: 4
Chosen action achieved 0 PnL
LOGITS tf.Tensor([[0.0000000e+00 6.2463847e-35 0.0000000e+00 2.3690546e-03 9.9763095e-01]], shape=(1, 5), dtype=float32)
TRADING DAY 2
Market condition: [array([413.2099914550781, 0.51, 0.28, 0.09, 0.24, -0.21, 0.017, False,
       False, False, False], dtype=object)]
Chosen bot: 0
Chosen action achieved 0 PnL
LOGITS tf.Tensor([[0.0000000e+00 1.8960560e-35 0.0000000e+00 2.1830238e-03 9.9781692e-01]], shape=(1, 5), dtype=float32)
TRADING DAY 3
Market condition: [array([415.3900146484375, 0.3, 0.25, -0.08, -0.27, -0.32, 0.02, False,
       False, False, False], dtype=object)]
Chosen bot: 3
Chosen action achieved 0 PnL
LOGITS tf.Tensor([[0.0000000e+00 1.2495778e-35 0.0000000e+00 2.1306956e-03 9.9786925e-01]], shape=(1, 5), dtype=float32)
TRADING DAY 4
Market condition: [array([415.79998779296875, 0.

In [None]:
aisystem.test()

TRADING DAY 1
Market condition: [array([448.1199951171875, -0.66, 0.42, -0.26, -0.38, 0.62, 0.021, False,
       False, False, False], dtype=object)]
Chosen bot: 0
Chosen action achieved -555.1399999999994 PnL
LOGITS tf.Tensor([[0.0000000e+00 2.1564826e-38 0.0000000e+00 1.2767951e-03 9.9872321e-01]], shape=(1, 5), dtype=float32)
TRADING DAY 2
Market condition: [array([444.6199951171875, -0.83, 0.36, 0.06, -0.08, 0.22, 0.022, False,
       False, False, False], dtype=object)]
Chosen bot: 0
Chosen action achieved 0 PnL
LOGITS tf.Tensor([[0.0000000e+00 4.0931746e-38 0.0000000e+00 1.3263305e-03 9.9867368e-01]], shape=(1, 5), dtype=float32)
TRADING DAY 3
Market condition: [array([447.32000732421875, -1.22, 0.33, 0.53, -0.28, 0.29, 0.023, False,
       False, False, False], dtype=object)]
Chosen bot: 1
Chosen action achieved -526.5 PnL
LOGITS tf.Tensor([[0.0000000e+00 2.3923744e-38 0.0000000e+00 1.2515867e-03 9.9874842e-01]], shape=(1, 5), dtype=float32)
TRADING DAY 4
Market condition: [arra

In [None]:
better_prop = aisystem.compare(pnl_over_time, sum(pnl_over_time), np.mean(sharpe_over_time))

In [None]:
aisystem.show_results()

RL PnL: [-555.1399999999994, 0, -526.5, 0, 683.4749999999913, 529.1720000000205, 357.4930000000022, 107.0740000000078, 0, -1477.050000000003, 1165.6008000000002, -1004.5776000000333, -544.1862999999867, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
RL Cum PnL: -1264.6391000000003
RL Sharpe: 0.08996027070199299
RL actions: [0, 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3]


### Visualizations

In [None]:
import seaborn as sns
sns.set_theme()
pnls = [0.0, 0.0, -3073.924204822295, 0, 0.0, 0.0, 0.0, 0, 0, 0, 0.0, -11404.91, -4915.195001602173, 0, 9964.503403663635, 17511.087799072266, 0, 7645.038441610406, 0.0, 0.0, 0.0, 0, 0, 0, 0.0, 0, 0.0, 898.2, 0.0, 0, 0.0, 0.0, 0, 0, 1222.22, 7223.816871643066, 0, 0, 0, 0, 0.0, 9508.24, 0, 0.0, 0.0, 0, 0.0, 0.0, 0.0, 0, 0, 0.0, 0, 0.0, 0.0, -3332.33980178833, 0, 0, 0.0, 0]
cum = np.cumsum(pnls)
plt.plot(range(len(pnls)), cum, label = "Cumulative PnL")
plt.xlabel("Month")
plt.ylabel("PnL ($)")
plt.legend()
plt.show()
print("Final PnL: " + "$" + str(round(cum[-1], 2)))
print("Average Sharpe Ratio:", 69)

### Visualizations

In [None]:
import seaborn as sns
sns.set_theme()
pnls = [0.0, 0.0, -3073.924204822295, 0, 0.0, 0.0, 0.0, 0, 0, 0, 0.0, -11404.91, -4915.195001602173, 0, 9964.503403663635, 17511.087799072266, 0, 7645.038441610406, 0.0, 0.0, 0.0, 0, 0, 0, 0.0, 0, 0.0, 898.2, 0.0, 0, 0.0, 0.0, 0, 0, 1222.22, 7223.816871643066, 0, 0, 0, 0, 0.0, 9508.24, 0, 0.0, 0.0, 0, 0.0, 0.0, 0.0, 0, 0, 0.0, 0, 0.0, 0.0, -3332.33980178833, 0, 0, 0.0, 0]
cum = np.cumsum(pnls)
plt.plot(range(len(pnls)), cum, label = "Cumulative PnL")
plt.xlabel("Month")
plt.ylabel("PnL ($)")
plt.legend()
plt.show()
print("Final PnL: " + "$" + str(round(cum[-1], 2)))
print("Average Sharpe Ratio:", 69)