In [2]:
import yfinance as yf
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.arima.model import ARIMA
import numpy as np
import seaborn as sns
from tqdm import tqdm
import pandas as pd
from statsmodels.tools.sm_exceptions import ValueWarning, HessianInversionWarning, ConvergenceWarning
import warnings

#in practice do not supress these warnings, they carry important information about the status of your model
warnings.filterwarnings('ignore', category=ValueWarning)
warnings.filterwarnings('ignore', category=HessianInversionWarning)
warnings.filterwarnings('ignore', category=ConvergenceWarning)

# Simulate Buying and Selling Stock Using ARMA(p,q) and ARIMA

In [3]:
def run_simulation(returns, prices, amt, order, thresh, verbose=False, plot=True):
    if type(order) == float:
        thresh = None
        
    curr_holding = False
    events_list = []
    init_amt = amt

    #go through dates
    for date, r in tqdm (returns.iloc[14:].items(), total=len(returns.iloc[14:])):
        #if you're currently holding the stock, sell it
        if curr_holding:
            sell_price = prices.loc[date]
            curr_holding=False
            ret = (sell_price-buy_price)/buy_price
            amt *= (1+ret)
            events_list.append(('s', date, ret))
            
            if verbose:
                print('Sold at $%s'%sell_price)
                print('Predicted Return: %s'%round(pred,4))
                print('Actual Return: %s'%(round(ret, 4)))
                print('=======================================')
            continue

        #get data til just before current date
        curr_data = returns[:date]
        
        if type(order) == tuple:
            try:
                #fit model
                model = ARIMA(curr_data, order=order).fit()

                #get forecast
                pred = model.forecast()[0][0]

            except:
                pred = thresh - 1



        #if you predict a high enough return and not holding, buy stock
        if (not curr_holding) and \
        ((type(order) == float and np.random.random() < order) 
         or (type(order) == tuple and pred > thresh)
         or (order == 'last' and curr_data[-1] > 0)):
            
            curr_holding = True
            buy_price = prices.loc[date]
            events_list.append(('b', date))
            if verbose:
                print('Bought at $%s'%buy_price)
                
    if verbose:
        print('Total Amount: $%s'%round(amt,2))
        
    #graph
    if plot:
    
        plt.figure(figsize=(10,4))
        plt.plot(prices[14:])

        y_lims = (int(prices.min()*.95), int(prices.max()*1.05))
        shaded_y_lims = int(prices.min()*.5), int(prices.max()*1.5)

        for idx, event in enumerate(events_list):
            plt.axvline(event[1], color='k', linestyle='--', alpha=0.4)
            if event[0] == 's':
                color = 'green' if event[2] > 0 else 'red'
                plt.fill_betweenx(range(*shaded_y_lims), 
                                  event[1], events_list[idx-1][1], color=color, alpha=0.1)

        tot_return = round(100*(amt / init_amt - 1), 2)
        tot_return = str(tot_return) + '%'
        plt.title("%s Price Data\nThresh=%s\nTotal Amt: $%s\nTotal Return: %s"%(tickerSymbol, thresh, round(amt,2), tot_return), fontsize=20)
        plt.ylim(*y_lims)
        plt.show()
    
    return amt

# Read Data

In [4]:
# tickerSymbol = 'AAPL'
# data = yf.Ticker(tickerSymbol)
dateparse = lambda dates: pd.datetime.strptime(dates, '%Y-%m-%d')
stock_data=pd.read_csv("./Dataset/HSBC_Set01/ohlc.csv",index_col='Date',parse_dates=['Date'], date_parser=dateparse)


FileNotFoundError: [Errno 2] No such file or directory: './Dataset/HSBC_Set01/ohlc.csv'

In [None]:
# prices = data.history(start='2021-01-01', end='2021-04-01').Close
prices=stock_data['Close']
returns = prices.pct_change().dropna()
type(returns)

In [None]:
plt.figure(figsize=(10,4))
plt.plot(prices)
plt.ylabel('Prices', fontsize=20)

In [None]:
plt.figure(figsize=(10,4))
plt.plot(returns)
plt.ylabel('Return', fontsize=20)

Returns just means what is the percent change in stock price between one day in the next. So sometimes it's less than zero. Those are days where the stock price is going down and sometimes it's greater thicker than zero where the stock price is going up.

In [None]:
plot_acf(returns)
plt.show()

In [None]:
plot_pacf(returns)
plt.show()

# Baseline Model : Random Buying

What it's really doing is just random buying so here's a graphic of what it does.

Basically every day it randomly chooses whether or not to buy the stock and then on the subsequent day, it's just going to sell the stock no matter if it's hot high or low.

Basically these red windows are places where it bought the stock at the left of the window and the stock price went down where it sold the stock on the following day.

The green windows are aware it bought the stock and then this stock price actually went up.

In [None]:
tickerSymbol='HSBC'
run_simulation(returns, prices, 100, 0.5, None, verbose=False)

In [None]:
final_amts = [run_simulation(returns, prices, 100, 0.5, None, verbose=False, plot=False) for _ in range(1000)]

In [None]:
plt.figure(figsize=(10,4))
sns.distplot(final_amts)
plt.axvline(np.mean(final_amts), color='k', linestyle='--')
plt.axvline(100, color='g', linestyle='--')
plt.title('Avg: $%s\nSD: $%s'%(round(np.mean(final_amts),2), round(np.std(final_amts),2)), fontsize=20)

There is a histogram of the total amount of money you have at the end after a random buying scheme.

The dash line is how much you started with and this histogram shows how much you end with

# If Last Return was Positive, Buy

Let's do one more baseline method, this method says if the last return was positive then you're going to buy the stock and then immediately sell it on the day after that.

In [None]:
run_simulation(returns, prices, 100, 'last', None, verbose=False)

For example, take a look at this little green window here right before that the stock price is going up, so you say that it's going up. Let me buy it and then after that you sell it one day after, so the green winding windows are again where you made a good decision and the stock price continued to go up sfter that and the red windows are where you made a bad decision where all the way was going up the previous day when you bought it. It went down after that.

We're actually doing worse than before

# Try ARIMA(2,0,0) Model

Run different AR1 models, they're different based on the threshold

What we do is we forecast what the stock return will be on the following day. We forecast that using AR1 model. If that predicted return is bigger than some threshold so you're going to buy the stock on the next day. And you're going to immediately sell the stock on the following day.

In [None]:
def run_simulation(returns, prices, amt, order, thresh, verbose=False, plot=True):
    if type(order) == float:
        thresh = None
        
    curr_holding = False
    events_list = []
    init_amt = amt

    #go through dates
    for date, r in tqdm (returns.iloc[14:].items(), total=len(returns.iloc[14:])):
        #if you're currently holding the stock, sell it
        if curr_holding:
            sell_price = prices.loc[date]
            curr_holding=False
            ret = (sell_price-buy_price)/buy_price
            amt *= (1+ret)
            events_list.append(('s', date, ret))
            
            if verbose:
                print('Sold at $%s'%sell_price)
                print('Predicted Return: %s'%round(pred,4))
                print('Actual Return: %s'%(round(ret, 4)))
                print('=======================================')
            continue

        #get data til just before current date
        curr_data = returns[:date]
        
        if type(order) == tuple:
            try:
                #fit model
                model = ARIMA(curr_data, order=order).fit()

                #get forecast
                pred = model.forecast().values[0]
                
            except:
                pred = thresh - 1



        #if you predict a high enough return and not holding, buy stock
        if (not curr_holding) and \
        ((type(order) == float and np.random.random() < order) 
         or (type(order) == tuple and pred > thresh)
         or (order == 'last' and curr_data[-1] > 0)):
            
            curr_holding = True
            buy_price = prices.loc[date]
            events_list.append(('b', date))
            if verbose:
                print('Bought at $%s'%buy_price)
                
    if verbose:
        print('Total Amount: $%s'%round(amt,2))
        
    #graph
    if plot:
    
        plt.figure(figsize=(10,4))
        plt.plot(prices[14:])

        y_lims = (int(prices.min()*.95), int(prices.max()*1.05))
        shaded_y_lims = int(prices.min()*.5), int(prices.max()*1.5)

        for idx, event in enumerate(events_list):
            plt.axvline(event[1], color='k', linestyle='--', alpha=0.4)
            if event[0] == 's':
                color = 'green' if event[2] > 0 else 'red'
                plt.fill_betweenx(range(*shaded_y_lims), 
                                  event[1], events_list[idx-1][1], color=color, alpha=0.1)

        tot_return = round(100*(amt / init_amt - 1), 2)
        tot_return = str(tot_return) + '%'
        plt.title("%s Price Data\nThresh=%s\nTotal Amt: $%s\nTotal Return: %s"%(tickerSymbol, thresh, round(amt,2), tot_return), fontsize=20)
        plt.ylim(*y_lims)
        plt.show()
    
    return amt

for thresh in [0, 0.001, 0.005]:
    run_simulation(returns, prices, 200, (2,0,0), thresh, verbose=False)

# Try ARIMA(2,0,2) Model

In [None]:
for thresh in [0, 0.001, 0.005]:
    run_simulation(returns, prices, 200, (2,0,2), thresh, verbose=False)