In [1]:
import numpy as np
import pandas as pd
import yfinance as yf
import tensorflow as tf
from tensorflow.keras.layers import Dense, LSTM
from tensorflow.keras.models import Sequential
from sklearn.preprocessing import MinMaxScaler
pd.options.mode.chained_assignment = None
tf.random.set_seed(0)
from stock_utils import *

In [2]:
from datetime import datetime
tech_list = ['AAPL', 'GOOG', 'AMZN']

start = "2010-01-01"
end = datetime.now()
for stock in tech_list:
    globals()[stock] = yf.download(stock, start, end)
    
company_list = [AAPL, GOOG, AMZN]
company_name = ["APPLE", "GOOGLE", "AMAZON"]

for company, com_name in zip(company_list, company_name):
    company["company_name"] = com_name
    
df = pd.concat(company_list, axis=0)
df.head(10)

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,company_name
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2010-01-04 00:00:00-05:00,7.6225,7.660714,7.585,7.643214,6.515212,493729600,APPLE
2010-01-05 00:00:00-05:00,7.664286,7.699643,7.616071,7.656429,6.526477,601904800,APPLE
2010-01-06 00:00:00-05:00,7.656429,7.686786,7.526786,7.534643,6.422664,552160000,APPLE
2010-01-07 00:00:00-05:00,7.5625,7.571429,7.466071,7.520714,6.410791,477131200,APPLE
2010-01-08 00:00:00-05:00,7.510714,7.571429,7.466429,7.570714,6.453412,447610800,APPLE
2010-01-11 00:00:00-05:00,7.6,7.607143,7.444643,7.503929,6.396482,462229600,APPLE
2010-01-12 00:00:00-05:00,7.471071,7.491786,7.372143,7.418571,6.323722,594459600,APPLE
2010-01-13 00:00:00-05:00,7.423929,7.533214,7.289286,7.523214,6.412922,605892000,APPLE
2010-01-14 00:00:00-05:00,7.503929,7.516429,7.465,7.479643,6.375782,432894000,APPLE
2010-01-15 00:00:00-05:00,7.533214,7.557143,7.3525,7.354643,6.269229,594067600,APPLE


In [16]:
n_lookback = 500
n_forecast = 60
df_stock = AMZN.reset_index().copy()

In [17]:
df_stock, df_left = create_backtest_frames(df_stock,n_lookback,n_forecast)

In [18]:
df_stock = feature_creation(df_stock)

In [19]:
df_stock.bfill(inplace=True)
df_stock.ffill(inplace=True)

In [20]:
#df_left = df_stock.iloc[:-100]
df_stock.tail()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,company_name,50D-SMA,50D-EMA,rsi,rsicat,average,vwap,vwap_pct_ret,pvwap
3226,2022-10-26,116.0,119.349998,114.760002,115.660004,115.660004,68802300,AMAZON,123.0188,120.415688,46.500584,1,116.590001,115.660004,-0.040962,-0.040636
3227,2022-10-27,113.919998,114.120003,109.769997,110.959999,110.959999,129605400,AMAZON,122.396,120.044877,40.609601,1,111.616666,110.959999,-0.040636,-0.068042
3228,2022-10-28,97.910004,103.959999,97.660004,103.410004,103.410004,223133400,AMAZON,121.6182,119.392529,33.309477,1,101.676669,103.410004,-0.068042,-0.00938
3229,2022-10-31,103.559998,104.870003,100.739998,102.440002,102.440002,99251400,AMAZON,120.9024,118.727724,32.501108,1,102.683334,102.440002,-0.00938,-0.055154
3230,2022-11-01,103.989998,104.580002,96.059998,96.790001,96.790001,153370000,AMAZON,120.1738,117.867421,28.207111,1,99.143333,96.790001,-0.055154,-0.055154


In [21]:
df_left.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,company_name
3231,2022-11-02,97.32,97.739998,92.010002,92.120003,92.120003,135761800,AMAZON
3232,2022-11-03,92.470001,93.5,89.019997,89.300003,89.300003,136683300,AMAZON
3233,2022-11-04,91.489998,92.440002,88.040001,90.980003,90.980003,129101300,AMAZON
3234,2022-11-07,91.949997,92.099998,89.040001,90.529999,90.529999,77495700,AMAZON
3235,2022-11-08,90.790001,91.720001,88.230003,89.980003,89.980003,88703400,AMAZON


In [22]:
# Removing last n rows
#df_dropped_last_n = df.iloc[:-100]
 

In [23]:
df_stock.columns

Index(['Date', 'Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume',
       'company_name', '50D-SMA', '50D-EMA', 'rsi', 'rsicat', 'average',
       'vwap', 'vwap_pct_ret', 'pvwap'],
      dtype='object')

In [24]:
features = ['Close','50D-SMA','50D-EMA','rsi','rsicat']
x = df_stock[features]
scaler = MinMaxScaler(feature_range=(0, 1))
x = scaler.fit_transform(x)

In [25]:
x.shape

(3231, 5)

In [26]:
y = df_stock['Close'].fillna(method='ffill')
y = y.values.reshape(-1, 1)

# scale the data
scaler = MinMaxScaler(feature_range=(0, 1))
scaler = scaler.fit(y)
y = scaler.transform(y)

In [27]:
f_len = len(features)

In [3]:
#import tensorflow as tf

stock_model = tf.keras.models.load_model('saved_models/AMZN_60day_model')

In [5]:
from keras.utils.vis_utils import plot_model
plot_model(stock_model, to_file='model_archs/AMZN_60day_model.png', show_shapes=True, show_layer_names=True)

You must install pydot (`pip install pydot`) and install graphviz (see instructions at https://graphviz.gitlab.io/download/) for plot_model to work.


In [30]:
df_f,df_p = make_future_dataframe(df_stock,x,y,n_lookback,n_forecast,stock_model,scaler,features,f_len)

In [31]:
rmse = np.sqrt(np.mean(((np.array(df_f['Forecast'])- np.array(df_left['Close'])) ** 2)))
rmse

26.679083080110274

In [32]:
from sklearn.metrics import mean_absolute_percentage_error,mean_absolute_error
print(mean_absolute_percentage_error(np.array(df_left['Close']), np.array(df_f['Forecast'])))
print(mean_absolute_error(np.array(df_left['Close']), np.array(df_f['Forecast'])))

0.28945598070113776
26.19855079650879
