In [1]:
import os
import datetime

import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

mpl.rcParams['figure.figsize'] = (8, 6)
mpl.rcParams['axes.grid'] = False


**Prepare Data**

In [24]:
stocks = {
"financial_large_stocks" : ['WFC','PNC','BAC','C','JPM'],
"financial_mid_stocks" : ['TRV', 'ALL', 'HBAN', 'LNC', 'RF'],
"financial_small_stocks" : ['UMBF', 'HFWA', 'SBSI', 'THFF', 'RNST'],

"tech_large_stocks" : ['MSFT', 'AAPL', 'IBM', 'CSCO', 'INTC'],
"tech_mid_stocks" : ['MANH', 'NVDA', 'SWKS', 'CGNX', 'BLKB'],
"tech_small_stocks" : ['PLAB', 'TSRI', 'CAMP', 'ASUR', 'DMRC'],

"medic_large_stocks" : ['JNJ', 'PFE', 'MRK', 'ABT', 'LLY'],
"medic_mid_stocks" : ['DXCM', 'HOLX', 'PDCO', 'ALGN', 'TECH'],
"medic_small_stocks" : ['OMI', 'VIVE', 'OGEN', 'CLDX', 'ANGO'],

"utility_large_stocks" : ['NEE', 'DUK', 'SO', 'D', 'EXC'],
"utility_mid_stocks"  : ['AWK', 'WEC', 'PNM', 'NWE', 'OGE'],
"utility_small_stocks" : ['MGEE', 'CNLG', 'CWCO', 'ARTNA', 'SJI'],

"food_large_stocks" : ['PEP', 'KO', 'PG', 'MDLZ', 'GIS'],
"food_mid_stocks" : ['BGS', 'FLO', 'JBSS', 'TSN', 'THS'],
"food_small_stocks" : ['FARM', 'PRMW', 'WINA', 'CALM', 'UNFI']
}
test_stocks = {
"choosen_stocks":["AAPL","DXCM","JPM","NEE"]
}

In [39]:
# Scale the data to be between 0 and 1
# When scaling remember! You normalize both test and train data with respect to training data
# Because you are not supposed to have access to test data

def get_ON_normalized_dt(df, scaler):
  data = np.array(df_close).reshape(-1,1)

  df_old_normalized = scaler.fit_transform(data[:len(data)//2,:])
  df_new_normalized = scaler.fit_transform(data[len(data)//2+1:,:])

  df_old_normalized += 0.01
  df_new_normalized += 0.01
  df_old_normalized[df_old_normalized>1] = 1
  df_new_normalized[df_new_normalized>1] = 1

  return df_old_normalized.reshape(-1,1), df_new_normalized.reshape(-1,1)

def create_dataset(dataset,time_step=1):

  dataX, dataY = [], []
  for i in range(len(dataset)-time_step):
    a = dataset[i:(i+time_step),0]
    dataX.append(a)
    dataY.append(dataset[i+time_step,0])
  return np.array(dataX), np.array(dataY)

def creat_train_test(data,time_step=1):
  # split = int(split_percent*len(data))
  # train = data[:split]
  # test = data[split:]

  train = data[:len(data)-(30+time_step)]
  test = data[-(30+time_step):]

  X_train, y_train = create_dataset(train,time_step)
  X_test, y_test = create_dataset(test,time_step)
  X_train = X_train[:,:,None]
  X_test= X_test[:,:,None]

  return X_train, y_train, X_test, y_test

import math
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error
def get_prediction(model, scalar,X_train,X_test):
  train_predict=model.predict(X_train)
  test_predict=model.predict(X_test)

  train_predict=scaler.inverse_transform(train_predict)
  test_predict=scaler.inverse_transform(test_predict)
  return train_predict, test_predict

def get_MAE(y_test, predictions):
  _ , test_predict = predictions
  test_MAE = math.sqrt(mean_squared_error(y_test,test_predict))

  # return train_MAE, test_MAE
  return np.around(test_MAE,4)
def get_MAPE(y_test, predictions):
  _, test_predict = predictions
  test_MAPE = mean_absolute_percentage_error(y_test,test_predict)

  # return train_MAPE, test_MAPE
  return np.around(test_MAPE,7)


In [4]:
import time
def plot_predictions(ON_data,ON_predictions,ON_MAE,scaler,date,name,look_back):
  slots = ["past", "current"]
  fig, axs = plt.subplots(2,1, figsize=(7,7))
  ax1, ax2 = axs


  test_data = ON_data[0]
  train_predict, test_predict = ON_predictions[0]

  # trainPredictPlot = np.zeros(np.shape(data))+ np.nan
  # trainPredictPlot[look_back:len(train_predict)+look_back,:] = train_predict

  # testPredictPlot = np.zeros(np.shape(data))+np.nan
  # testPredictPlot[len(train_predict)+look_back+1:len(data)-(1+look_back),:] = test_predict

  # train_data = scaler.inverse_transform(data[look_back:len(train_predict)+look_back])
  ax1.plot(range(test_data.shape[0]),test_data)
  ax1.plot(range(test_data.shape[0]),test_predict)
  ax1.legend(["Ground Truth", "Prediction"], loc="best")
  # ax1.xticks(range(0,test_data.shape[0],5),date.loc[len(train_predict)+look_back*2+1:len(data)-1:5],rotation=45)

  start = len(train_predict)+look_back*2

  ax1.set_xticks(range(0,len(test_data)+1,5),date[start:start+len(test_data)+1:5],size=6,rotation=45)
  #ax1.text(3.5, 10, f'MAE : {ON_MAE[0]}', ha='right', va='center', transform=ax2.transAxes)
  # plt.text(.8, .7, f"past_Test_MAE: {ON_MAE[0][1]}")

  ax1.set_xlabel('Date',size='medium',fontweight='bold')
  ax1.set_ylabel('Stock Price')
  # displaying the title
  ax1.set_title(f"Actual v.s Prediction of {name} ({slots[0]})")

  # fig.tight_layout()
###################################################################################################
  train_predict, test_predict = ON_predictions[1]
  # trainPredictPlot = np.zeros(np.shape(data))+ np.nan
  # trainPredictPlot[look_back:len(train_predict)+look_back,:] = train_predict

  # testPredictPlot = np.zeros(np.shape(data))+np.nan
  # testPredictPlot[len(train_predict)+look_back+1:len(data)-(1+look_back),:] = test_predict
  test_data = ON_data[1]

  ax2.plot(range(test_data.shape[0]),test_data,label="Test Ground Truth")
  ax2.plot(range(test_data.shape[0]),test_predict,label='Prediction')
  ax2.legend(["Ground Truth", "Prediction"], loc="best")
  start = len(date)//2+len(train_predict)+look_back*2


  data_stick = date[start:start+len(test_data)+1:5]
  # ax2.xticks(range(0,test_data.shape[0],5),date.loc[start:start+len(test_data):5],rotation=45)
  ax2.set_xticks(range(0,len(data_stick)*5,5),data_stick,size=6,rotation=45)
  # Adding text beside the subplots
  #ax2.text(3.5, 10, f'MAE : {ON_MAE[1]}', ha='right', va='center', transform=ax2.transAxes)

  ax2.set_xlabel('Date',size='medium',fontweight='bold')
  ax2.set_ylabel('Stock Price')
  # displaying the title
  ax2.set_title(f"Actual v.s Prediction of {name} ({slots[1]})")
  fig.tight_layout()
  plt.show()

In [40]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Input
from tensorflow.keras.regularizers import l2
from tensorflow.keras.callbacks import EarlyStopping

time_step = 10
def get_vanilla_LSTM(time_step):
  model = Sequential()
  model.add(Input((time_step,1)))
  model.add(LSTM(50, activation='relu',))
  model.add(Dense(1))
  model.compile(optimizer='adam', loss='mse')
  return model

def get_stacked_LSTM(time_step):
  model = Sequential()
  model.add(Input((time_step, 1)))
  model.add(LSTM(50, activation='relu', return_sequences=True, kernel_regularizer=l2(0.01)))
  model.add(Dropout(0.2))
  model.add(LSTM(50, activation='relu', return_sequences=True, kernel_regularizer=l2(0.01)))
  model.add(Dropout(0.2))
  model.add(Dense(1))
  model.compile(optimizer='adam', loss='mse')
  return model

In [36]:
model.summary()

In [35]:
MAPE = {}
list_dic = [{} for sub in range(5)]
for stocks_cata in test_stocks.keys():
  MAPE[stocks_cata] = dict(zip(test_stocks[stocks_cata],list_dic))

In [26]:
print(MAPE)

{'choosen_stocks': {'AAPL': {}, 'DXCM': {}, 'JPM': {}, 'NEE': {}}}


In [41]:
data_dir = "/content/drive/My Drive/stock_data"
MAE = {}
num_epochs = 200

from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler(feature_range=(0,1))

# for file in files:
#   if not file.endswith('.csv'):
#     continue
#   df = pd.read_csv(file)
#   df_close = df.reset_index()['Close']
import yfinance as yf
# Define early stopping
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

model = get_stacked_LSTM(time_step)
# for stocks_cata in test_stocks.keys():
for stock_cata in test_stocks.keys():
  data = yf.download(test_stocks[stock_cata], start='2010-1-1', end='2020-1-1')
  data_close = data['Close']
  date = data_close.index.date
  for column in data_close:

    # Select column contents by column
    # name using [] operator
    df_close = data_close[column]
    df_close.head()
    df_old_normalized, df_new_normalized = get_ON_normalized_dt(df_close, scaler)

    # data[stock_name] = {'old': df_old_normalized, 'new':df_new_normalized}
    X_old_train, y_old_train, X_old_test, y_old_test = creat_train_test(df_old_normalized,time_step=time_step)

    model.fit(X_old_train,y_old_train,validation_data=(X_old_test,y_old_test),epochs=num_epochs,batch_size=64, verbose=1, validation_split=0.2, callbacks=[early_stopping])
    X_new_train, y_new_train, X_new_test, y_new_test = creat_train_test(df_new_normalized,time_step=time_step)
    model.fit(X_new_train,y_new_train,validation_data=(X_new_test,y_new_test),epochs=num_epochs,batch_size=64, verbose=1, validation_split=0.2, callbacks=[early_stopping])

    old_predictions = get_prediction(model,scaler,X_old_train,X_old_test)
    new_predictions = get_prediction(model,scaler,X_new_train,X_new_test)
    y_old_test = scaler.inverse_transform(y_old_test.reshape(-1,1))
    y_new_test = scaler.inverse_transform(y_new_test.reshape(-1,1))
    # old_MAE = get_MAE(y_old_test, old_predictions)
    old_MAPE = get_MAPE(y_old_test, old_predictions)
    # new_MAE = get_MAE(y_new_test,new_predictions)
    new_MAPE = get_MAPE(y_new_test,new_predictions)

    # plot_predictions([y_old_test,y_new_test],[old_predictions,new_predictions],[old_MAE,new_MAE],scaler,date,column,time_step)
    # MAE[column] = [old_MAE, new_MAE]

    MAPE[stock_cata][column] = [old_MAPE, new_MAPE]
    print(MAPE)
# for key in MAPE.keys():
#  print(f"{key} : {MAPE[key]}")

[*********************100%%**********************]  4 of 4 completed

Epoch 1/200





[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 245ms/step - loss: 0.9363 - val_loss: 1.0960
Epoch 2/200
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.6184 - val_loss: 0.7141
Epoch 3/200
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.4190 - val_loss: 0.5884
Epoch 4/200
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.2940 - val_loss: 0.5079
Epoch 5/200
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.2093 - val_loss: 0.4295
Epoch 6/200
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.1522 - val_loss: 0.4095
Epoch 7/200
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.1170 - val_loss: 0.3631
Epoch 8/200
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.0930 - val_loss: 0.3387
Epoch 9/200
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[

ValueError: Found array with dim 3. None expected <= 2.

In [22]:
for key in MAPE.keys():
  dic = MAPE[key]
  for comp in dic.keys():
    print(f"{comp} : {dic[comp]}")

AAPL : [0.0208175, 0.0258982]
DXCM : [0.0321343, 0.0347536]
JPM : [0.0108855, 0.0073206]
NEE : [0.0094263, 0.0048628]
PEE : {}
