# LSTM RNN

## Imports

In [None]:
# Importing packages
import warnings
import numpy as np
import pandas as pd
import hvplot.pandas
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM
from plotting import Plotter

In [None]:
# Ignoring warnings
warnings.filterwarnings('ignore')

In [None]:
# Setting global variables
tickers = ['ARKK','SPY','FNGU']

## Data

In [None]:
# Reading in ETF data to DataFrame
etf_data = pd.read_csv(Path('./Resources/Data/etf_data.csv'), index_col='Date', parse_dates=True, infer_datetime_format=True)

In [None]:
# Separating ETF DataFrame into separate DataFrames
arkk,spy,fngu = [etf_data[i].to_frame(i) for i in tickers]

## Scaling Data

In [None]:
# Lambda helper methods
get_scaler = lambda df: MinMaxScaler(feature_range=(0,1)).fit(df)
get_scaled = lambda scaler, array: scaler.transform(array)

In [None]:
# Creating scalers and Numpy arrays
arkk_scaler, spy_scaler, fngu_scaler =  get_scaler(arkk),get_scaler(spy),get_scaler(fngu)
arkk_array, spy_array, fngu_array = [i.values for i in [arkk,spy,fngu]]

In [None]:
# Created scaled data
arkk_scaled, spy_scaled, fngu_scaled = [get_scaled(i[0],i[1]) for i in [(arkk_scaler,arkk_array),(spy_scaler,spy_array),(fngu_scaler,fngu_array)]];

## Train Test Split

In [None]:
# Setting forecasting timeframe (60 days) and training length
forecast_length = 60
training_length = len(etf_data) - forecast_length

In [None]:
# Method for train test split
def train_test_split(scaled_data, array):
    # Creating features and targets
    X, y = scaled_data[0:training_length, :], scaled_data[training_length - forecast_length:, :]

    # Intializing training and testing variables
    X_train, X_test, y_train = [],[],[]

    # Looping from forecast_length to end of features
    for i in range(forecast_length, len(X)):
        # Appending training data
        X_train.append(X[i-forecast_length:i, 0])
        y_train.append(X[i,0])

    # Loopiung from forecast_length to end of features
    for i in range(forecast_length, len(y)):
        # Appending testing data
        X_test.append(y[i-forecast_length:i, 0])
    # Converting data to Numpy Array    
    X_train, X_test, y_train = np.array(X_train), np.array(X_test), np.array(y_train)
    # Reshaping testing data array
    X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))
    
    # Returning training and testing features and targets
    return [X_train, X_test, y_train, array[training_length:, :]]

In [None]:
# Splitting scaled data into train and test features and targets
arkk_X_train, arkk_X_test, arkk_y_train, arkk_y_test =  train_test_split(arkk_scaled, arkk_array)
spy_X_train, spy_X_test, spy_y_train, spy_y_test = train_test_split(spy_scaled, spy_array)
fngu_X_train, fngu_X_test, fngu_y_train, fngu_y_test = train_test_split(fngu_scaled, fngu_array)

## Create Models

In [None]:
# Helper method to create, summarize and compile model
def get_model():
    # Creating Sequential model
    model = Sequential()
    
    # Adding LSTM layer to model
    model.add(LSTM(50, return_sequences=True, input_shape=(forecast_length, 1)))
    # Adding LSTM layer to model
    model.add(LSTM(50, return_sequences=False))
    # Adding Dense layer to model
    model.add(Dense(25))
    # Adding output layer
    model.add(Dense(1))
    
    # Displaying model summary
    display(model.summary())
    
    # Compiling model
    model.compile(optimizer='adam', loss='mean_absolute_error', metrics=['mse'])
    
    # Returning model
    return model

In [None]:
# Creating models
arkk_model, spy_model, fngu_model = [get_model() for i in range(3)]

## Train Models

In [None]:
# Batch Size and Epochs variables for model training
batch_size,epochs = 1,35

In [None]:
# Training ARKK Model
arkk_model.fit(arkk_X_train, arkk_y_train, batch_size=batch_size, epochs=epochs);

In [None]:
# Training SPY Model
spy_model.fit(spy_X_train, spy_y_train, batch_size=batch_size, epochs=epochs);

In [None]:
# Training FNGU Model
fngu_model.fit(fngu_X_train, fngu_y_train, batch_size=batch_size, epochs=epochs);

## Forecast

In [None]:
# Forecasting with ARKK Model
arkk_forecast = arkk_model.predict(arkk_X_test)

In [None]:
# Forecasting with SPY Model
spy_forecast = spy_model.predict(spy_X_test)

In [None]:
# Forecasting with FNGU Model
fngu_forecast = fngu_model.predict(fngu_X_test)

## Evaluation

In [None]:
# Helper method to print evaluation metrics
def evaluate(ticker, model, scaler, forecast, y_test):
    # Saving evaluation metrics
    metrics = model.evaluate(forecast,y_test)
    
    # Printing ETF ticker
    print(ticker)
    
    # Saving each metric as variable
    mae,mse,rmse = round(metrics[0],2),round(metrics[1],2),round(metrics[1]**.5,2)
    # Printing metrics
    print(f'Mean Absolute Error: {mae}')
    print(f'Mean Squared Error: {mse}')
    print(f'Root Mean Squared Error: {rmse}')
    
    # Returning metrics
    return [mae,mse,rmse]

### ARKK

In [None]:
# Saving and printing ARKK metrics
arkk_mae, arkk_mse, arkk_rmse = evaluate(tickers[0], arkk_model, arkk_scaler, arkk_forecast, arkk_y_test)

### SPY

In [None]:
# Saving and printing SPY metrics
spy_mae, spy_mse, spy_rmse = evaluate(tickers[1], spy_model, spy_scaler, spy_forecast, spy_y_test)

### FNGU

In [None]:
# Saving and printing FNGU metrics
fngu_mae, fngu_mse, fngu_rmse = evaluate(tickers[2], fngu_model, fngu_scaler, fngu_forecast, fngu_y_test)

### Mean Absolute Error

In [None]:
# Lambda helper method to create DataFrame of error metrics
get_error_df = lambda a,s,f: pd.DataFrame({tickers[0]:a,tickers[1]:s,tickers[2]:f}, index=[0])

In [None]:
# Instantiating custom plotter class
plotter = Plotter('LSTM_RNN')

In [None]:
# Plotting MAS bar plot
plotter.bar(get_error_df(arkk_mae,spy_mae,fngu_mae),'Mean Absolute Error')

### Mean Squared Error

In [None]:
# Plotting MSE bar plot
plotter.bar(get_error_df(arkk_mse,spy_mse,fngu_mse),'Mean Squared Error')

### Root Mean Squared Error

In [None]:
# Plotting RMSE bar plot
plotter.bar(get_error_df(arkk_rmse,spy_rmse,fngu_rmse),'Root Mean Squared Error')

## Results

### Actual vs Forecast 

In [None]:
# Helper method to display DataFrame head and tail
display_head_tail = lambda df: display(df.head(),df.tail())
# Variable for titles
avf = 'Actual vs Forecasted'
# Helper method to get titles
get_avf_title = lambda index: tickers[index] + ' ' + avf

In [None]:
# Helper method to get actual and forecasted adjusted closing prices as DataFrame
def get_actual_forecasted(df, forecast, scaler): 
    # Getting ticker symbol
    ticker = df.columns[0]
    # Creating dataframe from testing DataFrame
    actual_forecasted = df[training_length:]
    
    # Renaming actual column
    actual_forecasted.rename(columns={ticker:f'{ticker} Actual Adjusted Close'}, inplace=True)
    
    # Setting forecasted column
    actual_forecasted[f'{ticker} Forecasted Adjusted Close'] = scaler.inverse_transform(forecast)
    # Rounding forecasted column
    actual_forecasted[f'{ticker} Forecasted Adjusted Close'] = actual_forecasted[f'{ticker} Forecasted Adjusted Close'].apply(lambda x: round(x,2))
    
    # Returning actual and forecasted values as DataFrame
    return actual_forecasted

In [None]:
# Saving and displaying ARKK actual and forecasted adjusted closing prices
arkk_actual_forecasted = get_actual_forecasted(arkk,arkk_forecast,arkk_scaler)
display_head_tail(arkk_actual_forecasted)

In [None]:
# Plotting ARKK actual vs forecasted adjusted closing prices
plotter.line(arkk_actual_forecasted, get_avf_title(0)) 

In [None]:
# Saving and displaying SPY actual and forecasted adjusted closing prices
spy_actual_forecasted = get_actual_forecasted(spy,spy_forecast,spy_scaler)
display_head_tail(spy_actual_forecasted)

In [None]:
# Plotting SPY actual vs forecasted adjusted closing prices
plotter.line(spy_actual_forecasted, get_avf_title(1)) 

In [None]:
# Saving and displaying FNGU actual and forecasted adjusted closing prices
fngu_actual_forecasted = get_actual_forecasted(fngu,fngu_forecast,fngu_scaler)
display_head_tail(fngu_actual_forecasted)

In [None]:
# Plotting FNGU actual vs forecasted adjusted closing prices
plotter.line(fngu_actual_forecasted, get_avf_title(2)) 

In [None]:
# Saving and displaying ETF actual and forecasted adjusted closing prices
actual_forecasted = pd.concat([arkk_actual_forecasted,spy_actual_forecasted,fngu_actual_forecasted], axis=1, join='inner')
display_head_tail(actual_forecasted)

In [None]:
# Plotting ETF actual vs forecasted adjusted closing prices
plotter.line(actual_forecasted, avf, None)

In [None]:
# Plotting heatmap of actual ETF price correlation
actual = pd.concat([arkk_actual_forecasted.iloc[:,0],spy_actual_forecasted.iloc[:,0],fngu_actual_forecasted.iloc[:,0]], join='inner', axis=1)
plotter.heatmap(actual, 'Actual')

In [None]:
# Plotting heatmap of forecasted ETF price correlation
forecasted = pd.concat([arkk_actual_forecasted.iloc[:,1],spy_actual_forecasted.iloc[:,1],fngu_actual_forecasted.iloc[:,1]], join='inner', axis=1)
plotter.heatmap(forecasted, 'Forecasted')

In [None]:
# Plotting heatmap of actual vs forecasted ETF price correlation
plotter.heatmap(actual_forecasted, avf)

### Training Data vs Actual vs Forecasted

In [None]:
# Creating DataFrame of training, actual and forecasted adjusted cloding prices
training_data = etf_data[:training_length].rename(columns={tickers[i]: tickers[i] + ' Training Data' for i in range(3)})
train_actual_forecasted = pd.concat([training_data,actual_forecasted])

In [None]:
# Helper method to get DataFrame by ticker
get_df_by_ticker = lambda ticker: train_actual_forecasted[[col for col in train_actual_forecasted if col.startswith(ticker)]]

In [None]:
# Variable for title and lambda helper method to get title
taf = 'Training Data vs Actual vs Forecasted'
get_train_title = lambda index: tickers[index] + ' ' + taf

In [None]:
# Saving and displaying ARKK data
arrk_train_actual_forecasted = get_df_by_ticker(tickers[0])
display_head_tail(arrk_train_actual_forecasted)

In [None]:
# Plotting ARKK data
plotter.line(arrk_train_actual_forecasted, get_train_title(0))

In [None]:
# Saving and displaying SPY data
spy_train_actual_forecasted = get_df_by_ticker(tickers[1])
display_head_tail(spy_train_actual_forecasted)

In [None]:
# Plotting SPY data
plotter.line(spy_train_actual_forecasted, get_train_title(1))

In [None]:
# Saving and displaying FNGU data
fngu_train_actual_forecasted = get_df_by_ticker(tickers[2])
display_head_tail(fngu_train_actual_forecasted)

In [None]:
# Plotting FNGU data
plotter.line(fngu_train_actual_forecasted, get_train_title(2))

In [None]:
# Displaying ETF data
display_head_tail(train_actual_forecasted)

In [None]:
# Plotting ETF data
plotter.line(train_actual_forecasted, taf, None)

In [None]:
# Plotting ETF training data heatmap
plotter.heatmap(etf_data[:training_length], 'Training Data')

## Write-Up

## Conclusion