# LSTM RNN

## Imports

In [132]:
import warnings
import numpy as np
import pandas as pd
import hvplot.pandas
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM
import yfinance as yf
import matplotlib.pyplot as plt

In [133]:
warnings.filterwarnings('ignore')

## Data

In [134]:
display_head_tail = lambda df: display(df.head(),df.tail())

In [135]:
etf_data = pd.read_csv('./Resources/Data/etf_data.csv', index_col='Date', parse_dates=True, infer_datetime_format=True)
display_head_tail(etf_data)

Unnamed: 0_level_0,ARKK,SPY,FNGU
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2020-03-02,52.79,294.65,80.15
2020-03-03,51.73,286.22,73.85
2020-03-04,53.49,298.25,80.57
2020-03-05,52.24,288.33,73.8
2020-03-06,50.87,283.57,68.17


Unnamed: 0_level_0,ARKK,SPY,FNGU
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2022-03-24,66.4,444.68,236.0
2022-03-25,64.51,446.85,233.4
2022-03-28,67.06,450.03,247.0
2022-03-29,71.39,455.59,257.7
2022-03-30,68.68,452.78,247.5


In [136]:
arkk,spy,fngu = [etf_data[i].to_frame(i) for i in ['ARKK','SPY','FNGU']]

In [137]:
display_head_tail(arkk)

Unnamed: 0_level_0,ARKK
Date,Unnamed: 1_level_1
2020-03-02,52.79
2020-03-03,51.73
2020-03-04,53.49
2020-03-05,52.24
2020-03-06,50.87


Unnamed: 0_level_0,ARKK
Date,Unnamed: 1_level_1
2022-03-24,66.4
2022-03-25,64.51
2022-03-28,67.06
2022-03-29,71.39
2022-03-30,68.68


In [138]:
display_head_tail(spy)

Unnamed: 0_level_0,SPY
Date,Unnamed: 1_level_1
2020-03-02,294.65
2020-03-03,286.22
2020-03-04,298.25
2020-03-05,288.33
2020-03-06,283.57


Unnamed: 0_level_0,SPY
Date,Unnamed: 1_level_1
2022-03-24,444.68
2022-03-25,446.85
2022-03-28,450.03
2022-03-29,455.59
2022-03-30,452.78


In [139]:
display_head_tail(fngu)

Unnamed: 0_level_0,FNGU
Date,Unnamed: 1_level_1
2020-03-02,80.15
2020-03-03,73.85
2020-03-04,80.57
2020-03-05,73.8
2020-03-06,68.17


Unnamed: 0_level_0,FNGU
Date,Unnamed: 1_level_1
2022-03-24,236.0
2022-03-25,233.4
2022-03-28,247.0
2022-03-29,257.7
2022-03-30,247.5


## Scaling Data

In [140]:
dataframes = [arkk,spy,fngu]

In [141]:
arkk_scaler, spy_scaler, fngu_scaler = [MinMaxScaler(feature_range=(0,1)).fit(i) for i in dataframes]

In [142]:
arkk_array, spy_array, fngu_array = [i.values for i in dataframes]

In [143]:
get_scaled = lambda scaler, array: scaler.transform(array)
arkk_scaled, spy_scaled, fngu_scaled = get_scaled(arkk_scaler,arkk_array),get_scaled(spy_scaler,arkk_array),get_scaled(fngu_scaler,fngu_array);

## Train Test Split

In [144]:
forecast_length = 60
training_length = len(etf_data) - forecast_length

In [145]:
def train_test_split_reshape(scaled_data, array):
    X, y = scaled_data[0:training_length, :], scaled_data[training_length - forecast_length:, :]

    X_train, X_test, y_train, y_test = [],[],[], array[training_length:, :]

    for i in range(forecast_length, len(X)):
        X_train.append(X[i-forecast_length:i, 0])
        y_train.append(X[i,0])

    for i in range(forecast_length, len(y)):
        X_test.append(y[i-forecast_length:i, 0])
        
    X_train, X_test, y_train = np.array(X_train), np.array(X_test), np.array(y_train)
    X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))
    
    return [X_train, X_test, y_train, y_test]

In [146]:
arkk_X_train, arkk_X_test, arkk_y_train, arkk_y_test =  train_test_split_reshape(arkk_scaled, arkk_array)
spy_X_train, spy_X_test, spy_y_train, spy_y_test = train_test_split_reshape(spy_scaled, spy_array)
fngu_X_train, fngu_X_test, fngu_y_train, fngu_y_test = train_test_split_reshape(fngu_scaled, fngu_array)

## Create Models

In [147]:
def get_model(n_layer1, n_layer2, n_layer3, X_train, optimizer, loss_func):
    model = Sequential()
    
    model.add(LSTM(50, return_sequences=True, input_shape=(X_train.shape[1], 1)))
    model.add(LSTM(50, return_sequences=False))
    model.add(Dense(25))
    model.add(Dense(1))
    
    display(model.summary())
    
    model.compile(optimizer=optimizer, loss=loss_func)
    
    return model

In [148]:
arkk_model = get_model(50,50,25,arkk_X_train,'adam','mse')

Model: "sequential_16"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_32 (LSTM)              (None, 60, 50)            10400     
                                                                 
 lstm_33 (LSTM)              (None, 50)                20200     
                                                                 
 dense_32 (Dense)            (None, 25)                1275      
                                                                 
 dense_33 (Dense)            (None, 1)                 26        
                                                                 
Total params: 31,901
Trainable params: 31,901
Non-trainable params: 0
_________________________________________________________________


None

In [149]:
spy_model = get_model(50,50,25,spy_X_train,'adam','mse')

Model: "sequential_17"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_34 (LSTM)              (None, 60, 50)            10400     
                                                                 
 lstm_35 (LSTM)              (None, 50)                20200     
                                                                 
 dense_34 (Dense)            (None, 25)                1275      
                                                                 
 dense_35 (Dense)            (None, 1)                 26        
                                                                 
Total params: 31,901
Trainable params: 31,901
Non-trainable params: 0
_________________________________________________________________


None

In [150]:
fngu_model = get_model(50,50,25,fngu_X_train,'adam','mse')

Model: "sequential_18"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_36 (LSTM)              (None, 60, 50)            10400     
                                                                 
 lstm_37 (LSTM)              (None, 50)                20200     
                                                                 
 dense_36 (Dense)            (None, 25)                1275      
                                                                 
 dense_37 (Dense)            (None, 1)                 26        
                                                                 
Total params: 31,901
Trainable params: 31,901
Non-trainable params: 0
_________________________________________________________________


None

## Train Models

In [151]:
arkk_model.fit(arkk_X_train, arkk_y_train, batch_size=1, epochs=1);



In [152]:
spy_model.fit(spy_X_train, spy_y_train, batch_size=1, epochs=1);



In [153]:
fngu_model.fit(fngu_X_train, fngu_y_train, batch_size=1, epochs=1);



## Predict/Forecast

In [155]:
arkk_predictions = arkk_model.predict(arkk_X_test)
spy_predictions = spy_model.predict(spy_X_test)
fngu_predictions = fngu_model.predict(fngu_X_test)



### Actual vs Predicted 

In [256]:
def get_actual_predicted(df, predictions): 
    valid = df[training_length:]
    predictions = scaler.inverse_transform(predictions)
    column_name = df.columns[0]
    valid[f'{column_name} Forecasted Adjusted Close'] = predictions
    valid.rename(columns={column_name:f'{column_name} Actual Adjusted Close'}, inplace=True)
    return valid

In [257]:
arkk_actual_predicted = get_actual_predicted(arkk,arkk_predictions)
display_head_tail(arkk_actual_predicted)

Unnamed: 0_level_0,ARKK Actual Adjusted Close,ARKK Forecasted Adjusted Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2022-01-04,92.69,105.933006
2022-01-05,86.12,105.765465
2022-01-06,85.58,105.114441
2022-01-07,84.42,104.186623
2022-01-10,84.64,103.066788


Unnamed: 0_level_0,ARKK Actual Adjusted Close,ARKK Forecasted Adjusted Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2022-03-24,66.4,71.225555
2022-03-25,64.51,71.944176
2022-03-28,67.06,72.544334
2022-03-29,71.39,73.176857
2022-03-30,68.68,74.03019


In [230]:
spy_actual_predicted = get_actual_predicted(spy,spy_predictions)
display_head_tail(spy_actual_predicted)

Unnamed: 0_level_0,SPY Actual Adjusted Close,SPY Forecasted Adjusted Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2022-01-04,469.93,-24.101727
2022-01-05,460.9,-24.183472
2022-01-06,460.47,-24.516848
2022-01-07,458.65,-24.986734
2022-01-10,458.08,-25.542082


Unnamed: 0_level_0,SPY Actual Adjusted Close,SPY Forecasted Adjusted Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2022-03-24,444.68,-39.579117
2022-03-25,446.85,-39.223789
2022-03-28,450.03,-38.948513
2022-03-29,455.59,-38.656994
2022-03-30,452.78,-38.244995


In [231]:
fngu_actual_predicted = get_actual_predicted(fngu,fngu_predictions)
display_head_tail(fngu_actual_predicted)

Unnamed: 0_level_0,FNGU Actual Adjusted Close,FNGU Forecasted Adjusted Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2022-01-04,380.4,124.855186
2022-01-05,342.9,125.264259
2022-01-06,349.1,124.786491
2022-01-07,342.4,124.043999
2022-01-10,344.2,123.056061


Unnamed: 0_level_0,FNGU Actual Adjusted Close,FNGU Forecasted Adjusted Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2022-03-24,236.0,81.510818
2022-03-25,233.4,83.266479
2022-03-28,247.0,84.90023
2022-03-29,257.7,86.592293
2022-03-30,247.5,88.374146


## Visualization

### Actual vs Predicted 

In [232]:
def plot_df(df):
    return df.hvplot()

In [233]:
arkk_ap_plot = display_actual_predicted(arkk_actual_predicted)
spy_ap_plot = display_actual_predicted(spy_actual_predicted)
fngu_ap_plot = display_actual_predicted(fngu_actual_predicted) 
arkk_ap_plot + spy_ap_plot + fngu_ap_plot

### Training Data vs Actual vs Predicted

In [None]:
# arkk_train_plot,spy_train_plot,fngu_train_plot = [plot_df(i[:training_length]).rename(columns=f'{i.columns[0]} Training Data') for i in dataframes]

AttributeError: 'Curve' object has no attribute 'rename'

In [None]:
# arkk_train_plot

In [258]:
# pd.concat([arkk, arkk_actual_predicted], join='inner', axis=1).apply(lambda x: round(x,2))

In [248]:
pd.concat([spy[:training_length], spy_actual_predicted], join='inner', axis=1)

Unnamed: 0_level_0,SPY,SPY Actual Adjusted Close,SPY Forecasted Adjusted Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1


In [249]:
pd.concat([fngu[:training_length], fngu_actual_predicted], join='inner', axis=1)

Unnamed: 0_level_0,FNGU,FNGU Actual Adjusted Close,FNGU Forecasted Adjusted Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1


## Evaluation

In [None]:
rmse = np.sqrt(np.mean(predictions - y_test)**2)
rmse

## Write-Up/Conclusion