In [1]:
import pandas as pd
import numpy as np
from pathlib import Path
from datetime import datetime

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error

from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

from functions import *

## Load training and testing dataframe + data cleanup

In [2]:
# load csv into dataframe
df = pd.read_csv(
    Path('../data/stocks_history.csv'),
    index_col='Unnamed: 0',
    infer_datetime_format=True,
    parse_dates=True
)
df.head()

Unnamed: 0,MSFT_open,MSFT_high,MSFT_low,MSFT_close,AMD_open,AMD_high,AMD_low,AMD_close,TSLA_open,TSLA_high,...,JNJ_low,JNJ_close,REGN_open,REGN_high,REGN_low,REGN_close,GILD_open,GILD_high,GILD_low,GILD_close
2010-06-29,24.13,24.2,23.11,23.31,7.93,7.93,7.41,7.48,19.0,25.0,...,58.68,59.24,23.67,23.95,22.86,22.98,35.4,35.61,34.74,34.97
2010-06-30,23.3,23.68,22.95,23.01,7.58,7.65,7.3,7.32,25.79,30.4192,...,58.94,59.06,23.05,23.47,22.32,22.32,34.83,35.13,34.26,34.28
2010-07-01,23.09,23.32,22.73,23.16,7.35,7.53,7.1,7.39,25.0,25.92,...,58.65,59.07,22.31,22.37,20.45,20.79,34.24,34.27,33.3,34.14
2010-07-02,23.36,23.48,23.05,23.27,7.45,7.48,7.02,7.17,23.0,23.1,...,58.85,59.08,21.06,21.88,20.75,21.61,34.38,35.16,34.18,34.87
2010-07-06,23.7,24.09,23.584,23.82,7.4,7.42,6.96,7.04,20.0,20.0,...,58.669,59.08,22.03,22.03,21.16,21.36,35.11,35.42,34.415,34.77


In [3]:
# drop all columns except closing prices
dropped_columns = [
    'MSFT_open',
    'MSFT_high',
    'MSFT_low',
    'AMD_open',
    'AMD_high',
    'AMD_low',
    'TSLA_open',
    'TSLA_high',
    'TSLA_low',
    'JNJ_open',
    'JNJ_high',
    'JNJ_low',
    'REGN_open',
    'REGN_high',
    'REGN_low',
    'GILD_open',
    'GILD_high',
    'GILD_low'
]
df.drop(columns=dropped_columns, inplace=True)
df.head()

Unnamed: 0,MSFT_close,AMD_close,TSLA_close,JNJ_close,REGN_close,GILD_close
2010-06-29,23.31,7.48,23.89,59.24,22.98,34.97
2010-06-30,23.01,7.32,23.83,59.06,22.32,34.28
2010-07-01,23.16,7.39,21.96,59.07,20.79,34.14
2010-07-02,23.27,7.17,19.2,59.08,21.61,34.87
2010-07-06,23.82,7.04,16.11,59.08,21.36,34.77


## Train models to find optimum number of epochs - 1 LSTM layer

In [4]:
candidates_1lstm_path = Path('./df_best_performers_1lstm.csv')
df_candidates_1lstm = pd.read_csv(candidates_1lstm_path)
df_candidates_1lstm.drop(columns='Unnamed: 0', inplace=True)

In [5]:
df_candidates_1lstm

Unnamed: 0,stock,window size,dropout fraction,epochs,batch size,mse,rmse
0,AMD_close,15,0.15,10,150,3.392164,1.841783
1,GILD_close,1,0.15,10,10,2.603896,1.613659
2,JNJ_close,50,0.3,10,10,6.606514,2.570314
3,MSFT_close,25,0.15,10,10,7.780636,2.789379
4,REGN_close,50,0.1,10,10,169.053021,13.002039
5,TSLA_close,25,0.15,10,10,863.553889,29.386287


In [6]:
for index, row in df_candidates_1lstm.iterrows():
    
    feature_column = df.columns.get_loc(row['stock'])
    target_column = df.columns.get_loc(row['stock'])
    window_size = row['window size']
    dropout_fraction = row['dropout fraction']
    batch_size = row['batch size']
    epochs = 150

    model_path = Path('./models/'+row['stock']+'_1lstm_model.h5')
    
    check_point = ModelCheckpoint(str(model_path), monitor="loss", verbose=0,\
                                  save_best_only=True, mode="min")
    early_stop = EarlyStopping(monitor="loss", mode="min", patience=15)
    
    one_lstm(
        df=df,
        feature_column=feature_column,
        target_column=target_column,
        window_size=window_size,
        batch_size=batch_size,
        dropout_fraction=dropout_fraction,
        epoch=epochs,
        name=model_path,
        check_point=check_point,
        early_stop=early_stop
    )

## Train models to find optimum number of epochs - 2 LSTM layers

In [None]:
candidates_2lstm_path = Path('./df_best_performers_2lstm.csv')
df_candidates_2lstm = pd.read_csv(candidates_1lstm_path)
df_candidates_2lstm.drop(columns='Unnamed: 0', inplace=True)

In [None]:
df_candidates_2lstm

In [None]:
for index, row in df_candidates_2lstm.iterrows():
    
    feature_column = df.columns.get_loc(row['stock'])
    target_column = df.columns.get_loc(row['stock'])
    window_size = row['window size']
    dropout_fraction = row['dropout fraction']
    batch_size = row['batch size']
    epochs = 150

    model_path = Path('./models/'+row['stock']+'_2lstm_model.h5')
    
    check_point = ModelCheckpoint(str(model_path), monitor="loss", verbose=0,\
                                  save_best_only=True, mode="min")
    early_stop = EarlyStopping(monitor="loss", mode="min", patience=15)
    
    two_lstm(
        df=df,
        feature_column=feature_column,
        target_column=target_column,
        window_size=window_size,
        batch_size=batch_size,
        dropout_fraction=dropout_fraction,
        epoch=epochs,
        name=name,
        check_point=check_point,
        early_stop=early_stop
    )