In [1]:
import pandas as pd
import yfinance as yf
import numpy as np
import keras
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler
from keras.models import Model
from keras.models import Sequential
from keras.layers import Dense, Dropout, LSTM, Input, Activation,concatenate, Attention, Bidirectional,GlobalAveragePooling1D
from keras import optimizers
from keras.preprocessing.sequence import TimeseriesGenerator
from keras.layers import LeakyReLU
import matplotlib.pyplot as plt
import keras_tuner as kt
import math
from datetime import datetime, timedelta
from scipy.stats import norm




In [2]:
timestep = 100
testfrac = 0.2
valfrac = 0.25

In [3]:
start_date = '2010-01-01'
today = datetime.now()
end_date = datetime.strftime(today, '%Y-%m-%d')
display(end_date)
data = yf.download("AAPL", start=start_date, end=end_date) #Displays till yesterday as today is not complete
data = data.reset_index()

'2024-01-18'

[*********************100%%**********************]  1 of 1 completed


In [4]:
data = data[['Open', 'High', 'Low', 'Close', 'Volume']]
display(data)

Unnamed: 0,Open,High,Low,Close,Volume
0,7.622500,7.660714,7.585000,7.643214,493729600
1,7.664286,7.699643,7.616071,7.656429,601904800
2,7.656429,7.686786,7.526786,7.534643,552160000
3,7.562500,7.571429,7.466071,7.520714,477131200
4,7.510714,7.571429,7.466429,7.570714,447610800
...,...,...,...,...,...
3528,184.350006,186.399994,183.919998,186.190002,46792900
3529,186.539993,187.050003,183.619995,185.589996,49128400
3530,186.059998,186.740005,185.190002,185.919998,40444700
3531,182.160004,184.259995,180.929993,183.630005,65562600


In [5]:
Xscaler = MinMaxScaler()
Xdata = Xscaler.fit_transform(np.array(data))
Xdata.shape
Yscaler = MinMaxScaler()
Ydata = Yscaler.fit_transform(np.array([data['Close']]).transpose())

In [6]:
def build_timeseries(Xdata, Ydata, time_step):
    dim_0 = Xdata.shape[0] - time_step
    dim_1 = Xdata.shape[1]

    x = np.zeros((dim_0, time_step, dim_1))
    y = np.zeros((Ydata.shape[0] - time_step,))

    for i in range(dim_0):
        x[i] = Xdata[i:time_step+i]
        y[i] = Ydata[time_step+i]
    return x, y

In [7]:
Xdata, Ydata = build_timeseries(Xdata, Ydata, timestep)
display(Xdata.shape)
display(Ydata.shape)

  y[i] = Ydata[time_step+i]


(3532, 1, 5)

(3532,)

In [8]:
X_temp, X_test = train_test_split(Xdata, test_size=testfrac, shuffle=False)
Y_temp, Y_test = train_test_split(Ydata, test_size=testfrac, shuffle=False)
X_train, X_val = train_test_split(X_temp, test_size=valfrac, shuffle=False)
Y_train, Y_val = train_test_split(Y_temp, test_size=valfrac, shuffle=False)


In [9]:
display(X_train.shape)
display(X_val.shape)
display(X_test.shape)

(2118, 1, 5)

(707, 1, 5)

(707, 1, 5)

In [10]:
def model_builder(hp):
    lstm_model = Sequential()
    
    lstm_units_0 = hp.Int('units', min_value=50, max_value=100, step=5)
    lstm_units_1 = hp.Int('units', min_value=0, max_value=100, step=5)
    lstm_units_2 = hp.Int('units', min_value=0, max_value=100, step=5)
    dense_units_0 = hp.Int('units', min_value=0, max_value=100, step=5)
    dense_units_1 = hp.Int('units', min_value=0, max_value=100, step=5)
    dense_units_2 = hp.Int('units', min_value=0, max_value=100, step=5)
    lstm_model.add(Input(shape=(X_train.shape[1], X_train.shape[2])))
    lstm_model.add(LSTM(units=lstm_units_0, return_sequences=True, kernel_initializer='he_normal'))
    print("Layer 1")
    lstm_model.add(LSTM(units=lstm_units_1, return_sequences=True))
    print("Layer 2")
    lstm_model.add(LSTM(units=lstm_units_2, return_sequences=True))
    print("Layer 3")
    lstm_model.add(GlobalAveragePooling1D())
    print("Layer 4")
    lstm_model.add(Dense(units=dense_units_0,activation='relu'))
    print("Layer 5")
    lstm_model.add(Dense(units=dense_units_1,activation='relu'))
    print("Layer 6")
    lstm_model.add(Dropout(0.05))
    print("Layer 7")
    lstm_model.add(Dense(units=dense_units_2, activation='linear'))
    print("Layer 8")
    lstm_model.summary()
    
    hyperparameters_learning_rate = hp.Choice('learning_rate', values=[0.01, 0.05, 0.1])
    
    lstm_model.compile(loss='mean_absolute_percentage_error', optimizer=keras.optimizers.Adam(learning_rate=hyperparameters_learning_rate))
    
    return lstm_model

In [11]:
tuner = kt.BayesianOptimization(
    model_builder,
    objective='val_loss',
    max_trials=100)
tuner.search(X_train, Y_train, epochs=50, validation_data=(X_val,Y_val))
lstm_model  = tuner.get_best_models()[0]  
print(lstm_model.summary())
callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10)

Trial 5 Complete [00h 00m 07s]
val_loss: 70.91867065429688

Best val_loss So Far: 17.06915283203125
Total elapsed time: 00h 00m 34s
Layer 1
Layer 2
Layer 3
Layer 4
Layer 5
Layer 6
Layer 7
Layer 8
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 1, 95)             38380     
                                                                 
 lstm_1 (LSTM)               (None, 1, 95)             72580     
                                                                 
 lstm_2 (LSTM)               (None, 1, 95)             72580     
                                                                 
 global_average_pooling1d (  (None, 95)                0         
 GlobalAveragePooling1D)                                         
                                                                 
 dense (Dense)               (None, 95)                912

In [None]:
lstm_model.save("AAPL_LSTM_V1.keras")