In [3]:
# Imports
import pandas as pd
import numpy as np
import keras
import tensorflow as tf
from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator

import plotly.graph_objs as go

In [4]:
# Read the dataset
df = pd.read_csv('../data/GOOGL_historical_data (1).csv')
df.head()

Unnamed: 0,Date,Open,High,Low,Close,Volume
0,2004-08-20,2.51575,2.716741,2.503048,2.697563,456686856
1,2004-08-23,2.758334,2.826327,2.715994,2.724711,365122512
2,2004-08-24,2.770538,2.779504,2.579509,2.611887,304946748
3,2004-08-25,2.614129,2.689843,2.587231,2.640031,183772044
4,2004-08-26,2.613879,2.688597,2.606657,2.687601,141897960


In [5]:
# Generate interactive plots via plotly
trace1 = go.Scatter(
    x = df['Date'],
    y = df['Close'],
    mode = 'lines',
    name = 'Data'
)
layout = go.Layout(
    title = "Google Stock",
    xaxis = {'title' : "Date"},
    yaxis = {'title' : "Close"}
)
fig = go.Figure(data=[trace1], layout=layout)
fig.show()

In [6]:
# Splitting the data into training and testing sets
close_data = df['Close'].values
close_data = close_data.reshape((-1,1))

split_percent = 0.80
split = int(split_percent*len(close_data))


close_train = close_data[:split]
close_test = close_data[split:]

date_train = df['Date'][:split]
date_test = df['Date'][split:]

print(len(close_train))
print(len(close_test))

4100
1025


In [7]:
# Defining lookback of 15d to use Keras' TimeSeriesGenerator to get the desired format
look_back = 15

train_generator = TimeseriesGenerator(close_train, close_train, length=look_back, batch_size=20)
test_generator = TimeseriesGenerator(close_test, close_test, length=look_back, batch_size=1)

In [12]:
from keras.models import Sequential
from keras.layers import LSTM, Dense

# Initializing the sequential model. Stacking the layers in a linear order.
model = Sequential()
# LSTM with one layer: 10 neurons, ReLU activation function, and definition of the input (number of time steps, number of features)
model.add(
    LSTM(10,
        activation='relu',
        input_shape=(look_back,1))
)
# Adding a fully connected layer to the model and converting all calculations into a single neuron to predict a single continuous value
model.add(Dense(1))
# Configuring the optimizer and loss function
model.compile(optimizer='adam', loss='mse')

# Training the model, num_epochs should be set to a reasonable value, preferably not too high to avoid overfitting and not to low to ensure convergence
num_epochs = 20
model.fit(train_generator, epochs=num_epochs, verbose=1)

Epoch 1/20
[1m205/205[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 108ms/step - loss: 3820.2959
Epoch 2/20
[1m205/205[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 108ms/step - loss: 449.5230
Epoch 3/20
[1m205/205[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 104ms/step - loss: 1680.7629
Epoch 4/20
[1m205/205[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 104ms/step - loss: 1553.4940
Epoch 5/20
[1m205/205[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 104ms/step - loss: 1298.0765
Epoch 6/20
[1m205/205[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 104ms/step - loss: 1318.6025
Epoch 7/20
[1m205/205[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 107ms/step - loss: 1215.3763
Epoch 8/20
[1m205/205[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 106ms/step - loss: 988.7068
Epoch 9/20
[1m205/205[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 106ms/step - loss: 856.9197
Epoch 10/20
[1m205/205[0m [32m━━━━━━━

<keras.src.callbacks.history.History at 0x376764ed0>

In [13]:
# Making a prediction
prediction = model.predict(test_generator)

# Reshape the data to a one dimensional array to make it easier to visualize
close_train = close_train.reshape((-1))
close_test = close_test.reshape((-1))
prediction = prediction.reshape((-1))

[1m1010/1010[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 15ms/step


In [14]:
# The training data, the predictions, and the ground truth are visualized
trace1 = go.Scatter(
    x = date_train,
    y = close_train,
    mode = 'lines',
    name = 'Data'
)
trace2 = go.Scatter(
    x = date_test,
    y = prediction,
    mode = 'lines',
    name = 'Prediction'
)
trace3 = go.Scatter(
    x = date_test,
    y = close_test,
    mode='lines',
    name = 'Ground Truth'
)
layout = go.Layout(
    title = "Google Stock",
    xaxis = {'title' : "Date"},
    yaxis = {'title' : "Close"}
)
fig = go.Figure(data=[trace1, trace2, trace3], layout=layout)
fig.show()