In [1]:
import numpy as np
import pandas as pd
import yfinance as yf
import datetime as dt
import matplotlib.pyplot as plt
import tensorflow as tf

In [62]:
start_date = dt.datetime(2015,11,19)
end_date = dt.datetime(2024,1,14)

#loading from yahoo finance
data = yf.download("GOOGL",start_date, end_date)

[*********************100%%**********************]  1 of 1 completed


In [63]:
df = pd.DataFrame(data)
df = df.reset_index(drop=True) # Drops date which is the index column
df.head()

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume
0,37.988998,38.161499,37.8615,37.997002,37.997002,28078000
1,38.465,38.966,38.205002,38.849998,38.849998,55478000
2,38.873501,39.145,38.675499,38.834999,38.834999,34648000
3,38.647999,38.7995,37.905499,38.481499,38.481499,47880000
4,38.584499,38.650002,38.3265,38.463001,38.463001,23612000


In [64]:
close = df['Close'].values
raw_data = df.drop(['Adj Close'], axis=1).values

close.shape, raw_data.shape

((2050,), (2050, 5))

In [65]:
norm_layer = tf.keras.layers.Normalization()
norm_layer.adapt(raw_data)

In [66]:
norm_data = norm_layer(raw_data)
norm_data.shape

TensorShape([2050, 5])

In [96]:
norm_data[0]

<tf.Tensor: shape=(5,), dtype=float32, numpy=
array([-1.1733222, -1.1785861, -1.1667808, -1.1737608, -0.4331457],
      dtype=float32)>

In [67]:
p20 = round(len(raw_data) * 0.2)
p20

410

# Creating datasets

In [181]:
def create_ds(data, target, seq_len, bs):
  ds = tf.keras.utils.timeseries_dataset_from_array(
      data=data,
      targets=target[seq_len:],
      sequence_length=seq_len,
      batch_size=bs
  )

  return ds

Sequence length of 56: Model predicts price of next day based off last 56 days

In [182]:
train_ds = create_ds(norm_data[:-p20], close[:-p20], 56, 128)
val_ds = create_ds(norm_data[-p20:-p20//2], close[-p20:-p20//2], 56, 128)
test_ds = create_ds(norm_data[-p20//2:], close[-p20//2:], 56, 128)

In [183]:
for item, label in test_ds.take(1):
  print(item.shape)
  print(label.shape)

(128, 56, 5)
(128,)


In [176]:
testing = create_ds(raw_data[:100], close[:100], 5, 1)
for item, label in testing.take(2):
  print(f"{item} with shape {item.shape} maps to {label}")
  print("*"*150)

[[[3.79889984e+01 3.81614990e+01 3.78614998e+01 3.79970016e+01
   2.80780000e+07]
  [3.84650002e+01 3.89659996e+01 3.82050018e+01 3.88499985e+01
   5.54780000e+07]
  [3.88735008e+01 3.91450005e+01 3.86754990e+01 3.88349991e+01
   3.46480000e+07]
  [3.86479988e+01 3.87994995e+01 3.79054985e+01 3.84814987e+01
   4.78800000e+07]
  [3.85844994e+01 3.86500015e+01 3.83264999e+01 3.84630013e+01
   2.36120000e+07]]] with shape (1, 5, 5) maps to [38.5984993]
******************************************************************************************************************************************************
[[[3.84650002e+01 3.89659996e+01 3.82050018e+01 3.88499985e+01
   5.54780000e+07]
  [3.88735008e+01 3.91450005e+01 3.86754990e+01 3.88349991e+01
   3.46480000e+07]
  [3.86479988e+01 3.87994995e+01 3.79054985e+01 3.84814987e+01
   4.78800000e+07]
  [3.85844994e+01 3.86500015e+01 3.83264999e+01 3.84630013e+01
   2.36120000e+07]
  [3.84580002e+01 3.87000008e+01 3.83499985e+01 3.85984993e+01
   1

# Creating and training model

In [210]:
tf.keras.backend.clear_session()

In [146]:
model = tf.keras.Sequential([
    tf.keras.layers.SimpleRNN(units=32, input_shape=(None, 5)),
    tf.keras.layers.Dense(1)
])

In [None]:
optimizer = tf.keras.optimizers.RMSprop(learning_rate=0.01)
model.compile(optimizer=optimizer, loss="mse", metrics=["mae"])

In [149]:
model_checkpoint = tf.keras.callbacks.ModelCheckpoint("google_stock_model1.keras", save_best_only=True)

In [152]:
model.fit(train_ds, epochs=10, validation_data=val_ds, callbacks=[model_checkpoint])

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x78b6d5e59b10>

In [154]:
model = tf.keras.models.load_model("google_stock_model1.keras")

In [155]:
model.evaluate(test_ds)



[11.614954948425293, 2.8600399494171143]

# Creating a second model

In [211]:
model2 = tf.keras.Sequential([
    tf.keras.layers.LSTM(units=32, input_shape=(None, 5)),
    tf.keras.layers.Dense(1)
])

In [212]:
optimizer = tf.keras.optimizers.RMSprop(learning_rate=0.01)
model2.compile(optimizer=optimizer, loss="mse", metrics=["mae"])

In [213]:
model_checkpoint = tf.keras.callbacks.ModelCheckpoint("google_stock_model2.keras", save_best_only=True)

In [223]:
model2.fit(train_ds, epochs=5, validation_data=val_ds, callbacks=[model_checkpoint])

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.src.callbacks.History at 0x78b6d5131c60>

# Third model

In [225]:
model3 = tf.keras.Sequential([
    tf.keras.layers.Flatten(input_shape=(56, 5)),
    tf.keras.layers.Dense(units=32, activation="relu"),
    tf.keras.layers.Dense(units=16, activation="relu"),
    tf.keras.layers.Dense(1)
])

In [235]:
optimizer = tf.keras.optimizers.RMSprop(learning_rate=0.003)
model3.compile(optimizer=optimizer, loss="mse", metrics=["mae"])

In [227]:
model_checkpoint = tf.keras.callbacks.ModelCheckpoint("google_stock_model3.keras", save_best_only=True)

In [None]:
model3.fit(train_ds, epochs=30, validation_data=val_ds, callbacks=[model_checkpoint])

# Making predictions

In [200]:
def make_pred(model):
  for item, label in test_ds.take(1):
    input = tf.reshape(item[0], (1, item[0].shape[0], item[0].shape[1])) # reshape to add batchsize = 1, otherwise error from model which expects input_dim=3
    y_pred = model.predict(input, verbose=0)
    print(f"Prediction: {y_pred[0][0]}\nActual: {label[0]}")

In [202]:
make_pred(model)

Prediction: 119.01102447509766
Actual: 123.63999938964844


In [237]:
model2 = tf.keras.models.load_model("google_stock_model2.keras")
make_pred(model2)



Prediction: 105.74337005615234
Actual: 123.63999938964844


In [238]:
model3 = tf.keras.models.load_model("google_stock_model3.keras")
make_pred(model3)



Prediction: 120.695556640625
Actual: 123.63999938964844
