In [15]:
import tensorflow as tf
import keras 
import numpy as np
import yfinance as yf
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

In [16]:
sp500 = yf.Ticker("^GSPC")
sp500 = sp500.history(period="max")
sp500.index= pd.to_datetime(sp500.index)
sp500

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1927-12-30 00:00:00-05:00,17.660000,17.660000,17.660000,17.660000,0,0.0,0.0
1928-01-03 00:00:00-05:00,17.760000,17.760000,17.760000,17.760000,0,0.0,0.0
1928-01-04 00:00:00-05:00,17.719999,17.719999,17.719999,17.719999,0,0.0,0.0
1928-01-05 00:00:00-05:00,17.549999,17.549999,17.549999,17.549999,0,0.0,0.0
1928-01-06 00:00:00-05:00,17.660000,17.660000,17.660000,17.660000,0,0.0,0.0
...,...,...,...,...,...,...,...
2025-01-13 00:00:00-05:00,5782.020020,5838.609863,5773.310059,5836.220215,4421200000,0.0,0.0
2025-01-14 00:00:00-05:00,5859.270020,5871.919922,5805.419922,5842.910156,4142280000,0.0,0.0
2025-01-15 00:00:00-05:00,5905.209961,5960.609863,5905.209961,5949.910156,4544570000,0.0,0.0
2025-01-16 00:00:00-05:00,5963.609863,5964.689941,5930.720215,5937.339844,4285810000,0.0,0.0


In [None]:
sp500 = sp500.drop(columns=["Dividends", "Stock Splits","Volume"])
sp500["Tomorrow"] = sp500["Close"].shift(-1)
sp500 = sp500.loc["1990-01-01":].copy()

sp500

Unnamed: 0_level_0,Open,High,Low,Close,Tomorrow
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1970-01-02 00:00:00-05:00,0.000000,93.540001,91.790001,93.000000,93.459999
1970-01-05 00:00:00-05:00,0.000000,94.250000,92.529999,93.459999,92.820000
1970-01-06 00:00:00-05:00,0.000000,93.809998,92.129997,92.820000,92.629997
1970-01-07 00:00:00-05:00,0.000000,93.379997,91.930000,92.629997,92.680000
1970-01-08 00:00:00-05:00,0.000000,93.470001,91.989998,92.680000,92.400002
...,...,...,...,...,...
2025-01-13 00:00:00-05:00,5782.020020,5838.609863,5773.310059,5836.220215,5842.910156
2025-01-14 00:00:00-05:00,5859.270020,5871.919922,5805.419922,5842.910156,5949.910156
2025-01-15 00:00:00-05:00,5905.209961,5960.609863,5905.209961,5949.910156,5937.339844
2025-01-16 00:00:00-05:00,5963.609863,5964.689941,5930.720215,5937.339844,5996.660156


In [18]:
print("Start date:", sp500.index.min())
print("End date:", sp500.index.max())

Start date: 1970-01-02 00:00:00-05:00
End date: 2025-01-17 00:00:00-05:00


In [19]:
train_size = 0.7
val_size = 0.2
test_size = 0.1

n = len(sp500)
train_end = int(n * train_size)
val_end = int(n * (train_size + val_size))

train_data = sp500.iloc[:train_end]
val_data = sp500.iloc[train_end:val_end]
test_data = sp500.iloc[val_end:]

In [20]:
seq_length = 120

features = train_data[['Open', 'High', 'Low', 'Close']].values
targets = train_data['Tomorrow'].values[seq_length:]

train_ds = keras.utils.timeseries_dataset_from_array(
    data=features,
    targets=targets,
    sequence_length=seq_length,
    batch_size=32,
    shuffle=True,
    seed=42
)

In [21]:
val_features = val_data[['Open', 'High', 'Low', 'Close']].values
val_targets = val_data['Tomorrow'].values[seq_length:]

val_ds = keras.utils.timeseries_dataset_from_array(
    data=val_features,
    targets=val_targets,
    sequence_length=seq_length,
    batch_size=32,
    shuffle=False
)

In [22]:
def train_test(model, train_set, validate_set, learning_rate, epochs=20):
    early_stopping_cb = keras.callbacks.EarlyStopping(
        monitor='val_mae',
        patience=50,
        restore_best_weights=True
    )
    
    optimizer = keras.optimizers.SGD(learning_rate=learning_rate, momentum=0.9)
    model.compile(
        loss=keras.losses.Huber(),
        optimizer=optimizer,
        metrics=['mae']
    )
    
    history = model.fit(
        train_set,
        validation_data=validate_set,
        epochs=epochs,
        callbacks=[early_stopping_cb]
    )
    
    return history
    
    

In [24]:
tf.random.set_seed(42)

rnn_model = tf.keras.Sequential([
    tf.keras.layers.LSTM(32, return_sequences=True, input_shape=[seq_length, 4]),  
    tf.keras.layers.LSTM(32, return_sequences=True),
    tf.keras.layers.LSTM(32),
    tf.keras.layers.Dense(1)
])

  super().__init__(**kwargs)


In [25]:
history = train_test(rnn_model, train_ds, val_ds, learning_rate=0.1)

Epoch 1/20


[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 30ms/step - loss: 394.0689 - mae: 394.5687 - val_loss: 1485.1256 - val_mae: 1485.6256
Epoch 2/20
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 29ms/step - loss: 375.6032 - mae: 376.1029 - val_loss: 1489.2180 - val_mae: 1489.7180
Epoch 3/20
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 90ms/step - loss: 375.8224 - mae: 376.3222 - val_loss: 1479.2598 - val_mae: 1479.7598
Epoch 4/20
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 28ms/step - loss: 374.9543 - mae: 375.4540 - val_loss: 1482.4550 - val_mae: 1482.9550
Epoch 5/20
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 27ms/step - loss: 374.2682 - mae: 374.7679 - val_loss: 1479.5884 - val_mae: 1480.0884
Epoch 6/20
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 30ms/step - loss: 375.4199 - mae: 375.9197 - val_loss: 1472.6805 - val_mae: 1473.1805
Epoch 7/20
[1m284/300[0m [32

KeyboardInterrupt: 