In [7]:
# !pip install yfinance tensorflow keras numpy matplotlib seaborn tqdm scikit-learn graphviz

In [1]:
from utils import *
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import LSTM, Dropout, Dense, Input
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from datetime import datetime
from datetime import timedelta
from tqdm import tqdm
import plotly.graph_objects as go

sns.set()

2024-05-06 13:02:57.210751: I external/local_tsl/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-05-06 13:02:57.213687: I external/local_tsl/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-05-06 13:02:57.251057: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


# Configure Modeling Parameters and Fetch Data

Enter a ticker and date range you would like to build the model on.  This model takes a a single ticker's data.  Also enter a training size for the proportion of the data you want to include in your training set vs. your test set.

In [80]:
# stock configs
ticker = ['TSLA']
start_date = '2015-04-01'
end_date = '2024-04-05'

# model configs
train_size = 0.8

n_future = 1   # Number of days we want to look into the future based on the past days.
n_past = 30  # Number of past days we want to use to predict the future.

In [81]:
# Data Fetching
data = fetch_stock_data(ticker, start_date, end_date)[ticker[0]]
data.reset_index(drop=False, inplace=True)
data['Date'] = pd.to_datetime(data['Date']).dt.tz_localize(None)

print(data.shape)
included_days = len(data)
data.head()

(2268, 8)


Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits
0,2015-04-01,12.58,12.82,12.403333,12.506,56919000,0.0,0.0
1,2015-04-02,12.682,12.882,12.666667,12.733333,75156000,0.0,0.0
2,2015-04-06,13.2,13.85,13.166667,13.54,186837000,0.0,0.0
3,2015-04-07,13.500667,13.670667,13.409333,13.55,65218500,0.0,0.0
4,2015-04-08,13.88,14.06,13.724667,13.844667,94546500,0.0,0.0


# Model Implementation

Below we implement the model, first we separate the data into a train-test split.  For this LSTM model, we will leverage continuous windows of time for training and test data.


In [82]:
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(data[['Open', 'High', 'Low', 'Close', 'Volume']])

# Create input sequences
lookback = 30
X, y = [], []
for i in range(lookback, len(scaled_data)):
    X.append(scaled_data[i-lookback:i])
    y.append(scaled_data[i, 3])  # Closing price is at index 3
X, y = np.array(X), np.array(y)
# Split the data into training and testing sets
train_size = int(len(X) * 0.8)
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]

y_train

array([0.01672194, 0.01751117, 0.01749618, ..., 0.51744789, 0.56808994,
       0.56571727])

In [83]:
class Model(tf.keras.Model):
    def __init__(self, learning_rate, num_layers, size_layer, output_size, dropout_rate):
        super(Model, self).__init__()
        self.input_layer = tf.keras.layers.InputLayer(shape=(None, X_train.shape[2]))
        self.lstm_layers = [tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(size_layer, return_sequences=True)) for _ in range(num_layers)]
        self.dropout = tf.keras.layers.Dropout(rate=dropout_rate)
        self.dense = tf.keras.layers.Dense(output_size)
        self.optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)

    def call(self, inputs, training=False):
        x = inputs
        for lstm in self.lstm_layers:
            x = lstm(x, training=training)
        x = self.dropout(x, training=training)
        return self.dense(x)

In [84]:
def calculate_accuracy(real, predict):
    real = np.array(real) + 1
    predict = np.array(predict) + 1
    percentage = 1 - np.sqrt(np.mean(np.square((real - predict) / real)))
    return percentage * 100

In [85]:
def forecast(model, X_train, y_train, X_test, y_test, epoch):
    model.compile(optimizer=model.optimizer, loss='mean_squared_error')
    
    pbar = tqdm(range(epoch), desc='Training')
    for _ in pbar:
        model.fit(X_train, y_train, epochs=1, batch_size=32, validation_data=(X_test, y_test), verbose=0)
        mse_train = model.evaluate(X_train, y_train, verbose=0)
        mse_test = model.evaluate(X_test, y_test, verbose=0)
        pbar.set_postfix(train_mse=mse_train, test_mse=mse_test)
    
    y_pred = model.predict(X_test)
    mse = np.mean(np.square(y_test - y_pred))
    mae = np.mean(np.abs(y_test - y_pred))
    
    print("Test MSE:", mse)
    print("Test MAE:", mae)
    
    return y_pred


In [69]:
# Instantiate the model
model = Model(learning_rate=0.001, num_layers=4, size_layer=64, output_size=1, dropout_rate=0.8)

# Compile the model
model.compile(optimizer=model.optimizer, loss='mean_squared_error')

# Train the model
history = model.fit(X_train, y_train, epochs=5, batch_size=32, validation_split=0.2)

Epoch 1/5
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 63ms/step - loss: 0.0153 - val_loss: 0.3478
Epoch 2/5
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 48ms/step - loss: 0.0128 - val_loss: 0.3475
Epoch 3/5
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 48ms/step - loss: 0.0113 - val_loss: 0.3492
Epoch 4/5
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 49ms/step - loss: 0.0112 - val_loss: 0.3479
Epoch 5/5
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 48ms/step - loss: 0.0112 - val_loss: 0.3412


In [72]:
# Train the model and make predictions
y_pred = forecast(model, X_train, y_train, X_test, y_test, epoch=5)

Training: 100%|██████████| 5/5 [00:24<00:00,  4.92s/it, test_mse=0.122, train_mse=0.0664]


[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step
Test MSE: 0.12245288316940997
Test MAE: 0.3313141804479647


In [90]:
# Specify the number of iterations
num_iterations = 5

# Create traces for actual and predicted stock prices
trace_actual = go.Scatter(
    x=data['Date'],
    y=data['Close'],
    mode='lines',
    name='Actual Price',
    line=dict(color='blue')
)

traces_predicted = []
for i in range(num_iterations):
    # Train the model
    model = Model(learning_rate=0.001, num_layers=4, size_layer=64, output_size=1, dropout_rate=0.8)
    y_pred = forecast(model, X_train, y_train, X_test, y_test, epoch=5)
    
    # Reshape the predicted values to match the original shape
    y_pred_reshaped = y_pred.reshape(-1, 1)
    
    # Create a new scaler for inverse transformation
    scaler_pred = MinMaxScaler()
    scaler_pred.min_, scaler_pred.scale_ = scaler.min_[3], scaler.scale_[3]
    
    # Inverse transform the predicted values
    y_pred_inverse = scaler_pred.inverse_transform(y_pred_reshaped).flatten()
    
    # Create a trace for each iteration
    trace_predicted = go.Scatter(
        x=data['Date'][-len(y_pred_inverse):],
        y=y_pred_inverse,
        mode='lines',
        name=f'Predicted Price (Iteration {i+1})',
        line=dict(color=f'rgba(255, 0, 0, {0.2 + i*0.2})', dash='dot')
    )
    traces_predicted.append(trace_predicted)


Training:   0%|          | 0/5 [00:00<?, ?it/s]Exception ignored in: <bound method IPythonKernel._clean_thread_parent_frames of <ipykernel.ipkernel.IPythonKernel object at 0x78cb671afe50>>
Traceback (most recent call last):
  File "/home/codespace/.local/lib/python3.10/site-packages/ipykernel/ipkernel.py", line 775, in _clean_thread_parent_frames
    def _clean_thread_parent_frames(
KeyboardInterrupt: 
Training:  60%|██████    | 3/5 [00:21<00:12,  6.16s/it, test_mse=0.155, train_mse=0.0705]

Training:  60%|██████    | 3/5 [00:23<00:15,  7.95s/it, test_mse=0.155, train_mse=0.0705]


KeyboardInterrupt: 

In [89]:
# Create the layout
layout = go.Layout(
    title='Stock Price Prediction (Multiple Iterations)',
    xaxis=dict(title='Date'),
    yaxis=dict(title='Price'),
    template='plotly_dark'
)

# Create the figure and display it
fig = go.Figure(data=[trace_actual] + traces_predicted, layout=layout)
fig.show()