In [2]:
# Import necessary libraries
import numpy as np
import pandas as pd
import yfinance as yf
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
import os
import joblib

# Fetch stock data
stock = "GOOG"
end_date = pd.Timestamp.now()
start_date = end_date - pd.DateOffset(years=10)
data = yf.download(stock, start=start_date, end=end_date)

# Prepare data for training and testing
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(data[["Close"]])
window_size = 100

# Split data into training and testing sets
train_size = int(len(scaled_data) * 0.7)
train_data = scaled_data[:train_size]
test_data = scaled_data[train_size:]

# Prepare LSTM training data
x_train, y_train = [], []
for i in range(window_size, len(train_data)):
    x_train.append(train_data[i - window_size:i])
    y_train.append(train_data[i])

x_train, y_train = np.array(x_train), np.array(y_train)

# Prepare Linear Regression training data
x_train_lr = np.arange(0, len(train_data)).reshape(-1, 1)  # Time as feature
y_train_lr = train_data.reshape(-1, 1)

# Save scaler for future use
if not os.path.exists("saved_models"):
    os.makedirs("saved_models")
np.save("saved_models/scaler_minmax.npy", scaler.min_)
np.save("saved_models/scaler_scale.npy", scaler.scale_)

# Train the Linear Regression model
linear_model = LinearRegression()
linear_model.fit(x_train_lr, y_train_lr)

# Save the regression model
joblib.dump(linear_model, "saved_models/linear_regression_model.pkl")

# Build and train the LSTM model
lstm_model = Sequential()
lstm_model.add(LSTM(units=50, return_sequences=True, input_shape=(x_train.shape[1], 1)))
lstm_model.add(Dropout(0.2))
lstm_model.add(LSTM(units=50, return_sequences=True))
lstm_model.add(Dropout(0.2))
lstm_model.add(LSTM(units=50))
lstm_model.add(Dropout(0.2))
lstm_model.add(Dense(units=1))

lstm_model.compile(optimizer="adam", loss="mean_squared_error")
lstm_model.fit(x_train, y_train, epochs=20, batch_size=32)

# Save the LSTM model
lstm_model.save("saved_models/pretrained_stock_model.h5")
print("Models saved successfully.")

# Evaluate both models on test data
# LSTM evaluation
x_test, y_test = [], []
for i in range(window_size, len(test_data)):
    x_test.append(test_data[i - window_size:i])
    y_test.append(test_data[i])

x_test, y_test = np.array(x_test), np.array(y_test)
lstm_predictions = lstm_model.predict(x_test)
inv_lstm_predictions = scaler.inverse_transform(lstm_predictions)
inv_y_test = scaler.inverse_transform(y_test)

# Linear Regression evaluation
x_test_lr = np.arange(train_size, len(scaled_data)).reshape(-1, 1)
lr_predictions = linear_model.predict(x_test_lr)
inv_lr_predictions = scaler.inverse_transform(lr_predictions)

# Calculate errors
lstm_rmse = np.sqrt(mean_squared_error(inv_y_test, inv_lstm_predictions))
lr_rmse = np.sqrt(mean_squared_error(data["Close"][train_size:].values, inv_lr_predictions))

print(f"LSTM RMSE: {lstm_rmse}")
print(f"Linear Regression RMSE: {lr_rmse}")



[*********************100%%**********************]  1 of 1 completed


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Models saved successfully.


  saving_api.save_model(


LSTM RMSE: 6.318129481718311
Linear Regression RMSE: 23.006723524030292
