# Multivarient LSTM

In [None]:
import numpy as np
import pandas as pd
import yfinance as yf
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import LearningRateScheduler
from sklearn.metrics import mean_absolute_error, mean_squared_error
from datetime import datetime, timedelta

In [None]:
# Learning rate scheduler
def scheduler(epoch, lr):
    if epoch < 10:
        return lr
    else:
        return lr * np.exp(-0.1)

# Manual RSI calculation
def compute_rsi(data, periods=14):
    delta = data.diff()
    gain = (delta.where(delta > 0, 0)).rolling(window=periods).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=periods).mean()
    rs = gain / loss
    rsi = 100 - (100 / (1 + rs))
    return rsi

# Manual SMA calculation
def compute_sma(data, periods):
    return data.rolling(window=periods).mean()

In [None]:
# Fetch GOOGL and S&P 500 data up to yesterday
end_date = '2025-04-14'
start_date = '2022-01-01'
googl_data = yf.download('GOOGL', start=start_date, end=end_date)
sp500_data = yf.download('^GSPC', start=start_date, end=end_date)

# Combine relevant features
data = googl_data[['Open', 'High', 'Low', 'Close', 'Volume']].copy()
data['SP500_Close'] = sp500_data['Close']

# Add manual technical indicators
data['RSI'] = compute_rsi(data['Close'], periods=14)
data['SMA_20'] = compute_sma(data['Close'], periods=20)
data['SMA_50'] = compute_sma(data['Close'], periods=50)

# Handle missing values
data = data.dropna()

# Normalize the data
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(data)

# Prepare sequences for LSTM (120-day lookback)
def create_sequences(data, seq_length):
    X, y = [], []
    for i in range(len(data) - seq_length):
        X.append(data[i:i + seq_length])
        y.append(data[i + seq_length, 3])  # Predict Close price (index 3)
    return np.array(X), np.array(y)

seq_length = 120
X, y = create_sequences(scaled_data, seq_length)

# Split into train and test sets (80% train, 20% test)
train_size = int(len(X) * 0.8)
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]

[*********************100%***********************]  1 of 1 completed

YF.download() has changed argument auto_adjust default to True



[*********************100%***********************]  1 of 1 completed


In [None]:
# Build LSTM model
model = Sequential([
    LSTM(100, return_sequences=True, input_shape=(seq_length, X.shape[2])),
    BatchNormalization(),
    Dropout(0.3),
    LSTM(100, return_sequences=True),
    BatchNormalization(),
    Dropout(0.3),
    LSTM(50),
    BatchNormalization(),
    Dropout(0.3),
    Dense(50, activation='relu'),
    Dense(1)
])

  super().__init__(**kwargs)


In [None]:
# Compile model
optimizer = Adam(learning_rate=0.001)
model.compile(optimizer=optimizer, loss='mse')

# Train the model
callbacks = [LearningRateScheduler(scheduler)]
model.fit(X_train, y_train, epochs=40, batch_size=32, validation_split=0.1, callbacks=callbacks, verbose=1)

Epoch 1/40
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 63ms/step - loss: 0.5305 - val_loss: 0.4196 - learning_rate: 0.0010
Epoch 2/40
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step - loss: 0.2140 - val_loss: 0.3225 - learning_rate: 0.0010
Epoch 3/40
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step - loss: 0.1715 - val_loss: 0.3528 - learning_rate: 0.0010
Epoch 4/40
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step - loss: 0.1140 - val_loss: 0.2648 - learning_rate: 0.0010
Epoch 5/40
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step - loss: 0.0976 - val_loss: 0.1927 - learning_rate: 0.0010
Epoch 6/40
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step - loss: 0.0977 - val_loss: 0.2344 - learning_rate: 0.0010
Epoch 7/40
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 28ms/step - loss: 0.0712 - val_loss: 0.1556 - learning_rate: 0.0010

<keras.src.callbacks.history.History at 0x79be7e292210>

In [None]:
# Evaluate model on test set
y_pred = model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
print(f"Test MAE: {mae:.4f}, Test RMSE: {rmse:.4f}")

[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 78ms/step
Test MAE: 0.0968, Test RMSE: 0.1250


In [None]:
# Prepare input for predicting today's price (April 15, 2025)
last_sequence = scaled_data[-seq_length:]  # Last 120 days up to April 14
last_sequence = last_sequence.reshape(1, seq_length, last_sequence.shape[1])

# Predict today's Close price
predicted_scaled = model.predict(last_sequence)

# Inverse transform to get actual price
dummy = np.zeros((1, data.shape[1]))
dummy[0, 3] = predicted_scaled[0, 0]
predicted_price = scaler.inverse_transform(dummy)[0, 3]

print(f"Predicted GOOGL Close price for April 15, 2025: ${predicted_price:.2f}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
Predicted GOOGL Close price for April 15, 2025: $157.56
