In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Bidirectional

# Set a random seed for reproducibility
np.random.seed(42)

# Load and preprocess data
data_msft = <provide your cleaned data here>

# Remove columns where the column name contains 'NVDA'
data = data_msft.loc[:, ~data_msft.columns.str.contains('NVDA')]

# Print the cleaned data
print(data.head())

data['date'] = pd.to_datetime(data['date'])

# Extract date-related features
data['day_of_week'] = data['date'].dt.dayofweek
data['month'] = data['date'].dt.month
data['quarter'] = data['date'].dt.quarter
data['year'] = data['date'].dt.year
data['date_unix'] = data['date'].astype('int64') // 10**9

# Scale the data
scaler = MinMaxScaler()
data_scaled = scaler.fit_transform(data[['MSFT_close']])

# Add the scaled values back to the data
data['MSFT_close_scaled'] = data_scaled

# Split data into train and test
split_date = np.quantile(data['date_unix'], 0.8)
train_data = data[data['date_unix'] <= split_date]
test_data = data[data['date_unix'] > split_date]

# Create sequences for LSTM
def create_sequences(data, seq_length):
    X, y = [], []
    for i in range(len(data) - seq_length):
        X.append(data[i:i+seq_length])
        y.append(data[i+seq_length])
    return np.array(X), np.array(y)

seq_length = 90  # Sequence length for LSTM

# Use the scaled 'MSFT_close_scaled' column for sequence creation
train_scaled = train_data[['MSFT_close_scaled']].values
test_scaled = test_data[['MSFT_close_scaled']].values

X_train, y_train = create_sequences(train_scaled, seq_length)
X_test, y_test = create_sequences(test_scaled, seq_length)

# Reshape for LSTM
X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))



In [None]:
# LSTM

# Define LSTM Model
model = Sequential([
    LSTM(50, return_sequences=True, input_shape=(seq_length, 1)),
    Dropout(0.2),
    LSTM(50, return_sequences=False),
    Dropout(0.2),
    Dense(25),
    Dense(1)
])

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# Train the model
model.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_test, y_test))

# Predictions on the test set
y_pred = model.predict(X_test)
y_pred = scaler.inverse_transform(y_pred)  # Inverse transform predictions
y_test = scaler.inverse_transform(y_test.reshape(-1, 1))  # Inverse transform actual values

# Compute RMSE
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
print(f'Test RMSE: {rmse}')

# Plot actual vs predicted values
plt.figure(figsize=(12, 6))
sns.lineplot(x=test_data['date'].iloc[seq_length:], y=y_test.flatten(), label='Actual', color='blue')
sns.lineplot(x=test_data['date'].iloc[seq_length:], y=y_pred.flatten(), label='Predicted', color='red')
plt.xlabel("Date")
plt.ylabel("MSFT Close Price")
plt.title("Test Set: Actual vs Predicted Prices")
plt.legend()
plt.xticks(rotation=45)
plt.show()

# Forecast the next 14 days
future_predictions = []
input_seq = X_test[-1].reshape(1, seq_length, 1)

for _ in range(14):
    pred = model.predict(input_seq)[0, 0]
    future_predictions.append(pred)
    new_seq = np.append(input_seq[:, 1:, :], [[[pred]]], axis=1)
    input_seq = new_seq

# Convert forecasted values back to original scale
future_predictions = scaler.inverse_transform(np.array(future_predictions).reshape(-1, 1)).flatten()

# Create future dates
date_range = pd.date_range(start=pd.to_datetime(test_data['date'].max()) + pd.Timedelta(days=1), periods=14, freq='B')

# Plot forecasted prices
plt.figure(figsize=(12, 6))
sns.lineplot(x=date_range, y=future_predictions, label='Forecasted Prices', color='green')
plt.xlabel("Date")
plt.ylabel("MSFT Close Price")
plt.title("Forecasted MSFT Close Prices for Next 14 Trading Days")
plt.legend()
plt.xticks(rotation=45)
plt.show()

In [None]:
# Bi-LSTM

# Define Bidirectional LSTM Model
model = Sequential([
    Bidirectional(LSTM(50, return_sequences=True), input_shape=(seq_length, 1)),
    Dropout(0.2),
    Bidirectional(LSTM(50, return_sequences=False)),
    Dropout(0.2),
    Dense(25),
    Dense(1)
])

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# Train the model
model.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_test, y_test))

# Predictions on the test set
y_pred = model.predict(X_test)
y_pred = scaler.inverse_transform(y_pred)  # Inverse transform predictions
y_test = scaler.inverse_transform(y_test.reshape(-1, 1))  # Inverse transform actual values

# Compute RMSE
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
print(f'Test RMSE: {rmse}')

# Plot actual vs predicted values
plt.figure(figsize=(12, 6))
sns.lineplot(x=test_data['date'].iloc[seq_length:], y=y_test.flatten(), label='Actual', color='blue')
sns.lineplot(x=test_data['date'].iloc[seq_length:], y=y_pred.flatten(), label='Predicted', color='red')
plt.xlabel("Date")
plt.ylabel("MSFT Close Price")
plt.title("Test Set: Actual vs Predicted Prices")
plt.legend()
plt.xticks(rotation=45)
plt.show()

# Forecast the next 14 days
future_predictions = []
input_seq = X_test[-1].reshape(1, seq_length, 1)

for _ in range(14):
    pred = model.predict(input_seq)[0, 0]
    future_predictions.append(pred)
    new_seq = np.append(input_seq[:, 1:, :], [[[pred]]], axis=1)
    input_seq = new_seq

# Convert forecasted values back to original scale
future_predictions = scaler.inverse_transform(np.array(future_predictions).reshape(-1, 1)).flatten()

# Create future dates
date_range = pd.date_range(start=pd.to_datetime(test_data['date'].max()) + pd.Timedelta(days=1), periods=14, freq='B')

# Plot forecasted prices
plt.figure(figsize=(12, 6))
sns.lineplot(x=date_range, y=future_predictions, label='Forecasted Prices', color='green')
plt.xlabel("Date")
plt.ylabel("MSFT Close Price")
plt.title("Forecasted MSFT Close Prices for Next 14 Trading Days")
plt.legend()
plt.xticks(rotation=45)
plt.show()
