In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from sklearn.preprocessing import MinMaxScaler
import xgboost as xgb
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error, r2_score

In [2]:
# Load dataset
file_path = "C:/Users/hp/OneDrive/Desktop/aapl_dataset.csv"  # Change to actual path
df = pd.read_csv(file_path, parse_dates=["Date"], index_col="Date")
df = df.rename({'close_aapl': 'Close'}, axis=1)
df = df[['Close']]


In [3]:
# Train-test split (before scaling to avoid leakage)
train_size = int(len(df) * 0.8)
train, test = df[:train_size], df[train_size:]


In [4]:
# Apply MinMaxScaler separately on train & test
scaler = MinMaxScaler(feature_range=(0, 1))
train_scaled = scaler.fit_transform(train)
test_scaled = scaler.transform(test)

In [5]:
# Convert to supervised learning format
def create_sequences(data, time_steps=10):
    X, y = [], []
    for i in range(len(data) - time_steps):
        X.append(data[i:i+time_steps])  # Input sequence
        y.append(data[i+time_steps])    # Target value
    return np.array(X), np.array(y)

In [6]:
time_steps = 10  # Define time steps
X_train, y_train = create_sequences(train_scaled, time_steps)
X_test, y_test = create_sequences(test_scaled, time_steps)

In [13]:
# Define LSTM Model
model = Sequential([
    LSTM(150, return_sequences=False, input_shape=(time_steps, 1)),
    Dense(50, activation='relu'),
    Dense(1)  # Output layer
])

model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), loss='mse')

# Train LSTM
model.fit(X_train, y_train, epochs=15, batch_size=16, verbose=1, validation_data=(X_test, y_test))

# Extract LSTM features (hidden states) for training XGBoost
train_features = model.predict(X_train)
test_features = model.predict(X_test)


  super().__init__(**kwargs)


Epoch 1/15
[1m557/557[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 21ms/step - loss: 0.0049 - val_loss: 3.1274
Epoch 2/15
[1m557/557[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 20ms/step - loss: 9.6707e-05 - val_loss: 2.4741
Epoch 3/15
[1m557/557[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 20ms/step - loss: 9.3541e-05 - val_loss: 2.0817
Epoch 4/15
[1m557/557[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 20ms/step - loss: 9.7693e-05 - val_loss: 1.3903
Epoch 5/15
[1m557/557[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 20ms/step - loss: 6.8689e-05 - val_loss: 1.1447
Epoch 6/15
[1m557/557[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 20ms/step - loss: 5.1533e-05 - val_loss: 1.0346
Epoch 7/15
[1m557/557[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 20ms/step - loss: 5.2016e-05 - val_loss: 0.7263
Epoch 8/15
[1m557/557[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 19ms/step - loss: 4.8196e-05 - val_loss: 0.4

In [15]:
# Train XGBoost on LSTM features
xgb_model = xgb.XGBRegressor(n_estimators=500, learning_rate=0.01, max_depth=5, objective='reg:squarederror')
xgb_model.fit(train_features, y_train)

# Predict using XGBoost
xgb_predictions = xgb_model.predict(test_features)

# Convert predictions back to original scale
y_test_original = scaler.inverse_transform(y_test.reshape(-1, 1))  # Rescale actual values
xgb_predictions_original = scaler.inverse_transform(xgb_predictions.reshape(-1, 1))  # Rescale predictions


In [17]:
mse = mean_squared_error(y_test_original, xgb_predictions_original)
rmse = np.sqrt(mse)
mape = mean_absolute_percentage_error(y_test_original, xgb_predictions_original) * 100
r2 = r2_score(y_test_original, xgb_predictions_original)

print(f"✅ Hybrid LSTM + XGBoost Model Performance:")
print(f"🔹 MSE: {mse:.4f}")
print(f"🔹 RMSE: {rmse:.4f}")
print(f"🔹 MAPE: {mape:.4f}%")
print(f"🔹 R²: {r2:.4f}")


✅ Hybrid LSTM + XGBoost Model Performance:
🔹 MSE: 10483.6808
🔹 RMSE: 102.3898
🔹 MAPE: 57.0732%
🔹 R²: -1.3974
