In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor, StackingRegressor
from xgboost import XGBRegressor
from sklearn import metrics
import matplotlib.pyplot as plt

# Load the dataset
gold_data = pd.read_csv('gld_price_data_extended_v3.csv')

# Drop unnecessary columns (like 'Date' if present)
if 'Date' in gold_data.columns:
    gold_data.drop(columns=['Date'], inplace=True)

# Remove missing values
gold_data.dropna(inplace=True)

# Splitting data into features and target
X = gold_data.drop(columns=['GLD'])
y = gold_data['GLD']

# Train-test split
X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size=0.2, random_state=2)

# Define base models for stacking (ONLY 2 MODELS)
base_models = [
    ('rf', RandomForestRegressor(n_estimators=30, max_depth=5, random_state=2)),
    ('xgb', XGBRegressor(n_estimators=30, learning_rate=0.2, max_depth=3, random_state=2))
]

# Stacking Regressor
stacking_reg = StackingRegressor(estimators=base_models, final_estimator=XGBRegressor(n_estimators=20, learning_rate=0.3, random_state=2))

# Train the model
stacking_reg.fit(X_train, Y_train)

# Predict and evaluate
y_pred = stacking_reg.predict(X_test)

# Compute evaluation metrics
r2_score = metrics.r2_score(Y_test, y_pred)
mae = metrics.mean_absolute_error(Y_test, y_pred)
mse = metrics.mean_squared_error(Y_test, y_pred)
rmse = np.sqrt(mse)

# Print results
print(f"Reduced R² Score: {r2_score}")
print(f"Mean Absolute Error (MAE): {mae}")
print(f"Mean Squared Error (MSE): {mse}")
print(f"Root Mean Squared Error (RMSE): {rmse}")

# FEATURE IMPORTANCE ANALYSIS
feature_importance = stacking_reg.final_estimator_.feature_importances_
feature_names = X.columns

# Plot feature importance
# plt.figure(figsize=(10, 5))
# plt.barh(feature_names, feature_importance, color='skyblue')
# plt.xlabel("Feature Importance Score")
# plt.ylabel("Features")
# plt.title("Feature Importance in Final Model")
# plt.show()


Reduced R² Score: 0.7060989319151031
Mean Absolute Error (MAE): 6.407196159787213
Mean Squared Error (MSE): 161.28185916652524
Root Mean Squared Error (RMSE): 12.699679490700749
