In [5]:
import pandas as pd
from sklearn.datasets import fetch_california_housing
from sklearn.ensemble import RandomForestRegressor, StackingRegressor
from sklearn.linear_model import LinearRegression
from xgboost import XGBRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error

data = fetch_california_housing()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = data.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

rf = RandomForestRegressor(n_estimators=100, random_state=42)
xgb = XGBRegressor()
meta_model = LinearRegression()

stack = StackingRegressor(estimators=[('rf', rf), ('xgb', xgb)], final_estimator=meta_model)
rf.fit(X_train_scaled, y_train)
xgb.fit(X_train_scaled, y_train)
stack.fit(X_train_scaled, y_train)

rf_pred = rf.predict(X_test_scaled)
xgb_pred = xgb.predict(X_test_scaled)
stack_pred = stack.predict(X_test_scaled)

rf_mse = mean_squared_error(y_test, rf_pred)
xgb_mse = mean_squared_error(y_test, xgb_pred)
stack_mse = mean_squared_error(y_test, stack_pred)

results = pd.DataFrame({
    'Model': ['Random Forest', 'XGBoost', 'Stacked'],
    'MSE': [rf_mse, xgb_mse, stack_mse]
})

print(results)


           Model       MSE
0  Random Forest  0.255170
1        XGBoost  0.222590
2        Stacked  0.217038
