In [1]:
# Import required libraries
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, VotingRegressor
from sklearn.metrics import r2_score, mean_squared_error
import numpy as np
import pandas as pd

# 1. Load the California housing dataset
data = fetch_california_housing(as_frame=True)
X = data.data
y = data.target

# 2. Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 3. Create individual regression models
lr = LinearRegression()
rf = RandomForestRegressor(random_state=42)
gbr = GradientBoostingRegressor(random_state=42)

# 4. Create a list of estimators (model name, model object)
estimators = [
    ('lr', lr),
    ('rf', rf),
    ('gbr', gbr)
]

# 5. Create a Voting Regressor
voting_reg = VotingRegressor(estimators=estimators)

# 6. Fit the model
voting_reg.fit(X_train, y_train)

# 7. Make predictions
y_pred = voting_reg.predict(X_test)

# 8. Evaluate performance
r2 = r2_score(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))

print(f"R² Score: {r2:.3f}")
print(f"RMSE: {rmse:.3f}")

# 9. Optional: Compare individual model performance using cross-validation
for name, model in estimators:
    scores = cross_val_score(model, X, y, cv=5, scoring='r2')
    print(f"{name} Mean R²: {np.round(np.mean(scores), 3)}")

# 10. Voting Regressor cross-validation
voting_scores = cross_val_score(voting_reg, X, y, cv=5, scoring='r2')
print(f"Voting Regressor Mean R²: {np.round(np.mean(voting_scores), 3)}")


R² Score: 0.764
RMSE: 0.556
lr Mean R²: 0.553
rf Mean R²: 0.656
gbr Mean R²: 0.67
Voting Regressor Mean R²: 0.676
