In [1]:
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor, GradientBoostingRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error
from math import sqrt

# Load the California housing dataset
data = fetch_california_housing()

# Split the data into features (X) and target (y)
X = data.data
y = data.target

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define a list of ensemble models
ensemble_models = [
    ("RandomForest", RandomForestRegressor()),
    ("AdaBoost", AdaBoostRegressor()),
    ("GradientBoosting", GradientBoostingRegressor())
]

# Define hyperparameter grids for each ensemble
param_grids = {
    "RandomForest": {
        "n_estimators": [100, 200],
        "max_depth": [None, 10],
        "min_samples_split": [2, 5]
    },
    "AdaBoost": {
        "n_estimators": [50, 100],
        "learning_rate": [0.01, 0.1]
    },
    "GradientBoosting": {
        "n_estimators": [50, 100],
        "learning_rate": [0.01, 0.1],
        "max_depth": [3, 4]
    }
}

results = {}

# Loop through ensemble models and hyperparameters
for model_name, model in ensemble_models:
    param_grid = param_grids[model_name]
    
    # Create GridSearchCV for the current ensemble model
    grid_search = GridSearchCV(model, param_grid, scoring='neg_mean_squared_error', cv=5)
    
    # Fit the model and find the best hyperparameters
    grid_search.fit(X_train, y_train)
    
    # Evaluate the model with the best hyperparameters
    y_pred = grid_search.predict(X_test)
    rmse = sqrt(mean_squared_error(y_test, y_pred))
    
    # Store results
    results[model_name] = {"best_params": grid_search.best_params_, "RMSE": rmse}

# Compare results to find the best ensemble method and hyperparameters
best_ensemble = min(results, key=lambda x: results[x]["RMSE"])
best_params = results[best_ensemble]["best_params"]
best_rmse = results[best_ensemble]["RMSE"]

print(f"Best Ensemble: {best_ensemble}")
print(f"Best Hyperparameters: {best_params}")
print(f"Best RMSE: {best_rmse}")

Best Ensemble: RandomForest
Best Hyperparameters: {'max_depth': None, 'min_samples_split': 2, 'n_estimators': 200}
Best RMSE: 0.5018215331774152
