# Random Forest Regression
Using scikit-learn to predict California housing prices.

We load the `fetch_california_housing` dataset, split it into training and test sets, and train a `RandomForestRegressor`. We evaluate using MAE and RMSE.

In [None]:
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error
import pandas as pd

In [None]:
data = fetch_california_housing(as_frame=True)
X_train, X_test, y_train, y_test = train_test_split(data.data, data.target, test_size=0.2, random_state=42)
model = RandomForestRegressor(random_state=42)
model.fit(X_train, y_train)
preds = model.predict(X_test)
mae = mean_absolute_error(y_test, preds)
rmse = mean_squared_error(y_test, preds, squared=False)
print('MAE:', mae)
print('RMSE:', rmse)

In [None]:
importances = pd.Series(model.feature_importances_, index=data.feature_names)
print(importances.sort_values(ascending=False))

Optional: compute SHAP values to interpret the model.

In [None]:
# import shap
# explainer = shap.Explainer(model.predict, X_train)
# shap_values = explainer(X_test[:100])
# shap.plots.beeswarm(shap_values)