In [None]:
# Install missing plotting dependency for this notebook

import pandas as pd, numpy as np
import seaborn as sns, matplotlib.pyplot as plt

from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split    
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.ensemble import HistGradientBoostingRegressor
import joblib, pathlib

ModuleNotFoundError: No module named 'plotly'

In [None]:
data = fetch_california_housing(as_frame=True)
df = data.frame.rename(columns={"MedHouseVal":"Target"})

In [None]:
display(df.head())
plt.figure(figsize=(8,4))
sns.histplot(df["Target"], kde=True)
plt.title("Target Distribution ($100k units)"); plt.xlabel("MedHouseVal"); plt.tight_layout(); plt.show()

fig = plt.scatter(df, x="MedInc", y="Target", trendline="ols",
                 title="Target vs Median Income (strongest single predictor)")
fig.show()

In [None]:
X = df.drop(columns=["Target"])
y = df["Target"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
model = HistGradientBoostingRegressor(learning_rate=0.08, max_depth=None, random_state=42)
model.fit(X_train, y_train)

In [None]:
pred = model.predict(X_test)
mse  = mean_squared_error(y_test, pred)
rmse = np.sqrt(mse)
r2   = r2_score(y_test, pred)
print({"mse": mse, "rmse": rmse, "r2": r2})

In [None]:
example = X_test.iloc[:1]
example_pred_usd = float(model.predict(example)[0]) * 100_000
print("Example input:\n", example.to_dict(orient="records")[0])
print("Example predicted USD:", round(example_pred_usd, 2))

In [None]:
out_path = pathlib.Path("..") / "model.joblib"
joblib.dump(model, out_path)
print("Saved model to", out_path.resolve())