In [8]:
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
from sklearn.preprocessing import PolynomialFeatures
from sklearn.ensemble import HistGradientBoostingRegressor
import joblib

# Load the California housing dataset
housing = datasets.fetch_california_housing()
x = housing.data
y = housing.target


# Create polynomial features to improve model performance
poly = PolynomialFeatures()
x = poly.fit_transform(x)


# Split the dataset into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(
    x, y, test_size=0.2, random_state=432
)

# Train a Gradient Boosting model
model = HistGradientBoostingRegressor(
    max_iter=350,        # Number of boosting iterations (trees)
    learning_rate=0.05   # Scales the contribution of each tree.
)
model.fit(x_train, y_train)

# Save the trained model
joblib.dump(model, 'housing_model.joblib')

# Make predictions and evaluate performance
y_pred = model.predict(x_test)
r2 = r2_score(y_test, y_pred)
print(f"R^2 score: {r2}")

R^2 score: 0.8444897211096725


In [None]:
import joblib
import numpy as np
from sklearn.preprocessing import PolynomialFeatures

data = [8.3252, 41.0, 6.9841, 1.0238, 322.0, 2.5556, 37.88, -122.23]
data = np.array([data])  # Make it 2D array for prediction

localmodel = joblib.load('housing_model.joblib')

poly = PolynomialFeatures()
data_transformed = poly.fit_transform(data)
price = localmodel.predict(data_transformed)[0]
print("Predicted House Price:", round(price, 2))


Predicted house price: 4.3195614783615675
