<a href="https://colab.research.google.com/github/Rdxsandy/house_prediction/blob/main/Untitled17.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_squared_error, r2_score


In [6]:
np.random.seed(42)

n = 1000

df = pd.DataFrame({
    "area_sqft": np.random.randint(500, 3000, n),
    "bedrooms": np.random.randint(1, 6, n),
    "bathrooms": np.random.randint(1, 5, n),
    "age": np.random.randint(0, 40, n),
    "location_score": np.random.randint(1, 11, n)
})

noise = np.random.normal(0, 10, n)

df["price"] = (
    0.05 * df["area_sqft"]
    + 5 * df["bedrooms"]
    + 8 * df["bathrooms"]
    - 0.3 * df["age"]
    + 6 * df["location_score"]
    + noise
)


In [7]:
X = df.drop("price", axis=1)
y = df["price"]


In [8]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


In [9]:
gbr = GradientBoostingRegressor(
    n_estimators=200,
    learning_rate=0.1,
    max_depth=3,
    random_state=42
)

gbr.fit(X_train, y_train)


In [10]:
y_train_pred = gbr.predict(X_train)
y_test_pred = gbr.predict(X_test)


[143.63592976 111.711271   195.90786992 217.48820616 210.48465013
  84.32774857 109.35964569 131.37270726 127.41114534 152.85057643
 112.53500244 150.08827899 168.31620292 116.40251341 151.41413433
  94.66000458 198.71705599 130.03814607 127.45463173 171.30711068
  91.99032131 174.31988401  94.20764147 158.92024965 100.59332741
 169.9811796  176.76505407 188.45816228 153.52199247 126.38036276
 222.55049281 118.70178474  83.83999737  95.82969682 128.75286011
 138.15877291 184.25673986 138.12095567 176.61275011 167.3196846
 146.59309412 133.21124384 218.94925874 218.55548186 204.36607541
 108.45518892 175.87607051 104.21879203 181.10529594 100.86250115
 156.15590124 128.88659311 224.28911181 219.90179618 140.08926922
 139.21752474 107.04151869  92.039711   152.17037713 199.90538569
  99.28769001 213.80956096 119.5740828  111.28812336 112.76985816
 110.49983254 179.30558239 157.04856958 148.85463307  85.98403246
 173.42260975 118.40170273 214.88077986 104.10949909 124.86077455
 154.739068

In [12]:
train_mse = mean_squared_error(y_train, y_train_pred)
test_mse = mean_squared_error(y_test, y_test_pred)

train_r2 = r2_score(y_train, y_train_pred)
test_r2 = r2_score(y_test, y_test_pred)

print("Train MSE:", train_mse)
print("Test MSE :", test_mse)
print("Train R2 :", train_r2)
print("Test R2  :", test_r2)


Train MSE: 48.752433745109585
Test MSE : 127.96625108273125
Train R2 : 0.9725712883732213
Test R2  : 0.9164402886092302


In [13]:
import pickle

# save model to pkl file
with open("house_price_gb_model.pkl", "wb") as file:
    pickle.dump(gbr, file)


In [14]:
from google.colab import files

files.download("house_price_gb_model.pkl")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>