In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# Load dataset
data = pd.read_csv("/workspaces/Estate-Price-Prediction/Real estate.csv")
data = data.drop(columns=["No"], errors="ignore")

X = data.drop(columns=["Y house price of unit area"])
y = data["Y house price of unit area"]

# Split data
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Define parameter grid for tuning
param_grid = {
    "n_estimators": [100, 300, 500],
    "learning_rate": [0.01, 0.05, 0.1],
    "max_depth": [3, 4, 5],
    "min_samples_split": [2, 5, 10],
    "min_samples_leaf": [1, 3, 5]
}

# Initialize model
gb = GradientBoostingRegressor(random_state=42)

# Grid Search
grid_search = GridSearchCV(
    gb, param_grid, cv=5,
    scoring="r2", n_jobs=-1, verbose=1
)
grid_search.fit(X_train, y_train)

# Best Model
best_gb = grid_search.best_estimator_

# Predictions
y_pred = best_gb.predict(X_test)

# Evaluation
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("📊 Optimized Gradient Boosting Results")
print(f"Best Params: {grid_search.best_params_}")
print(f"Mean Squared Error (MSE): {mse:.2f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.2f}")
print(f"Mean Absolute Error (MAE): {mae:.2f}")
print(f"R² Score: {r2:.2f}")


Fitting 5 folds for each of 243 candidates, totalling 1215 fits


📊 Optimized Gradient Boosting Results
Best Params: {'learning_rate': 0.01, 'max_depth': 3, 'min_samples_leaf': 5, 'min_samples_split': 2, 'n_estimators': 300}
Mean Squared Error (MSE): 32.80
Root Mean Squared Error (RMSE): 5.73
Mean Absolute Error (MAE): 4.03
R² Score: 0.80


In [5]:
# Example input (replace with real values)
sample_input = [[2013.250, 15, 300.5, 5, 24.98, 121.54]]

# Predict house price
prediction = best_gb.predict(sample_input)
print("Predicted Price per unit area:", prediction[0])


Predicted Price per unit area: 47.92069528611005




In [6]:
import joblib
joblib.dump(best_gb, "/workspaces/Estate-Price-Prediction/model.ipynbb")


['/workspaces/Estate-Price-Prediction/model.ipynbb']