In [1]:
%pip install pandas scikit-learn

Note: you may need to restart the kernel to use updated packages.


In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.model_selection import train_test_split, GridSearchCV

data = pd.read_csv('imputed_data_7.csv')

X = data.drop(columns=['Price']).values
y = data['Price'].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=13)

params = {
    "n_estimators": [100, 200, 300, 400, 500],
    "max_depth": [3, 4, 5],
    "min_samples_split": [2, 5, 10],
    "learning_rate": [0.01, 0.05, 0.1],
}

reg = GradientBoostingRegressor()
grid_search = GridSearchCV(reg, params, cv=5, scoring='neg_mean_squared_error')
grid_search.fit(X_train, y_train)
best_params = grid_search.best_params_
reg_best = GradientBoostingRegressor(**best_params)
reg_best.fit(X_train, y_train)

y_pred = reg_best.predict(X_test)
mse = mean_squared_error(y_test, y_pred)

mae = mean_absolute_error(y_test, y_pred)

r_squared = r2_score(y_test, y_pred)

print("Best hyperparameters:", best_params)
print("Mean squared error (MSE) on test set:", mse)
print("Mean Absolute Error (MAE) on test set:", mae)
print("R-squared on test set:", r_squared)

Best hyperparameters: {'learning_rate': 0.01, 'max_depth': 3, 'min_samples_split': 10, 'n_estimators': 100}
Mean squared error (MSE) on test set: 156903.50005272753
Mean Absolute Error (MAE) on test set: 118.12868615671673
R-squared on test set: 0.006328867514570624
