Notebook containing Gradient Boosting Regressor model

In [33]:
# train defualt GBR model on dataset and get accuracy and error metrics

%store -r X_train X_test y_train y_test
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

default_gbr = GradientBoostingRegressor()
default_gbr.fit(X_train, y_train)
y_pred = default_gbr.predict(X_test)

default_error = mean_squared_error(y_test, y_pred)
default_rmse = default_error ** 0.5                 # get square root of error to get RMSE
default_mae = mean_absolute_error(y_test, y_pred)
default_r2 = r2_score(y_test, y_pred)

print(f"Default GBR RMSE: {default_rmse:.4f}")
print(f"Default GBR MAE: {default_mae:.4f}")
print(f"Default GBR R2: {default_r2:.4f}")

Default GBR RMSE: 25.3536
Default GBR MAE: 19.8319
Default GBR R2: 0.9343


Use GridSearchCV to find optimal hyperparameter tuning for 'n_estimators' and 'learning_rate'

In [34]:
from sklearn.model_selection import GridSearchCV

hyperparameters = {'n_estimators': [100, 200, 300, 400], 'learning_rate': [0.01, 0.1, 0.2, 0.3, 0.4, 0.5]}

param_gbr = GradientBoostingRegressor()

gridsearch_gbr = GridSearchCV(param_gbr, hyperparameters, cv= 5, scoring='neg_mean_squared_error')
gridsearch_gbr.fit(X_train, y_train)

gridsearch_best_params = gridsearch_gbr.best_params_
print(f"Best hyperparameters: {gridsearch_best_params}")

Best hyperparameters: {'learning_rate': 0.1, 'n_estimators': 400}


In [39]:

improved_gbr = GradientBoostingRegressor(n_estimators=400, learning_rate=0.1)
improved_gbr.fit(X_train, y_train)

y_pred_improved = improved_gbr.predict(X_test)

improved_error = mean_squared_error(y_test, y_pred_improved)
improved_rmse = improved_error ** 0.5
improved_mae = mean_absolute_error(y_test, y_pred_improved)
improved_r2 = r2_score(y_test, y_pred_improved)

print(f"Improved GBR RMSE: {improved_rmse:.4f} vs Default: {default_rmse:.4f}")
print(f"Improved GBR MAE: {improved_mae:.4f} vs Default: {default_mae:.4f}")
print(f"Improved GBR R2: {improved_r2:.4f} vs Default: {default_r2:.4f}")

print("Improvement in RMSE:", default_rmse - improved_rmse)


Improved GBR RMSE: 22.6801 vs Default: 25.3536
Improved GBR MAE: 17.4108 vs Default: 19.8319
Improved GBR R2: 0.9474 vs Default: 0.9343
Improvement in RMSE: 2.67347946956318
