In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.model_selection import train_test_split, GridSearchCV

data = pd.read_csv('../Data/imputed_data_7.csv')

X = data.drop(columns=['Price']).values
y = data['Price'].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=13)

params = {
    "n_estimators": [100, 200, 300],
    "max_depth": [3, 4, 5],
    "min_samples_split": [2, 5, 10],
    "learning_rate": [0.01, 0.05, 0.1],
}

reg = GradientBoostingRegressor()
grid_search = GridSearchCV(reg, params, cv=5, scoring='neg_mean_squared_error')
grid_search.fit(X_train, y_train)
best_params = grid_search.best_params_
reg_best = GradientBoostingRegressor(**best_params)
reg_best.fit(X_train, y_train)

y_pred = reg_best.predict(X_test)
mse = mean_squared_error(y_test, y_pred)

mae = mean_absolute_error(y_test, y_pred)

r_squared = r2_score(y_test, y_pred)

print("Best hyperparameters:", best_params)

Best hyperparameters: {'learning_rate': 0.1, 'max_depth': 3, 'min_samples_split': 5, 'n_estimators': 300}


In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

data = pd.read_csv('../Data/imputed_data_7.csv')
X = data.drop(columns=['Price']).values
y = data['Price'].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=13)

best_params = {'learning_rate': 0.1, 'max_depth': 3, 'min_samples_split': 2, 'n_estimators': 200}
reg_best = GradientBoostingRegressor(**best_params)

cv_scores = cross_val_score(reg_best, X_train, y_train, cv=5, scoring='neg_mean_squared_error')

reg_best.fit(X_train, y_train)
y_pred = reg_best.predict(X_test)

mse = mean_squared_error(y_test, y_pred)
rmse = mse ** 0.5
r_squared = r2_score(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)

print("Best hyperparameters:", best_params)
print(f"Cross-validation scores (MSE): {-cv_scores}")
print(f"Mean cross-validation score (MSE): {(-cv_scores).mean()}")
print(f"Test set RMSE: {rmse}")
print(f"Test set R-squared: {r_squared}")
print(f"Test set MAE: {mae}")

Best hyperparameters: {'learning_rate': 0.1, 'max_depth': 3, 'min_samples_split': 2, 'n_estimators': 200}
Cross-validation scores (MSE): [ 918.84146579 1010.31577119  747.26634661  922.96270874  863.00846611]
Mean cross-validation score (MSE): 892.4789516909266
Test set RMSE: 31.628331560502076
Test set R-squared: 0.9052225048824823
Test set MAE: 20.755624359545113
