In [1]:
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor

In [2]:
X,y = make_regression(
    n_samples=1000,
    n_features=20,
    n_informative=10,
    n_targets=1,
    noise=0.5,
    random_state=42
)

In [3]:
gbr = GradientBoostingRegressor(
    loss="squared_error",
    learning_rate = 0.1,
    n_estimators=100,
    max_depth=3,
    min_samples_split=3,
    min_samples_leaf=2,
    random_state=42
)

In [4]:
X_train,X_test,y_train,y_test=train_test_split(
    X,
    y,
    test_size=0.2,
    random_state=42
)

In [5]:
gbr.fit(X_train,y_train)

In [6]:
from sklearn.metrics import r2_score,mean_squared_error
y_pred = gbr.predict(X_test)

print("R2 Score :",r2_score(y_test,y_pred))
print("Mean Squared Error :",mean_squared_error(y_test,y_pred))

R2 Score : 0.9241233179676238
Mean Squared Error : 2937.313745650557


In [7]:
# Check Overfitting & underfitting

y_train_pred = gbr.predict(X_train)
y_test_pred = gbr.predict(X_test)

print(f"Training R2 score : {r2_score(y_train,y_train_pred)}")
print(f"Testing R2 score : {r2_score(y_test, y_test_pred)}")

Training R2 score : 0.9868715106797162
Testing R2 score : 0.9241233179676238


In [8]:
# Hyperparamter tuning
from sklearn.model_selection import GridSearchCV

model = GradientBoostingRegressor(random_state=42)
parameters = {
    "learning_rate":[0.1,0.01,0.2,0.02],
    "n_estimators":[100,110,115,120],
    "max_depth":[3,4,5,6]
}

gridsearchcv = GridSearchCV(
    estimator=model,
    param_grid=parameters,
    cv=5,
    scoring="r2"
)

In [9]:
gridsearchcv.fit(X_train,y_train)

In [10]:
print(f"Best Parameters : {gridsearchcv.best_params_}")
print(f"Best Score : {gridsearchcv.best_score_}")
print(f"Best Estimators : {gridsearchcv.best_estimator_}")

Best Parameters : {'learning_rate': 0.2, 'max_depth': 3, 'n_estimators': 120}
Best Score : 0.9198434576909793
Best Estimators : GradientBoostingRegressor(learning_rate=0.2, n_estimators=120, random_state=42)


In [11]:
best_model = gridsearchcv.best_estimator_

y_train_grid_pred = best_model.predict(X_train)
y_test_grid_pred = best_model.predict(X_test)

print(f"Grid Training R2 score : {r2_score(y_train, y_train_grid_pred)}")
print(f"Grig Testing R2 score : {r2_score(y_test, y_test_grid_pred)}")

Grid Training R2 score : 0.9947710867994619
Grig Testing R2 score : 0.9324582798101274
