<a href="https://colab.research.google.com/github/Davron030901/Scikit_learning/blob/main/Mastering_Gradient_Boosting_Regressor_A_Comprehensive_Tutorial1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from sklearn import datasets
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

In [3]:
# Load the diabetes dataset
diabetes = datasets.load_diabetes()
X = diabetes.data
y = diabetes.target

In [4]:
# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=17)

In [5]:
# Create and train the Gradient Boosting Regressor
gbr = GradientBoostingRegressor()
gbr.fit(X_train, y_train)

In [6]:
# Make predictions
y_pred = gbr.predict(X_test)

In [7]:
# Calculate metrics
print("Mean Absolute Error:", mean_absolute_error(y_test, y_pred))
print("Mean Squared Error:", mean_squared_error(y_test, y_pred))
print("R-squared Score:", r2_score(y_test, y_pred))

Mean Absolute Error: 48.95266086848329
Mean Squared Error: 3736.2265748015043
R-squared Score: 0.36730152832417406


In [8]:
# Grid Search for hyperparameter tuning
param_grid = {
    'n_estimators': [100, 200, 300],
    'learning_rate': [0.1, 0.01, 0.2],
    'max_depth': [3, 4, 5],
    'min_samples_split': [2, 3, 4],
    'min_samples_leaf': [1, 2, 3],
}

In [9]:
# Perform Grid Search with Cross-Validation
gbr_cv = GridSearchCV(
    GradientBoostingRegressor(),
    param_grid,
    cv=3,
    n_jobs=-1,
    scoring='neg_mean_squared_error'
)
gbr_cv.fit(X_train, y_train)

In [10]:
# Print best parameters and performance
print("\nBest Parameters:", gbr_cv.best_params_)


Best Parameters: {'learning_rate': 0.01, 'max_depth': 3, 'min_samples_leaf': 3, 'min_samples_split': 3, 'n_estimators': 300}


In [11]:
# Predict using the best model
best_gbr = gbr_cv.best_estimator_
y_pred_best = best_gbr.predict(X_test)

In [12]:
print("Best Model Metrics:")
print("Mean Absolute Error:", mean_absolute_error(y_test, y_pred_best))
print("Mean Squared Error:", mean_squared_error(y_test, y_pred_best))
print("R-squared Score:", r2_score(y_test, y_pred_best))

Best Model Metrics:
Mean Absolute Error: 47.35225724152144
Mean Squared Error: 3297.207852066771
R-squared Score: 0.44164564781218874


In [13]:
gbr_cv.best_params_

{'learning_rate': 0.01,
 'max_depth': 3,
 'min_samples_leaf': 3,
 'min_samples_split': 3,
 'n_estimators': 300}