In [9]:
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split , GridSearchCV
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score,confusion_matrix,classification_report


import warnings
warnings.filterwarnings('ignore')

X,y = make_classification(n_samples=1000 , n_features=10 , n_classes=2 , random_state=1)
X_train,X_test , y_train,y_test = train_test_split(X,y , test_size=0.2 , random_state=1)
model = GradientBoostingClassifier()
model.fit(X_train,y_train)
y_pred = model.predict(X_test)

In [10]:
print("current model performance")
print(f'accuracy is {accuracy_score(y_test,y_pred)}\n')
print("classification report")
print(f' {classification_report(y_test,y_pred)}')
print("confusion metrics")
print(f' {confusion_matrix(y_test,y_pred)}')

current model performance
accuracy is 0.895

classification report
               precision    recall  f1-score   support

           0       0.91      0.89      0.90       105
           1       0.88      0.91      0.89        95

    accuracy                           0.90       200
   macro avg       0.89      0.90      0.89       200
weighted avg       0.90      0.90      0.90       200

confusion metrics
 [[93 12]
 [ 9 86]]


In [13]:
param_grid = {
    'n_estimators': [100, 200, 300],  # Number of boosting stages
    'learning_rate': [0.01, 0.1, 0.2], # Learning rate shrinks the contribution of each tree
    'max_depth': [3, 4, 5],          # Maximum depth of the individual regression estimators
    'min_samples_split': [2, 4],     # The minimum number of samples required to split an internal node
    'min_samples_leaf': [1, 2],      # The minimum number of samples required to be at a leaf node
    'subsample': [0.8, 1.0]          # The fraction of samples to be used for fitting the individual base learners.
}

gbc = GradientBoostingClassifier()
model1 = GridSearchCV(estimator=gbc,param_grid=param_grid , cv = 5 , verbose = 3)
model1.fit(X_train,y_train)
print(f'Best esitimator is {model1.best_estimator_}')
y_pred1 = model1.best_estimator_.predict(X_test)

Fitting 5 folds for each of 216 candidates, totalling 1080 fits
[CV 1/5] END learning_rate=0.01, max_depth=3, min_samples_leaf=1, min_samples_split=2, n_estimators=100, subsample=0.8;, score=0.925 total time=   0.7s
[CV 2/5] END learning_rate=0.01, max_depth=3, min_samples_leaf=1, min_samples_split=2, n_estimators=100, subsample=0.8;, score=0.844 total time=   0.8s
[CV 3/5] END learning_rate=0.01, max_depth=3, min_samples_leaf=1, min_samples_split=2, n_estimators=100, subsample=0.8;, score=0.856 total time=   1.2s
[CV 4/5] END learning_rate=0.01, max_depth=3, min_samples_leaf=1, min_samples_split=2, n_estimators=100, subsample=0.8;, score=0.863 total time=   0.6s
[CV 5/5] END learning_rate=0.01, max_depth=3, min_samples_leaf=1, min_samples_split=2, n_estimators=100, subsample=0.8;, score=0.825 total time=   0.6s
[CV 1/5] END learning_rate=0.01, max_depth=3, min_samples_leaf=1, min_samples_split=2, n_estimators=100, subsample=1.0;, score=0.919 total time=   0.7s
[CV 2/5] END learning_ra

In [15]:
print("GridSearchCV model performance")
print(f'accuracy is {accuracy_score(y_test,y_pred1)}\n')
print("classification report")
print(f' {classification_report(y_test,y_pred1)}')
print("confusion metrics")
print(f' {confusion_matrix(y_test,y_pred1)}')

GridSearchCV model performance
accuracy is 0.885

classification report
               precision    recall  f1-score   support

           0       0.91      0.87      0.89       105
           1       0.86      0.91      0.88        95

    accuracy                           0.89       200
   macro avg       0.89      0.89      0.88       200
weighted avg       0.89      0.89      0.89       200

confusion metrics
 [[91 14]
 [ 9 86]]
