In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

In [6]:
# Gradient Boosting

from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.ensemble import GradientBoostingClassifier


In [8]:
x,y= make_classification(n_samples=1000, n_features=20, n_classes=2, random_state=1)

In [9]:
x_train,x_test,y_train,y_test = train_test_split(x,y, test_size=0.2, random_state=1)

In [10]:
x_train.shape, x_test.shape, y_train.shape, y_test.shape

((800, 20), (200, 20), (800,), (200,))

In [12]:
clf = GradientBoostingClassifier()
clf

In [13]:
clf.fit(x_train,y_train)

In [14]:
y_pred = clf.predict(x_test)

In [15]:
y_pred

array([0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1,
       0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1,
       0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1,
       0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1,
       1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1,
       1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0,
       0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0,
       1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
       1, 1])

In [17]:
# Evaluate the model

print('Current model performance')
print(classification_report(y_pred,y_test))
print('Classification Report')
print(classification_report(y_test,y_pred))
print('Confusion Matrix')
print(confusion_matrix(y_test,y_pred))

Current model performance
              precision    recall  f1-score   support

           0       0.84      0.85      0.85        89
           1       0.88      0.87      0.88       111

    accuracy                           0.86       200
   macro avg       0.86      0.86      0.86       200
weighted avg       0.87      0.86      0.87       200

Classification Report
              precision    recall  f1-score   support

           0       0.85      0.84      0.85        90
           1       0.87      0.88      0.88       110

    accuracy                           0.86       200
   macro avg       0.86      0.86      0.86       200
weighted avg       0.86      0.86      0.86       200

Confusion Matrix
[[76 14]
 [13 97]]


In [22]:
# Hyperparameter tuning

param_grid={
    'n_estimators':[100,200,300],
    'learning_rate':[0.01,0.1,0.05,0.2],

}

In [23]:
from sklearn.model_selection import GridSearchCV

In [24]:
gbc = GradientBoostingClassifier()
grid_search= GridSearchCV(estimator=gbc, param_grid=param_grid, cv=5, verbose=3)

In [25]:
grid_search

In [26]:
grid_search.fit(x_train,y_train)

Fitting 5 folds for each of 12 candidates, totalling 60 fits
[CV 1/5] END learning_rate=0.01, n_estimators=100;, score=0.825 total time=   1.5s
[CV 2/5] END learning_rate=0.01, n_estimators=100;, score=0.881 total time=   1.0s
[CV 3/5] END learning_rate=0.01, n_estimators=100;, score=0.875 total time=   0.9s
[CV 4/5] END learning_rate=0.01, n_estimators=100;, score=0.850 total time=   0.9s
[CV 5/5] END learning_rate=0.01, n_estimators=100;, score=0.844 total time=   0.9s
[CV 1/5] END learning_rate=0.01, n_estimators=200;, score=0.825 total time=   1.9s
[CV 2/5] END learning_rate=0.01, n_estimators=200;, score=0.887 total time=   1.8s
[CV 3/5] END learning_rate=0.01, n_estimators=200;, score=0.894 total time=   1.8s
[CV 4/5] END learning_rate=0.01, n_estimators=200;, score=0.838 total time=   2.4s
[CV 5/5] END learning_rate=0.01, n_estimators=200;, score=0.863 total time=   2.5s
[CV 1/5] END learning_rate=0.01, n_estimators=300;, score=0.831 total time=   2.7s
[CV 2/5] END learning_rate

In [27]:
grid_search.best_params_

{'learning_rate': 0.05, 'n_estimators': 300}

In [29]:
y_pred = grid_search.predict(x_test)

In [30]:
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [32]:
print('Current_model_performance')
print(classification_report(y_pred,y_test))
print('Confusion Matrix')
print(confusion_matrix(y_test,y_pred))
print('Accuracy_score')
print(accuracy_score(y_test,y_pred))

Current_model_performance
              precision    recall  f1-score   support

           0       0.86      0.88      0.87        88
           1       0.90      0.88      0.89       112

    accuracy                           0.88       200
   macro avg       0.88      0.88      0.88       200
weighted avg       0.88      0.88      0.88       200

Confusion Matrix
[[77 13]
 [11 99]]
Accuracy_score
0.88


In [33]:
# Gradient Boosting Regressor

from sklearn.datasets import make_regression
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error

In [35]:
x,y=make_regression(n_samples=1000,n_features=10,n_targets=1,random_state=1)

In [36]:
x

array([[-2.02220122,  0.31563495,  0.82797464, ..., -0.50446586,
         0.23009474,  0.76201118],
       [-0.24910301, -0.70141147,  0.1297875 , ..., -0.22284591,
        -0.57869807,  1.39874983],
       [-0.14323775, -0.20364226, -0.43754648, ..., -0.16786598,
        -0.64113613,  0.11192949],
       ...,
       [ 0.79452824, -1.02188594,  0.92061512, ...,  1.76795995,
        -0.03536792,  2.11060505],
       [-1.09989127,  1.13376944, -0.87785842, ..., -2.06014071,
         0.04221375,  0.58281521],
       [ 0.00762745, -0.5886461 , -0.03455115, ...,  1.69101002,
        -0.2068231 , -1.10938582]])

In [37]:
y

array([-9.41044562e+01,  1.40332263e+02, -1.88115733e+02, -3.43487703e+02,
        8.59332588e+01,  7.93987069e+01,  2.68994453e+01,  2.97040296e+02,
       -5.87118226e+01, -2.46500771e+02,  1.79712303e+02,  4.53145300e+01,
        7.14864544e+01, -7.05712059e+01,  3.38185027e+01, -3.25369237e+01,
       -8.47001948e+01, -1.89758379e+01, -1.69165434e+02,  1.20928706e+02,
        1.10794496e+02, -1.05414828e+02,  1.20347144e+02, -3.11316272e+02,
        1.50160386e+02, -1.04648070e+02,  1.31396567e+02,  1.72674661e+02,
       -1.46983251e+02, -1.84182567e+02, -1.76970648e+02, -5.35043830e+01,
       -2.58489866e+02,  3.57219091e+02, -1.41442561e+02, -3.66954735e+02,
        2.05221624e+01, -3.02037240e+02,  1.99020189e+02,  6.64012165e+01,
        3.19937482e+01, -2.11161192e+02,  9.31535832e+01,  5.72599538e+01,
       -3.70766263e+02,  2.73069483e+01,  1.42401863e+02,  2.77505150e+02,
        2.59382586e+02, -1.94479092e+02, -2.18951383e+02, -4.26316069e+01,
        2.68323438e+02,  

In [39]:
x_train,x_test,y_train,y_test = train_test_split(x,y, test_size=0.3, random_state=1)

In [40]:
x_train.shape, x_test.shape, y_train.shape, y_test.shape

((700, 10), (300, 10), (700,), (300,))

In [41]:
regressor = GradientBoostingRegressor()
regressor.fit(x_train,y_train)

In [42]:
y_pred = regressor.predict(x_test)

In [44]:
# Evaluation

print('current model performance')
print('r2_score')
print(r2_score(y_test,y_pred))
print('mean_squared_error')
print(mean_squared_error(y_test,y_pred))
print('mean_absolute_error')
print(mean_absolute_error(y_test,y_pred))

current model performance
r2_score
0.9272594849458738
mean_squared_error
2345.26860571743
mean_absolute_error
36.96934624310438


In [45]:
# Hyperparameter tuning

param_grid={
    'n_estimators':[100,200,300],
    'learning_rate':[0.01,0.1,0.05,0.2],

}

gbr = GradientBoostingRegressor()

In [47]:
grid_search = GridSearchCV(estimator=gbr, param_grid=param_grid, cv=5,verbose=3)

In [48]:
grid_search.fit(x_train,y_train)

Fitting 5 folds for each of 12 candidates, totalling 60 fits
[CV 1/5] END learning_rate=0.01, n_estimators=100;, score=0.497 total time=   0.5s
[CV 2/5] END learning_rate=0.01, n_estimators=100;, score=0.557 total time=   0.6s
[CV 3/5] END learning_rate=0.01, n_estimators=100;, score=0.482 total time=   0.7s
[CV 4/5] END learning_rate=0.01, n_estimators=100;, score=0.489 total time=   1.8s
[CV 5/5] END learning_rate=0.01, n_estimators=100;, score=0.475 total time=   1.6s
[CV 1/5] END learning_rate=0.01, n_estimators=200;, score=0.703 total time=   1.4s
[CV 2/5] END learning_rate=0.01, n_estimators=200;, score=0.735 total time=   0.8s
[CV 3/5] END learning_rate=0.01, n_estimators=200;, score=0.678 total time=   0.9s
[CV 4/5] END learning_rate=0.01, n_estimators=200;, score=0.667 total time=   0.8s
[CV 5/5] END learning_rate=0.01, n_estimators=200;, score=0.672 total time=   0.9s
[CV 1/5] END learning_rate=0.01, n_estimators=300;, score=0.794 total time=   1.2s
[CV 2/5] END learning_rate

In [49]:
grid_search.best_params_

{'learning_rate': 0.1, 'n_estimators': 300}

In [51]:
best_model = grid_search.best_estimator_

In [53]:
y_pred_tuned = best_model.predict(x_test)

# Evaluation

print('Tuned model performance')
print(f'r2_score: {r2_score(y_test,y_pred_tuned)}')
print(f'mean_squared_error: {mean_squared_error(y_test,y_pred_tuned)}')
print(f'mean_absolute_error: {mean_squared_error(y_test,y_pred_tuned)}')

Tuned model performance
r2_score: 0.9492565183371892
mean_squared_error: 1636.0496540344182
mean_absolute_error: 1636.0496540344182
