In [1]:
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score,classification_report,confusion_matrix

In [2]:
x,y = make_classification(n_samples = 1000,n_features = 20,n_classes = 2,random_state = 1)
x_train,x_test,y_train,y_test = train_test_split(x,y,random_state = 1,test_size = 0.30)

In [3]:
model = GradientBoostingClassifier()
model.fit(x_train,y_train)

In [4]:
y_pred = model.predict(x_test)

In [5]:
y_pred

array([0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1,
       0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1,
       0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1,
       0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0,
       1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1,
       1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1,
       1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0,
       0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0,
       1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
       1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0,
       0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0,
       0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1,
       0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1,
       1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1])

In [6]:
print("accuracy:",accuracy_score(y_test,y_pred))
print("classification_report:\n",classification_report(y_test,y_pred))
print("confusion_matrix:\n",confusion_matrix(y_test,y_pred))

accuracy: 0.8633333333333333
classification_report:
               precision    recall  f1-score   support

           0       0.84      0.88      0.86       139
           1       0.89      0.85      0.87       161

    accuracy                           0.86       300
   macro avg       0.86      0.86      0.86       300
weighted avg       0.86      0.86      0.86       300

confusion_matrix:
 [[122  17]
 [ 24 137]]


In [7]:
#hyperparameter tuning.
from sklearn.model_selection import GridSearchCV
param_grid = {
    'n_estimators':[100,200,300],
    'learning_rate':[0.01,0.1,0.05,0.2],
#     'max_depth':[3,4,5],
#     'sub_samples':[0.8,0.9,1.0],
#     'min_samples_split':[2,5,10],
#     'min_samples_leaf':[1,2,4]
}

In [8]:
gbc = GradientBoostingClassifier()
grid_search = GridSearchCV(estimator = gbc,param_grid = param_grid,cv=5,verbose = 3)

In [9]:
grid_search

In [10]:
grid_search.fit(x_train,y_train)

Fitting 5 folds for each of 12 candidates, totalling 60 fits
[CV 1/5] END learning_rate=0.01, n_estimators=100;, score=0.857 total time=   0.3s
[CV 2/5] END learning_rate=0.01, n_estimators=100;, score=0.907 total time=   0.3s
[CV 3/5] END learning_rate=0.01, n_estimators=100;, score=0.857 total time=   0.3s
[CV 4/5] END learning_rate=0.01, n_estimators=100;, score=0.850 total time=   0.3s
[CV 5/5] END learning_rate=0.01, n_estimators=100;, score=0.821 total time=   0.3s
[CV 1/5] END learning_rate=0.01, n_estimators=200;, score=0.857 total time=   0.8s
[CV 2/5] END learning_rate=0.01, n_estimators=200;, score=0.893 total time=   0.8s
[CV 3/5] END learning_rate=0.01, n_estimators=200;, score=0.864 total time=   0.8s
[CV 4/5] END learning_rate=0.01, n_estimators=200;, score=0.836 total time=   0.8s
[CV 5/5] END learning_rate=0.01, n_estimators=200;, score=0.829 total time=   0.8s
[CV 1/5] END learning_rate=0.01, n_estimators=300;, score=0.850 total time=   1.2s
[CV 2/5] END learning_rate

In [11]:
grid_search.best_params_

{'learning_rate': 0.1, 'n_estimators': 100}

In [12]:
best_model = grid_search.best_estimator_

In [13]:
best_model.fit(x_train,y_train)

In [14]:
y_pred_tuned = best_model.predict(x_test)
y_pred_tuned

array([0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1,
       0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1,
       0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1,
       0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0,
       1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1,
       1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1,
       1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0,
       0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0,
       1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
       1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0,
       0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0,
       0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1,
       0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1,
       1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1])

In [15]:
print("accuracy:",accuracy_score(y_test,y_pred_tuned))
print("classification_report:\n",classification_report(y_test,y_pred_tuned))
print("confusion_matrix:\n",confusion_matrix(y_test,y_pred_tuned))

accuracy: 0.8633333333333333
classification_report:
               precision    recall  f1-score   support

           0       0.83      0.88      0.86       139
           1       0.89      0.84      0.87       161

    accuracy                           0.86       300
   macro avg       0.86      0.86      0.86       300
weighted avg       0.87      0.86      0.86       300

confusion_matrix:
 [[123  16]
 [ 25 136]]


In [1]:
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import r2_score,mean_absolute_error,mean_squared_error

In [2]:
x,y = make_regression(n_samples=1000,n_features=20,n_targets = 1)

In [3]:
x_train,x_test,y_train,y_test = train_test_split(x,y,random_state = 1,test_size = 0.30)

In [4]:
clf = GradientBoostingRegressor()
clf.fit(x_train,y_train)

In [5]:
y_pred = clf.predict(x_test)

In [6]:
print(r2_score(y_pred,y_test))
print(mean_absolute_error(y_test,y_pred))
print(mean_squared_error(y_test,y_pred))

0.899560745173785
33.03879980758793
2162.8763196639525


In [9]:
#now hyper parameter tuning.
from sklearn.model_selection import GridSearchCV
param_grid = {
     'n_estimators':[100,200,300],
    'learning_rate':[0.01,0.1,0.05,0.2],
#     'max_depth':[3,4,5],
#     'sub_samples':[0.8,0.9,1.0],
#     'min_samples_split':[2,5,10],
#     'min_samples_leaf':[1,2,4]
    
}
model = GridSearchCV(estimator = clf,param_grid = param_grid,cv=5,verbose = 3)

In [10]:
model.fit(x_train,y_train)

Fitting 5 folds for each of 12 candidates, totalling 60 fits
[CV 1/5] END learning_rate=0.01, n_estimators=100;, score=0.514 total time=   0.3s
[CV 2/5] END learning_rate=0.01, n_estimators=100;, score=0.503 total time=   0.3s
[CV 3/5] END learning_rate=0.01, n_estimators=100;, score=0.521 total time=   0.3s
[CV 4/5] END learning_rate=0.01, n_estimators=100;, score=0.489 total time=   0.3s
[CV 5/5] END learning_rate=0.01, n_estimators=100;, score=0.491 total time=   0.3s
[CV 1/5] END learning_rate=0.01, n_estimators=200;, score=0.697 total time=   0.7s
[CV 2/5] END learning_rate=0.01, n_estimators=200;, score=0.710 total time=   0.7s
[CV 3/5] END learning_rate=0.01, n_estimators=200;, score=0.697 total time=   0.7s
[CV 4/5] END learning_rate=0.01, n_estimators=200;, score=0.688 total time=   0.7s
[CV 5/5] END learning_rate=0.01, n_estimators=200;, score=0.693 total time=   0.7s
[CV 1/5] END learning_rate=0.01, n_estimators=300;, score=0.783 total time=   1.1s
[CV 2/5] END learning_rate

In [12]:
model.best_params_

{'learning_rate': 0.05, 'n_estimators': 300}

In [15]:
best_model = model.best_estimator_

In [16]:
best_model.fit(x_train,y_train)

In [19]:
y_pred_tuned = best_model.predict(x_test)

In [20]:
print(r2_score(y_pred_tuned,y_test))
print(mean_absolute_error(y_test,y_pred_tuned))
print(mean_squared_error(y_test,y_pred_tuned))

0.9208030505960442
30.8123663477532
1796.0568364591184
