##  Boosting Methods

Ada boosting

Gradient descent

XGBoost

#### import the libraries

In [1]:
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.datasets import load_breast_cancer
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import LabelEncoder

from sklearn import metrics
from sklearn.metrics import accuracy_score 
from sklearn.metrics import classification_report

#### load the input dataset

In [2]:
breast_cancer = load_breast_cancer()
X = pd.DataFrame(breast_cancer.data, columns=breast_cancer.feature_names)
y = pd.Categorical.from_codes(breast_cancer.target, breast_cancer.target_names)

#### Data Wrangling

In [3]:
encoder = LabelEncoder()
binary_encoded_y = pd.Series(encoder.fit_transform(y))

#### Data Split

In [4]:
train_X, test_X, train_y, test_y = train_test_split(X, binary_encoded_y, random_state=1)

##  Adaboost classifier

In [5]:
classifier = AdaBoostClassifier(
    DecisionTreeClassifier(max_depth=1),
    n_estimators=200
)
classifier.fit(train_X, train_y)

AdaBoostClassifier(algorithm='SAMME.R',
                   base_estimator=DecisionTreeClassifier(class_weight=None,
                                                         criterion='gini',
                                                         max_depth=1,
                                                         max_features=None,
                                                         max_leaf_nodes=None,
                                                         min_impurity_decrease=0.0,
                                                         min_impurity_split=None,
                                                         min_samples_leaf=1,
                                                         min_samples_split=2,
                                                         min_weight_fraction_leaf=0.0,
                                                         presort=False,
                                                         random_state=None,
                             

In [6]:
predictions = classifier.predict(test_X)

#### Model Evaluation

In [7]:
confusion_matrix(test_y, predictions)

array([[86,  2],
       [ 3, 52]], dtype=int64)

In [8]:
# Model Accuracy, how well the model performs
print("Accuracy:",metrics.accuracy_score(test_y, predictions))

Accuracy: 0.965034965034965


## Gradient Descent

In [14]:
from sklearn.ensemble import GradientBoostingClassifier

In [19]:
lr_list = [0.05, 0.075, 0.1, 0.25, 0.5, 0.75, 1]

for learning_rate in lr_list:
    gb_clf = GradientBoostingClassifier(n_estimators=20, learning_rate=learning_rate, max_features=2, max_depth=2, random_state=0)
    gb_clf.fit(train_X, train_y)
    
    print("Learning rate: ", learning_rate)
    print("Accuracy score (training): {0:.3f}".format(gb_clf.score(train_X, train_y)))
    print("Accuracy score (validation): {0:.3f}".format(gb_clf.score(test_X, test_y)))

Learning rate:  0.05
Accuracy score (training): 0.953
Accuracy score (validation): 0.916
Learning rate:  0.075
Accuracy score (training): 0.965
Accuracy score (validation): 0.916
Learning rate:  0.1
Accuracy score (training): 0.969
Accuracy score (validation): 0.916
Learning rate:  0.25
Accuracy score (training): 0.988
Accuracy score (validation): 0.930
Learning rate:  0.5
Accuracy score (training): 0.998
Accuracy score (validation): 0.937
Learning rate:  0.75
Accuracy score (training): 1.000
Accuracy score (validation): 0.951
Learning rate:  1
Accuracy score (training): 1.000
Accuracy score (validation): 0.930


In [20]:
## Slect the learning rate from the above observation, we considered 0.5

In [25]:
gb_clf2 = GradientBoostingClassifier(n_estimators=20, learning_rate=0.5, max_features=2, max_depth=2, random_state=0)
gb_clf2.fit(train_X, train_y)
predictions = gb_clf2.predict(test_X)

print("Confusion Matrix:")
print(confusion_matrix(test_y, predictions))

print();print("Accuracy:",metrics.accuracy_score(test_y, predictions))

print();print("Classification Report")
print();print(classification_report(test_y, predictions))

Confusion Matrix:
[[84  4]
 [ 5 50]]

Accuracy: 0.9370629370629371

Classification Report

              precision    recall  f1-score   support

           0       0.94      0.95      0.95        88
           1       0.93      0.91      0.92        55

    accuracy                           0.94       143
   macro avg       0.93      0.93      0.93       143
weighted avg       0.94      0.94      0.94       143



##  XG Boost classifier

In [9]:
#!pip install xgboost

In [10]:
from xgboost import XGBClassifier

classifier = XGBClassifier()
classifier.fit(train_X, train_y)

XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1, gamma=0,
              learning_rate=0.1, max_delta_step=0, max_depth=3,
              min_child_weight=1, missing=None, n_estimators=100, n_jobs=1,
              nthread=None, objective='binary:logistic', random_state=0,
              reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
              silent=None, subsample=1, verbosity=1)

In [11]:
y_pred = classifier.predict(test_X)
y_pred

array([0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0,
       1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
       0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0,
       0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0,
       1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,
       1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1,
       0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0])

In [12]:
from sklearn.metrics import confusion_matrix

cm = confusion_matrix(y_pred, test_y)
cm

array([[86,  6],
       [ 2, 49]], dtype=int64)

In [13]:
# Model Accuracy, how well the model performs
print("Accuracy:",metrics.accuracy_score(test_y, y_pred))

Accuracy: 0.9440559440559441


### Inference :

Adaptive boosting gives high accuracy of 96% 