# Boosting Ensemble Techniques

# <font color = 'orange'> AdaBoost, Gradient Boost and XgBoost

---

## Classification

In [1]:
!pip install xgboost

Defaulting to user installation because normal site-packages is not writeable


---

### <font color = 'Blue'> 1. Create the classification dataset

In [2]:
from sklearn.datasets import make_classification
x, y = make_classification(n_samples = 1000, n_features = 4, n_informative = 2, n_redundant = 0, random_state = 0, shuffle = False)

---

### <font color = 'Blue'> 2. Train test split

In [3]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.33, random_state = 42)

---

### <font color = 'Blue'> 3. Model Building

In [4]:
from sklearn.ensemble import AdaBoostClassifier 
from sklearn.ensemble import GradientBoostingClassifier 
import xgboost as xb

---

In [5]:
classifier1 = AdaBoostClassifier()
classifier1.fit(x_train,y_train)

AdaBoostClassifier()

In [6]:
y_pred = classifier1.predict(x_test)

In [7]:
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

print(confusion_matrix(y_test, y_pred))
print()
print(accuracy_score(y_test, y_pred))
print()
print(classification_report(y_test, y_pred))

[[153  13]
 [ 12 152]]

0.9242424242424242

              precision    recall  f1-score   support

           0       0.93      0.92      0.92       166
           1       0.92      0.93      0.92       164

    accuracy                           0.92       330
   macro avg       0.92      0.92      0.92       330
weighted avg       0.92      0.92      0.92       330



---

In [8]:
classifier2 = GradientBoostingClassifier()
classifier2.fit(x_train, y_train)

GradientBoostingClassifier()

In [9]:
y_pred = classifier2.predict(x_test)

In [10]:
print(confusion_matrix(y_test, y_pred))
print()
print(accuracy_score(y_test, y_pred))
print()
print(classification_report(y_test, y_pred))

[[153  13]
 [  7 157]]

0.9393939393939394

              precision    recall  f1-score   support

           0       0.96      0.92      0.94       166
           1       0.92      0.96      0.94       164

    accuracy                           0.94       330
   macro avg       0.94      0.94      0.94       330
weighted avg       0.94      0.94      0.94       330



---

In [11]:
classifier3 = xb.XGBClassifier()
classifier3.fit(x_train, y_train)

XGBClassifier(base_score=None, booster=None, callbacks=None,
              colsample_bylevel=None, colsample_bynode=None,
              colsample_bytree=None, early_stopping_rounds=None,
              enable_categorical=False, eval_metric=None, feature_types=None,
              gamma=None, gpu_id=None, grow_policy=None, importance_type=None,
              interaction_constraints=None, learning_rate=None, max_bin=None,
              max_cat_threshold=None, max_cat_to_onehot=None,
              max_delta_step=None, max_depth=None, max_leaves=None,
              min_child_weight=None, missing=nan, monotone_constraints=None,
              n_estimators=100, n_jobs=None, num_parallel_tree=None,
              predictor=None, random_state=None, ...)

In [12]:
y_pred = classifier3.predict(x_test)

In [13]:
print(confusion_matrix(y_test, y_pred))
print()
print(accuracy_score(y_test, y_pred))
print()
print(classification_report(y_test, y_pred))

[[155  11]
 [ 11 153]]

0.9333333333333333

              precision    recall  f1-score   support

           0       0.93      0.93      0.93       166
           1       0.93      0.93      0.93       164

    accuracy                           0.93       330
   macro avg       0.93      0.93      0.93       330
weighted avg       0.93      0.93      0.93       330



---

### <font color = '#AA00FF'> Observation:
* All Boosting algorithm are performing well

---

## Regression

In [14]:
from sklearn.datasets import make_regression

x, y = make_regression(n_features = 4, n_informative = 2, random_state = 0, shuffle = False)

In [15]:
# train test split
x_train, x_test, y_train, y_test = train_test_split(x,y, test_size = 0.33, random_state = 42)

In [16]:
from sklearn.ensemble import AdaBoostRegressor
from sklearn.ensemble import GradientBoostingRegressor
from xgboost import XGBRegressor

---

In [17]:
# AdaBoostRegressor model training 
ada_regressor = AdaBoostRegressor()
ada_regressor.fit(x_train, y_train)

AdaBoostRegressor()

In [18]:
# prediction
y_pred = ada_regressor.predict(x_test)

In [19]:
# preformance calculation
from sklearn.metrics import r2_score

print(r2_score(y_test, y_pred))

0.9263825656799889


---

In [20]:
# GradientBoosting Regressor model training
gb_regressor = GradientBoostingRegressor()
gb_regressor.fit(x_train, y_train)

GradientBoostingRegressor()

In [21]:
# prediction
y_pred = gb_regressor.predict(x_test)

In [22]:
# preformance calculation
from sklearn.metrics import r2_score

print(r2_score(y_test, y_pred))

0.937820850160989


---

In [23]:
# Extreme Gradient Boosting Regressor 
xgb_regessor = XGBRegressor()
xgb_regessor.fit(x_train, y_train)

XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None,
             colsample_bytree=None, early_stopping_rounds=None,
             enable_categorical=False, eval_metric=None, feature_types=None,
             gamma=None, gpu_id=None, grow_policy=None, importance_type=None,
             interaction_constraints=None, learning_rate=None, max_bin=None,
             max_cat_threshold=None, max_cat_to_onehot=None,
             max_delta_step=None, max_depth=None, max_leaves=None,
             min_child_weight=None, missing=nan, monotone_constraints=None,
             n_estimators=100, n_jobs=None, num_parallel_tree=None,
             predictor=None, random_state=None, ...)

In [24]:
# prediction
y_pred = xgb_regessor.predict(x_test)

In [25]:
# preformance calculation
from sklearn.metrics import r2_score

print(r2_score(y_test, y_pred))

0.9085731911474559


---