In [None]:
# Ensemble Learning â€“ Boosting
Practical demonstration of AdaBoost and Gradient Boosting.


In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import AdaBoostClassifier, GradientBoostingClassifier
from sklearn.metrics import accuracy_score


In [3]:
X, y = make_classification(
    n_samples=500,
    n_features=6,
    n_informative=4,
    random_state=42
)


In [4]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


In [5]:
dt = DecisionTreeClassifier(max_depth=1, random_state=42)
dt.fit(X_train, y_train)

baseline_acc = accuracy_score(y_test, dt.predict(X_test))
baseline_acc


0.61

In [6]:
ada = AdaBoostClassifier(
    estimator=DecisionTreeClassifier(max_depth=1),
    n_estimators=100,
    learning_rate=0.5,
    random_state=42
)

ada.fit(X_train, y_train)

ada_acc = accuracy_score(y_test, ada.predict(X_test))
ada_acc


0.73

In [7]:
gb = GradientBoostingClassifier(
    n_estimators=100,
    learning_rate=0.1,
    max_depth=3,
    random_state=42
)

gb.fit(X_train, y_train)

gb_acc = accuracy_score(y_test, gb.predict(X_test))
gb_acc


0.81

In [8]:
print("Decision Tree Accuracy:", baseline_acc)
print("AdaBoost Accuracy:", ada_acc)
print("Gradient Boosting Accuracy:", gb_acc)


Decision Tree Accuracy: 0.61
AdaBoost Accuracy: 0.73
Gradient Boosting Accuracy: 0.81


In [9]:
gb.feature_importances_


array([0.10001042, 0.20890372, 0.1519652 , 0.11915166, 0.2552999 ,
       0.16466911])

In [None]:
## Key Takeaways
- Ensemble methods outperform single models
- Boosting reduces bias
- Learning rate controls overfitting
- Gradient Boosting is very powerful on tabular data
