## Implementing the AdaBoost Algorithm From Scratch

**AdaBoost means Adaptive Boosting which is a ensemble learning technique that combines multiple weak classifiers to create a strong classifier. It works by sequentially adding classifiers to correct the errors made by previous models giving more weight to the misclassified data points. Lets implement AdaBoost algorithm from scratch.**

### 1. Import Libraries

In [1]:
import numpy as np
from sklearn.tree import DecisionTreeClassifier
from sklearn.datasets import load_iris
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, f1_score, roc_auc_score

### 2. Defining the AdaBoost Class

In [2]:
class AdaBoost:
    def __init__(self, n_estimators=50):
        self.n_estimators = n_estimators
        self.alphas = []
        self.models = []

### 3. Training the AdaBoost Model

In [3]:
def fit(self, X, y):
        n_samples, n_features = X.shape  
        w = np.ones(n_samples) / n_samples 

        for _ in range(self.n_estimators):
            model = DecisionTreeClassifier(max_depth=1)  
            model.fit(X, y, sample_weight=w)  
            predictions = model.predict(X)  

            err = np.sum(w * (predictions != y)) / np.sum(w)

            alpha = 0.5 * np.log((1 - err) / (err + 1e-10))

            self.models.append(model) 
            self.alphas.append(alpha)  

            w *= np.exp(-alpha * y * predictions)  
            w /= np.sum(w)

### 4. Defining Predict Method

In [4]:
def predict(self, X):
        strong_preds = np.zeros(X.shape[0])  

        for model, alpha in zip(self.models, self.alphas):
            predictions = model.predict(X)  
            strong_preds += alpha * predictions  

        return np.sign(strong_preds).astype(int)

### 5. Example Usage

In [9]:
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

# Split data
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)

# Base estimator
base_clf = DecisionTreeClassifier(max_depth=1, random_state=42)

# AdaBoost model
adaboost = AdaBoostClassifier(
    estimator=base_clf,
    n_estimators=50,
    random_state=42
)

# Train
adaboost.fit(X_train, y_train)

# Predict
predictions = adaboost.predict(X_test)

# Accuracy
accuracy = accuracy_score(y_test, predictions)
print("Accuracy:", accuracy)
print(f"Accuracy: {accuracy * 100}%")

Accuracy: 0.8466666666666667
Accuracy: 84.66666666666667%


In [10]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score


In [11]:
y_pred = adaboost.predict(X_test)
y_prob = adaboost.predict_proba(X_test)[:, 1]   # required for ROC-AUC


In [12]:
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
roc_auc = roc_auc_score(y_test, y_prob)


In [13]:
print(f"Accuracy: {accuracy * 100:.2f}%")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")
print(f"ROC-AUC: {roc_auc:.4f}")


Accuracy: 84.67%
Precision: 0.8609
Recall: 0.8387
F1 Score: 0.8497
ROC-AUC: 0.9164


## The End !! 