## AdaBoost Algorithm
What is it? The term ‘Boosting’ refers to a family of algorithms which converts weak learner to strong learners.
How it works:
- trains multiple weak models sequentially, with each *subsequent model* focusing on *correcting the errors made by the previous models*
- After each model is trained, the **algorithm adjusts the weights of the training examples**. **Incorrectly predicted examples are given higher weights** (they are "focused on"), so the next model is forced to pay more attention to them.
- The final prediction is made by combining the predictions of all models, often with weighted averaging

In [1]:
import numpy as np
from sklearn.tree import DecisionTreeClassifier
from sklearn.datasets import load_iris
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, f1_score, roc_auc_score

In [2]:
class AdaBoost:
    def __init__(self, n_estimators=50):
        self.n_estimators = n_estimators # number of weak learners 
        self.alphas = []                 # stores the weights of each model, relative to its performance
        self.models = []                 # stores the weak classifiers in AdaBoost
    
    # training
    def fit(self, X, y):
        n_samples, n_features = X.shape    # Retrieves the number of samples and features from the dataset
        w = np.ones(n_samples) / n_samples # Initializes sample weights uniformly
        
        for _ in range(self.n_estimators):
            model = DecisionTreeClassifier(max_depth=1)
            model.fit(X, y, sample_weight=w)
            predictions = model.predict(X)
            err = np.sum(w * (predictions != y)) / np.sum(w) # computes the weighted error to penalize misclassified samples more
            alpha = 0.5 * np.log((1 - err) / (err + 1e-10))  # calculates the model weight based on its error; models with lower error receive higher weight (alpha)
            self.alphas.append(alpha)                        # appends the model’s weight to the list
            self.models.append(model)                        # appends the trained weak classifier to the list
            w = w * np.exp(-alpha * y * predictions)         # updates the sample weights
            w = w / np.sum(w)
    
    # making predictions    
    def predict(self, X):
        strong_preds = np.zeros(X.shape[0])                  # stores the aggregated predictions from all weak classifiers
        for model, alpha in zip(self.models, self.alphas):
            strong_preds += alpha * model.predict(X)
        return np.sign(strong_preds).astype(int)
    
    

In [None]:
# sample dataset
X, y = make_classification(n_samples=1000, n_features=20, n_classes=2, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [None]:
adaboost = AdaBoost(n_estimators=50)
adaboost.fit(X_train, y_train)

In [18]:
predictions = adaboost.predict(X_test)

accuracy = accuracy_score(y_test, predictions)
precision = precision_score(y_test, predictions)
recall = recall_score(y_test, predictions)
f1 = f1_score(y_test, predictions)
try:
    roc_auc = roc_auc_score(y_test, predictions)
except ValueError:
    roc_auc = 'Undefined (requires probability scores)'

# Print results
print(f"Accuracy: {accuracy * 100}%")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1}")
print(f"ROC-AUC: {roc_auc}")

Accuracy: 84.0%
Precision: 0.8364779874213837
Recall: 0.8580645161290322
F1 Score: 0.8471337579617835
ROC-AUC: 0.839377085650723
