In [2]:
import numpy as np

class AdaBoostClassifier:
    """
    AdaBoost classifier using decision stumps as weak learners.
    """

    def __init__(self, n_estimators=50):
        self.n_estimators = n_estimators
        self.alphas = []
        self.stumps = []

    def fit(self, X, y):
        n_samples, n_features = X.shape
        # Ensure y is {-1, 1}
        y_ = np.where(y == 0, -1, 1)
        # Initialize weights
        w = np.ones(n_samples) / n_samples

        for _ in range(self.n_estimators):
            stump = DecisionStump()
            stump.fit(X, y_, w)
            pred = stump.predict(X)
            # Compute error
            err = np.sum(w * (pred != y_)) / np.sum(w)
            # Avoid division by zero, clip error
            err = np.clip(err, 1e-10, 1 - 1e-10)
            alpha = 0.5 * np.log((1 - err) / err)
            # Update weights
            w *= np.exp(-alpha * y_ * pred)
            w /= np.sum(w)
            # Save
            self.alphas.append(alpha)
            self.stumps.append(stump)

    def predict(self, X):
        clf_preds = np.array([alpha * stump.predict(X) for stump, alpha in zip(self.stumps, self.alphas)])
        y_pred = np.sign(np.sum(clf_preds, axis=0))
        return np.where(y_pred == -1, 0, 1)

class DecisionStump:
    """
    Decision stump for AdaBoost (single feature threshold classifier)
    """
    def __init__(self):
        self.feature_index = None
        self.threshold = None
        self.polarity = 1

    def fit(self, X, y, sample_weight):
        n_samples, n_features = X.shape
        min_error = float('inf')
        for feature_i in range(n_features):
            X_column = X[:, feature_i]
            thresholds = np.unique(X_column)
            for threshold in thresholds:
                for polarity in [1, -1]:
                    pred = np.ones(n_samples)
                    pred[polarity * X_column < polarity * threshold] = -1
                    error = np.sum(sample_weight * (pred != y))
                    if error < min_error:
                        min_error = error
                        self.feature_index = feature_i
                        self.threshold = threshold
                        self.polarity = polarity

    def predict(self, X):
        n_samples = X.shape[0]
        X_column = X[:, self.feature_index]
        pred = np.ones(n_samples)
        pred[self.polarity * X_column < self.polarity * self.threshold] = -1
        return pred

> ## Example usage:

In [3]:
# Simple binary classification problem
X = np.array([[1,2], [2,1], [3,5], [6,8], [7,8], [8,6]])
y = np.array([0, 0, 0, 1, 1, 1])  # Classes: 0 and 1

clf = AdaBoostClassifier(n_estimators=5)
clf.fit(X, y)
X_test = np.array([[1,2], [8,7], [4,4]])
print("AdaBoost predictions:", clf.predict(X_test))

AdaBoost predictions: [0 1 0]
