In [6]:
import numpy as np
from sklearn.tree import DecisionTreeClassifier
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.datasets import make_classification

In [9]:
class AdaBoost:
    def __init__(self, n_estimators=50):
        self.n_estimators = n_estimators
        self.weights = []
        self.models = []

    def fit(self, X, y):
        n_samples, n_features = X.shape
        w = np.ones(n_samples)/n_samples
        for _ in range(self.n_estimators):
            model = DecisionTreeClassifier(max_depth=1)
            model.fit(X, y, sample_weight=w)
            predictions = model.predict(X)
            error = np.sum(w*(predictions!=y))/np.sum(w)
            weight = 0.5 * np.log((1-error) / (error+1e-10))
            self.weights.append(weight)
            self.models.append(model)
            w = w * np.exp(-weight*y*predictions)
            w = w/np.sum(w)

    def predict(self, X):
        strong_preds = np.zeros(X.shape[0])
        for model, weight in zip(self.models, self.weights):
            strong_preds += model.predict(X)*weight
        return np.sign(strong_preds).astype(int)

In [13]:
if __name__ == "__main__":

    X, y = make_classification(n_samples=10000, n_features=200, n_classes=2, random_state=42)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

    adaboost = AdaBoost(n_estimators=200)
    adaboost.fit(X_train, y_train)

    predictions = adaboost.predict(X_test)

    accuracy = accuracy_score(y_test, predictions)
    print(f"Accuracy: {accuracy * 100}%")


Accuracy: 88.36666666666667%
