In [4]:
import numpy as np

In [8]:
class DecisionStump:
    def __init__(self):
        self.feature_idx=None
        self.threshold=None
        self.alpha=None
        self.polarity=1
        
    def predict(self, X):
        n_samples = X.shape[0]
        X_column = X[:, self.feature_idx]
        predictions = np.ones(n_samples)
        if self.polarity == 1:
            predictions[X_column < self.threshold] = -1
        else:
            predictions[X_column > self.threshold] = -1
            
        return predictions

In [15]:
class Adaboost:
    def __init__(self, n_clf=5):
        self.n_clf = n_clf
        
    def fit(self, X, y):
        n_samples, n_features = X.shape
        
        w = np.full(n_samples, 1/n_samples)
        self.clfs = []
        
        for _ in range(self.n_clf):
            
            clf = DecisionStump()
            min_error = float('inf')
            
            for f in range(n_features):
                X_column = X[:,f]
                thresholds = np.unique(X_column)
                
                for t in thresholds:
                    p=1
                    predictions = np.ones(n_samples)
                    predictions[X_column < t] = -1
                    
                    misclassified = w[y!=predictions]
                    error = sum(misclassified)
                    
                    if error > 0.5:
                        error = 1-error
                        p=-1
                        
                    if error < min_error:
                        clf.polarity = p
                        clf.threshold = t
                        clf.feature_idx = f
                        min_error = error
                        
            EPS = 1e-10
            clf.alpha = 0.5 * np.log((1.0 - min_error + EPS) / (min_error + EPS))
            
            predictions = clf.predict(X)
            
            w *= np.exp(-clf.alpha * y * predictions)
            w /= np.sum(w)
            
            self.clfs.append(clf)
            
    def predict(self, X):
        clf_preds = [clf.alpha * clf.predict(X) for clf in self.clfs]
        ypred = np.sum(clf_preds, axis=0)
        print(ypred)
        ypred = np.sign(ypred)
        return ypred
        

In [16]:
if __name__ == "__main__":
    # Imports
    from sklearn import datasets
    from sklearn.model_selection import train_test_split

    def accuracy(y_true, y_pred):
        accuracy = np.sum(y_true == y_pred) / len(y_true)
        return accuracy

    data = datasets.load_breast_cancer()
    X, y = data.data, data.target

    y[y == 0] = -1

    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=5
    )

    # Adaboost classification with 5 weak classifiers
    clf = Adaboost(n_clf=5)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    acc = accuracy(y_test, y_pred)
    print("Accuracy:", acc)

[-4.13373664  2.55039001  1.12396037  2.55039001  2.55039001 -4.13373664
  1.39537603  4.13373664  1.12396037  2.97872265  2.55039001  2.55039001
 -4.13373664  2.55039001  2.27897436  4.13373664  0.52437067  4.13373664
  2.55039001  4.13373664 -2.55039001  4.13373664  4.13373664  2.97872265
  2.97872265  4.13373664  0.69562773 -4.13373664  0.69562773 -2.55039001
 -2.55039001 -4.13373664 -1.33039161 -2.55039001  4.13373664  2.55039001
 -0.09603803  4.13373664  2.27897436 -2.55039001  2.55039001  4.13373664
  4.13373664 -2.55039001  4.13373664  1.39537603 -4.13373664 -1.67938466
  1.39537603 -4.13373664  2.27897436  4.13373664  4.13373664  2.97872265
  0.69562773 -4.13373664 -4.13373664 -2.55039001  4.13373664 -2.55039001
 -1.67938466 -2.91373823  2.55039001  4.13373664  2.97872265  2.27897436
  1.39537603  1.39537603  4.13373664 -2.55039001  4.13373664 -4.13373664
  2.97872265  2.55039001  1.39537603  4.13373664  0.52437067  4.13373664
 -4.13373664 -1.67938466 -4.13373664  4.13373664 -4