# AdaBoost Classification
- This notebook contains the code to implement the AdaBoost algorithm for Binary Classification from scratch

### Importing Libraries

In [7]:
# data manipulation
import numpy as np
import pandas as pd

# tree
from sklearn.tree import DecisionTreeClassifier

# dataset
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

## Custom Class

In [112]:
class AdaBoostClassifier:

  def __init__(self,
               n_estimators=100,
               epsilon=1e-4,
               random_state=None):
    """
    Parameters:
    -----------

    n_estimators: int
                  The total no. of base estimators to include in the ensemble

    epsilon: float
             To avoid error during log computations

    random_state: int
                  For controlling the randomness and reproducibility of results across multiple runs
    """
    self.n_estimators = n_estimators
    self.epsilon = epsilon
    self.random_state = random_state


  def fit(self, X, y):
    """
    This method will train an AdaBoost classifier model
    """

    if self.random_state:
      np.random.seed(self.random_state)

    n = X.shape[0]
    weights = np.ones(n) / n
    self.alphas_ = []
    self.base_estimators_ = []

    for _ in range(self.n_estimators):
      # training a base estimator
      base_estimator = DecisionTreeClassifier(max_depth=1)
      self.base_estimator_ = base_estimator.fit(X,
                                                y,
                                                sample_weight=weights)
      self.base_estimators_.append(self.base_estimator_)

      # computing the weight of the base estimator
      y_pred = self.base_estimator_.predict(X)
      misclassifications = (y != y_pred)
      err = np.sum(weights[misclassifications])
      alpha = 0.5 * np.log((1 - err + self.epsilon) / (err + self.epsilon))
      self.alphas_.append(alpha)

      # updating the weights
      t = -alpha * y * y_pred
      weights = weights * np.exp(t)
      weights = weights / np.sum(weights)

    self.alphas_ = np.array(self.alphas_)
    print("> Succesfully trained an AdaBoost Classifier model\n")

    return self


  def predict(self, X):
    """
    This method will return the predicted labels
    """
    result = np.zeros(X.shape[0])
    for alpha, tree in zip(self.alphas_, self.base_estimators_):
      result += (alpha * tree.predict(X))
    return np.sign(result).astype(int)


  def score(self, X, y):
    """
    This method will return the accuracy of the model on the given data
    """
    y_pred = self.predict(X)
    return (y == y_pred).mean()

## Getting the Data

In [113]:
X, y = load_iris(return_X_y=True)
y = np.where(y == 0, 1, -1)

X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.2,
                                                    stratify=y,
                                                    random_state=7)

print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(120, 4) (120,)
(30, 4) (30,)


## Training the Model

In [114]:
ada = AdaBoostClassifier()
ada.fit(X_train, y_train)

> Succesfully trained an AdaBoost Classifier model



<__main__.AdaBoostClassifier at 0x7a9742e137c0>

In [None]:
ada.alphas_

## Evaluating the Model

In [115]:
ada.score(X_train, y_train)

1.0

In [116]:
ada.score(X_test, y_test)

1.0

- The model performs very well and shows good predictive performance on the given train and test data