# Bagging Regressor
- This notebook contains the code to implement the Bagging Regressor algorithm from scratch using `Majority Voting`

## Importing Libraries

In [5]:
# data manipulation
import numpy as np
import pandas as pd

# decision tree
from sklearn.tree import DecisionTreeRegressor

# dataset
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split

## Custom Class

In [6]:
class BaggingRegressor:

  def __init__(self,
               estimator,
               n_estimators=100,
               random_state=None,
               **estimator_params):
    """
    Parameters:
    -----------

    estimator: estimator
               Any learning algorithm to use as the base estimator

    n_estimators: int
                  The total no. of base estimators to include in the ensemble

    random_state: int
                  To control randomness and reproducibility of results

    **estimator_params: dict
                        Key-word parameters specific to the base estimator
    """
    self.estimator = estimator
    self.n_estimators = n_estimators
    self.random_state = random_state
    self.estimator_params = estimator_params


  def _rmse(self, y, y_pred):
    return np.sqrt(np.mean((y - y_pred) ** 2))


  def fit(self, X, y):
    """
    This method will train a Bagging Classifier model
    """

    if self.random_state:
      np.random.seed(self.random_state)

    n, p = X.shape
    self.oob_score_ = 0
    self.oob_scores_ = []
    self.estimators_ = []
    indices = np.arange(n)

    for _ in range(self.n_estimators):
      bag_indices = np.random.choice(X.shape[0],
                                     size=n,
                                     replace=True)
      oob_indices = np.setdiff1d(indices, bag_indices)
      X_bag, y_bag = X[bag_indices], y[bag_indices]
      X_oob, y_oob = X[oob_indices], y[oob_indices]

      self.estimator_ = self.estimator.set_params(**self.estimator_params)
      self.estimator_.fit(X_bag, y_bag)
      oob_score = self._rmse(y_oob, self.estimator_.predict(X_oob))
      self.oob_score_ += oob_score
      self.oob_scores_.append(oob_score)
      self.estimators_.append(self.estimator_)

      return self


  def predict(self, X):
    return np.array([
        np.mean([estimator.predict(x.reshape(1, -1))[0] for estimator in self.estimators_])
        for x in X
    ])


  def evaluate(self, X, y):
    y_pred = self.predict(X)
    return self._rmse(y, y_pred)

## Getting the Data

In [4]:
X, y = fetch_california_housing(return_X_y=True)

X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.2,
                                                    random_state=7)

print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(16512, 8) (16512,)
(4128, 8) (4128,)


## Training the Model

In [7]:
bag_reg = BaggingRegressor(estimator=DecisionTreeRegressor(),
                            n_estimators=50,
                            random_state=7)
bag_reg.fit(X_train, y_train)

<__main__.BaggingRegressor at 0x797e72b173a0>

In [8]:
bag_reg.oob_score_

0.7518512118017553

## Evaluating the Model

In [9]:
bag_reg.evaluate(X_train, y_train)

0.4538601682709338

In [10]:
bag_reg.evaluate(X_test, y_test)

0.7874742803138025