In [None]:
import numpy as np
bag = np.random.choice(range(0, 50), size=50, replace=True)
np.sort(bag)


In [None]:
oob = np.setdiff1d(range(0, 50), bag)
oob


## Implementing bagging

1. Generate a bootstrap sample from the original data set.
2. Fit a base estimator to the bootstrap sample.


In [None]:
import numpy as np
from scipy.stats import mode
from sklearn.tree import DecisionTreeClassifier

rng = np.random.RandomState(seed=4190)

def bagging_fit(X, y, n_estimators, max_depth=5, max_samples=200):
    n_examples = len(y)
    estimators = [DecisionTreeClassifier(max_depth=max_depth)

    for _ in range(n_estimators)]

    for tree in estimators:
        bag = np.random.choice(n_examples, max_samples, replace=True)
        tree.fit(X[bag, :], y[bag])

    return estimators

def bagging_predict(X, estimators):
    all_predictions = np.array([tree.predict(X) for tree in estimators])
    ypred, _ = mode(all_predictions, axis=0, keepdims=False)
    return np.squeeze(ypred)


In [None]:
from sklearn.datasets import make_moons
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

X, y = make_moons(n_samples=300, noise=.25, random_state=rng)

Xtrn, Xtst, ytrn, ytst = train_test_split(X, y, test_size=0.33,random_state=rng)
bag_ens = bagging_fit(Xtrn, ytrn, n_estimators=500, max_depth=12, max_samples=300)
ypred = bagging_predict(Xtst, bag_ens)
print(accuracy_score(ytst, ypred))