In [1]:
# 0. básico
import numpy as np
from sklearn.tree import DecisionTreeClassifier

# 1. random seed
rng = np.random.RandomState(seed=101)

def bagging_fit(X, y, n_estimators, max_depth=5, max_samples=200):
    n_examples = len(y)
    estimators = [DecisionTreeClassifier(max_depth=max_depth) for _ in range(n_estimators)]
    
    for tree in estimators:
        bag =np.random.choice(n_examples, max_samples, replace=True)
        
        tree.fit(X[bag, :], y[bag])
        
    return estimators

'''
This function will return a list of DecisionTreeClassifier objects. We can use this
ensemble for prediction, which is implemented in the following listing.
'''

# 2. prediction
from scipy.stats import mode

def bagging_predict(X, estimators):
    all_predictions = np.array([tree.predict(X) for tree in estimators])
    
    y_pred, _ = mode(all_predictions, axis=0, keepdims=False)
    
    return np.squeeze(y_pred)

# 3. teste
from sklearn.datasets import make_moons
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

X, y = make_moons(n_samples=300, noise=.25, random_state=rng) 
Xtrn, Xtst, ytrn, ytst = train_test_split(X, y, test_size=0.33,random_state=rng)

bag_ens = bagging_fit(Xtrn, ytrn, n_estimators=500, max_depth=12, max_samples=300)
ypred = bagging_predict(Xtst, bag_ens) 

print(accuracy_score(ytst, ypred))

0.9393939393939394
