In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report, accuracy_score
import random
from scipy import stats

In [2]:
iris = load_iris()
X = iris.data
y = iris.target

X_train, X_test, y_train, y_test = train_test_split(X, y, 
                test_size=0.3, shuffle=True, random_state=42)
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

(105, 4)
(105,)
(45, 4)
(45,)


In [3]:
class Bagging:
        
    def __init__(self, boostrap, boostrap_ratio, tree_params, models ,with_no_replacement = True):
        self.boostrap = boostrap
        self.boostrap_ratio = boostrap_ratio
        self.tree_params = tree_params
        self.models = models
        self.with_no_replacement = with_no_replacement
                
    def fit(self, X, y):
        
        m, n = X.shape
      
        sample_size = int(self.boostrap_ratio * len(X))

        xsamples = np.zeros((self.boostrap, sample_size, n))
        ysamples = np.zeros((self.boostrap, sample_size))

        xsamples_oob = [] 
        ysamples_oob = []

        #bootstrapping samples for each model
        for i in range(self.boostrap):
            
            oob_index = []
            index = []
            
            for j in range(sample_size):
                idx = random.randrange(m) # with replacement
                if (self.with_no_replacement):
                    while idx in index:
                        idx = random.randrange(m) # without replacement
                index.append(idx)
                oob_index.append(idx)
                
                xsamples[i, j, :] = X[idx]
                ysamples[i, j] = y[idx]
                
            mask = np.zeros((m), dtype=bool)
            mask[oob_index] = True
            xsamples_oob.append(X[~mask])
            ysamples_oob.append(y[~mask])
    
     
        oob_score = 0
        print("_______Bagging score for each tree_______")
        for i, model in enumerate(self.models):
            
            _X = xsamples[i]
            _y = ysamples[i]
            model.fit(_X, _y)

            #calculating oob score
            _X_test = np.asarray(xsamples_oob[i])
            _y_test = np.asarray(ysamples_oob[i])
            
            yhat = model.predict(_X_test)
            oob_score += accuracy_score(_y_test, yhat)
            
            print("Tree ", accuracy_score(_y_test, yhat))
            
        self.avg_oob_score = oob_score / len(self.models)
        print("________Average out of bag score________")
        print(self.avg_oob_score)
    
    def predict(self, X): 
        #make prediction and return the probabilities
        predictions = np.zeros((self.boostrap, X.shape[0]))
        for i, model in enumerate(self.models):
            yhat = model.predict(X)
            predictions[i, :] = yhat
        return stats.mode(predictions)[0][0]



In [4]:
boostrap = 5
boostrap_ratio = 0.8

tree_params = {'max_depth': 2, 'max_features': 'sqrt'}
models = [DecisionTreeClassifier(**tree_params) for i in range(boostrap)]

model = Bagging(boostrap,boostrap_ratio,tree_params,models)
model.fit(X_train, y_train)
yhat = model.predict(X_test)
print(classification_report(y_test, yhat))

_______Bagging score for each tree_______
Tree  0.9047619047619048
Tree  0.8571428571428571
Tree  0.7619047619047619
Tree  0.8571428571428571
Tree  0.9523809523809523
________Average out of bag score________
0.8666666666666666
              precision    recall  f1-score   support

           0       1.00      0.84      0.91        19
           1       0.81      1.00      0.90        13
           2       1.00      1.00      1.00        13

    accuracy                           0.93        45
   macro avg       0.94      0.95      0.94        45
weighted avg       0.95      0.93      0.93        45

