# Bagging

In [None]:
import numpy as np
from sklearn.tree import DecisionTreeClassifier
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [10]:
import numpy as np


class BaggingClassifier:

    '''1. creating bagging classifier class 
        - it takes a base model ( weak learner), number of base classifiers'''
    def __init__(self, base_classifier, n_estimators):
        self.base_classifier = base_classifier
        self.n_estimators = n_estimators
        self.classifiers = []




    '''2. Bootstrap Sampling

        For each estimator:

        Perform bootstrap sampling with replacement from training data.
        Train a fresh instance of the base classifier on sampled data.
        Save the trained classifier in the list. '''
    def fit(self, X, y):
        for _ in range(self.n_estimators):
            indices = np.random.choice(len(X), len(X), replace=True)
            X_sampled, y_sampled = X[indices], y[indices]
            clf = self.base_classifier.__class__()
            clf.fit(X_sampled, y_sampled)
            self.classifiers.append(clf)
        return self.classifiers
    


    
    '''3.  Implement the predict Method Using Majority Voting

        Collect predictions from each trained classifier.

        Use majority voting across all classifiers to determine final prediction.'''
    def predict(self, X):
        predictions = np.array([clf.predict(X) for clf in self.classifiers])
        majority_votes = np.apply_along_axis(
            lambda x: np.bincount(x).argmax(), axis=0, arr=predictions)
        return majority_votes
    

# Testing


In [11]:
digits = load_digits()
X, y = digits.data, digits.target

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42)

In [12]:
base_clf = DecisionTreeClassifier()
model = BaggingClassifier(base_classifier=base_clf, n_estimators=10)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))

Accuracy: 0.9388888888888889


In [13]:
for i, clf in enumerate(model.classifiers):
    y_pred_i = clf.predict(X_test)
    acc_i = accuracy_score(y_test, y_pred_i)
    print(f"Accuracy of classifier {i+1}: {acc_i:.4f}")

Accuracy of classifier 1: 0.8694
Accuracy of classifier 2: 0.8750
Accuracy of classifier 3: 0.8194
Accuracy of classifier 4: 0.8389
Accuracy of classifier 5: 0.8556
Accuracy of classifier 6: 0.8444
Accuracy of classifier 7: 0.8639
Accuracy of classifier 8: 0.8278
Accuracy of classifier 9: 0.8583
Accuracy of classifier 10: 0.8361
