In [6]:
import numpy as np
from sklearn.metrics import accuracy_score
from sklearn.utils import resample
from Decision_Tree import DecisionTree

class Bagging:
    def __init__(self, base_model, n_estimators=10, sample_size=None, random_state=42):
        self.base_model = base_model
        self.n_estimators = n_estimators
        self.sample_size = sample_size
        self.models = []
        np.random.seed(random_state)

    def fit(self, X, y):
        n_samples = X.shape[0]
        self.models = []
        for i in range(self.n_estimators):
            # Bootstrap sampling
            X_sample, y_sample = resample(X, y, n_samples=self.sample_size or n_samples)
            model = self.base_model()
            model.fit(X_sample, y_sample)
            self.models.append(model)

    def predict(self, X):
        # Get predictions from all models
        all_preds = np.array([model.predict(X) for model in self.models])
        # Majority vote
        y_pred = np.round(all_preds.mean(axis=0)).astype(int)
        return y_pred


In [7]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score


url = "https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv"
data = pd.read_csv(url)
data = data[["Pclass", "Sex", "Age", "SibSp", "Fare", "Survived"]]
data.dropna(inplace=True)


data["Sex"] = data["Sex"].map({"male": 0, "female": 1})

X = data.drop("Survived", axis=1)
y = data["Survived"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [8]:
bagging = Bagging(base_model=lambda: DecisionTree(max_depth=4), n_estimators=10)
bagging.fit(X_train, y_train)
y_pred_bag = bagging.predict(X_test)

acc_bag = accuracy_score(y_test, y_pred_bag)
print(f"Bagging Accuracy: {acc_bag:.3f}")

Bagging Accuracy: 0.769
