In [6]:
from Decision_Tree import DecisionTree
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score


class AdaBoost:
    def __init__(self, base_model, n_estimators=10):
        self.base_model = base_model
        self.n_estimators = n_estimators
        self.models = []
        self.alphas = []

    def fit(self, X, y):
        n = len(y)
        w = np.ones(n) / n

        for i in range(self.n_estimators):
            model = self.base_model()
            model.fit(X, y)
            y_pred = model.predict(X)

            # Compute weighted error
            err = np.sum(w * (y_pred != y)) / np.sum(w)
            if err == 0 or err >= 0.5:
                break

            # Compute model weight (alpha)
            alpha = 0.5 * np.log((1 - err) / (err + 1e-10))
            self.models.append(model)
            self.alphas.append(alpha)

            # Update sample weights
            w = w * np.exp(-alpha * y * (2 * y_pred - 1))  # y, y_pred ∈ {0,1}
            w /= np.sum(w)

    def predict(self, X):
        preds = np.zeros(X.shape[0])
        for model, alpha in zip(self.models, self.alphas):
            preds += alpha * (2 * model.predict(X) - 1)
        return (preds > 0).astype(int)

In [7]:

url = "https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv"
data = pd.read_csv(url)
data = data[["Pclass", "Sex", "Age", "SibSp", "Fare", "Survived"]]
data.dropna(inplace=True)


data["Sex"] = data["Sex"].map({"male": 0, "female": 1})

X = data.drop("Survived", axis=1)
y = data["Survived"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [8]:
boost = AdaBoost(base_model=lambda: DecisionTree(max_depth=1), n_estimators=20)
boost.fit(X_train, y_train)
y_pred_boost = boost.predict(X_test)

acc_boost = accuracy_score(y_test, y_pred_boost)
print(f"AdaBoost Accuracy: {acc_boost:.3f}")

AdaBoost Accuracy: 0.734
