<a href="https://colab.research.google.com/github/Sameersah/decision-trees-ensemble/blob/main/AdaBoost_Implementation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>


AdaBoost Implementation

AdaBoost updates weights for misclassified samples to focus on harder examples in subsequent iterations.

Steps:
Initialize Weights: Assign equal weights to all samples.
Train Weak Learner: Use a weak learner (e.g., decision stump).
Update Weights: Increase weights for misclassified samples.
Combine Weak Learners: Aggregate predictions using weighted voting.

AdaBoost (Adaptive Boosting) combines multiple weak learners to create a strong classifier. At each iteration:




In [1]:
import numpy as np

class AdaBoost:
    def __init__(self, n_estimators=50):
        self.n_estimators = n_estimators
        self.alphas = []
        self.models = []

    def fit(self, X, y):
        n_samples, n_features = X.shape
        weights = np.ones(n_samples) / n_samples  # Initialize weights equally

        for _ in range(self.n_estimators):
            # Train weak learner
            tree = DecisionTreeClassifier(max_depth=1)  # Decision stump
            tree.fit(X, y, sample_weight=weights)
            predictions = tree.predict(X)

            # Calculate weighted error
            err = np.sum(weights * (predictions != y)) / np.sum(weights)

            # Compute alpha (amount of say for this weak learner)
            alpha = 0.5 * np.log((1 - err) / (err + 1e-10))
            self.alphas.append(alpha)
            self.models.append(tree)

            # Update weights
            weights *= np.exp(-alpha * y * predictions)
            weights /= np.sum(weights)  # Normalize

    def predict(self, X):
        # Weighted majority vote
        final_prediction = np.zeros(X.shape[0])
        for alpha, model in zip(self.alphas, self.models):
            final_prediction += alpha * model.predict(X)
        return np.sign(final_prediction)

# Example usage with a classification dataset
from sklearn.datasets import make_classification
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

X, y = make_classification(n_samples=100, n_features=2, n_classes=2, random_state=42)
y = np.where(y == 0, -1, 1)  # Convert labels to -1 and 1 for AdaBoost

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train AdaBoost
adaboost = AdaBoost(n_estimators=50)
adaboost.fit(X_train, y_train)

# Predict and evaluate
y_pred = adaboost.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))


ValueError: Number of informative, redundant and repeated features must sum to less than the number of total features