# AdaBoost

#### What is AdaBoost?

AdaBoost is an iterative algorithm that:

    1. Trains a sequence of weak learners (e.g., decision stumps).

    2. Assigns higher weights to misclassified samples in each iteration.

    3. Combines the weak learners into a strong learner by weighting their predictions.

#### Key Terms:

    1. Weak Learner: A model that performs slightly better than random guessing (e.g., a decision stump, which is a decision tree with only one split).

    2. Sample Weights: Each training sample is assigned a weight, which determines its importance during training.

    3. Model Weight: Each weak learner is assigned a weight based on its accuracy.

#### How AdaBoost Works:

1. Initialize equal weights for all training samples.

2. For each iteration:

    1. Train a weak learner on the weighted data.

    2. Compute the error of the weak learner.

    3. Update the sample weights: Increase weights for misclassified samples and decrease weights for correctly classified samples.

    4. Assign a weight to the weak learner based on its accuracy.

3. Combine all weak learners into a strong learner using weighted voting.



In [None]:
import numpy as np

class DecisionStump:
    """A weak learner: Decision Stump (1-level decision tree)."""
    def __init__(self):
        self.feature_index = None
        self.threshold = None
        self.polarity = None  # To handle flipping predictions

    def predict(self, X):
        """Predict using the decision stump."""
        n_samples = X.shape[0]
        predictions = np.ones(n_samples)
        if self.polarity == 1:
            predictions[X[:, self.feature_index] <= self.threshold] = -1
        else:
            predictions[X[:, self.feature_index] > self.threshold] = -1
        return predictions

class AdaBoost:
    """AdaBoost algorithm."""
    def __init__(self, n_estimators=50):
        self.n_estimators = n_estimators  # Number of weak learners
        self.models = []  # List to store weak learners
        self.alphas = []  # List to store model weights

    def fit(self, X, y):
        """Train the AdaBoost model."""
        n_samples, n_features = X.shape
        weights = np.ones(n_samples) / n_samples  # Initialize weights

        for _ in range(self.n_estimators):
            # Train a decision stump
            stump = DecisionStump()
            min_error = float('inf')

            # Find the best feature and threshold
            for feature_index in range(n_features):
                thresholds = np.unique(X[:, feature_index])
                for threshold in thresholds:
                    # Predict using the current feature and threshold
                    predictions = np.ones(n_samples)
                    predictions[X[:, feature_index] <= threshold] = -1

                    # Calculate weighted error
                    error = np.sum(weights * (predictions != y))

                    # If error > 0.5, flip the predictions
                    if error > 0.5:
                        error = 1 - error
                        predictions = -predictions
                        polarity = -1
                    else:
                        polarity = 1

                    # Update the best stump
                    if error < min_error:
                        min_error = error
                        stump.feature_index = feature_index
                        stump.threshold = threshold
                        stump.polarity = polarity

            # Compute the weak learner's weight
            stump.alpha = 0.5 * np.log((1 - min_error) / (min_error + 1e-10))

            # Update sample weights
            predictions = stump.predict(X)
            weights *= np.exp(-stump.alpha * y * predictions)
            weights /= np.sum(weights)  # Normalize weights

            # Save the weak learner and its weight
            self.models.append(stump)#
            self.alphas.append(stump.alpha)

    def predict(self, X):
        """Predict using the AdaBoost model."""
        n_samples = X.shape[0]
        weighted_predictions = np.zeros(n_samples)

        # Combine predictions from all weak learners
        for model, alpha in zip(self.models, self.alphas):
            weighted_predictions += alpha * model.predict(X)

        return np.sign(weighted_predictions)

In [9]:
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Generate a synthetic dataset
X, y = make_classification(n_samples=100, n_features=2, n_informative=2, n_redundant=0, random_state=42)
y = np.where(y == 0, -1, 1)  # Convert labels to {-1, 1}

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the AdaBoost model
model = AdaBoost(n_estimators=50)  # Now this works!
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.95
