# AdaBoost Regressor From Scratch

AdaBoost for regression (AdaBoost.R2) follows a similar principle to the classifier: it trains a sequence of weak learners where each subsequent learner focuses more on the samples that were difficult for the previous ones.

## Key Concepts:
- **Weighted Error**: Error relative to the current sample weights
- **Beta (Confidence)**: Confidence in the learner based on its average error
- **Weight Update**: Harder-to-predict samples increase in weight
- **Final Prediction**: Weighted median of all learners

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import AdaBoostRegressor as SklearnAda

## 1. AdaBoost.R2 Implementation

In [None]:
class AdaBoostRegressor:
    def __init__(self, n_estimators=50, max_depth=3):
        self.n_estimators = n_estimators
        self.max_depth = max_depth
        self.estimators = []
        self.betas = []

    def fit(self, X, y):
        n_samples = X.shape[0]
        w = np.full(n_samples, 1 / n_samples)
        
        self.estimators = []
        self.betas = []

        for _ in range(self.n_estimators):
            # Train weak learner with weights
            # Note: Sklearn's DecisionTreeRegressor supports sample_weight
            tree = DecisionTreeRegressor(max_depth=self.max_depth)
            tree.fit(X, y, sample_weight=w)
            
            y_pred = tree.predict(X)
            error_abs = np.abs(y_pred - y)
            max_error = np.max(error_abs)
            if max_error == 0: break
            
            # Relative error
            L = error_abs / max_error
            avg_L = np.sum(w * L)
            
            if avg_L >= 0.5: break # Stop if weak learner is no better than random
            
            beta = avg_L / (1 - avg_L)
            self.betas.append(beta)
            self.estimators.append(tree)
            
            # Update weights
            w *= (beta ** (1 - L))
            w /= np.sum(w)

    def predict(self, X):
        # Weighted median prediction
        # Calculate weights for each estimator: ln(1/beta)
        weights = np.log(1 / np.array(self.betas))
        predictions = np.array([tree.predict(X) for tree in self.estimators])
        
        final_preds = []
        for i in range(X.shape[0]):
            # Sort predictions and find weighted median
            sample_preds = predictions[:, i]
            sorted_idx = np.argsort(sample_preds)
            sorted_preds = sample_preds[sorted_idx]
            sorted_weights = weights[sorted_idx]
            
            cumulative_weight = np.cumsum(sorted_weights)
            total_weight = np.sum(sorted_weights)
            
            median_idx = np.searchsorted(cumulative_weight, 0.5 * total_weight)
            final_preds.append(sorted_preds[median_idx])
            
        return np.array(final_preds)

    def score(self, X, y):
        y_pred = self.predict(X)
        return 1 - np.sum((y - y_pred)**2) / np.sum((y - np.mean(y))**2)

## 2. Testing and Comparison

In [None]:
X, y = make_regression(n_samples=100, n_features=1, noise=10, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

ada = AdaBoostRegressor(n_estimators=50, max_depth=3)
ada.fit(X_train, y_train)
print(f"Our AdaBoost R2: {ada.score(X_test, y_test):.4f}")

sk_ada = SklearnAda(n_estimators=50, random_state=42)
sk_ada.fit(X_train, y_train)
print(f"Sklearn AdaBoost R2: {sk_ada.score(X_test, y_test):.4f}")

## 3. Visualizing Fit

In [None]:
X_line = np.linspace(X.min(), X.max(), 100).reshape(-1, 1)
y_line = ada.predict(X_line)

plt.scatter(X, y, color='blue', alpha=0.5)
plt.plot(X_line, y_line, color='red', linewidth=2)
plt.title("AdaBoost Regression Line")
plt.show()