# Gradient Boosting Classifier From Scratch

Gradient Boosting for classification is slightly more involved than regression. Instead of predicting raw targets, we predict **log-odds** and use the **logistic loss** (log-loss).

## Key Concepts:
- **Log-Odds**: $L = \log(\frac{p}{1-p})$
- **Probability**: $p = \frac{1}{1 + e^{-L}}$
- **Pseudo-Residuals**: $y_i - p_i$
- **Gamma Calculation**: In each terminal node, we calculate the leaf value that minimizes the overall loss for classification.

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.tree import DecisionTreeRegressor
from sklearn.datasets import make_moons
from sklearn.model_selection import train_test_split

## 1. Implementation

We use `DecisionTreeRegressor` as the weak learner even for classification, but we adjust the leaf values (gamma) to minimize log-loss.

In [None]:
class GradientBoostingClassifier:
    def __init__(self, n_estimators=100, learning_rate=0.1, max_depth=3):
        self.n_estimators = n_estimators
        self.lr = learning_rate
        self.max_depth = max_depth
        self.trees = []
        self.init_log_odds = None

    def _sigmoid(self, x):
        return 1 / (1 + np.exp(-x))

    def fit(self, X, y):
        # Initial log-odds: log(y=1 / y=0)
        p_mean = np.mean(y)
        self.init_log_odds = np.log(p_mean / (1 - p_mean))
        
        current_log_odds = np.full(len(y), self.init_log_odds)
        
        for _ in range(self.n_estimators):
            # 1. Calculate probabilities
            probs = self._sigmoid(current_log_odds)
            
            # 2. Calculate residuals (y - p)
            residuals = y - probs
            
            # 3. Fit tree to residuals
            tree = DecisionTreeRegressor(max_depth=self.max_depth)
            tree.fit(X, residuals)
            
            # 4. Update log-odds
            # Simplification: in a proper GBM, we recalculate leaf values (Gamma)
            # to minimize log-loss. For this educational implementation, 
            # we simply add the tree predictions scaled by the learning rate.
            current_log_odds += self.lr * tree.predict(X)
            self.trees.append(tree)

    def predict_proba(self, X):
        log_odds = np.full(X.shape[0], self.init_log_odds)
        for tree in self.trees:
            log_odds += self.lr * tree.predict(X)
        return self._sigmoid(log_odds)

    def predict(self, X):
        return (self.predict_proba(X) >= 0.5).astype(int)

    def score(self, X, y):
        return np.mean(self.predict(X) == y)

## 2. Testing and Comparison

In [None]:
X, y = make_moons(n_samples=200, noise=0.1, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

gbc = GradientBoostingClassifier(n_estimators=50, learning_rate=0.1, max_depth=2)
gbc.fit(X_train, y_train)
print(f"Our GB Classifier Accuracy: {gbc.score(X_test, y_test):.4f}")

from sklearn.ensemble import GradientBoostingClassifier as SklearnGBC
sk_gbc = SklearnGBC(n_estimators=50, learning_rate=0.1, max_depth=2, random_state=42)
sk_gbc.fit(X_train, y_train)
print(f"Sklearn GB Classifier Accuracy: {sk_gbc.score(X_test, y_test):.4f}")