In [1]:
import numpy as np
import pandas as pd
from sklearn.tree import DecisionTreeRegressor

In [27]:
class CustomGradientBoostingClassifier:
    
    def __init__(self, learning_rate, n_estimators, max_depth=1):
        self.learning_rate = learning_rate
        self.n_estimators = n_estimators
        self.max_depth = max_depth
        self.trees = []
        
    def fit(self, X, y):
        
        F0 = np.log(y.mean()/(1-y.mean()))  # log-odds values
        self.F0 = np.full(len(y), F0)  # converting to array with the input length
        Fm = self.F0.copy()
        
        for _ in range(self.n_estimators):
            p = np.exp(Fm) / (1 + np.exp(Fm))  # converting back to probabilities
            r = y - p  # residuals
            tree = DecisionTreeRegressor(max_depth=self.max_depth, random_state=0)
            tree.fit(X, r)
            ids = tree.apply(x)  # getting the terminal node IDs

            # looping through the terminal nodes 
            for j in np.unique(ids):
                fltr = ids == j

                # getting gamma using the formula (Σresiduals/Σp(1-p))
                num = r[fltr].sum()
                den = (p[fltr]*(1-p[fltr])).sum()
                gamma = num / den

                # updating the prediction
                Fm[fltr] += self.learning_rate * gamma

                # replacing the prediction value in the tree
                tree.tree_.value[j, 0, 0] = gamma

            self.trees.append(tree)
            
    def predict_proba(self, X):
        
        Fm = self.F0
        
        for i in range(self.n_estimators):
            Fm += self.learning_rate * self.trees[i].predict(X)
            
        return np.exp(Fm) / (1 + np.exp(Fm))  # converting back to probabilities

In [28]:
np.random.seed(42)
X = np.random.rand(100, 1) - 0.5
y = 3*X[:, 0]**2 + 0.05 * np.random.randn(100)
gd = CustomGradientBoostingClassifier(learning_rate=0.1, n_estimators=20)
gd.fit(X,y)
gd.predict_proba(X)
print('f(0) is :: ',gd.F0)

[1 2]
[1 2]
[1 2]
[1 2]
[1 2]
[1 2]
[1 2]
[1 2]
[1 2]
[1 2]
[1 2]
[1 2]
[1 2]
[1 2]
[1 2]
[1 2]
[1 2]
[1 2]
[1 2]
[1 2]
f(0) is ::  [-1.58252068 -0.31657724 -1.58252068 -1.58252068 -0.86905818 -0.86905818
  0.14345481 -0.31657724 -1.58252068 -1.58252068  0.14345481 -0.31657724
 -1.299584   -1.58252068 -0.86905818 -0.86905818 -1.58252068 -1.58252068
 -1.58252068 -1.58252068 -1.58252068 -0.48267714 -1.58252068 -1.58252068
 -1.58252068 -1.58252068 -1.58252068 -1.58252068 -1.58252068  0.14345481
 -1.58252068 -0.86905818  0.14345481 -0.31657724 -0.31657724 -1.58252068
 -1.58252068  0.14345481 -1.58252068 -1.58252068 -0.14650689 -1.58252068
  0.14345481 -0.31657724 -1.58252068 -1.58252068 -1.58252068 -1.58252068
 -1.58252068 -0.86905818 -0.31657724 -1.58252068 -0.31657724 -0.31657724
 -1.58252068 -0.31657724  0.14345481 -1.13734113  0.14345481 -1.58252068
 -1.58252068 -1.58252068 -1.299584   -1.58252068 -1.58252068 -1.58252068
 -0.86905818 -1.58252068  0.14345481 -0.31657724 -1.58252068 -1.5

In [17]:
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import log_loss

# ... CustomGradientBoostingClassifier implementation ...

np.random.seed(42)
X = np.random.rand(100, 1) - 0.5
y = (3 * X[:, 0]**2 + 0.05 * np.random.randn(100)) > 0.2  # Convert to binary labels

custom_gbm = CustomGradientBoostingClassifier(
    n_estimators=20, 
    learning_rate=0.1, 
    max_depth=1
)
custom_gbm.fit(X, y)
custom_gbm_log_loss = log_loss(y, custom_gbm.predict_proba(X))
print(f"Custom GBM Log-Loss: {custom_gbm_log_loss:.15f}")

sklearn_gbm = GradientBoostingClassifier(
    n_estimators=20, 
    learning_rate=0.1, 
    max_depth=1
)
sklearn_gbm.fit(X, y)
sklearn_gbm_log_loss = log_loss(y, sklearn_gbm.predict_proba(X))
print(f"Scikit-learn GBM Log-Loss: {sklearn_gbm_log_loss:.15f}")


Custom GBM Log-Loss: 0.284047360116223
Scikit-learn GBM Log-Loss: 0.310336960055931
