https://towardsdatascience.com/all-you-need-to-know-about-gradient-boosting-algorithm-part-2-classification-d3ed8f56541e

In [34]:
import numpy as np
from sklearn.tree import DecisionTreeRegressor

class CustomGradientBoostingClassifier:
    
    def __init__(self, learning_rate, n_estimators, max_depth=1):
        self.learning_rate = learning_rate
        self.n_estimators = n_estimators
        self.max_depth = max_depth
        self.trees = []
        self.y_hat = None
        
    def fit(self, X, y):
        
        F0 = np.log(y.mean()/(1-y.mean()))  # log-odds values
        self.y_hat = F0
        self.F0 = np.full(len(y), F0)  # converting to array with the input length
        Fm = self.F0.copy()
        
        
        for _ in range(self.n_estimators):
            p = np.exp(Fm) / (1 + np.exp(Fm))  # converting back to probabilities
            r = y - p  # residuals
            tree = DecisionTreeRegressor(max_depth=self.max_depth, random_state=0)
            tree.fit(X, r)
            ids = tree.apply(X)  # getting the terminal node IDs

            # looping through the terminal nodes 
            for j in np.unique(ids):
              fltr = ids == j

              # getting gamma using the formula (Σresiduals/Σp(1-p))
              num = r[fltr].sum()
              den = (p[fltr]*(1-p[fltr])).sum()
              gamma = num / den

              # updating the prediction
              Fm[fltr] += self.learning_rate * gamma

              # replacing the prediction value in the tree
              tree.tree_.value[j, 0, 0] = gamma

            self.trees.append(tree)
            
    def predict_proba(self, X):        
        F0 = self.y_hat  
        Fm = np.full(len(X), F0)
        #print(Fm)
        for i in range(self.n_estimators):
            Fm += self.learning_rate * self.trees[i].predict(X)
            
        return np.exp(Fm) / (1 + np.exp(Fm))  # converting back into probabilities

In [39]:
# Imports
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import log_loss

def accuracy(y_true, y_pred):
    accuracy = np.sum(y_true == y_pred) / len(y_true)
    return accuracy

data = datasets.load_breast_cancer()
X, y = data.data, data.target

# y[y == 0] = -1

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=5)

print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

custom_gbm = CustomGradientBoostingClassifier(
    n_estimators=200, 
    learning_rate=0.1, 
    max_depth=1
)
custom_gbm.fit(X_train, y_train)
prob_train = custom_gbm.predict_proba(X_train)
custom_gbm_log_loss = log_loss(y_train, prob_train)
print(f"Custom GBM Log-Loss:{custom_gbm_log_loss:.15f}")
print(prob_train)

(455, 30) (455,)
(114, 30) (114,)
Custom GBM Log-Loss:0.033312747890304
[9.98995535e-01 9.83374221e-01 4.54413984e-02 9.19879858e-01
 2.10518715e-03 9.95901821e-01 9.99602707e-01 9.94978062e-01
 4.58403683e-03 9.96496671e-01 4.76836524e-03 9.99690879e-01
 2.69796044e-03 9.98530703e-01 9.90947265e-01 7.53311861e-02
 9.43232422e-01 9.95792904e-01 9.95897917e-01 9.49683990e-01
 9.84194435e-01 2.31542984e-02 9.88058999e-01 9.97962012e-01
 9.96468424e-01 9.99652401e-01 9.88314899e-01 9.55074325e-01
 9.00035547e-01 6.09232507e-03 9.98254096e-01 8.38435963e-01
 9.86830159e-01 1.82240901e-03 9.92804245e-01 9.99662945e-01
 9.87254367e-01 4.13923246e-02 9.98925285e-01 9.94249837e-01
 9.99178148e-01 5.15200902e-04 9.98729141e-01 9.46276090e-01
 8.05199276e-01 9.70499731e-01 9.98797073e-01 9.97791107e-01
 9.99497260e-01 9.99487618e-01 1.12854388e-03 2.69796044e-03
 9.86832476e-01 9.98854146e-01 9.36029745e-01 9.99120184e-01
 9.96047409e-01 1.18065579e-01 9.99487618e-01 3.83733488e-02
 9.99521128e-

In [40]:
y_pred=[1 if x > 0.5 else 0 for x in prob_train]
print(y_pred)

acc = accuracy(y_train, y_pred)
print("Accuracy:", acc)

[1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 

In [41]:
prob_test = custom_gbm.predict_proba(X_test)

y_pred=[1 if x > 0.5 else 0 for x in prob_test]
print(y_pred)

acc = accuracy(y_test, y_pred)
print("Accuracy:", acc)

[0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0]
Accuracy: 0.9736842105263158
