In [1]:
import numpy as np
from sklearn.model_selection import cross_val_score
from sklearn.base import BaseEstimator
from sklearn.tree import DecisionTreeRegressor
from sklearn.datasets import load_boston
from sklearn.model_selection import KFold
import copy

In [2]:
kf = KFold(n_splits=10, shuffle=True, random_state=0)
metric = 'neg_mean_squared_error'

In [3]:
def performance(model):
    score = np.sqrt(-cross_val_score(model, X_train, y_train, cv=kf, scoring=metric, n_jobs=4)).mean()
    return score

In [4]:
boston = load_boston()
X_train, y_train = boston.data, boston.target

In [5]:
X_train.shape, y_train.shape

((506, 13), (506,))

In [6]:
class MyGradientBoosting(BaseEstimator):
    def __init__(self, base_learner=DecisionTreeRegressor(max_depth=2), n_estimators=100, learning_rate=0.1):
        self.n_estimators = n_estimators
        self.learning_rate = learning_rate
        self.base_learner = base_learner
        self.learners = []
    
    def fit(self, X, y):
        n, m = X.shape
        learned = np.zeros(n)
        for _ in range(self.n_estimators):
            r = y - learned
            self.base_learner.fit(X, r)
            self.learners.append(copy.copy(self.base_learner))
            learned += self.learning_rate * self.base_learner.predict(X)
        return self
    
    def predict(self, X):
        y_pred = np.zeros(X.shape[0])
        for learner in self.learners:
            y_pred += self.learning_rate * learner.predict(X)
        return y_pred

In [7]:
performance(MyGradientBoosting())

3.6172784422660422