In [1]:
import pandas as pd

In [None]:
df = pd.read_csv('some/path/')

In [None]:
from sklearn.base import BaseEstimator, RegressorMixin, ClassifierMixin
from sklearn.tree import DecisionTreeRegressor, DecisionTreeClassifier

class MyBoosting(BaseEstimator):
    
    def __init__(self, base_clf=DecisionTreeRegressor, n_estimators: int = 5, learning_rate: float = 0.01, **kwargs):
        self.base_clf = base_clf
        self.n_estimators = n_estimators
        self.learning_rate = learning_rate
        self.clf_collection = []
        self.kwargs = kwargs

    def fit(self, X, y):
        y_hat = y.copy()
        self.clf_collection = []
        
        for i in range(self.n_estimators):
            model = self.base_clf(**self.kwargs).fit(X, y_hat)
            y_hat = y_hat - model.predict(X) * self.learning_rate
            self.clf_collection.append(model)
        
        # TODO: добавить обучение линейной регрессии
        X_new = []
        for model in self.clf_collection:
            pred = model.predict(X) * self.learning_rate
            X_new.append(pred)
                        
        return self  # Возвращаем self для совместимости со sklearn

    def predict(self, X):
        y_pred_collection = []
        
        for model in self.clf_collection:
            pred = model.predict(X) * self.learning_rate
            y_pred_collection.append(pred)
            
        return sum(y_pred_collection)

    def fit_predict(self, X, y):
        self.fit(X, y)
        return self.predict(X)
    
    def get_params(self, deep=True):
        return {
            'base_clf': self.base_clf,
            'n_estimators': self.n_estimators,
            'learning_rate': self.learning_rate,
            **self.kwargs
        }
    
    def set_params(self, **params):
        for key, value in params.items():
            if key in ['base_clf', 'n_estimators', 'learning_rate']:
                setattr(self, key, value)
            else:
                self.kwargs[key] = value
        return self