I tried to follow these recommendations: https://scikit-learn.org/stable/developers/develop.html#rolling-your-own-estimator <br>
I could not get the BinningClassifier to fullfill all the requirements that are checked by scikits check_estimator-test-suite. <br>
Read more on this here: https://scikit-learn.org/stable/modules/generated/sklearn.utils.estimator_checks.check_estimator.html#sklearn.utils.estimator_checks.check_estimator <br>

In [10]:
from sklearn.base import BaseEstimator, ClassifierMixin, RegressorMixin
from sklearn.utils.validation import check_is_fitted, check_array, check_X_y
from sklearn.exceptions import NotFittedError
import numpy as np

class BinningClassifier(BaseEstimator, ClassifierMixin):
    """
    A wrapper around a regression-estimator that converts the output of the regressor into a classification-statement.
    This is done by separating the output-range of the regressor into bins (given by intervals as a user input). 
    The BinningClassifier returns the number/ index of the bin in which the regressor-output lies, starting with 0.
    This wrapper-class around a regressor is necessary to be able to use the regressors in grid-search etc.:
    e.g. from the GridSearchCV documentation:
    [https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.GridSearchCV.html]
    class sklearn.model_selection.GridSearchCV(estimator, param_grid, *, ...)
    Parameters:
        estimator: estimator object
        This is assumed to implement the scikit-learn estimator interface. Either estimator needs to provide a score function, or scoring must be passed.
    """

    # scikit-learns check_estimator from sklearn.utils.estimator_checks seems to have a problem with this constructor's kwargs argument:
    """ 
    
    def __init__(self, regressor=None, intervals=None, **regressor_params):
        if regressor and regressor_params:
            regressor.set_params(**regressor_params)
                  
        self.regressor = regressor
        self.intervals = intervals
    """

    def __init__(self, regressor=None, intervals=None):
        self.regressor = regressor
        self.intervals = intervals


    def fit(self, X, y):

        # Validate and check X and y for consistency,deny sparse X, 
        X, y = check_X_y(X, y, accept_sparse=False, y_numeric=True, ensure_2d=True)
        
        # Check if the intervals are provided
        if self.intervals is None or not isinstance(self.intervals, np.ndarray) or not len(self.intervals) >= 2:
            raise ValueError("Intervals must be provided as a numpy array.")
        
        # Check if the regressor is provided
        if self.regressor is None or not isinstance(self.regressor, RegressorMixin):
            raise ValueError("A valid scikit-learn regressor must be provided.")
                
        # Fit the regressor
        self.regressor.fit(X, y)
        
        # Define the array of possible class-attributes:
        self.classes_ = np.arange(len(self.intervals) - 1)
        
        return self


    def predict(self, X):

        # Ensure that the fit was successful
        try:
            check_is_fitted(self, ['regressor', 'intervals', 'classes_'])
        except NotFittedError as exc:
            print("Model is not fitted yet.")

        # Ensure X is a 2d array (i.e. has 2 axises) and denies sparse data
        X = check_array(X, accept_sparse=False, ensure_2d=True)

        # Predict using the regressor
        reg_predictions = self.regressor.predict(X)
        
        # Determine the interval index for each prediction - subtract 1 because np.digitize counts from 1 not 0
        classes = np.digitize(reg_predictions, bins=self.intervals, right=True) - 1
        
        return classes
    

    def predict_proba(self, X):

        # Ensure that the fit was successful
        try:
            check_is_fitted(self, ['regressor', 'intervals', 'classes_'])
        except NotFittedError as exc:
            print("Model is not fitted yet.")
        
        binned_predictions = self.predict(X)
        
        # Create the probability array:
        num_samples = len(binned_predictions)
        num_classes = len(self.classes_)
        proba = np.zeros((num_samples, num_classes))
        
        # Assign probabilities based on class distribution
        for i in range(num_classes):
            #print(f"i: {i} - proba[:,i]: {(binned_predictions.ravel() == i).astype(float)}")
            proba[:, i] = (binned_predictions.ravel() == i).astype(float)
        
        # Normalize probabilities row-wise to sum to 1
        proba = proba / proba.sum(axis=1, keepdims=True)
        
        return proba
    
    def score(self, X, y, sample_weight=None):

        # Ensure that the fit was successful
        try:
            check_is_fitted(self, ['regressor', 'intervals', 'classes_'])
        except NotFittedError as exc:
            print("Model is not fitted yet.")

        # Ensure X is a 2d array (i.e. has 2 axises) and denies sparse data
        X = check_array(X, accept_sparse=False, ensure_2d=True)

        # Validate and check X and y for consistency,deny sparse X, 
        X, y = check_X_y(X, y, accept_sparse=False, y_numeric=True, ensure_2d=True)

        return self.regressor.score(X, y, sample_weight)


    def get_params(self, deep=True):
        #return {"regressor": self.regressor, "intervals": self.intervals, "regressor_params": self.regressor.get_params()}
        return {"regressor": self.regressor, "intervals": self.intervals}


    def set_params(self, **params):
        for key, value in params.items():
            setattr(self, key, value)
        return self

In [1]:
from sklearn.linear_model import LinearRegression

lr = LinearRegression(**{"fit_intercept": True})

In [11]:
# Adding some debug prints to understand the input shapes:

from sklearn.linear_model import LinearRegression
from sklearn.utils.estimator_checks import check_estimator # scikits api-check 

intervals = np.array([-1000, 10, 15, 20, 25, 30, 1000])
clf = BinningClassifier(regressor=LinearRegression(), intervals=intervals)

check_estimator(clf)

TypeError: BinningClassifier.__init__() got an unexpected keyword argument 'regressor_params'

In [7]:
# comment out the init method without kwargs and comment in the init method with kwargs in BinningClassifier and run this example - seems to work -
# so the problem obviously lies with the check_estimator...

import numpy as np

X = np.array([1,2,3,4,5,6,7,8,9]).reshape(-1,1)
#X = np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0]).reshape(-1,1)
y = 2 * X + (X * X / 100)

print(f"y: {y}")

intervals = np.array([-1000,1,3,5,7,9,11,13,19, 1000])

bsc_clf = BinningClassifier(regressor=LinearRegression(), intervals=intervals, fit_intercept=True)
bsc_clf.fit(X,y)
preds = bsc_clf.predict(X)
#print(f"preds: {preds}")
probs = bsc_clf.predict_proba(X)
print(f"probs: \n {probs}")

y: [[ 2.01]
 [ 4.04]
 [ 6.09]
 [ 8.16]
 [10.25]
 [12.36]
 [14.49]
 [16.64]
 [18.81]]
probs: 
 [[0. 1. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 1. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 1. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 0. 0. 0. 1. 0.]]


  y = column_or_1d(y, warn=True)


In [10]:
# test example using kwargs - works as it should:

def tt(regressor=None, intervals=None, **regressor_params):
    print(f"regressor: {regressor}")
    print(f"intervals: {intervals}")
    if(regressor_params):
        for k,v in regressor_params.items():
            print(f"key: {k} -- value:{v}")

tt(regressor="reggi", intervals=[0,4,8], fit_intercept=True)

regressor: reggi
intervals: [0, 4, 8]
key: fit_intercept -- value:True
