# Custom regressors
Module with custom regressors models using sklearn as base

In [None]:
from functools import partial

import pandas as pd
import numpy as np

from sklearn.base import RegressorMixin
from sklearn.utils.validation import check_X_y, check_array, check_is_fitted

## Static Regressor
Simple regresser were predictions are based in a single static value calculated from target values

In [1]:
def _get_np_func(method: str, q: float, interpolation: str):
    """ Return a function from numpy to calculate statistics from training
    set. https://numpy.org/doc/stable/reference/routines.statistics.html

    Parameters
    ----------
    method : {mean, median, max, min, quantile}
        Function from numpy statistics api.
    q : float, [0, 1]
        Quantile to compute, which must be between 0 and 1 inclusive.
    interpolation : {‘linear’, ‘lower’, ‘higher’, ‘midpoint’, ‘nearest’}
        Interpolation method to use when the desired quantile lies between
        two data points. Only used when method='quantile'

    Returns
    -------
    func: numpy statistics function
    """

    if method == 'quantile':
        # TODO: see if this can be replaced with using **kwargs
        return partial(getattr(np, method), q=q, interpolation=interpolation)
    else:
        return getattr(np, method)

In [None]:
class StaticRegression(RegressorMixin):
    """ Simple regresser were predictions are based in a single static value
    calculated from target values.

    Parameters
    ----------
    method : str, default='mean'
        function from numpy statistics api (see _get_func method).

    q : float, default=0.5
        Quantile to compute, which must be between 0 and 1 inclusive.
        Only used when method='quantile'.

    interpolation: {‘linear’, ‘lower’, ‘higher’, ‘midpoint’, ‘nearest’}
        Interpolation method to use when the desired quantile lies between
        two data points. Only used when method='quantile'

    Examples
    --------
    >>> import numpy as np
    >>> from modelutils.regressors import StaticRegression
    >>> X = np.array([[1, 1], [1, 2], [2, 2], [2, 3]])
    >>> # y = 1 * x_0 + 2 * x_1 + 3
    >>> y = np.dot(X, np.array([1, 2])) + 3
    >>> sr = StaticRegression().fit(X, y)
    >>> sr.score(X, y)
    0.0
    >>> sr.predict(np.array([[3, 5]]))
    array([8.5])
    """

    def __init__(self, *, method: str = 'mean', q=0.5, interpolation='linear') -> None:
        self.method = method
        self.q = q
        self.interpolation = interpolation

    def __repr__(self) -> str:
        if self.method != 'quantile':
            target = self.method
        else:
            target = f'{self.method}, q={self.q}, interpolation={self.interpolation}'

        return f'StaticRegression(method={target})'

    def fit(self, X: np.ndarray, y: np.ndarray) -> 'StaticRegression':
        """ Fitting function using simple statistic from training set

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape (n_samples, n_features)
            The training input samples.
        y : array-like, shape (n_samples,) or (n_samples, n_outputs)
            The target values (real numbers)

        Returns
        -------
        self : 'StaticRegression' object
        """

        X, y = check_X_y(X, y)  # Check that X and y have correct shape
        y = y[~np.isnan(y)]  # remove nan values

        target_function = _get_np_func(self.method, self.q, self.interpolation)
        self.y_ = target_function(y)

        return self

    def predict(self, X: np.ndarray) -> np.ndarray:
        """ Predict the value to each sample in X.

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape (n_samples, n_features)
            The training input samples.

        Returns
        -------
        y : ndarray, shape (n_samples,)
        """

        check_is_fitted(self)  # Check is fit had been called
        X = check_array(X)  # Input validation
        return np.ones(X.shape[0], dtype=np.float64)*self.y_

    def get_params(self, deep=True):
        return {'method': self.method, 'q': self.q, 'interpolation': self.interpolation}

    def set_params(self, **parameters):
        for parameter, value in parameters.items():
            setattr(self, parameter, value)

        # reset these in case of change of method from quantile
        if self.method != 'quantile':
            self.q, self.interpolation = 0.5, 'linear'

        return self

## Grouping Regressor

In [None]:
class GroupRegression(RegressorMixin):
    """


    Parameters
    ----------

    Examples
    --------

    """

    def __init__(self, *, method: str = 'mean', q=0.5, interpolation='linear') -> None:
        self.method = method
        self.q = q
        self.interpolation = interpolation

    def __repr__(self) -> str:
        if self.method != 'quantile':
            target = self.method
        else:
            target = f'{self.method}, q={self.q}, interpolation={self.interpolation}'

        return f'GroupRegression(method={target})'

    def fit(self, X: np.ndarray, y: np.ndarray) -> 'GroupRegression':

        self.X = 'dataframe'
        # self.features_ = X.columns

        # NOTE:
        # Take the dataframe as is and take the columns labels as a argument when fit is called
        # then use these arguments or features to check on predict if the input test
        # has the features necessary
        pass

    def predict(self, X: np.ndarray) -> np.ndarray:
        pass

    def set_params(self, **parameters):
        for parameter, value in parameters.items():
            setattr(self, parameter, value)

        # reset these in case of change of method from quantile
        if self.method != 'quantile':
            self.q, self.interpolation = 0.5, 'linear'

        return self