In [29]:
from sklearn.base import BaseEstimator, RegressorMixin
import statsmodels.api as sm
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.statespace.sarimax import SARIMAXResults

from typing import Optional, Union, Tuple, List, Dict


import pandas as pd
import numpy as np
import warnings
import inspect

https://www.aritro.in/post/exponential-smoothing-using-scikit-learn-wrapper-statsmodels/

https://www.scikit-yb.org/en/latest/_modules/yellowbrick/contrib/statsmodels/base.html#StatsModelsWrapper

In [7]:
from typing import Optional, Union, Tuple, List, Dict



class Sarimax(BaseEstimator, RegressorMixin):
    """
    A universal sklearn-style wrapper for statsmodels SARIMAX.

    Parameters
    ----------


    Attributes
    ----------

    """

    def __init__(
        self,
        order: tuple=(1, 0, 0),
        seasonal_order: tuple=(0, 0, 0, 0),
        trend: str=None,
        measurement_error: bool=False,
        time_varying_regression: bool=False,
        mle_regression: bool=True,
        simple_differencing: bool=False,
        enforce_stationarity: bool=True,
        enforce_invertibility: bool=True,
        hamilton_representation: bool=False,
        concentrate_scale: bool=False,
        trend_offset: int=1,
        use_exact_diffuse: bool=False,
        dates = None,
        freq = None,
        missing = 'none',
        validate_specification: bool=True,
        method: str='lbfgs',
        maxiter: int=50,
        start_params = None,
        disp: bool= False,
        fit_kwargs: Optional[dict]={'disp':False},
        predict_kwargs: Optional[dict]={}
    ) -> None:

        self.order                   = order
        self.seasonal_order          = seasonal_order
        self.trend                   = trend
        self.measurement_error       = measurement_error
        self.time_varying_regression = time_varying_regression
        self.mle_regression          = mle_regression
        self.simple_differencing     = simple_differencing
        self.enforce_stationarity    = enforce_stationarity
        self.enforce_invertibility   = enforce_invertibility
        self.hamilton_representation = hamilton_representation
        self.concentrate_scale       = concentrate_scale
        self.trend_offset            = trend_offset
        self.use_exact_diffuse       = use_exact_diffuse
        self.dates                   = dates
        self.freq                    = freq
        self.missing                 = missing
        self.validate_specification  = validate_specification
        self.method                  = method
        self.maxiter                 = maxiter
        self.start_params            = start_params
        self.disp                    = disp
        self.fit_kwargs              = fit_kwargs
        self.predict_kwargs          = predict_kwargs
        
        self.sarimax        = None
        self.sarimax_res    = None
        self.training_index = None
        self._dummy_create_fit_sarimax()

        # Check remove from fit_kwargs the parameters that are not in the fit method
        # of the statsmodels.SARIMAX
        fit_kwargs_keys = inspect.signature(self.sarimax.fit).parameters.keys()
        self.fit_kwargs = {k:v for k,v in self.fit_kwargs.items() if k in fit_kwargs_keys}
        # Check remove from predict_kwargs the parameters that are not in the predict method
        # of the statsmodels.SARIMAX.RESULTS
        predict_kwargs_keys = inspect.signature(self.sarimax_res.get_forecast).parameters.keys()
        self.predict_kwargs = {k:v for k,v in self.predict_kwargs.items() if k in predict_kwargs_keys}


    def _create_sarimax(
        self,
        y: pd.Series,
        exog: Optional[Union[pd.Series, pd.DataFrame]] = None
        ) -> None:
        """
        A helper function to create a new statsmodel.SARIMAX.

        Parameters
        ----------
        y : pandas.Series
            The endogenous variable.
        exog : pandas.DataFrame
            The exogenous variables.
        
        Returns
        -------
        None

        """
        print(self.__dict__)

        self.sarimax = SARIMAX(endog=y, exog=exog, **self.__dict__)

        return
    

    def _dummy_create_fit_sarimax(self):
        """
        A helper function to create a dummy SARIMAX and fit it to an empty
        series.

        Parameters
        ----------
        None

        Returns
        -------
        None
        """
        kwargs_default = {
            'method': self.method,
            'maxiter': self.maxiter,
            'start_params': self.start_params,
            'disp': self.disp,
        }
        
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            self._create_sarimax(y=pd.Series([], dtype=float), exog=None)
            self.sarimax_res = self.sarimax.fit(**kwargs_default)
            self.training_index = pd.RangeIndex(start=0, stop=0, step=1)

        return


    def fit(
        self,
        y: pd.Series,
        exog: Optional[Union[pd.Series, pd.DataFrame]] = None
    ) -> None:
        """
        Fit the model to the data.

        Parameters
        ----------
        y : pandas Series
            Training time series.
        exog : pandas Series, pandas DataFrame, default `None`
            Exogenous variable/s included as predictor/s. Must have the same
            number of observations as `y` and their indexes must be aligned so
            that y[i] is regressed on exog[i].

        Returns
        -------
        None

        """
        
        kwargs_default = {
            'method': self.method,
            'maxiter': self.maxiter,
            'start_params': self.start_params,
            'disp': self.disp,
        }

        # User provided fit_kwargs in the fit have preference over the default ones
        if self.fit_kwargs:
            kwargs_default.update(fit_kwargs)

        self._create_sarimax(y=y, exog=exog)
        self.sarimax_res = self.sarimax.fit(**self.fit_kwargs)
        self.training_index = y.index

        return 


    def predict(
        self,
        steps: int,
        last_window: Optional[pd.Series]=None,
        exog: Optional[Union[pd.Series, pd.DataFrame]]=None
    ):
        """
        Predict n steps ahead. It is an recursive process in which, each prediction,
        is used as a predictor for the next step.

        Parameters
        ----------
        steps : int
            Number of future steps predicted.
        last_window : pandas Series, default `None`
            Series values used to create the predictors (lags) needed in the 
            first iteration of the prediction (t + 1).
            If `last_window = None`, the values stored in `self.last_window` are
            used to calculate the initial predictors, and the predictions start
            right after training data.
        exog : pandas Series, pandas DataFrame, default `None`
            Exogenous variable/s included as predictor/s.

        Returns
        -------
        predictions : pandas Series
            Predicted values.
        
        """

        predictions = self.sarimax_res.forecast(steps=steps, exog=exog)

        return predictions
    

    def predict_interval(self, steps, exog=None, alpha=0.05, **kwargs):
        """
        """
        predictions = self.sarimax_res.get_forecast(
                        steps           = steps,
                        exog            = exog,
                        return_conf_int = False,
                        alpha           = 0.05,
                        **kwargs
                      )
        
        predictions = pd.concat((
                        predictions.predicted_mean.rename("pred"),
                        predictions.conf_int(alpha=alpha)),
                        axis = 1
                     )
        predictions.columns = ['pred', 'lower_bound', 'upper_bound']

        return predictions
    
    def extend():
        """
        """
        pass
    

    def set_params(self, params):
        """
        
        """
        params = {k:v for k,v in params.items() if k in self.__dict__}
        for key, value in params.items():
            setattr(self, key, value)

        self._create_sarimax(
            y = pd.Series(data=self.sarimax.endog.ravel(), index=self.training_index),
            exog = self.sarimax.exog
        )
            

    def __repr__(self):
        p, d, q = self.order
        P, D, Q, m = self.seasonal_order

        return f"Sarimax({p},{d},{q})({P},{D},{Q})[{m}]"


In [14]:
args = inspect.getargvalues(Sarimax())

{'order': (1, 0, 0), 'seasonal_order': (0, 0, 0, 0), 'trend': None, 'measurement_error': False, 'time_varying_regression': False, 'mle_regression': True, 'simple_differencing': False, 'enforce_stationarity': True, 'enforce_invertibility': True, 'hamilton_representation': False, 'concentrate_scale': False, 'trend_offset': 1, 'use_exact_diffuse': False, 'dates': None, 'freq': None, 'missing': 'none', 'validate_specification': True, 'method': 'lbfgs', 'maxiter': 50, 'start_params': None, 'disp': False, 'fit_kwargs': {'disp': False}, 'predict_kwargs': {}, 'sarimax': None, 'sarimax_res': None, 'training_index': None}


AttributeError: 'Sarimax' object has no attribute 'f_code'

In [3]:
sarimax = Sarimax(order=(5, 1, 1))

{'order': (5, 1, 1), 'seasonal_order': (0, 0, 0, 0), 'trend': None, 'measurement_error': False, 'time_varying_regression': False, 'mle_regression': True, 'simple_differencing': False, 'enforce_stationarity': True, 'enforce_invertibility': True, 'hamilton_representation': False, 'concentrate_scale': False, 'trend_offset': 1, 'use_exact_diffuse': False, 'dates': None, 'freq': None, 'missing': 'none', 'validate_specification': True, 'method': 'lbfgs', 'maxiter': 50, 'start_params': None, 'disp': False, 'fit_kwargs': {'disp': False}, 'predict_kwargs': {}, 'sarimax': None, 'sarimax_res': None, 'training_index': None}


LinAlgError: Schur decomposition solver error.

In [56]:
sarimax = Sarimax(order=(1, 1, 1))
sarimax.fit(y=pd.Series(np.random.normal(size=100)))
sarimax



In [57]:
sarimax.predict(steps=4)

100    0.089448
101    0.094491
102    0.094429
103    0.094430
Name: predicted_mean, dtype: float64

In [58]:
sarimax.predict_interval(steps=4)



Unnamed: 0,pred,lower_bound,upper_bound
100,0.089448,-1.962973,2.14187
101,0.094491,-1.957836,2.146818
102,0.094429,-1.957901,2.146759
103,0.09443,-1.9579,2.14676


In [54]:
sarimax.set_params({'order': (1, 0, 110)})
sarimax



In [49]:
sarimax = Sarimax()
sarimax.set_params({'order': (1, 0, 99)})
sarimax



## v2

In [44]:
from typing import Optional, Union, Tuple, List, Dict



class Sarimax2(BaseEstimator, RegressorMixin):
    """
    A universal sklearn-style wrapper for statsmodels SARIMAX.

    Parameters
    ----------


    Attributes
    ----------

    """

    def __init__(
        self,
        order: tuple=(1, 0, 0),
        seasonal_order: tuple=(0, 0, 0, 0),
        trend: str=None,
        measurement_error: bool=False,
        time_varying_regression: bool=False,
        mle_regression: bool=True,
        simple_differencing: bool=False,
        enforce_stationarity: bool=True,
        enforce_invertibility: bool=True,
        hamilton_representation: bool=False,
        concentrate_scale: bool=False,
        trend_offset: int=1,
        use_exact_diffuse: bool=False,
        dates = None,
        freq = None,
        missing = 'none',
        validate_specification: bool=True,
        method: str='lbfgs',
        maxiter: int=50,
        start_params: np.ndarray= None,
        disp: bool=False,
        sm_init_kwargs: dict={},
        sm_fit_kwargs: dict={},
        sm_predict_kwargs: dict={}
    ) -> None:

        self.order                   = order
        self.seasonal_order          = seasonal_order
        self.trend                   = trend
        self.measurement_error       = measurement_error
        self.time_varying_regression = time_varying_regression
        self.mle_regression          = mle_regression
        self.simple_differencing     = simple_differencing
        self.enforce_stationarity    = enforce_stationarity
        self.enforce_invertibility   = enforce_invertibility
        self.hamilton_representation = hamilton_representation
        self.concentrate_scale       = concentrate_scale
        self.trend_offset            = trend_offset
        self.use_exact_diffuse       = use_exact_diffuse
        self.dates                   = dates
        self.freq                    = freq
        self.missing                 = missing
        self.validate_specification  = validate_specification
        self.method                  = method
        self.maxiter                 = maxiter
        self.start_params            = start_params
        self.disp                    = disp

        # Create the dictionaries with the additional statsmodels parameters to be  
        # used during the init, fit and predict methods. Note that the statsmodels 
        # SARIMAX.fit parameters `method`, `max_iter`, `start_params` and `disp` 
        # have been moved to the initialization of this model and will have 
        # priority over those provided by the user using via `sm_fit_kwargs`.
        self.sm_init_kwargs    = sm_init_kwargs
        self.sm_fit_kwargs     = sm_fit_kwargs
        self.sm_predict_kwargs = sm_predict_kwargs

        # Params that can be set with the `set_params` method
        _, _, _, _sarimax_params = inspect.getargvalues(inspect.currentframe())
        _sarimax_params.pop("self")
        self._sarimax_params = _sarimax_params

        self._consolidate_kwargs()

        # Create Results Attributes 
        self.set_output     = None
        self.sarimax        = None
        self.sarimax_res    = None
        self.training_index = None


    def __repr__(self):
        p, d, q = self.order
        P, D, Q, m = self.seasonal_order

        return f"Sarimax({p},{d},{q})({P},{D},{Q})[{m}]"
    

    def _consolidate_kwargs(
        self
    ) -> None:
        """
        """
        
        # statsmodels.tsa.statespace.SARIMAX parameters
        _init_kwargs = self.sm_init_kwargs.copy()
        _init_kwargs.update({
           'order': self.order,
           'seasonal_order': self.seasonal_order,
           'trend': self.trend,
           'measurement_error': self.measurement_error,
           'time_varying_regression': self.time_varying_regression,
           'mle_regression': self.mle_regression,
           'simple_differencing': self.simple_differencing,
           'enforce_stationarity': self.enforce_stationarity,
           'enforce_invertibility': self.enforce_invertibility,
           'hamilton_representation': self.hamilton_representation,
           'concentrate_scale': self.concentrate_scale,
           'trend_offset': self.trend_offset,
           'use_exact_diffuse': self.use_exact_diffuse,
           'dates': self.dates,
           'freq': self.freq,
           'missing': self.missing,
           'validate_specification': self.validate_specification
        })
        self._init_kwargs = _init_kwargs

        # statsmodels.tsa.statespace.SARIMAX.fit parameters
        _fit_kwargs = self.sm_fit_kwargs.copy()
        _fit_kwargs.update({
           'method': self.method,
           'maxiter': self.maxiter,
           'start_params': self.start_params,
           'disp': self.disp,
        })        
        self._fit_kwargs = _fit_kwargs

        # statsmodels.tsa.statespace.SARIMAXResults.get_forecast parameters
        self._predict_kwargs = self.sm_predict_kwargs.copy()

        return
    
        
    def _create_sarimax(
        self,
        endog: Union[np.ndarray, pd.Series, pd.DataFrame],
        exog: Optional[Union[np.ndarray, pd.Series, pd.DataFrame]]=None
    ) -> None:
        """
        A helper function to create a new statsmodel.SARIMAX.

        Parameters
        ----------
        endog : pandas.Series
            The endogenous variable.
        exog : pandas.DataFrame
            The exogenous variables.
        
        Returns
        -------
        None

        """

        self.sarimax = SARIMAX(endog=endog, exog=exog, **self._init_kwargs)

        return


    def fit(
        self,
        y: Union[np.ndarray, pd.Series, pd.DataFrame],
        exog: Optional[Union[np.ndarray, pd.Series, pd.DataFrame]]=None
    ) -> None:
        """
        Fit the model to the data.

        Parameters
        ----------
        y : pandas Series
            Training time series.
        exog : pandas Series, pandas DataFrame, default `None`
            Exogenous variable/s included as predictor/s. Must have the same
            number of observations as `y` and their indexes must be aligned so
            that y[i] is regressed on exog[i].

        Returns
        -------
        None

        """

        self.set_output = 'numpy' if isinstance(y, np.ndarray) else 'pandas'
        
        self._create_sarimax(endog=y, exog=exog)
        self.sarimax_res = self.sarimax.fit(**self._fit_kwargs)
        # self.training_index = y.index

        return
    

    def predict(
        self,
        steps: int,
        exog: Optional[Union[np.ndarray, pd.Series, pd.DataFrame]]=None, 
        return_conf_int: bool=False,
        alpha: float=0.05
    ) -> Union[np.ndarray, pd.DataFrame]:
        """
        """
        predictions = self.sarimax_res.get_forecast(
                          steps = steps,
                          exog  = exog,
                          **self._predict_kwargs
                      )
        
        if not return_conf_int:
            predictions = predictions.predicted_mean
            if self.set_output == 'pandas':
                predictions = predictions.rename("pred").to_frame()
        else:
            if self.set_output == 'numpy':
                predictions = np.column_stack(
                                  [predictions.predicted_mean,
                                   predictions.conf_int(alpha=alpha)]
                              )
            else:
                predictions = pd.concat((
                                  predictions.predicted_mean.rename("pred"),
                                  predictions.conf_int(alpha=alpha)),
                                  axis = 1
                              )
                predictions.columns = ['pred', 'lower_bound', 'upper_bound']

        return predictions
    
    def append():
        """
        """
        pass
    

    def set_params(self, params):
        """
        """
        params = {k:v for k,v in params.items() if k in self._sarimax_params}
        for key, value in params.items():
            setattr(self, key, value)

        self._consolidate_kwargs()

        self.sarimax_res = None

        # Create the dictionaries with the additional statsmodels parameters to be  
        # used during the init, fit and predict methods. Note that the statsmodels 
        # SARIMAX.fit parameters `method`, `max_iter`, `start_params` and `disp` 
        # have been moved to the initialization of this model and will have 
        # priority over those provided by the user using via `sm_fit_kwargs`.
        
        # statsmodels.tsa.statespace.SARIMAX parameters
       


In [60]:
sarimax = Sarimax2(sm_fit_kwargs={'maxiter': 100, 'hhh':1})

In [61]:
sarimax._fit_kwargs

{'maxiter': 50,
 'hhh': 1,
 'method': 'lbfgs',
 'start_params': None,
 'disp': False}

In [62]:
sarimax.sm_fit_kwargs

{'maxiter': 100, 'hhh': 1}

In [63]:
sarimax.maxiter

50

In [64]:
sarimax.set_params({'sm_fit_kwargs':{'maxiter': 200, 'hhh':1}})

In [65]:
sarimax._fit_kwargs

{'maxiter': 50,
 'hhh': 1,
 'method': 'lbfgs',
 'start_params': None,
 'disp': False}

In [66]:
sarimax.sm_fit_kwargs

{'maxiter': 200, 'hhh': 1}

In [67]:
sarimax.set_params({'maxiter': 300})

In [68]:
sarimax.maxiter

300

In [69]:
sarimax._fit_kwargs

{'maxiter': 300,
 'hhh': 1,
 'method': 'lbfgs',
 'start_params': None,
 'disp': False}

In [70]:
sarimax.sm_fit_kwargs

{'maxiter': 200, 'hhh': 1}

In [60]:
np.random.seed(123)
endog = pd.DataFrame(pd.Series(np.random.normal(size=100)), columns=['serie_1'])
#endog = pd.Series(np.random.normal(size=100))
#endog = np.random.normal(size=100)

exog = np.random.normal(size=100)
#exog = pd.Series(np.random.normal(size=100))

sarimax = Sarimax2(order=(3, 3, 3))
sarimax.fit(y=endog, exog=exog)

predictions = sarimax.predict(5, exog=exog[:5], return_conf_int=True)
print(type(predictions))
predictions

  warn('Non-invertible starting MA parameters found.'


<class 'pandas.core.frame.DataFrame'>




Unnamed: 0,pred,lower_bound,upper_bound
100,-0.648113,-3.128797,1.83257
101,-0.20908,-2.814753,2.396594
102,-0.418755,-3.190136,2.352626
103,-0.460242,-3.695584,2.775099
104,-0.391963,-3.834511,3.050586


In [59]:
exog[:5]

0    0.703310
1   -0.598105
2    2.200702
3    0.688297
4   -0.006307
dtype: float64

In [38]:
arr1 = np.array([0.76578498, 0.79847458, 0.73380039, 0.86588736, 0.96231786])
arr2 = np.array([[-1.20204223,  2.7336122 ],
 [-1.20640858 , 2.80335774],
 [-1.28768676 , 2.75528753],
 [-1.16192687,  2.89370158],
 [-1.18203997,  3.10667568]])

In [40]:
np.column_stack([arr1,arr2])

array([[ 0.76578498, -1.20204223,  2.7336122 ],
       [ 0.79847458, -1.20640858,  2.80335774],
       [ 0.73380039, -1.28768676,  2.75528753],
       [ 0.86588736, -1.16192687,  2.89370158],
       [ 0.96231786, -1.18203997,  3.10667568]])

In [27]:
sarimax.predict_kwargs

{}

## Statsmodels

In [13]:
np.random.seed(123)
endog = pd.DataFrame(pd.Series(np.random.normal(size=100)), columns=['serie_1'])
endog = pd.DataFrame(pd.Series(np.random.normal(size=100)), columns=['serie_1'])

sarimax = SARIMAX(order=(3, 3, 3), endog=endog)
sarimax_res = sarimax.fit()

type(sarimax_res.get_forecast(5).predicted_mean)

  warn('Non-invertible starting MA parameters found.'


pandas.core.series.Series

In [12]:
endog

Unnamed: 0,serie_1
0,-1.085631
1,0.997345
2,0.282978
3,-1.506295
4,-0.578600
...,...
95,1.031114
96,-1.084568
97,-1.363472
98,0.379401


In [87]:
sarimax.missing

AttributeError: 'SARIMAX' object has no attribute 'missing'

In [78]:
np.random.seed(123)
sarimax = Sarimax2(order=(3, 3, 3))
sarimax.fit(y=np.random.normal(size=100))

sarimax.predict(5)

  warn('Non-invertible starting MA parameters found.'


array([-0.66263334, -0.17596637, -0.43844843, -0.52160303, -0.39956411])