# Join get_coef and get_feature_importance into get_feature_importance

In [14]:
from pathlib import Path
Path.cwd()

PosixPath('/home/javi/Documents/GitHub/skforecast/dev/develop')

In [15]:
%load_ext autoreload
%autoreload 2
import sys
sys.path.insert(1, '/home/javi/Documents/GitHub/skforecast')
%config Completer.use_jedi = False

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [16]:
# !pip uninstall skforecast -y

In [17]:
# Unit test __init__
# ==============================================================================
import pytest
from pytest import approx
import numpy as np
import pandas as pd
import sklearn
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso
from sklearn.neural_network import MLPRegressor
from skforecast.ForecasterAutoreg import ForecasterAutoreg
from skforecast.ForecasterAutoregCustom import ForecasterAutoregCustom
from skforecast.ForecasterAutoregMultiOutput import ForecasterAutoregMultiOutput
import warnings

# ForecasterAutoreg

## Original

In [None]:
def get_coef(self) -> pd.DataFrame:
        '''      
        Return estimated coefficients for the regressor stored in the forecaster.
        Only valid when regressor stores internally the feature coefficients in
        the attribute `coef_`.
        
        Parameters
        ----------
        self

        Returns 
        -------
        coef : pandas DataFrame
            Value of the coefficients associated with each predictor.
        
        '''
        
        if isinstance(self.regressor, sklearn.pipeline.Pipeline):
            estimator = self.regressor[-1]
        else:
            estimator = self.regressor

        try:
            coef = pd.DataFrame({
                        'feature': self.X_train_col_names,
                        'coef' : estimator.coef_
                   })
        except:
            warnings.warn(
                f"Impossible to access feature coefficients for regressor of type {type(estimator)}. "
                f"This method is only valid when the regressor stores internally "
                f" the coefficients in the attribute `coef_`."
            )

            coef = None
            
        return coef

    
def get_feature_importance(self) -> pd.DataFrame:
    '''      
    Return feature importance of the regressor stored in the
    forecaster. Only valid when regressor stores internally the feature
    importance in the attribute `feature_importances_`.

    Parameters
    ----------
    self

    Returns 
    -------
    feature_importance : pandas DataFrame
        Feature importance associated with each predictor.
    '''

    if isinstance(self.regressor, sklearn.pipeline.Pipeline):
        estimator = self.regressor[-1]
    else:
        estimator = self.regressor

    try:
        feature_importance = pd.DataFrame({
                                'feature': self.X_train_col_names,
                                'importance' : estimator.feature_importances_
                            })
    except:
        warnings.warn(
            f"Impossible to access feature importance for regressor of type {type(estimator)}. "
            f"This method is only valid when the regressor stores internally "
            f" the feature importance in the attribute `feature_importances_`."
        )

        feature_importance = None

    return feature_importance

### Test Originales

In [None]:
def test_output_get_feature_importance_when_regressor_is_RandomForest():
    '''
    '''
    forecaster = ForecasterAutoreg(RandomForestRegressor(n_estimators=1, max_depth=2, random_state=123), lags=3)
    forecaster.fit(y=pd.Series(np.arange(10)))
    expected = pd.DataFrame({
                    'feature': ['lag_1', 'lag_2', 'lag_3'],
                    'importance': np.array([0.94766355, 0., 0.05233645])
                })
    results = forecaster.get_feature_importance()
    assert (results['feature'] == expected['feature']).all()
    assert results['importance'].values == approx(expected['importance'].values)
    
    
def test_output_get_feature_importance_when_regressor_is_linear_model():
    '''
    '''
    forecaster = ForecasterAutoreg(Lasso(), lags=3)
    forecaster.fit(y=pd.Series(np.arange(5)))
    expected = None
    results = forecaster.get_feature_importance()
    assert results is expected
    
def test_output_get_coef_when_regressor_is_LinearRegression():
    '''
    Test output of get_coef when regressor is LinearRegression with lags=3
    and it is trained with y=pd.Series(np.arange(5)).
    '''
    forecaster = ForecasterAutoreg(LinearRegression(), lags=3)
    forecaster.fit(y=pd.Series(np.arange(5)))
    expected = pd.DataFrame({
                    'feature': ['lag_1', 'lag_2', 'lag_3'],
                    'coef': np.array([0.33333333, 0.33333333, 0.33333333])
                })
    results = forecaster.get_coef()
    assert (results['feature'] == expected['feature']).all()
    assert results['coef'].values == approx(expected['coef'].values)
    
def test_output_get_coef_when_regressor_is_RandomForest():
    '''
    Test output of get_coef when regressor is RandomForestRegressor with lags=3
    and it is trained with y=pd.Series(np.arange(5)).
    '''
    forecaster = ForecasterAutoreg(RandomForestRegressor(n_estimators=1, max_depth=2), lags=3)
    forecaster.fit(y=pd.Series(np.arange(5)))
    expected = None
    results = forecaster.get_coef()
    assert results is expected

In [None]:
test_output_get_feature_importance_when_regressor_is_RandomForest()
test_output_get_feature_importance_when_regressor_is_linear_model()
test_output_get_coef_when_regressor_is_LinearRegression()
test_output_get_coef_when_regressor_is_RandomForest()

## New

In [4]:
def get_feature_importance(self) -> pd.DataFrame:
    '''      
    Return feature importance of the regressor stored in the
    forecaster. Only valid when regressor stores internally the feature
    importance in the attribute `feature_importances_` or `coef_`.

    Parameters
    ----------
    self

    Returns
    -------
    feature_importance : pandas DataFrame
        Feature importance associated with each predictor.
    '''

    if isinstance(self.regressor, sklearn.pipeline.Pipeline):
        estimator = self.regressor[-1]
    else:
        estimator = self.regressor

    try:
        feature_importance = pd.DataFrame({
                                'feature': self.X_train_col_names,
                                'importance' : estimator.feature_importances_
                            })
    except:   
        try:
            feature_importance = pd.DataFrame({
                                    'feature': self.X_train_col_names,
                                    'importance' : estimator.coef_
                                })
        except:
            warnings.warn(
                f"Impossible to access feature importance for regressor of type {type(estimator)}. "
                f"This method is only valid when the regressor stores internally "
                f"the feature importance in the attribute `feature_importances_` "
                f"or `coef_`."
            )

            feature_importance = None

    return feature_importance

In [76]:
forecaster = ForecasterAutoreg(RandomForestRegressor(n_estimators=1, max_depth=2, random_state=123), lags=3)
forecaster.fit(y=pd.Series(np.arange(10)))

forecaster.regressor.feature_importances_

array([0.94766355, 0.        , 0.05233645])

In [77]:
forecaster = ForecasterAutoreg(LinearRegression(), lags=3)
forecaster.fit(y=pd.Series(np.arange(10)))

try:
    forecaster.regressor.feature_importances_
except AttributeError:
    print('a')
except:
    print('b')

a


In [5]:
forecaster = ForecasterAutoreg(LinearRegression(), lags=3)
forecaster.fit(y=pd.Series(np.arange(10)))
get_feature_importance(forecaster)

Unnamed: 0,feature,importance
0,lag_1,0.333333
1,lag_2,0.333333
2,lag_3,0.333333


In [79]:
forecaster = ForecasterAutoreg(RandomForestRegressor(n_estimators=1, max_depth=2, random_state=123), lags=3)
forecaster.fit(y=pd.Series(np.arange(10)))
get_feature_importance_new(forecaster)

Unnamed: 0,feature,importance
0,lag_1,0.947664
1,lag_2,0.0
2,lag_3,0.052336


In [80]:
forecaster = ForecasterAutoreg(MLPRegressor(solver = 'lbfgs', max_iter= 100, random_state=123), lags=3)
forecaster.fit(y=pd.Series(np.arange(10)))
get_feature_importance_new(forecaster)



### Test new

In [53]:
forecaster = ForecasterAutoreg(LinearRegression(), lags=3)
forecaster.fit(y=pd.Series(np.arange(5)), exog=pd.Series(np.arange(10, 15), name='exog'))
get_feature_importance(forecaster)



Unnamed: 0,feature,importance
0,lag_1,0.25
1,lag_2,0.25
2,lag_3,0.25
3,exog,0.25


In [29]:
pd.Series(np.arange(10), name='exog').index

RangeIndex(start=0, stop=10, step=1)

In [42]:
forecaster = ForecasterAutoreg(RandomForestRegressor(n_estimators=1, max_depth=2, random_state=123), lags=3)
forecaster.fit(y=pd.Series(np.arange(10)))
get_feature_importance(forecaster)

Unnamed: 0,feature,importance
0,lag_1,0.947664
1,lag_2,0.0
2,lag_3,0.052336


In [9]:
def test_output_get_feature_importance_when_regressor_is_RandomForest():
    '''
    Test output of get_feature_importance when regressor is RandomForestRegressor with lags=3
    and it is trained with y=pd.Series(np.arange(10)).
    '''
    forecaster = ForecasterAutoreg(RandomForestRegressor(n_estimators=1, max_depth=2, random_state=123), lags=3)
    forecaster.fit(y=pd.Series(np.arange(10)))
    expected = pd.DataFrame({
                    'feature': ['lag_1', 'lag_2', 'lag_3'],
                    'importance': np.array([0.94766355, 0., 0.05233645])
                })
    results = forecaster.get_feature_importance()
    pd.testing.assert_frame_equal(expected, results)


def test_output_get_feature_importance_when_regressor_is_RandomForest_with_exog():
    '''
    Test output of get_feature_importance when regressor is RandomForestRegressor with lags=3
    and it is trained with y=pd.Series(np.arange(10)) and a exogenous variable
    exog=pd.Series(np.arange(10, 20), name='exog').
    '''
    forecaster = ForecasterAutoreg(RandomForestRegressor(n_estimators=1, max_depth=2, random_state=123), lags=3)
    forecaster.fit(y=pd.Series(np.arange(10)), exog=pd.Series(np.arange(10, 20), name='exog'))
    expected = pd.DataFrame({
                    'feature': ['lag_1', 'lag_2', 'lag_3', 'exog'],
                    'importance': np.array([0.94766355, 0.05233645, 0., 0.])
                })
    results = forecaster.get_feature_importance()
    pd.testing.assert_frame_equal(expected, results)


def test_output_get_feature_importance_when_regressor_is_LinearRegression():
    '''
    Test output of get_feature_importance when regressor is LinearRegression with lags=3
    and it is trained with y=pd.Series(np.arange(5)).
    '''
    forecaster = ForecasterAutoreg(LinearRegression(), lags=3)
    forecaster.fit(y=pd.Series(np.arange(5)))
    expected = pd.DataFrame({
                    'feature': ['lag_1', 'lag_2', 'lag_3'],
                    'importance': np.array([0.33333333, 0.33333333, 0.33333333])
                })
    results = forecaster.get_feature_importance()
    pd.testing.assert_frame_equal(expected, results)


def test_output_get_feature_importance_when_regressor_is_LinearRegression_with_exog():
    '''
    Test output of get_feature_importance when regressor is LinearRegression with lags=3
    and it is trained with y=pd.Series(np.arange(5)) and a exogenous variable
    exog=pd.Series(np.arange(10, 15), name='exog').
    '''
    forecaster = ForecasterAutoreg(LinearRegression(), lags=3)
    forecaster.fit(y=pd.Series(np.arange(5)), exog=pd.Series(np.arange(10, 15), name='exog'))
    expected = pd.DataFrame({
                    'feature': ['lag_1', 'lag_2', 'lag_3', 'exog'],
                    'importance': np.array([0.25, 0.25, 0.25, 0.25])
                })
    results = forecaster.get_feature_importance()
    pd.testing.assert_frame_equal(expected, results)


def test_output_get_feature_importance_when_regressor_no_attributes():
    '''
    Test output of get_feature_importance when regressor is MLPRegressor with lags=3
    and it is trained with y=pd.Series(np.arange(10)). Since MLPRegressor hasn't attributes
    `feature_importances_` or `coef_, results = None and a warning is raised`
    '''
    forecaster = ForecasterAutoreg(MLPRegressor(solver = 'lbfgs', max_iter= 50, random_state=123), lags=3)
    forecaster.fit(y=pd.Series(np.arange(10)))
    expected = None
    results = forecaster.get_feature_importance()
    assert results is expected

In [10]:
test_output_get_feature_importance_when_regressor_is_RandomForest()
test_output_get_feature_importance_when_regressor_is_RandomForest_with_exog()
test_output_get_feature_importance_when_regressor_is_LinearRegression()
test_output_get_feature_importance_when_regressor_is_LinearRegression_with_exog()
test_output_get_feature_importance_when_regressor_no_attributes()



In [8]:
forecaster = ForecasterAutoreg(LinearRegression(), lags=3)
forecaster.fit(y=pd.Series(np.arange(5)))
expected = pd.DataFrame({
                'feature': ['lag_1', 'lag_2', 'lag_3'],
                'importance': np.array([0.33333333, 0.33333333, 0.33333333])
            })
results = forecaster.get_feature_importance()
results

Unnamed: 0,feature,importance
0,lag_1,0.333333
1,lag_2,0.333333
2,lag_3,0.333333


# ForecasterAutoregCustom

### Test Originales

In [19]:
def create_predictors(y):
    '''
    Create first 5 lags of a time series.
    '''
    
    lags = y[-1:-6:-1]
    
    return lags 
    

def test_output_get_coef_when_regressor_is_LinearRegression():
    '''
    Test output of get_coef when regressor is LinearRegression with lags=3
    and it is trained with y=pd.Series(np.arange(5)).
    '''
    forecaster = ForecasterAutoregCustom(
                        regressor      = LinearRegression(),
                        fun_predictors = create_predictors,
                        window_size    = 5
                 )
    forecaster.fit(y=pd.Series(np.arange(7)))
    expected = pd.DataFrame({
                    'feature': ['custom_predictor_0', 'custom_predictor_1',
                                'custom_predictor_2', 'custom_predictor_3',
                                'custom_predictor_4'],
                    'coef': np.array([0.2, 0.2, 0.2, 0.2, 0.2])
                })
    results = forecaster.get_coef()
    assert (results['feature'] == expected['feature']).all()
    assert results['coef'].values == approx(expected['coef'].values)
    

def test_get_coef_when_regressor_is_RandomForest():
    '''
    Test output of get_coef when regressor is RandomForestRegressor with lags=3
    and it is trained with y=pd.Series(np.arange(5)).
    '''
    forecaster = ForecasterAutoregCustom(
                        regressor      = RandomForestRegressor(n_estimators=1, max_depth=2),
                        fun_predictors = create_predictors,
                        window_size    = 5
                 )
    forecaster.fit(y=pd.Series(np.arange(6)))
    expected = None
    results = forecaster.get_coef()
    assert results is expected
    
def test_output_get_feature_importance_when_regressor_is_RandomForest():
    '''
    '''
    forecaster = ForecasterAutoregCustom(
                        regressor      = RandomForestRegressor(n_estimators=1, max_depth=2, random_state=123),
                        fun_predictors = create_predictors,
                        window_size    = 5
                 )
    forecaster.fit(y=pd.Series(np.arange(10)))
    expected = np.array([0.82142857, 0., 0.17857143, 0., 0.])
    expected = pd.DataFrame({
                    'feature': ['custom_predictor_0', 'custom_predictor_1',
                                'custom_predictor_2', 'custom_predictor_3',
                                'custom_predictor_4'],
                    'importance': np.array([0.82142857, 0., 0.17857143, 0., 0.])
                })
    results = forecaster.get_feature_importance()
    assert (results['feature'] == expected['feature']).all()
    assert results['importance'].values == approx(expected['importance'].values)
    

def test_output_get_feature_importance_when_regressor_is_linear_model():
    '''
    '''
    forecaster = ForecasterAutoregCustom(
                            regressor      = LinearRegression(),
                            fun_predictors = create_predictors,
                            window_size    = 5
                    )
    forecaster.fit(y=pd.Series(np.arange(6)))
    expected = None
    results = forecaster.get_feature_importance()
    assert results is expected

In [20]:
test_output_get_coef_when_regressor_is_LinearRegression()
test_get_coef_when_regressor_is_RandomForest()
test_output_get_feature_importance_when_regressor_is_RandomForest()
test_output_get_feature_importance_when_regressor_is_linear_model()



### Test new

In [30]:
def create_predictors(y):
    '''
    Create first 5 lags of a time series.
    '''
    lags = y[-1:-6:-1]
    
    return lags


def test_output_get_feature_importance_when_regressor_is_RandomForest():
    '''
    Test output of get_feature_importance when regressor is RandomForestRegressor with lags=3
    and it is trained with y=pd.Series(np.arange(10)).
    '''
    forecaster = ForecasterAutoregCustom(
                        regressor      = RandomForestRegressor(n_estimators=1, max_depth=2, random_state=123),
                        fun_predictors = create_predictors,
                        window_size    = 5
                 )
    forecaster.fit(y=pd.Series(np.arange(10)))
    expected = pd.DataFrame({
                    'feature': ['custom_predictor_0', 'custom_predictor_1',
                                'custom_predictor_2', 'custom_predictor_3',
                                'custom_predictor_4'],
                    'importance': np.array([0.82142857, 0., 0.17857143, 0., 0.])
                })
    results = forecaster.get_feature_importance()
    pd.testing.assert_frame_equal(expected, results)
    
    
def test_output_get_feature_importance_when_regressor_is_RandomForest_with_exog():
    '''
    Test output of get_feature_importance when regressor is RandomForestRegressor with lags=3
    and it is trained with y=pd.Series(np.arange(10)) and a exogenous variable
    exog=pd.Series(np.arange(10, 20), name='exog').
    '''
    forecaster = ForecasterAutoregCustom(
                        regressor      = RandomForestRegressor(n_estimators=1, max_depth=2, random_state=123),
                        fun_predictors = create_predictors,
                        window_size    = 5
                 )
    forecaster.fit(y=pd.Series(np.arange(10)), exog=pd.Series(np.arange(10, 20), name='exog'))
    expected = pd.DataFrame({
                    'feature': ['custom_predictor_0', 'custom_predictor_1',
                                'custom_predictor_2', 'custom_predictor_3',
                                'custom_predictor_4', 'exog'],
                    'importance': np.array([0.76190476, 0., 0.05952381, 0.17857143, 0., 0.])
                })
    results = forecaster.get_feature_importance()
    pd.testing.assert_frame_equal(expected, results)


def test_output_get_feature_importance_when_regressor_is_LinearRegression():
    '''
    Test output of get_feature_importance when regressor is LinearRegression with lags=3
    and it is trained with y=pd.Series(np.arange(7)).
    '''
    forecaster = ForecasterAutoregCustom(
                        regressor      = LinearRegression(),
                        fun_predictors = create_predictors,
                        window_size    = 5
                 )
    forecaster.fit(y=pd.Series(np.arange(7)))
    expected = pd.DataFrame({
                    'feature': ['custom_predictor_0', 'custom_predictor_1',
                                'custom_predictor_2', 'custom_predictor_3',
                                'custom_predictor_4'],
                    'importance': np.array([0.2, 0.2, 0.2, 0.2, 0.2])
                })
    results = forecaster.get_feature_importance()
    pd.testing.assert_frame_equal(expected, results)


def test_output_get_feature_importance_when_regressor_is_LinearRegression_with_exog():
    '''
    Test output of get_feature_importance when regressor is LinearRegression with lags=3
    and it is trained with y=pd.Series(np.arange(7)) and a exogenous variable
    exog=pd.Series(np.arange(10, 17), name='exog').
    '''
    forecaster = ForecasterAutoregCustom(
                        regressor      = LinearRegression(),
                        fun_predictors = create_predictors,
                        window_size    = 5
                 )
    forecaster.fit(y=pd.Series(np.arange(7)), exog=pd.Series(np.arange(10, 17), name='exog'))
    expected = pd.DataFrame({
                    'feature': ['custom_predictor_0', 'custom_predictor_1',
                                'custom_predictor_2', 'custom_predictor_3',
                                'custom_predictor_4', 'exog'],
                    'importance': np.array([0.16666667, 0.16666667, 0.16666667, 0.16666667, 0.16666667, 0.16666667])
                })
    results = forecaster.get_feature_importance()
    pd.testing.assert_frame_equal(expected, results)
    
    
def test_output_get_feature_importance_when_regressor_no_attributes():
    '''
    Test output of get_feature_importance when regressor is MLPRegressor with lags=3
    and it is trained with y=pd.Series(np.arange(10)). Since MLPRegressor hasn't attributes
    `feature_importances_` or `coef_, results = None and a warning is raised`
    '''
    forecaster = ForecasterAutoregCustom(
                    regressor      = MLPRegressor(solver = 'lbfgs', max_iter= 50, random_state=123),
                    fun_predictors = create_predictors,
                    window_size    = 5
             )
    forecaster.fit(y=pd.Series(np.arange(10)))
    expected = None
    results = forecaster.get_feature_importance()
    assert results is expected


In [31]:
test_output_get_feature_importance_when_regressor_is_RandomForest()
test_output_get_feature_importance_when_regressor_is_RandomForest_with_exog()
test_output_get_feature_importance_when_regressor_is_LinearRegression()
test_output_get_feature_importance_when_regressor_is_LinearRegression_with_exog()
test_output_get_feature_importance_when_regressor_no_attributes()



In [71]:
forecaster = ForecasterAutoregCustom(
                    regressor      = RandomForestRegressor(n_estimators=1, max_depth=2, random_state=123),
                    fun_predictors = create_predictors,
                    window_size    = 5
             )
forecaster.fit(y=pd.Series(np.arange(10)), exog=pd.Series(np.arange(10, 20), name='exog'))
forecaster.regressor.feature_importances_



array([0.76190476, 0.        , 0.05952381, 0.17857143, 0.        ,
       0.        ])

In [75]:
forecaster = ForecasterAutoregCustom(
                    regressor      = LinearRegression(),
                    fun_predictors = create_predictors,
                    window_size    = 5
             )
forecaster.fit(y=pd.Series(np.arange(7)), exog=pd.Series(np.arange(10, 17), name='exog'))
forecaster.regressor.coef_



array([0.16666667, 0.16666667, 0.16666667, 0.16666667, 0.16666667,
       0.16666667])

In [13]:
warnings.warn(
    f'This method was deprecated in version 0.4.3 in favor of the get_feature_importance. '
    f'This method will be removed in 0.4.4', DeprecationWarning
)



# ForecasterAutoregMultiOutput

## Original

In [79]:
def get_coef(self, step) -> np.ndarray:
    '''      
    Return estimated coefficients for the regressor stored in the forecaster
    for a specific step. Since a separate model is created for each forecast
    time step, it is necessary to select the model from which retrieve the
    information.

    Only valid when regressor stores internally the feature coefficients in
    the attribute `coef_`.

    Parameters
    ----------
    step : int
        Model from which retrieve information (a separate model is created for
        each forecast time step). First step is 1.

    Returns 
    -------
    coef : pandas DataFrame
        Value of the coefficients associated with each predictor.

    '''

    if step > self.steps:
        raise Exception(
            f"Forecaster trained for {self.steps} steps. Got step={step}."
        )
    if step < 1:
        raise Exception("Minimum step is 1.")

    # Stored regressors start at index 0
    step = step - 1

    if isinstance(self.regressor, sklearn.pipeline.Pipeline):
        estimator = self.regressors_[step][-1]
    else:
        estimator = self.regressors_[step]

    try:
        idx_columns_lags = np.arange(len(self.lags))
        idx_columns_exog = np.array([], dtype=int)
        if self.included_exog:
            idx_columns_exog = np.arange(len(self.X_train_col_names))[len(self.lags) + step::self.steps]
        idx_columns = np.hstack((idx_columns_lags, idx_columns_exog))
        feature_names = [self.X_train_col_names[i] for i in idx_columns]
        feature_names = [name.replace(f"_step_{step+1}", "") for name in feature_names]
        coef = pd.DataFrame({
                    'feature': feature_names,
                    'coef' : estimator.coef_
               })
    except:
        warnings.warn(
            f"Impossible to access feature coefficients for regressor of type {type(estimator)}. "
            f"This method is only valid when the regressor stores internally "
            f" the coefficients in the attribute `coef_`."
        )

        coef = None

    return coef

    
def get_feature_importance(self, step) -> np.ndarray:
    '''      
    Return impurity-based feature importance of the model stored in
    the forecaster for a specific step. Since a separate model is created for
    each forecast time step, it is necessary to select the model from which
    retrieve information.

    Only valid when the forecaster has been trained using 
    `GradientBoostingRegressor`, `RandomForestRegressor` or 
    `HistGradientBoostingRegressor` as regressor.

    Parameters
    ----------
    step : int
        Model from which retrieve information (a separate model is created for
        each forecast time step). First step is 1.

    Returns 
    -------
    feature_importance : pandas DataFrame
        Impurity-based feature importance associated with each predictor.
    '''

    if step > self.steps:
        raise Exception(
            f"Forecaster trained for {self.steps} steps. Got step={step}."
        )
    if step < 1:
        raise Exception("Minimum step is 1.")

    # Stored regressors start at index 0
    step = step - 1

    if isinstance(self.regressor, sklearn.pipeline.Pipeline):
        estimator = self.regressors_[step][-1]
    else:
        estimator = self.regressors_[step]

    try:
        idx_columns_lags = np.arange(len(self.lags))
        idx_columns_exog = np.array([], dtype=int)
        if self.included_exog:
            idx_columns_exog = np.arange(len(self.X_train_col_names))[len(self.lags) + step::self.steps]
        idx_columns = np.hstack((idx_columns_lags, idx_columns_exog))
        feature_names = [self.X_train_col_names[i] for i in idx_columns]
        feature_names = [name.replace(f"_step_{step+1}", "") for name in feature_names]
        feature_importance = pd.DataFrame({
                                'feature': feature_names,
                                'importance' : estimator.feature_importances_
                            })
    except:
        warnings.warn(
            f"Impossible to access feature importance for regressor of type {type(estimator)}. "
            f"This method is only valid when the regressor stores internally "
            f" the feature importance in the attribute `feature_importances_`."
        )

        feature_importance = None

    return feature_importance

### Test Originales

In [81]:
def test_get_feature_importance_when_regressor_is_RandomForestRegressor_lags_3_step_1():
    '''
    Test output of get_feature_importance for step 1, when regressor is RandomForestRegressor with lags=3
    and it is trained with y=pd.Series(np.arange(5)).
    '''
    forecaster = ForecasterAutoregMultiOutput(
                    RandomForestRegressor(random_state=123),
                    lags = 3,
                    steps = 1
                 )
    forecaster.fit(y=pd.Series(np.arange(5)))
    results = forecaster.get_feature_importance(step=1)
    expected = pd.DataFrame({
                    'feature': ['lag_1', 'lag_2', 'lag_3'],
                    'importance': np.array([0.3902439024390244, 0.3170731707317073, 0.2926829268292683])
                })
    pd.testing.assert_frame_equal(results, expected)
  
    
def test_get_feature_importance_when_regressor_is_RandomForestRegressor_lags_3_step_1_exog_included():
    '''
    Test output of get_feature_importance for step 1, when regressor is RandomForestRegressor with lags=3
    and it is trained with y=pd.Series(np.arange(5)) and
    exog=pd.Series(np.arange(5), name='exog').
    '''
    forecaster = ForecasterAutoregMultiOutput(
                    RandomForestRegressor(random_state=123),
                    lags = 3,
                    steps = 1
                 )
    forecaster.fit(y=pd.Series(np.arange(5)), exog=pd.Series(np.arange(5), name='exog'))
    results = forecaster.get_feature_importance(step=1)
    expected = pd.DataFrame({
                    'feature': ['lag_1', 'lag_2', 'lag_3', 'exog'],
                    'importance': np.array([0.1951219512195122, 0.0975609756097561,
                                            0.36585365853658536, 0.34146341463414637])
                })
    pd.testing.assert_frame_equal(results, expected)
    
    
def test_get_coef_when_regressor_is_LinearRegression_lags_3_step_1():
    '''
    Test output of get_coef for step 1, when regressor is LinearRegression with lags=3
    and it is trained with y=pd.Series(np.arange(5)).
    '''
    forecaster = ForecasterAutoregMultiOutput(LinearRegression(), lags=3, steps=1)
    forecaster.fit(y=pd.Series(np.arange(5)))
    results = forecaster.get_coef(step=1)
    expected = pd.DataFrame({
                    'feature': ['lag_1', 'lag_2', 'lag_3'],
                    'coef': np.array([0.33333333, 0.33333333, 0.33333333])
                })
    pd.testing.assert_frame_equal(results, expected)

    
def test_output_get_coef_when_regressor_is_RandomForest():
    '''
    Test output of get_coef when regressor is RandomForestRegressor with lags=3
    and it is trained with y=pd.Series(np.arange(5)).
    '''
    forecaster = ForecasterAutoregMultiOutput(RandomForestRegressor(), lags=3, steps=1)
    forecaster.fit(y=pd.Series(np.arange(5)))
    expected = None
    results = forecaster.get_coef(step=1)
    assert results is expected


def test_get_coef_when_regressor_is_LinearRegression_lags_3_step_1_exog_included():
    '''
    Test output of get_coef for step 1, when regressor is LinearRegression with lags=3
    and it is trained with y=pd.Series(np.arange(5)) and
    exog=pd.Series(np.arange(5), name='exog').
    '''
    forecaster = ForecasterAutoregMultiOutput(LinearRegression(), lags=3, steps=1)
    forecaster.fit(y=pd.Series(np.arange(5)), exog=pd.Series(np.arange(5), name='exog'))
    results = forecaster.get_coef(step=1)
    expected = pd.DataFrame({
                    'feature': ['lag_1', 'lag_2', 'lag_3', 'exog'],
                    'coef': np.array([0.25, 0.25, 0.25, 0.25])
                })
    pd.testing.assert_frame_equal(results, expected)

In [82]:
test_get_feature_importance_when_regressor_is_RandomForestRegressor_lags_3_step_1()
test_get_feature_importance_when_regressor_is_RandomForestRegressor_lags_3_step_1_exog_included()
test_get_coef_when_regressor_is_LinearRegression_lags_3_step_1()
test_output_get_coef_when_regressor_is_RandomForest()
test_get_coef_when_regressor_is_LinearRegression_lags_3_step_1_exog_included()



## New

In [15]:
def get_feature_importance(self, step) -> np.ndarray:
    '''      
    Return impurity-based feature importance of the model stored in
    the forecaster for a specific step. Since a separate model is created for
    each forecast time step, it is necessary to select the model from which
    retrieve information.

    Only valid when the forecaster has been trained using 
    `GradientBoostingRegressor`, `RandomForestRegressor` or 
    `HistGradientBoostingRegressor` as regressor.

    Parameters
    ----------
    step : int
        Model from which retrieve information (a separate model is created for
        each forecast time step). First step is 1.

    Returns 
    -------
    feature_importance : pandas DataFrame
        Impurity-based feature importance associated with each predictor.
    '''

    if step > self.steps:
        raise Exception(
            f"Forecaster trained for {self.steps} steps. Got step={step}."
        )
    if step < 1:
        raise Exception("Minimum step is 1.")

    # Stored regressors start at index 0
    step = step - 1

    if isinstance(self.regressor, sklearn.pipeline.Pipeline):
        estimator = self.regressors_[step][-1]
    else:
        estimator = self.regressors_[step]

    try:
        idx_columns_lags = np.arange(len(self.lags))
        idx_columns_exog = np.array([], dtype=int)
        if self.included_exog:
            idx_columns_exog = np.arange(len(self.X_train_col_names))[len(self.lags) + step::self.steps]
        idx_columns = np.hstack((idx_columns_lags, idx_columns_exog))
        feature_names = [self.X_train_col_names[i] for i in idx_columns]
        feature_names = [name.replace(f"_step_{step+1}", "") for name in feature_names]
        feature_importance = pd.DataFrame({
                                'feature': feature_names,
                                'importance' : estimator.feature_importances_
                            })
    except:   
        try:
            idx_columns_lags = np.arange(len(self.lags))
            idx_columns_exog = np.array([], dtype=int)
            if self.included_exog:
                idx_columns_exog = np.arange(len(self.X_train_col_names))[len(self.lags) + step::self.steps]
            idx_columns = np.hstack((idx_columns_lags, idx_columns_exog))
            feature_names = [self.X_train_col_names[i] for i in idx_columns]
            feature_names = [name.replace(f"_step_{step+1}", "") for name in feature_names]
            feature_importance = pd.DataFrame({
                                    'feature': feature_names,
                                    'importance' : estimator.coef_
                                })
        except:
            warnings.warn(
                f"Impossible to access feature importance for regressor of type {type(estimator)}. "
                f"This method is only valid when the regressor stores internally "
                f"the feature importance in the attribute `feature_importances_` "
                f"or `coef_`."
            )

            feature_importance = None

    return feature_importance

In [20]:
forecaster = ForecasterAutoregMultiOutput(
                RandomForestRegressor(random_state=123),
                lags = 3,
                steps = 3
             )
forecaster.fit(y=pd.Series(np.arange(15)), exog=pd.Series(np.arange(15), name='exog'))

In [29]:
step=2
idx_columns_lags = np.arange(len(forecaster.lags))
print(idx_columns_lags)
idx_columns_exog = np.array([], dtype=int)
print(idx_columns_exog)
if forecaster.included_exog:
    idx_columns_exog = np.arange(len(forecaster.X_train_col_names))[len(forecaster.lags) + step::forecaster.steps]
idx_columns = np.hstack((idx_columns_lags, idx_columns_exog))
print(idx_columns)
feature_names = [forecaster.X_train_col_names[i] for i in idx_columns]
print(forecaster.X_train_col_names)
print(feature_names)
feature_names = [name.replace(f"_step_{step+1}", "") for name in feature_names]
print(feature_names)
feature_importance = pd.DataFrame({
                        'feature': feature_names,
                        'importance' : forecaster.regressors_[step].feature_importances_
                    })
display(feature_importance)

[0 1 2]
[]
[0 1 2 5]
['lag_1', 'lag_2', 'lag_3', 'exog_step_1', 'exog_step_2', 'exog_step_3']
['lag_1', 'lag_2', 'lag_3', 'exog_step_3']
['lag_1', 'lag_2', 'lag_3', 'exog']


Unnamed: 0,feature,importance
0,lag_1,0.218703
1,lag_2,0.19592
2,lag_3,0.31269
3,exog,0.272686


### Test new

In [35]:
def test_output_get_feature_importance_when_regressor_is_RandomForestRegressor_lags_3_step_1():
    '''
    Test output of get_feature_importance for step 1, when regressor is RandomForestRegressor with lags=3
    and it is trained with y=pd.Series(np.arange(5)).
    '''
    forecaster = ForecasterAutoregMultiOutput(
                    RandomForestRegressor(random_state=123),
                    lags = 3,
                    steps = 1
                 )
    forecaster.fit(y=pd.Series(np.arange(5)))
    results = forecaster.get_feature_importance(step=1)
    expected = pd.DataFrame({
                    'feature': ['lag_1', 'lag_2', 'lag_3'],
                    'importance': np.array([0.3902439024390244, 0.3170731707317073, 0.2926829268292683])
                })
    pd.testing.assert_frame_equal(results, expected)
  
    
def test_output_get_feature_importance_when_regressor_is_RandomForestRegressor_lags_3_step_1_exog_included():
    '''
    Test output of get_feature_importance for step 1, when regressor is RandomForestRegressor with lags=3
    and it is trained with y=pd.Series(np.arange(5)) and
    exog=pd.Series(np.arange(5), name='exog').
    '''
    forecaster = ForecasterAutoregMultiOutput(
                    RandomForestRegressor(random_state=123),
                    lags = 3,
                    steps = 1
                 )
    forecaster.fit(y=pd.Series(np.arange(5)), exog=pd.Series(np.arange(5), name='exog'))
    results = forecaster.get_feature_importance(step=1)
    expected = pd.DataFrame({
                    'feature': ['lag_1', 'lag_2', 'lag_3', 'exog'],
                    'importance': np.array([0.1951219512195122, 0.0975609756097561,
                                            0.36585365853658536, 0.34146341463414637])
                })
    pd.testing.assert_frame_equal(results, expected)
    
    
def test_output_get_feature_importance_when_regressor_is_LinearRegression_lags_3_step_1():
    '''
    Test output of get_feature_importance for step 1, when regressor is LinearRegression with lags=3
    and it is trained with y=pd.Series(np.arange(5)).
    '''
    forecaster = ForecasterAutoregMultiOutput(LinearRegression(), lags=3, steps=1)
    forecaster.fit(y=pd.Series(np.arange(5)))
    results = forecaster.get_feature_importance(step=1)
    expected = pd.DataFrame({
                    'feature': ['lag_1', 'lag_2', 'lag_3'],
                    'importance': np.array([0.33333333, 0.33333333, 0.33333333])
                })
    pd.testing.assert_frame_equal(results, expected)


def test_output_get_feature_importance_when_regressor_is_LinearRegression_lags_3_step_1_exog_included():
    '''
    Test output of get_feature_importance for step 1, when regressor is LinearRegression with lags=3
    and it is trained with y=pd.Series(np.arange(5)) and
    exog=pd.Series(np.arange(5), name='exog').
    '''
    forecaster = ForecasterAutoregMultiOutput(LinearRegression(), lags=3, steps=1)
    forecaster.fit(y=pd.Series(np.arange(5)), exog=pd.Series(np.arange(5), name='exog'))
    results = forecaster.get_feature_importance(step=1)
    expected = pd.DataFrame({
                    'feature': ['lag_1', 'lag_2', 'lag_3', 'exog'],
                    'importance': np.array([0.25, 0.25, 0.25, 0.25])
                })
    pd.testing.assert_frame_equal(results, expected)
    

def test_output_get_feature_importance_when_regressor_no_attributes():
    '''
    Test output of get_feature_importance when regressor is MLPRegressor with lags=3
    and it is trained with y=pd.Series(np.arange(10)). Since MLPRegressor hasn't attributes
    `feature_importances_` or `coef_, results = None and a warning is raised`
    '''
    forecaster = ForecasterAutoregMultiOutput(
                    regressor      = MLPRegressor(solver = 'lbfgs', max_iter= 50, random_state=123),
                    lags           = 5,
                    steps          = 1
             )
    forecaster.fit(y=pd.Series(np.arange(10)))
    results = forecaster.get_feature_importance(step=1)
    expected = None
    assert results is expected

In [36]:
test_output_get_feature_importance_when_regressor_is_RandomForestRegressor_lags_3_step_1()
test_output_get_feature_importance_when_regressor_is_RandomForestRegressor_lags_3_step_1_exog_included()
test_output_get_feature_importance_when_regressor_is_LinearRegression_lags_3_step_1()
test_output_get_feature_importance_when_regressor_is_LinearRegression_lags_3_step_1_exog_included()
test_output_get_feature_importance_when_regressor_no_attributes()



# New Tests >= 0.4.3

In [11]:
forecaster = ForecasterAutoreg(
                regressor = make_pipeline(StandardScaler(), LinearRegression()),
                lags      = 3,
             )

forecaster.fit(y=pd.Series(np.arange(5)))
forecaster.get_feature_importance()

Unnamed: 0,feature,importance
0,lag_1,0.166667
1,lag_2,0.166667
2,lag_3,0.166667


In [18]:
def create_predictors(y):
    '''
    Create first 5 lags of a time series.
    '''
    lags = y[-1:-6:-1]
    
    return lags

forecaster = ForecasterAutoregCustom(
                regressor      = make_pipeline(StandardScaler(), LinearRegression()),
                fun_predictors = create_predictors,
                window_size    = 5
                )
forecaster.fit(y=pd.Series(np.arange(7)))
forecaster.get_feature_importance()

Unnamed: 0,feature,importance
0,custom_predictor_0,0.1
1,custom_predictor_1,0.1
2,custom_predictor_2,0.1
3,custom_predictor_3,0.1
4,custom_predictor_4,0.1


In [25]:
forecaster = ForecasterAutoregMultiOutput(
                    regressor = make_pipeline(StandardScaler(), LinearRegression()),
                    lags      = 3,
                    steps     = 1
                    )
forecaster.fit(y=pd.Series(np.arange(5)))
forecaster.get_feature_importance(step=1)

Unnamed: 0,feature,importance
0,lag_1,0.166667
1,lag_2,0.166667
2,lag_3,0.166667


In [30]:
def test_output_get_feature_importance_when_pipeline_FA():
    '''
    ForecasterAutoreg
    Test output of get_feature_importance when regressor is pipeline,
    (StandardScaler() + LinearRegression with lags=3),
    it is trained with y=pd.Series(np.arange(5)).
    '''
    forecaster = ForecasterAutoreg(
                    regressor = make_pipeline(StandardScaler(), LinearRegression()),
                    lags      = 3
                    )
    forecaster.fit(y=pd.Series(np.arange(5)))
    expected = pd.DataFrame({
                    'feature': ['lag_1', 'lag_2', 'lag_3'],
                    'importance': np.array([0.166667, 0.166667, 0.166667])
                })
    results = forecaster.get_feature_importance()
    pd.testing.assert_frame_equal(expected, results)
    
    
def test_output_get_feature_importance_when_pipeline_FAC():
    '''
    ForecasterAutoregCustom
    Test output of get_feature_importance when regressor is pipeline,
    (StandardScaler() + LinearRegression with lags=3),
    it is trained with y=pd.Series(np.arange(5)).
    '''
    forecaster = ForecasterAutoregCustom(
                    regressor      = make_pipeline(StandardScaler(), LinearRegression()),
                    fun_predictors = create_predictors,
                    window_size    = 5
                    )
    forecaster.fit(y=pd.Series(np.arange(7)))
    expected = pd.DataFrame({
                    'feature': ['custom_predictor_0', 'custom_predictor_1',
                                'custom_predictor_2', 'custom_predictor_3',
                                'custom_predictor_4'],
                    'importance': np.array([0.1, 0.1, 0.1, 0.1, 0.1])
                })
    results = forecaster.get_feature_importance()
    pd.testing.assert_frame_equal(expected, results)
    
    
def test_output_get_feature_importance_when_pipeline_FAM():
    '''
    ForecasterAutoregMultiOutput
    Test output of get_feature_importance when regressor is pipeline,
    (StandardScaler() + LinearRegression with lags=3),
    it is trained with y=pd.Series(np.arange(5)).
    '''
    forecaster = ForecasterAutoregMultiOutput(
                        regressor = make_pipeline(StandardScaler(), LinearRegression()),
                        lags      = 3,
                        steps     = 1
                        )
    forecaster.fit(y=pd.Series(np.arange(5)))
    expected = pd.DataFrame({
                    'feature': ['lag_1', 'lag_2', 'lag_3'],
                    'importance': np.array([0.166667, 0.166667, 0.166667])
                })
    results = forecaster.get_feature_importance(step=1)
    pd.testing.assert_frame_equal(expected, results)

In [31]:
test_output_get_feature_importance_when_pipeline_FA()
test_output_get_feature_importance_when_pipeline_FAC()
test_output_get_feature_importance_when_pipeline_FAM()