In [8]:
import warnings
from sktime.transformations.base import BaseTransformer


class ColumnsGuard(BaseTransformer):
    _tags = {
        "scitype:transform-input": "Series",
        "scitype:transform-output": "Series",
        "scitype:transform-labels": "None",
        "scitype:instancewise": True,  
        "capability:inverse_transform": True,  
        "univariate-only": False,  
        "X_inner_mtype": "pd.DataFrame",  
        "y_inner_mtype": "None",  
        "requires_y": False,  
        "enforce_index_type": None,  
        "fit_is_empty": False, 
        "X-y-must-have-same-index": True, 
        "transform-returns-same-time-index": True,
        "skip-inverse-transform": False,  
        "capability:unequal_length": True,
        "capability:unequal_length:removes": True,
        "handles-missing-data": False,  
        "capability:missing_values:removes": False,
        
    }

    def __init__(self):
        super(ColumnsGuard, self).__init__()


    def _fit(self, X, y=None):
        self._columns_name = X.columns.tolist()
        return self

    def _transform(self, X, y=None):
        if self._columns_name == X.columns.tolist():
            X_transformed = X.copy(deep=True)
        else:
            missing_col = set(self._columns_name) - set(X.columns.tolist())
            extra_col = set(X.columns.tolist()) - set(self._columns_name)  
            if missing_col:
                raise ValueError('Missing columns: {missing_col}') 
            elif extra_col:
                warnings.warn('Got extra columns: {extra_col}, ignoring')
                X_transformed = X[self.self._columns_name].copy(deep=True)
        return X_transformed  

    def _inverse_transform(self, X, y=None):
        X_inv_transformed = X.copy(deep=True)
        return X_inv_transformed     

    @classmethod
    def get_test_params(cls, parameter_set="default"):
        params = {}
        return params
    
    
from sktime.utils.estimator_checks import check_estimator

results = check_estimator(ColumnsGuard, raise_exceptions=True)    

All tests PASSED!


In [7]:
from sktime.datasets import load_macroeconomic
from sktime.forecasting.compose import make_reduction
from sktime.forecasting.compose import ForecastingPipeline

from sktime.forecasting.model_selection import temporal_train_test_split

from sklearn.ensemble import RandomForestRegressor

forecaster_pipe = ForecastingPipeline(
    steps=[
        ('column_Gaurd', ColumnsGuard()),
        ( "forecaster",  make_reduction(
                            estimator=RandomForestRegressor(random_state=42),
                            scitype="tabular-regressor",
                            window_length=10,
                            strategy="recursive",
                            pooling="global",
                        ),
        )
    ]           
)


data = load_macroeconomic()
train_xy, test_xy = temporal_train_test_split(data, test_size=5)

train_y = train_xy.pop('unemp')
#train_xy.head()

test_y = test_xy.pop('unemp')

forecaster_pipe.fit(train_y,train_xy, fh=[1,2,3,4,5])

y_pred = forecaster_pipe.predict(fh=[1,2,3,4,5], X=test_xy)

y_pred

2008Q3    5.507
2008Q4    5.739
2009Q1    5.909
2009Q2    5.939
2009Q3    5.935
Freq: Q-DEC, Name: unemp, dtype: float64

In [None]:
import joblib

joblib.dump(forecaster_pipe, "../../results/RandomForestRegressor.pkl")

In [1]:
import joblib

trained_model = joblib.load("../../results/RandomForestRegressor.pkl")


AttributeError: Can't get attribute 'ColumnsGuard' on <module '__main__'>

In [2]:
import pickle

with  open("../../results/RandomForestRegressor.pkl", 'rb') as pkl_file
    trained_model = pickle.load(pkl_file)

SyntaxError: invalid syntax (1121957002.py, line 3)

In [None]:
!pip install catboost
