In [1]:
import warnings
from sktime.transformations.base import BaseTransformer


class ColumnsGuard(BaseTransformer):
    _tags = {
        "scitype:transform-input": "Series",
        "scitype:transform-output": "Series",
        "scitype:transform-labels": "None",
        "scitype:instancewise": True,  
        "capability:inverse_transform": True,  
        "univariate-only": False,  
        "X_inner_mtype": "pd.DataFrame",  
        "y_inner_mtype": "None",  
        "requires_y": False,  
        "enforce_index_type": None,  
        "fit_is_empty": False, 
        "X-y-must-have-same-index": True, 
        "transform-returns-same-time-index": True,
        "skip-inverse-transform": False,  
        "capability:unequal_length": True,
        "capability:unequal_length:removes": True,
        "handles-missing-data": False,  
        "capability:missing_values:removes": False,
        
    }

    def __init__(self):
        super(ColumnsGuard, self).__init__()


    def _fit(self, X, y=None):
        self._columns_name = X.columns.tolist()
        return self

    def _transform(self, X, y=None):
        if self._columns_name == X.columns.tolist():
            X_transformed = X.copy(deep=True)
        else:
            missing_col = set(self._columns_name) - set(X.columns.tolist())
            extra_col = set(X.columns.tolist()) - set(self._columns_name)  
            if missing_col:
                raise ValueError('Missing columns: {missing_col}') 
            elif extra_col:
                warnings.warn('Got extra columns: {extra_col}, ignoring')
                X_transformed = X[self.self._columns_name].copy(deep=True)
        return X_transformed  

    def _inverse_transform(self, X, y=None):
        X_inv_transformed = X.copy(deep=True)
        return X_inv_transformed     

    @classmethod
    def get_test_params(cls, parameter_set="default"):
        params = {}
        return params
    
    
from sktime.utils.estimator_checks import check_estimator

results = check_estimator(ColumnsGuard, raise_exceptions=True)    

All tests PASSED!


In [1]:
from sktime.datasets import load_macroeconomic
from sktime.forecasting.compose import make_reduction
from sktime.forecasting.model_selection import temporal_train_test_split
from sklearn.ensemble import RandomForestRegressor

forecaster = make_reduction(estimator=RandomForestRegressor(random_state=42),
                            scitype="tabular-regressor",
                            window_length=10,
                            strategy="recursive",
                            pooling="global")

data = load_macroeconomic()
train_xy, test_xy = temporal_train_test_split(data, test_size=5)

train_y = train_xy.pop('unemp')
#train_xy.head()

test_y = test_xy.pop('unemp')

train_xy.head()

Unnamed: 0_level_0,realgdp,realcons,realinv,realgovt,realdpi,cpi,m1,tbilrate,pop,infl,realint
Period,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1959Q1,2710.349,1707.4,286.898,470.045,1886.9,28.98,139.7,2.82,177.146,0.0,0.0
1959Q2,2778.801,1733.7,310.859,481.301,1919.7,29.15,141.7,3.08,177.83,2.34,0.74
1959Q3,2775.488,1751.8,289.226,491.26,1916.4,29.35,140.5,3.82,178.657,2.74,1.09
1959Q4,2785.204,1753.7,299.356,484.052,1931.3,29.37,140.0,4.33,179.386,0.27,4.06
1960Q1,2847.699,1770.5,331.722,462.199,1955.5,29.54,139.6,3.5,180.007,2.31,1.19


In [None]:
forecaster_pipe.fit(train_y,train_xy, fh=[1,2,3,4,5])

print(forecaster_pipe.cutoff)

y_pred = forecaster_pipe.predict(fh=[1,2,3,4,5], X=test_xy)

y_pred

In [10]:
y_pred = forecaster_pipe.predict(fh=[1,2,3,4,5], X=test_xy)

y_pred

2008Q3    5.507
2008Q4    5.739
2009Q1    5.909
2009Q2    5.939
2009Q3    5.935
Freq: Q-DEC, Name: unemp, dtype: float64

In [5]:
train_xy.tail()

Unnamed: 0_level_0,realgdp,realcons,realinv,realgovt,realdpi,cpi,m1,tbilrate,pop,infl,realint
Period,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2007Q2,13203.977,9291.5,2162.214,898.713,9842.7,207.338,1370.0,4.72,301.714,2.75,1.97
2007Q3,13321.109,9335.6,2166.491,918.983,9883.9,209.133,1379.2,4.0,302.509,3.45,0.55
2007Q4,13391.249,9363.6,2123.426,925.11,9886.2,212.495,1377.4,3.01,303.204,6.38,-3.37
2008Q1,13366.865,9349.6,2082.886,943.372,9826.8,213.997,1384.0,1.56,303.803,2.82,-1.26
2008Q2,13415.266,9351.0,2026.518,961.28,10059.0,218.61,1409.3,1.74,304.483,8.53,-6.79


In [4]:
forecaster_pipe.fit_predict(train_y,train_xy, fh=[1,2,3,4,5])

2008Q3    5.533
2008Q4    5.872
2009Q1    6.082
2009Q2    6.202
2009Q3    6.139
Freq: Q-DEC, Name: unemp, dtype: float64

In [6]:
forecaster_pipe.cutoff

PeriodIndex(['2008Q2'], dtype='period[Q-DEC]', name='Period')

In [None]:
import joblib

joblib.dump(forecaster_pipe, "../../results/RandomForestRegressor.pkl")

In [1]:
import joblib

trained_model = joblib.load("../../results/RandomForestRegressor.pkl")


AttributeError: Can't get attribute 'ColumnsGuard' on <module '__main__'>

In [2]:
import pickle

with  open("../../results/RandomForestRegressor.pkl", 'rb') as pkl_file
    trained_model = pickle.load(pkl_file)

SyntaxError: invalid syntax (1121957002.py, line 3)

In [None]:
!pip install catboost
