In [42]:
%load_ext autoreload
%autoreload 2
import sys
from pathlib import Path
sys.path.insert(1, str(Path.cwd().parent))
str(Path.cwd().parent)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


'c:\\Users\\Joaquín Amat\\Documents\\GitHub\\skforecast'

In [43]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from skforecast.ForecasterAutoregDiff import ForecasterAutoregDiff
from sklearn.linear_model import Ridge

In [44]:
# Create a random time series with 100 observations and date index
n = 100
np.random.seed(1)
y = pd.Series(np.random.normal(size=n), index=pd.date_range(start="2000-01-01", periods=n, freq="D"))
exog = pd.DataFrame(np.random.normal(size=(n, 2)), index=y.index, columns=["exog_1", "exog_2"])
y

2000-01-01    1.624345
2000-01-02   -0.611756
2000-01-03   -0.528172
2000-01-04   -1.072969
2000-01-05    0.865408
                ...   
2000-04-05    0.077340
2000-04-06   -0.343854
2000-04-07    0.043597
2000-04-08   -0.620001
2000-04-09    0.698032
Freq: D, Length: 100, dtype: float64

In [45]:
forecaster = ForecasterAutoregDiff(
    regressor = "ridge",
    lags = 3,
    differentiation = None,
)

X_train, y_train = forecaster.create_train_X_y(y=y, exog=exog)
display(X_train.head(3))
display(y_train.head(3))    

Unnamed: 0,lag_1,lag_2,lag_3,exog_1,exog_2
2000-01-04,-0.528172,-0.611756,1.624345,0.740556,-0.953701
2000-01-05,-1.072969,-0.528172,-0.611756,-0.266219,0.032615
2000-01-06,0.865408,-1.072969,-0.528172,-1.373117,0.315159


2000-01-04   -1.072969
2000-01-05    0.865408
2000-01-06   -2.301539
Freq: D, Name: y, dtype: float64

In [46]:
forecaster = ForecasterAutoregDiff(
    regressor = Ridge(),
    lags = 3,
    differentiation = 10,
)

X_train, y_train = forecaster.create_train_X_y(y=y, exog=exog)
display(X_train.head(10))
display(y_train.head(10))    

Unnamed: 0,lag_1,lag_2,lag_3,exog_1,exog_2
2000-01-14,1066.560613,-1296.519261,1350.521639,1.627651,0.338012
2000-01-15,-880.611945,1066.560613,-1296.519261,-1.199268,0.863345
2000-01-16,868.306788,-880.611945,1066.560613,-0.18092,-0.603921
2000-01-17,-936.767154,868.306788,-880.611945,-1.230058,0.550537
2000-01-18,920.06123,-936.767154,868.306788,0.792807,-0.623531
2000-01-19,-807.359062,920.06123,-936.767154,0.520576,-1.144341
2000-01-20,716.038633,-807.359062,920.06123,0.801861,0.046567
2000-01-21,-684.876763,716.038633,-807.359062,-0.18657,-0.101746
2000-01-22,633.00938,-684.876763,716.038633,0.868886,0.750412
2000-01-23,-490.734323,633.00938,-684.876763,0.529465,0.137701


2000-01-14   -880.611945
2000-01-15    868.306788
2000-01-16   -936.767154
2000-01-17    920.061230
2000-01-18   -807.359062
2000-01-19    716.038633
2000-01-20   -684.876763
2000-01-21    633.009380
2000-01-22   -490.734323
2000-01-23    250.388653
Freq: D, Name: y, dtype: float64

In [47]:
forecaster = ForecasterAutoregDiff(
    regressor = Ridge(),
    lags = 3,
    differentiation = 1,
)
forecaster.fit(y=y)
forecaster.predict(steps=5)

2000-04-10   -1.103449
2000-04-11   -0.863422
2000-04-12   -1.207648
2000-04-13   -0.767063
2000-04-14   -1.004409
Freq: D, Name: pred, dtype: float64

In [48]:
forecaster = ForecasterAutoregDiff(
    regressor = Ridge(),
    lags = 3,
    differentiation = None
)
forecaster.fit(y=y)
forecaster.predict(steps=5)

2000-04-10    0.034849
2000-04-11    0.227546
2000-04-12   -0.099882
2000-04-13    0.070084
2000-04-14    0.015421
Freq: D, Name: pred, dtype: float64

In [1]:
from sklearn.base import BaseEstimator, TransformerMixin
import numpy as np

class TimeSeriesDifferentiator(BaseEstimator, TransformerMixin):
    """
    Transforms a time series into a differentiated time series of order n.
    It also reverts the differentiation.

    Parameters
    ----------
    order : int
        Order of differentiation.

    Attributes
    ----------
    initial_values : list
        List with the initial value the time series after each differentiation.
        This is used to revert the differentiation.
    order : int
        Order of differentiation.   

    """
    
    def __init__(self, order=1):
        self.order = order
        self.initial_values = []
    
    def fit(self, X, y=None):
        """
        Fits the transformer. This method does nothing.
        """
        pass
    
    def transform(self, X, y=None):
        """
        Transforms a time series into a differentiated time series of order n.

        Parameters
        ----------
        X : array-like of shape (n_samples,)
            Time series to be differentiated.
        y : None
            Ignored.
        
        Returns
        -------
        X_diff : array-like of shape (n_samples,)
            Differentiated time series.

        """
        for i in range(self.order):
            if i == 0:
                self.initial_values.append(X[0])
                X_diff = np.diff(X, n=1)
            else:
                self.initial_values.append(X_diff[0])
                X_diff = np.diff(X_diff, n=1)
        return X_diff
    
    def inverse_transform(self, X, y=None):
        """
        Reverts the differentiation.

        Parameters
        ----------
        X : array-like of shape (n_samples,)
            Differentiated time series.
        y : None
            Ignored.
        
        Returns
        -------
        X_diff : array-like of shape (n_samples,)
            Reverted differentiated time series.

        """
        for i in range(self.order):
            if i == 0:
                X_diff = np.insert(X, 0, self.initial_values[self.order-1])
                X_diff = np.cumsum(X_diff)
            else:
                X_diff = np.insert(X_diff, 0, self.initial_values[self.order-i-1])
                X_diff = np.cumsum(X_diff)
        return X_diff



# Generate a sample time series
X = np.arange(10)

# Differentiate the time series up to order 1
differentiator = TimeSeriesDifferentiator(order=1)
differentiator.fit(X)
X_diff = differentiator.transform(X)

# Revert the differentiation
X_reverted = differentiator.inverse_transform(X_diff)

# Print the original time series, the differentiated time series, and the reverted time series
print(f"Original time series: {X}")
print(f"Differentiated time series: {X_diff}")
print(f"Reverted time series: {X_reverted}")

Original time series: [0 1 2 3 4 5 6 7 8 9]
Differentiated time series: [1 1 1 1 1 1 1 1 1]
Reverted time series: [array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])]
