In [3]:
from abc import ABC, abstractmethod
from typing import Optional, Tuple

import pandas as pd
import numpy as np
from numpy.lib.stride_tricks import sliding_window_view

from sklearn.linear_model import LinearRegression

In [4]:
nsamples = 100
data = pd.DataFrame(
    columns=["v1", "v2", "t"],
    index=pd.date_range(start="2023-01-01", freq="D", periods=nsamples),
)

data["v1"] = np.arange(1, nsamples + 1)
data["v2"] = data["v1"] + 0.1
data["t"] = data["v1"] + 0.01

train_data = data.head(80)
test_data = data.tail(20)

train_y_d = train_data[["t"]].copy(deep=True)
train_x_d = train_data[["v1", "v2"]].copy(deep=True)

test_y_d = test_data[["t"]].copy(deep=True)
test_x_d = test_data[["v1", "v2"]].copy(deep=True)

print(test_data.shape)

print(train_data.tail(), end="\n\n")

print(test_data.head())

(20, 3)
            v1    v2      t
2023-03-17  76  76.1  76.01
2023-03-18  77  77.1  77.01
2023-03-19  78  78.1  78.01
2023-03-20  79  79.1  79.01
2023-03-21  80  80.1  80.01

            v1    v2      t
2023-03-22  81  81.1  81.01
2023-03-23  82  82.1  82.01
2023-03-24  83  83.1  83.01
2023-03-25  84  84.1  84.01
2023-03-26  85  85.1  85.01


In [99]:
class BaseForecaster(ABC):
    @abstractmethod
    def fit(self, y_train: pd.DataFrame, x_hog_train: Optional[pd.DataFrame] = None):
        pass

    @abstractmethod
    def forecast(self, h: int, x_hog: Optional[pd.DataFrame] = None) -> pd.DataFrame:
        pass

    @abstractmethod
    def fit_forecast(
        self,
        y_train: pd.DataFrame,
        h: int,
        x_hog_train: Optional[pd.DataFrame] = None,
        x_hog: Optional[pd.DataFrame] = None,
    ) -> pd.DataFrame:
        pass


class ReduceWindow(BaseForecaster):
    def __init__(self, window_len: int):
        super().__init__()
        self.__window_len = window_len

    def fit(self, y_train: pd.DataFrame, x_hog_train: Optional[pd.DataFrame] = None):
        pass

    def forecast(self, h: int, x_hog: Optional[pd.DataFrame] = None) -> pd.DataFrame:
        pass

    def fit_forecast(
        self,
        y_train: pd.DataFrame,
        h: int,
        x_hog_train: Optional[pd.DataFrame] = None,
        x_hog: Optional[pd.DataFrame] = None,
    ) -> pd.DataFrame:
        pass

    @property
    def window_len(self):
        return self.__window_len

    def reduce_window(
        self, y_train: pd.DataFrame, x_hog: pd.DataFrame
    ) -> Tuple[np.ndarray, np.ndarray]:
        x_data = pd.DataFrame()
        for i in range(1, self.window_len + 1):
            x_data[f"target_leg_{i}"] = y_train.shift(i)
        if x_hog is not None:
            x_data = pd.concat([x_hog, x_data], axis=1)
        x_data.fillna(method="bfill", inplace=True)

        x_data_ = x_data.to_numpy()
        y_data_ = y_train.to_numpy().flatten()
        initial_state = y_data_[-self.window_len :]
        return x_data_, y_data_, initial_state

In [115]:
class Forecaster(ReduceWindow):
    def __init__(self, window_len: int = 10):
        super().__init__(window_len)
        self._estimator = LinearRegression()
        self._last_fited_data = None
        self._fitted_with_xhog = False

    def fit(self, y_train: pd.DataFrame, x_hog_train: Optional[pd.DataFrame] = None):
        x_data, y_data, self._last_fited_data = self.reduce_window(y_train, x_hog_train)
        self._estimator.fit(x_data, y_data)
        self._last_fited_index = y_train.index[-1]
        self._fitted_with_xhog = False if x_hog_train is None else True
        return self

    def __forecast(self, h: int, x_hog: pd.DataFrame = None) -> pd.DataFrame:
        forecast_values = []
        y_lags = self._last_fited_data.copy()
        for i in range(h):
            x_data = (
                y_lags if x_hog is None else np.hstack([x_hog.to_numpy()[i, :], y_lags])
            )
            fcast_t = self._estimator.predict(x_data[np.newaxis, :])
            forecast_values.append(fcast_t.item())
            y_lags = np.hstack([y_lags, np.array(fcast_t)])[-self.window_len :]
        return np.array(forecast_values)

    def forecast(self, h: int, x_hog: Optional[pd.DataFrame] = None) -> pd.DataFrame:
        if (self._fitted_with_xhog is True) and (x_hog is None):
            raise Exception("Xhog is needed , coz estimater was fitted with xhog")
        elif (self._fitted_with_xhog is False) and (x_hog is not None):
            print("Xhog will be ignored as it was fitted with out xhog")
            return self.__forecast(h)
        else:
            return self.__forecast(h, x_hog)

    def fit_forecast(
        self,
        y_train: pd.DataFrame,
        h: int,
        x_hog_train: Optional[pd.DataFrame] = None,
        x_hog: Optional[pd.DataFrame] = None,
    ) -> pd.DataFrame:
        self.fit(y_train, x_hog_train)
        return self.forecast(h, x_hog)

In [116]:
fcast = ReduceWindow(4)

x_, y_, xl = fcast.reduce_window(train_y_d, train_x_d)

print(x_.shape, y_.shape, xl.shape)

# print(x_)
# print(y_)
print(xl)

(80, 6) (80,) (4,)
[77.01 78.01 79.01 80.01]


In [117]:
fcast = Forecaster(4)
fh = 20
fcast.fit(train_y_d, train_x_d)
y_pred = fcast.forecast(fh, test_x_d)
y_pred

array([ 81.01,  82.01,  83.01,  84.01,  85.01,  86.01,  87.01,  88.01,
        89.01,  90.01,  91.01,  92.01,  93.01,  94.01,  95.01,  96.01,
        97.01,  98.01,  99.01, 100.01])

In [118]:
dt_range = pd.date_range(
    start=train_y_d.index[-1],
    freq=train_y_d.index.freqstr,
    inclusive="right",
    periods=20 + 1,
)
forecast_df = pd.DataFrame(columns=["predicted"], index=dt_range)
forecast_df["predicted"] = y_pred
forecast_df

Unnamed: 0,predicted
2023-03-22,81.01
2023-03-23,82.01
2023-03-24,83.01
2023-03-25,84.01
2023-03-26,85.01
2023-03-27,86.01
2023-03-28,87.01
2023-03-29,88.01
2023-03-30,89.01
2023-03-31,90.01


In [15]:
fcast = Forecaster(3)

fcast.fit(train_y_d)
y_pred = fcast.forecast(20)
y_pred

array([ 81.01,  82.01,  83.01,  84.01,  85.01,  86.01,  87.01,  88.01,
        89.01,  90.01,  91.01,  92.01,  93.01,  94.01,  95.01,  96.01,
        97.01,  98.01,  99.01, 100.01])

In [16]:
dt_range = pd.date_range(
    start=train_y_d.index[-1],
    freq=train_y_d.index.freqstr,
    inclusive="right",
    periods=20 + 1,
)
forecast_df = pd.DataFrame(columns=["predicted"], index=dt_range)
forecast_df["predicted"] = y_pred
forecast_df

Unnamed: 0,predicted
2023-03-22,81.01
2023-03-23,82.01
2023-03-24,83.01
2023-03-25,84.01
2023-03-26,85.01
2023-03-27,86.01
2023-03-28,87.01
2023-03-29,88.01
2023-03-30,89.01
2023-03-31,90.01
