### Imports

In [2]:
from typing import Optional, Tuple

import pandas as pd
import numpy as np
from numpy.lib.stride_tricks import sliding_window_view

from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, LSTM, Input

In [3]:
np.set_printoptions(linewidth=np.inf)

### Data generation

In [4]:
n_sample = 120
data = pd.DataFrame(
    columns=["v1", "v2", "t"],
    index=pd.date_range(start="2023-01-01", freq="D", periods=n_sample),
)
data["v1"] = np.arange(1, n_sample + 1)
data["v2"] = data["v1"] + 0.1
data["t"] = data["v1"] + 0.01

print(f"Data shape {data.shape}")

data.head()

Data shape (120, 3)


Unnamed: 0,v1,v2,t
2023-01-01,1,1.1,1.01
2023-01-02,2,2.1,2.01
2023-01-03,3,3.1,3.01
2023-01-04,4,4.1,4.01
2023-01-05,5,5.1,5.01


### Test Train Split

In [5]:
train_data = data.head(100)
test_data = data.tail(20)
train_x, train_y = train_data[["v1", "v2"]].copy(deep=True), train_data[["t"]].copy(
    deep=True
)
test_x, test_y = test_data[["v1", "v2"]].copy(deep=True), test_data[["t"]].copy(
    deep=True
)

print(f"train_x shape {train_x.shape}, train_y shape {train_y.shape}")
print(f"test_x shape  {test_x.shape},  test_y  shape {test_y.shape}")

train_x shape (100, 2), train_y shape (100, 1)
test_x shape  (20, 2),  test_y  shape (20, 1)


### SIMPLE LSTM DENSE MODEL

In [6]:
def lstm_model(shape: Tuple[int, int], n_unit: int = 64):
    model = Sequential(
        [
            LSTM(units=n_unit, return_sequences=True, input_shape=shape, dropout=0.2),
            LSTM(units=n_unit, return_sequences=True, dropout=0.2),
            LSTM(units=n_unit, dropout=0.2),
            Dense(units=1),
        ],
        name="LSTM_3_DENSE",
    )
    model.compile(optimizer="adam", loss="mse", metrics="mae")
    return model

### Forecaster Wrapper

In [7]:
class LstmForecaster:
    def __init__(self, window_len: int = 10, epoch: int = 25, batch_size: int = 64):
        self.window_len = window_len
        self.epoch = epoch
        self.batch_size = batch_size
        self.data_x_max, self.data_x_min = None, None
        self.data_y_max, self.data_y_min = None, None
        self.initial_state = None
        self.forecaster = None

    def minmax_scale_x(self, data_x: pd.DataFrame) -> pd.DataFrame:
        return 2 * (data_x - self.data_x_min) / (self.data_x_max - self.data_x_min) - 1

    def minmax_scale_y(self, data_y: pd.Series) -> pd.Series:
        return 2 * (data_y - self.data_y_min) / (self.data_y_max - self.data_y_min) - 1

    def inv_minmax_scale_y(self, data_y: pd.Series) -> pd.Series:
        return (data_y + 1) * 0.5 * (
            self.data_y_max - self.data_y_min
        ) + self.data_y_min

    def preprocess_data(
        self, data_x: pd.DataFrame, data_y: pd.Series
    ) -> Tuple[np.ndarray, np.ndarray]:
        data_size = data_x.shape[0]

        self.data_x_max, self.data_x_min = data_x.max(), data_x.min()
        self.data_y_max, self.data_y_min = data_y.max().item(), data_y.min().item()

        x_scaled = self.minmax_scale_x(data_x)
        y_scaled = self.minmax_scale_y(data_y)

        # x_scaled = data_x
        # y_scaled = data_y

        xy_scaled_np = pd.concat([x_scaled, y_scaled], axis=1).to_numpy()

        n_features = xy_scaled_np.shape[1]
        window_shape_ = self.window_len * n_features

        x_data_ = sliding_window_view(
            xy_scaled_np.flatten(), window_shape=window_shape_
        )
        x_data_ = x_data_[:-1:n_features]
        x_data_ = x_data_.reshape(-1, self.window_len, n_features)

        y_data_ = y_scaled.to_numpy()[self.window_len :]

        self.initial_state = xy_scaled_np[np.newaxis, -self.window_len :, :]

        return np.array(x_data_, dtype=np.float64), np.array(y_data_, dtype=np.float64)

    def fit(self, X: pd.DataFrame, y: pd.Series):
        x_data, y_data = self.preprocess_data(X, y)
        self.forecaster = lstm_model(x_data.shape[1:])

        self.forecaster.fit(
            x_data, y_data, epochs=self.epoch, batch_size=self.batch_size, shuffle=False
        )
        return self

    def predict(self, forecast_horizon: int, X: pd.DataFrame) -> np.ndarray:
        y_predict = []
        x_scaled = self.minmax_scale_x(X)
        model_input = self.initial_state
        for i in range(forecast_horizon):
            predict_t = self.forecaster.predict(model_input, verbose=0)
            y_predict.append(predict_t.flatten().item())
            xhog = x_scaled.iloc[i, :].to_numpy()
            new_test = np.hstack([xhog, predict_t.flatten()])
            model_input_2d = model_input.reshape(self.window_len, -1)
            model_input_2d = np.vstack([model_input_2d, new_test])[-self.window_len :]
            model_input = model_input_2d[np.newaxis, :]
        y_original_scale = self.inv_minmax_scale_y(np.array(y_predict))
        return y_original_scale

### Training And Forecasting

In [8]:
fcaster = LstmForecaster(window_len=4, epoch=25)
x_, y_ = fcaster.preprocess_data(train_x, train_y)

print(x_.shape, y_.shape, fcaster.initial_state.shape)
print(x_)
print(y_)
print(fcaster.initial_state)

(96, 4, 3) (96, 1) (1, 4, 3)
[[[-1.         -1.         -1.        ]
  [-0.97979798 -0.97979798 -0.97979798]
  [-0.95959596 -0.95959596 -0.95959596]
  [-0.93939394 -0.93939394 -0.93939394]]

 [[-0.97979798 -0.97979798 -0.97979798]
  [-0.95959596 -0.95959596 -0.95959596]
  [-0.93939394 -0.93939394 -0.93939394]
  [-0.91919192 -0.91919192 -0.91919192]]

 [[-0.95959596 -0.95959596 -0.95959596]
  [-0.93939394 -0.93939394 -0.93939394]
  [-0.91919192 -0.91919192 -0.91919192]
  [-0.8989899  -0.8989899  -0.8989899 ]]

 ...

 [[ 0.87878788  0.87878788  0.87878788]
  [ 0.8989899   0.8989899   0.8989899 ]
  [ 0.91919192  0.91919192  0.91919192]
  [ 0.93939394  0.93939394  0.93939394]]

 [[ 0.8989899   0.8989899   0.8989899 ]
  [ 0.91919192  0.91919192  0.91919192]
  [ 0.93939394  0.93939394  0.93939394]
  [ 0.95959596  0.95959596  0.95959596]]

 [[ 0.91919192  0.91919192  0.91919192]
  [ 0.93939394  0.93939394  0.93939394]
  [ 0.95959596  0.95959596  0.95959596]
  [ 0.97979798  0.97979798  0.97979

In [9]:
fcaster = LstmForecaster(window_len=10, epoch=25)
fcaster.fit(train_x, train_y)

y_pred = fcaster.predict(20, test_x)

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


### 20 Point forecasted results

In [10]:
y_pred

array([ 97.28263734,  97.88920442,  98.46395997,  99.0055999 ,  99.51454611,  99.99219416, 100.44033791, 100.86072696, 101.25500471, 101.62459917, 101.97099144, 102.40530063, 102.82724161, 103.23786474, 103.63825578, 104.02948279, 104.41255481, 104.78830976, 105.15756787, 105.52099002])

In [11]:
np.hstack([test_x.to_numpy(), y_pred.reshape(-1, 1)])

array([[101.        , 101.1       ,  97.28263734],
       [102.        , 102.1       ,  97.88920442],
       [103.        , 103.1       ,  98.46395997],
       [104.        , 104.1       ,  99.0055999 ],
       [105.        , 105.1       ,  99.51454611],
       [106.        , 106.1       ,  99.99219416],
       [107.        , 107.1       , 100.44033791],
       [108.        , 108.1       , 100.86072696],
       [109.        , 109.1       , 101.25500471],
       [110.        , 110.1       , 101.62459917],
       [111.        , 111.1       , 101.97099144],
       [112.        , 112.1       , 102.40530063],
       [113.        , 113.1       , 102.82724161],
       [114.        , 114.1       , 103.23786474],
       [115.        , 115.1       , 103.63825578],
       [116.        , 116.1       , 104.02948279],
       [117.        , 117.1       , 104.41255481],
       [118.        , 118.1       , 104.78830976],
       [119.        , 119.1       , 105.15756787],
       [120.        , 120.1    

In [13]:
def create_lagged_features(data, lag):
    df = pd.DataFrame(data)
    columns = [df.shift(i) for i in range(1, lag + 1)]
    columns.append(df)
    df = pd.concat(columns, axis=1)
    df.fillna(0, inplace=True)
    return df


data_s = create_lagged_features(data, 4)

data_s.tail(5)

Unnamed: 0,v1,v2,t,v1.1,v2.1,t.1,v1.2,v2.2,t.2,v1.3,v2.3,t.3,v1.4,v2.4,t.4
2023-04-26,115.0,115.1,115.01,114.0,114.1,114.01,113.0,113.1,113.01,112.0,112.1,112.01,116,116.1,116.01
2023-04-27,116.0,116.1,116.01,115.0,115.1,115.01,114.0,114.1,114.01,113.0,113.1,113.01,117,117.1,117.01
2023-04-28,117.0,117.1,117.01,116.0,116.1,116.01,115.0,115.1,115.01,114.0,114.1,114.01,118,118.1,118.01
2023-04-29,118.0,118.1,118.01,117.0,117.1,117.01,116.0,116.1,116.01,115.0,115.1,115.01,119,119.1,119.01
2023-04-30,119.0,119.1,119.01,118.0,118.1,118.01,117.0,117.1,117.01,116.0,116.1,116.01,120,120.1,120.01
