### Imports

In [1]:
from typing import Optional, Tuple

import pandas as pd
import numpy as np

from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, LSTM, Input

### Data generation

In [2]:
n_sample = 120
data = pd.DataFrame(
    columns=["v1", "v2", "t"],
    index=pd.date_range(start="2023-01-01", freq="D", periods=n_sample),
)
data["v1"] = np.arange(1, n_sample + 1)
data["v2"] = data["v1"] + 0.1
data["t"] = data["v1"] + 0.01

print(f"Data shape {data.shape}")

data.head()

Data shape (120, 3)


Unnamed: 0,v1,v2,t
2023-01-01,1,1.1,1.01
2023-01-02,2,2.1,2.01
2023-01-03,3,3.1,3.01
2023-01-04,4,4.1,4.01
2023-01-05,5,5.1,5.01


### Test Train Split

In [3]:
train_data = data.head(100)
test_data = data.tail(20)
train_x, train_y = train_data[["v1", "v2"]].copy(deep=True), train_data[["t"]].copy(
    deep=True
)
test_x, test_y = test_data[["v1", "v2"]].copy(deep=True), test_data[["t"]].copy(
    deep=True
)

print(f"train_x shape {train_x.shape}, train_y shape {train_y.shape}")
print(f"test_x shape  {test_x.shape},  test_y  shape {test_y.shape}")

train_x shape (100, 2), train_y shape (100, 1)
test_x shape  (20, 2),  test_y  shape (20, 1)


### SIMPLE LSTM DENSE MODEL

In [4]:
def lstm_model(shape: Tuple[int, int], n_unit: int = 64):
    model = Sequential(
        [
            LSTM(units=n_unit, return_sequences=True, input_shape=shape, dropout=0.2),
            LSTM(units=n_unit, return_sequences=True, dropout=0.2),
            LSTM(units=n_unit, dropout=0.2),
            Dense(units=1),
        ],
        name="LSTM_3_DENSE",
    )
    model.compile(optimizer="adam", loss="mse", metrics="mae")
    return model

### Forecaster Wrapper

In [5]:
class LstmForecaster:
    def __init__(self, window_len: int = 10, epoch: int = 25, batch_size: int = 64):
        self.window_len = window_len
        self.epoch = epoch
        self.batch_size = batch_size
        self.data_x_max, self.data_x_min = None, None
        self.data_y_max, self.data_y_min = None, None
        self.initial_state = None
        self.forecaster = None

    def minmax_scale_x(self, data_x: pd.DataFrame) -> pd.DataFrame:
        return (data_x - self.data_x_min) / (self.data_x_max - self.data_x_min)

    def minmax_scale_y(self, data_y: pd.Series) -> pd.Series:
        return (data_y - self.data_y_min) / (self.data_y_max - self.data_y_min)

    def inv_minmax_scale_y(self, data_y: pd.Series) -> pd.Series:
        return data_y * (self.data_y_max - self.data_y_min) + self.data_y_min

    def preprocess_data(
        self, data_x: pd.DataFrame, data_y: pd.Series
    ) -> Tuple[np.ndarray, np.ndarray]:
        x_data, y_data = [], []
        data_size = data_x.shape[0]
        self.data_x_max, self.data_x_min = data_x.max(), data_x.min()
        self.data_y_max, self.data_y_min = data_y.max().item(), data_y.min().item()
        x_scaled = self.minmax_scale_x(data_x)
        y_scaled = self.minmax_scale_y(data_y)
        xy_scaled = pd.concat([x_scaled, y_scaled], axis=1)
        self.initial_state = xy_scaled.tail(self.window_len).to_numpy()[np.newaxis, :]
        for i in range(self.window_len, data_size):
            x_data.append(xy_scaled.iloc[i - self.window_len : i, :])
            y_data.append(y_scaled.iloc[i, :])
        return np.array(x_data, dtype=np.float64), np.array(y_data, dtype=np.float64)

    def fit(self, X: pd.DataFrame, y: pd.Series):
        x_data, y_data = self.preprocess_data(X, y)
        self.forecaster = lstm_model(x_data.shape[1:])

        self.forecaster.fit(
            x_data, y_data, epochs=self.epoch, batch_size=self.batch_size, shuffle=False
        )
        return self

    def predict(self, forecast_horizon: int, X: pd.DataFrame) -> np.ndarray:
        y_predict = []
        x_scaled = self.minmax_scale_x(X)
        model_input = self.initial_state
        for i in range(forecast_horizon):
            predict_t = self.forecaster.predict(model_input, verbose=0)
            y_predict.append(predict_t.flatten().item())
            xhog = x_scaled.iloc[i, :].to_numpy()
            new_test = np.hstack([xhog, predict_t.flatten()])
            model_input_2d = model_input.reshape(self.window_len, -1)
            model_input_2d = np.vstack([model_input_2d, new_test])[-self.window_len :]
            model_input = model_input_2d[np.newaxis, :]
        y_original_scale = self.inv_minmax_scale_y(np.array(y_predict))
        return y_original_scale

### Training And Forecasting

In [6]:
fcaster = LstmForecaster(window_len=10, epoch=25)
fcaster.fit(train_x, train_y)

y_pred = fcaster.predict(20, test_x)

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


### 20 Point forecasted results

In [7]:
y_pred

array([ 99.90149499, 100.64632512, 101.37827958, 102.09581233,
       102.79800285, 103.48444987, 104.15504719, 104.80991281,
       105.44929458, 106.07353474, 106.68299914, 107.31264449,
       107.93454791, 108.54883922, 109.15588428, 109.75597812,
       110.3495102 , 110.93685819, 111.51838792, 112.09445346])

In [8]:
np.hstack([test_x.to_numpy(), y_pred.reshape(-1, 1)])

array([[101.        , 101.1       ,  99.90149499],
       [102.        , 102.1       , 100.64632512],
       [103.        , 103.1       , 101.37827958],
       [104.        , 104.1       , 102.09581233],
       [105.        , 105.1       , 102.79800285],
       [106.        , 106.1       , 103.48444987],
       [107.        , 107.1       , 104.15504719],
       [108.        , 108.1       , 104.80991281],
       [109.        , 109.1       , 105.44929458],
       [110.        , 110.1       , 106.07353474],
       [111.        , 111.1       , 106.68299914],
       [112.        , 112.1       , 107.31264449],
       [113.        , 113.1       , 107.93454791],
       [114.        , 114.1       , 108.54883922],
       [115.        , 115.1       , 109.15588428],
       [116.        , 116.1       , 109.75597812],
       [117.        , 117.1       , 110.3495102 ],
       [118.        , 118.1       , 110.93685819],
       [119.        , 119.1       , 111.51838792],
       [120.        , 120.1    