In [None]:
#| default_exp models.lstm

In [None]:
#| hide
%load_ext autoreload
%autoreload 2

# LSTM
> The Long Short-Term Memory Recurrent Neural Network (`LSTM`), uses a multilayer `LSTM` encoder and an `MLP` decoder. It builds upon the LSTM-cell that improves the exploding and vanishing gradients of classic `RNN`'s. This network has been extensively used in sequential prediction tasks like language modeling, phonetic labeling, and forecasting.<br><br>**References**<br>-[Jeffrey L. Elman (1990). "Finding Structure in Time".](https://onlinelibrary.wiley.com/doi/abs/10.1207/s15516709cog1402_1)<br>-[Haşim Sak, Andrew Senior, Françoise Beaufays (2014). "Long Short-Term Memory Based Recurrent Neural Network Architectures for Large Vocabulary Speech Recognition."](https://arxiv.org/abs/1402.1128)<br>

![Figure 1. Long Short-Term Memory Cell.](imgs_models/lstm.png)

In [None]:
#| hide
from nbdev.showdoc import show_doc
from neuralforecast.utils import generate_series

In [None]:
#| export
import torch.nn as nn

from neuralforecast.losses.pytorch import MAE
from neuralforecast.common._base_recurrent import BaseRecurrent

In [None]:
#| export
class LSTM(BaseRecurrent):
    def __init__(self,
                 input_size: int,
                 h: int,
                 state_hsize: int = 200, 
                 step_size: int = 1,
                 n_layers: int = 2,
                 bias: bool = True,
                 dropout: float = 0.,
                 learning_rate: float = 1e-3,
                 normalize: bool = True,
                 loss=MAE(),
                 batch_size=32, 
                 num_workers_loader=0,
                 drop_last_loader=False,
                 random_seed=1,
                 **trainer_kwargs):
        super(LSTM, self).__init__(
            loss=loss,
            batch_size=batch_size,
            num_workers_loader=num_workers_loader,
            drop_last_loader=drop_last_loader,
            random_seed=random_seed,
            **trainer_kwargs
        )

        # Architecture
        self.input_size = input_size
        self.h = h
        self.state_hsize = state_hsize
        self.step_size = step_size
        self.n_layers = n_layers
        self.bias = bias
        self.dropout = dropout

        # Optimization
        self.learning_rate = learning_rate
        self.loss = loss
        self.normalize = normalize
        self.random_seed = random_seed
        self.padder = nn.ConstantPad1d(padding=(0, self.h), value=0)

        # Instantiate model
        self.model = nn.LSTM(input_size=self.input_size,
                             hidden_size=self.state_hsize,
                             num_layers=self.n_layers,
                             bias=self.bias,
                             dropout=self.dropout,
                             batch_first=True)
        self.adapterW  = nn.Linear(self.state_hsize, self.h)

    def forward(self, insample_y, insample_mask):

        # LSTM forward
        insample_y, _ = self.model(insample_y)
        insample_y = self.adapterW(insample_y)
        
        return insample_y

In [None]:
#| hide
import logging
import warnings
logging.getLogger("pytorch_lightning").setLevel(logging.ERROR)
warnings.filterwarnings("ignore")

In [None]:
#| hide
import pytorch_lightning as pl
import matplotlib.pyplot as plt
import pandas as pd
from neuralforecast.utils import AirPassengersDF as Y_df
from neuralforecast.tsdataset import TimeSeriesDataset, TimeSeriesLoader

# Add second series
Y_df_2 = Y_df.tail(100).copy()
Y_df_2['unique_id'] = 2.0
Y_df_2['y'] = 0.5*Y_df_2['y']
Y_df = Y_df.append(Y_df_2).reset_index(drop=True)

# Train/Test split
Y_train_df = Y_df[Y_df.ds<='1959-12-31'] # 132 train
Y_test_df = Y_df[Y_df.ds>'1959-12-31']   # 12 test

dataset, *_ = TimeSeriesDataset.from_df(df = Y_train_df)
model = LSTM(24, 12, learning_rate=1e-3, max_epochs=100)
model.fit(dataset=dataset)
y_hat = model.predict(dataset=dataset)

Y_test_df['LSTM'] = y_hat

In [None]:
#| hide
pd.concat([Y_train_df[Y_train_df['unique_id']==1.0], Y_test_df[Y_test_df['unique_id']==1.0]]).drop('unique_id', axis=1).set_index('ds').plot()
pd.concat([Y_train_df[Y_train_df['unique_id']==2.0], Y_test_df[Y_test_df['unique_id']==2.0]]).drop('unique_id', axis=1).set_index('ds').plot()