In [None]:
#| default_exp models.mlp

In [None]:
#| hide
%load_ext autoreload
%autoreload 2

# MLP
> One of the simplest neural architectures are Multi Layer Perceptrons (`MLP`) composed of stacked Fully Connected Neural Networks trained with backpropagation. Each node in the architecture is capable of modeling non-linear relationships granted by their activation functions. Novel activations like Rectified Linear Units (`ReLU`) have greatly improved the ability to fit deeper networks overcoming gradient vanishing problems that were associated with `Sigmoid` and `TanH` activations. For the forecasting task the last layer is changed to follow a auto-regression problem.<br><br>**References**<br>-[Rosenblatt, F. (1958). "The perceptron: A probabilistic model for information storage and organization in the brain."](https://psycnet.apa.org/record/1959-09865-001)<br>-[Fukushima, K. (1975). "Cognitron: A self-organizing multilayered neural network."](https://pascal-francis.inist.fr/vibad/index.php?action=getRecordDetail&idt=PASCAL7750396723)<br>-[Vinod Nair, Geoffrey E. Hinton (2010). "Rectified Linear Units Improve Restricted Boltzmann Machines"](https://www.cs.toronto.edu/~fritz/absps/reluICML.pdf)<br>

![Figure 1. Three layer MLP with autorregresive inputs.](imgs_models/mlp.png)

In [None]:
#| hide
from fastcore.test import test_eq
from nbdev.showdoc import show_doc

In [None]:
#| export
import torch
import torch.nn as nn

from neuralforecast.losses.pytorch import MAE
from neuralforecast.common._base_windows import BaseWindows

In [None]:
#| export
class MLP(BaseWindows):
    
    def __init__(self, 
                 input_size,
                 h,
                 step_size=1,
                 hidden_size=1024, 
                 num_layers=2, 
                 learning_rate=1e-3,
                 normalize=False,
                 loss=MAE(),
                 batch_size=32, 
                 num_workers_loader=0,
                 drop_last_loader=False,
                 random_seed=1,
                 **trainer_kwargs):
        
        # Inherit BaseWindows class
        super(MLP, self).__init__(h=h, 
                                  loss=loss,
                                  batch_size=batch_size,
                                  normalize=normalize,
                                  num_workers_loader=num_workers_loader,
                                  drop_last_loader=drop_last_loader,
                                  random_seed=random_seed,
                                  **trainer_kwargs)
        
        self.input_size = input_size
        self.step_size = step_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.learning_rate = learning_rate
        self.loss = loss
        
        # MultiLayer Perceptron
        layers = [nn.Linear(in_features=input_size, out_features=hidden_size)]
        for i in range(num_layers - 1):
            layers += [nn.Linear(in_features=hidden_size, out_features=hidden_size)]
        self.mlp = nn.ModuleList(layers)
        
        # Adapter with Loss dependent dimensions
        self.out = nn.Linear(in_features=hidden_size, 
                             out_features=h * self.loss.outputsize_multiplier)
        
    def forward(self, x, mask):
        y_pred = x
        for layer in self.mlp:
             y_pred = torch.relu(layer(y_pred))
        y_pred = self.out(y_pred)
        y_pred = self.loss.adapt_output(y_pred)
        return y_pred

In [None]:
#| hide
import logging
import warnings
logging.getLogger("pytorch_lightning").setLevel(logging.ERROR)
warnings.filterwarnings("ignore")

In [None]:
#| hide
# test performance fit/predict method
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from neuralforecast.utils import AirPassengersDF as Y_df
from neuralforecast.tsdataset import TimeSeriesDataset


Y_train_df = Y_df[Y_df.ds<='1959-12-31'] # 132 train
Y_test_df = Y_df[Y_df.ds>'1959-12-31']   # 12 test

dataset, *_ = TimeSeriesDataset.from_df(Y_train_df)
model = MLP(input_size=24, h=12, max_epochs=100)
model.fit(dataset=dataset)
y_hat = model.predict(dataset=dataset)
Y_test_df['MLP'] = y_hat

#test we recover the same forecast
y_hat2 = model.predict(dataset=dataset)
test_eq(y_hat, y_hat2)

pd.concat([Y_train_df, Y_test_df]).drop('unique_id', axis=1).set_index('ds').plot()

In [None]:
#| hide
#test no leakage with test_size
dataset, *_ = TimeSeriesDataset.from_df(Y_df)
model = MLP(input_size=24, h=12, max_epochs=100)
model.fit(dataset=dataset, test_size=12)
y_hat_test = model.predict(dataset=dataset, step_size=1)
np.testing.assert_almost_equal(
    y_hat, 
    y_hat_test,
    decimal=4
)
#test we recover the same forecast
y_hat_test2 = model.predict(dataset=dataset, step_size=1)
test_eq(y_hat_test, y_hat_test2)

In [None]:
#| hide
# test validation step
dataset, *_ = TimeSeriesDataset.from_df(Y_train_df)
model = MLP(input_size=24, h=12, step_size=1, 
            hidden_size=1024, num_layers=2,
            max_epochs=1)
model.fit(dataset=dataset, val_size=12)
y_hat_w_val = model.predict(dataset=dataset)
Y_test_df['MLP'] = y_hat_w_val

pd.concat([Y_train_df, Y_test_df]).drop('unique_id', axis=1).set_index('ds').plot()

In [None]:
#| hide
from neuralforecast.losses.pytorch import MQLoss

Y_train_df = Y_df[Y_df.ds<='1959-12-31'] # 132 train
Y_test_df = Y_df[Y_df.ds>'1959-12-31']   # 12 test

# Fit MQ-MLP
dataset, *_ = TimeSeriesDataset.from_df(Y_train_df)
model = MLP(input_size=24, h=12, 
            loss=MQLoss(level=[80,90]),
            max_epochs=1)
model.fit(dataset=dataset)

# Parse quantile predictions
y_hat = model.predict(dataset=dataset)
Y_hat_df = pd.DataFrame.from_records(data=y_hat,
    columns=['MLP'+q for q in model.loss.output_names],
    index=Y_test_df.index)

# Plot quantile predictions
plot_df = pd.concat([Y_test_df, Y_hat_df], axis=1)
plot_df = pd.concat([Y_train_df, plot_df]).drop('unique_id', axis=1)
plt.plot(plot_df['ds'], plot_df['y'], c='black', label='True')
plt.plot(plot_df['ds'], plot_df['MLP-median'], c='blue', label='median')
plt.fill_between(x=plot_df['ds'], 
                 y1=plot_df['MLP-lo-90'], y2=plot_df['MLP-hi-90'],
                 alpha=0.4, label='level 90')
plt.grid()
plt.legend()
plt.plot()

In [None]:
#| hide
#test no leakage with test_size and val_size
dataset, *_ = TimeSeriesDataset.from_df(Y_df)
model = MLP(input_size=24, h=12, step_size=1, 
            hidden_size=1024, num_layers=2,
            max_epochs=1)
model.fit(dataset=dataset, val_size=12, test_size=12)
y_hat_test_w_val = model.predict(dataset=dataset, step_size=1)
np.testing.assert_almost_equal(y_hat_test_w_val, 
                               y_hat_w_val, decimal=4)