<a href="https://colab.research.google.com/github/CristinaMarsh/Learning_/blob/main/TStool/NeuralcastAutoformer_training_model_by_pl.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
! pip install neuralforecast

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting neuralforecast
  Downloading neuralforecast-0.1.0-py3-none-any.whl (138 kB)
[K     |████████████████████████████████| 138 kB 5.1 MB/s 
Collecting torchinfo
  Downloading torchinfo-1.7.0-py3-none-any.whl (22 kB)
Collecting pytorch-lightning>=1.3.0
  Downloading pytorch_lightning-1.6.4-py3-none-any.whl (585 kB)
[K     |████████████████████████████████| 585 kB 45.0 MB/s 
[?25hCollecting py7zr
  Downloading py7zr-0.19.0-py3-none-any.whl (65 kB)
[K     |████████████████████████████████| 65 kB 2.3 MB/s 
Collecting pyDeprecate>=0.3.1
  Downloading pyDeprecate-0.3.2-py3-none-any.whl (10 kB)
Collecting torchmetrics>=0.4.1
  Downloading torchmetrics-0.9.2-py3-none-any.whl (419 kB)
[K     |████████████████████████████████| 419 kB 53.6 MB/s 
[?25hCollecting PyYAML>=5.4
  Downloading PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_6

## Source

https://github.com/Nixtla/neuralforecast/blob/main/nbs/models_transformer__autoformer.ipynb

In [2]:
#export
import random
from fastcore.foundation import patch

import numpy as np
import pandas as pd
import torch

import torch.nn as nn
import pytorch_lightning as pl
from torch import optim

from neuralforecast.models.components.embed import DataEmbedding_wo_pos
from neuralforecast.models.components.autocorrelation import (
    AutoCorrelation, AutoCorrelationLayer
)
from neuralforecast.models.components.autoformer import (
    Encoder, Decoder, EncoderLayer, DecoderLayer,
    my_Layernorm, series_decomp
)
from neuralforecast.losses.utils import LossFunction
from neuralforecast.data.tsdataset import IterateWindowsDataset
from neuralforecast.data.tsloader import TimeSeriesLoader

  import pandas.util.testing as tm


In [3]:
#export
class _Autoformer(nn.Module):
    """
    Autoformer is the first method to achieve the series-wise connection,
    with inherent O(LlogL) complexity
    """
    def __init__(self, seq_len, 
                 label_len, pred_len, output_attention,
                 enc_in, dec_in, d_model, c_out, embed, freq, dropout,
                 factor, n_heads, d_ff, moving_avg, activation, e_layers,
                 d_layers):
        super(_Autoformer, self).__init__()
        self.seq_len = seq_len
        self.label_len = label_len
        self.pred_len = pred_len
        self.output_attention = output_attention

        # Decomp
        kernel_size = moving_avg
        self.decomp = series_decomp(kernel_size)

        # Embedding
        # The series-wise connection inherently contains the sequential information.
        # Thus, we can discard the position embedding of transformers.
        self.enc_embedding = DataEmbedding_wo_pos(enc_in, d_model, embed, freq,
                                                  dropout)
        self.dec_embedding = DataEmbedding_wo_pos(dec_in, d_model, embed, freq,
                                                  dropout)

        # Encoder
        self.encoder = Encoder(
            [
                EncoderLayer(
                    AutoCorrelationLayer(
                        AutoCorrelation(False, factor, attention_dropout=dropout,
                                        output_attention=output_attention),
                        d_model, n_heads),
                    d_model,
                    d_ff,
                    moving_avg=moving_avg,
                    dropout=dropout,
                    activation=activation
                ) for l in range(e_layers)
            ],
            norm_layer=my_Layernorm(d_model)
        )
        # Decoder
        self.decoder = Decoder(
            [
                DecoderLayer(
                    AutoCorrelationLayer(
                        AutoCorrelation(True, factor, attention_dropout=dropout,
                                        output_attention=False),
                        d_model, n_heads),
                    AutoCorrelationLayer(
                        AutoCorrelation(False, factor, attention_dropout=dropout,
                                        output_attention=False),
                        d_model, n_heads),
                    d_model,
                    c_out,
                    d_ff,
                    moving_avg=moving_avg,
                    dropout=dropout,
                    activation=activation,
                )
                for l in range(d_layers)
            ],
            norm_layer=my_Layernorm(d_model),
            projection=nn.Linear(d_model, c_out, bias=True)
        )

    def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec,
                enc_self_mask=None, dec_self_mask=None, dec_enc_mask=None):
        # decomp init
        mean = torch.mean(x_enc, dim=1).unsqueeze(1).repeat(1, self.pred_len, 1)
        zeros = torch.zeros([x_dec.shape[0], self.pred_len, x_dec.shape[2]], device=x_enc.device)
        seasonal_init, trend_init = self.decomp(x_enc)
        # decoder input
        trend_init = torch.cat([trend_init[:, -self.label_len:, :], mean], dim=1)
        seasonal_init = torch.cat([seasonal_init[:, -self.label_len:, :], zeros], dim=1)
        # enc
        enc_out = self.enc_embedding(x_enc, x_mark_enc)
        enc_out, attns = self.encoder(enc_out, attn_mask=enc_self_mask)
        # dec
        dec_out = self.dec_embedding(seasonal_init, x_mark_dec)
        seasonal_part, trend_part = self.decoder(dec_out, enc_out, x_mask=dec_self_mask, cross_mask=dec_enc_mask,
                                                 trend=trend_init)
        # final
        dec_out = trend_part + seasonal_part

        if self.output_attention:
            return dec_out[:, -self.pred_len:, :], attns
        else:
            return dec_out[:, -self.pred_len:, :]

In [4]:
#export
class Autoformer(pl.LightningModule):
    def __init__(self, seq_len: int, 
                 label_len: int, pred_len: int, output_attention: bool,
                 enc_in: int, dec_in: int, d_model: int, c_out: int, 
                 embed: str, freq: str, dropout: float,
                 factor: float, n_heads: int, d_ff: int, moving_avg: int, 
                 activation: str, e_layers: int, d_layers: int,
                 loss_train: str, loss_valid: str, loss_hypar: float, 
                 learning_rate: float, lr_decay: float, weight_decay: float, 
                 lr_decay_step_size: int, random_seed: int):
        super(Autoformer, self).__init__()
        """
        Transformer Autoformer model.

        Parameters
        ----------
        seq_len: int
            Input sequence size.
        label_len: int
            Label sequence size.
        pred_len: int
            Prediction sequence size.
        output_attention: bool
            If true use output attention for Transformer model.
        enc_in: int
            Number of encoders in data embedding layers.
        dec_in: int
            Number of decoders in data embedding layers.
        d_model: int
            Number of nodes for embedding layers.
        c_out: int
            Number of output nodes in projection layer.
        embed: str
            Type of embedding layers.
        freq: str
            Frequency for embedding layers.
        dropout: float
            Float between (0, 1). Dropout for Transformer.
        factor: float
            Factor for attention layer.
        n_heads: int
            Number of heads in attention layer.
        d_ff: int
            Number of inputs in encoder layers.
        moving_avg: int
            Moving average for encoder and decoder layers.
        activation: str
            Activation function for encoder layer.
        e_layers: int
            Number of encoder layers.
        d_layers: int
            Number of decoder layers.
        loss_train: str
            Loss to optimize.
            An item from ['MAPE', 'MASE', 'SMAPE', 'MSE', 'MAE', 'QUANTILE', 'QUANTILE2'].
        loss_valid: str
            Validation loss.
            An item from ['MAPE', 'MASE', 'SMAPE', 'RMSE', 'MAE', 'QUANTILE'].
        loss_hypar: float
            Hyperparameter for chosen loss.
        learning_rate: float
            Learning rate between (0, 1).
        lr_decay: float
            Decreasing multiplier for the learning rate.
        weight_decay: float
            L2 penalty for optimizer.
        lr_decay_step_size: int 
            Steps between each learning rate decay.
        random_seed: int
            random_seed for pseudo random pytorch initializer and
            numpy random generator.
        """

        #------------------------ Model Attributes ------------------------#
        # Architecture parameters
        self.seq_len = seq_len 
        self.label_len = label_len 
        self.pred_len = pred_len 
        self.output_attention = output_attention
        self.enc_in = enc_in 
        self.dec_in = dec_in 
        self.d_model = d_model 
        self.c_out = c_out 
        self.embed = embed 
        self.freq = freq 
        self.dropout = dropout
        self.factor = factor 
        self.n_heads = n_heads 
        self.d_ff = d_ff 
        self.moving_avg = moving_avg 
        self.activation = activation 
        self.e_layers = e_layers
        self.d_layers = d_layers
        
        # Loss functions
        self.loss_train = loss_train
        self.loss_hypar = loss_hypar
        self.loss_valid = loss_valid
        self.loss_fn_train = LossFunction(loss_train, 
                                          seasonality=self.loss_hypar)
        self.loss_fn_valid = LossFunction(loss_valid,
                                          seasonality=self.loss_hypar)
        
        # Regularization and optimization parameters      
        self.learning_rate = learning_rate
        self.lr_decay = lr_decay
        self.weight_decay = weight_decay
        self.lr_decay_step_size = lr_decay_step_size
        self.random_seed = random_seed

        self.model = _Autoformer(seq_len, 
                                 label_len, pred_len, output_attention,
                                 enc_in, dec_in, d_model, c_out, 
                                 embed, freq, dropout,
                                 factor, n_heads, d_ff, 
                                 moving_avg, activation, e_layers,
                                 d_layers)
    
    def forward(self, batch):
        """
        Autoformer needs batch of shape (batch_size, time, series) for y
        and (batch_size, time, exogenous) for x
        and doesnt need X for each time series.
        USE DataLoader from pytorch instead of TimeSeriesLoader.
        """

        # Protection for missing batch_size dimension
        if batch['Y'].dim()<3:
            batch['Y'] = batch['Y'][None,:,:]

        if batch['X'] is not None:
            if batch['X'].dim()<4:
                batch['X'] = batch['X'][None,:,:,:]
        
        if batch['sample_mask'].dim()<3:
            batch['sample_mask'] = batch['sample_mask'][None,:,:]
        
        Y = batch['Y'].permute(0, 2, 1)
        X = batch['X'][:, 0, :, :].permute(0, 2, 1)
        sample_mask = batch['sample_mask'].permute(0, 2, 1)
        available_mask = batch['available_mask']
        
        s_begin = 0
        s_end = s_begin + self.seq_len
        r_begin = s_end - self.label_len
        r_end = r_begin + self.label_len + self.pred_len
        
        batch_x = Y[:, s_begin:s_end, :]
        batch_y = Y[:, r_begin:r_end, :]
        batch_x_mark = X[:, s_begin:s_end, :]
        batch_y_mark = X[:, r_begin:r_end, :]
        outsample_mask = sample_mask[:, r_begin:r_end, :]
        
        dec_inp = torch.zeros_like(batch_y[:, -self.pred_len:, :])
        dec_inp = torch.cat([batch_y[:, :self.label_len, :], dec_inp], dim=1)
        
        if self.output_attention:
            forecast = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
        else:
            forecast = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
            
        batch_y = batch_y[:, -self.pred_len:, :]
        outsample_mask = outsample_mask[:, -self.pred_len:, :]

        return batch_y, forecast, outsample_mask
    
    def training_step(self, batch, batch_idx):

        # Protection for missing batch_size dimension
        if batch['Y'].dim()<3:
            batch['Y'] = batch['Y'][None,:,:]

        outsample_y, forecast, outsample_mask = self(batch)

        loss = self.loss_fn_train(y=outsample_y,
                                  y_hat=forecast,
                                  mask=outsample_mask,
                                  y_insample= batch['Y'].permute(0, 2, 1))

        self.log('train_loss', loss, prog_bar=True, on_epoch=True)

        return loss

    def validation_step(self, batch, idx):
    
        # Protection for missing batch_size dimension
        if batch['Y'].dim()<3:
            batch['Y'] = batch['Y'][None,:,:]
        
        outsample_y, forecast, outsample_mask = self(batch)

        loss = self.loss_fn_valid(y=outsample_y,
                                  y_hat=forecast,
                                  mask=outsample_mask,
                                  y_insample= batch['Y'].permute(0, 2, 1))

        self.log('val_loss', loss, prog_bar=True)
        
        return loss

    def on_fit_start(self):
        torch.manual_seed(self.random_seed)
        np.random.seed(self.random_seed)
        random.seed(self.random_seed)

    def configure_optimizers(self):
        optimizer = optim.Adam(self.model.parameters(),
                               lr=self.learning_rate, 
                               weight_decay=self.weight_decay)
        
        lr_scheduler = optim.lr_scheduler.StepLR(optimizer, 
                                                 step_size=self.lr_decay_step_size, 
                                                 gamma=self.lr_decay)

        return {'optimizer': optimizer, 'lr_scheduler': lr_scheduler}

In [5]:
#export
@patch
def forecast(self: Autoformer, Y_df: pd.DataFrame, X_df: pd.DataFrame = None, S_df: pd.DataFrame = None, 
                trainer: pl.Trainer =None) -> pd.DataFrame:
    """
    Method for forecasting self.n_time_out periods after last timestamp of Y_df.

    Parameters
    ----------
    Y_df: pd.DataFrame
        Dataframe with target time-series data, needs 'unique_id','ds' and 'y' columns.
    X_df: pd.DataFrame
        Dataframe with exogenous time-series data, needs 'unique_id' and 'ds' columns.
        Note that 'unique_id' and 'ds' must match Y_df plus the forecasting horizon.
    S_df: pd.DataFrame
        Dataframe with static data, needs 'unique_id' column.
    bath_size: int
        Batch size for forecasting.
    trainer: pl.Trainer
        Trainer object for model training and evaluation.

    Returns
    ----------
    forecast_df: pd.DataFrame
        Dataframe with forecasts.
    """
    
    # Add forecast dates to Y_df
    Y_df['ds'] = pd.to_datetime(Y_df['ds'])
    if X_df is not None:
        X_df['ds'] = pd.to_datetime(X_df['ds'])
    self.frequency = pd.infer_freq(Y_df[Y_df['unique_id']==Y_df['unique_id'][0]]['ds']) # Infer with first unique_id series

    forecast_dates = pd.date_range(Y_df['ds'].max(), periods=self.pred_len+1, freq=self.frequency)[1:]
    index = pd.MultiIndex.from_product([Y_df['unique_id'].unique(), forecast_dates], names=['unique_id', 'ds'])
    forecast_df = pd.DataFrame({'y':[0]}, index=index).reset_index()

    Y_df = Y_df.append(forecast_df).sort_values(['unique_id','ds']).reset_index(drop=True)
    
    # Dataset, loader and trainer
    dataset = IterateWindowsDataset(S_df=S_df, Y_df=Y_df, X_df=X_df,
                                    mask_df=None, f_cols=[],
                                    input_size=self.seq_len,
                                    output_size=self.pred_len,
                                    ds_in_test=self.pred_len,
                                    is_test=True,
                                    verbose=True)

    loader = TimeSeriesLoader(dataset=dataset,
                                batch_size=1,
                                shuffle=False)
    
    if trainer is None:
        gpus = -1 if torch.cuda.is_available() else 0
        trainer = pl.Trainer(progress_bar_refresh_rate=1,
                             gpus=gpus,
                             logger=False)

    # Forecast
    outputs = trainer.predict(self, loader)

    # Process forecast and include in forecast_df
    _, forecast, _ = [torch.cat(output).cpu().numpy() for output in zip(*outputs)]
    forecast = np.transpose(forecast, (0, 2, 1))
    forecast_df['y'] = forecast.flatten()

    return forecast_df

In [6]:
from neuralforecast.data.datasets.long_horizon import LongHorizon

Y_df, X_df, S_df = LongHorizon.load(directory='./data', group='ETTm2')
Y_df = Y_df.reset_index(drop=True)
Y_df.loc[Y_df['unique_id']=='OT','y'] = Y_df[Y_df['unique_id']=='OT']['y'] + 100 #To obseve differences

100%|██████████| 314M/314M [00:14<00:00, 22.3MiB/s]
INFO:neuralforecast.data.datasets.utils:Successfully downloaded datasets.zip, 314116557, bytes.
INFO:neuralforecast.data.datasets.utils:Decompressing zip file...
INFO:neuralforecast.data.datasets.utils:Successfully decompressed data/longhorizon/datasets/datasets.zip


In [11]:
f_cols = X_df.drop(columns=['unique_id', 'ds']).columns.to_list()

In [12]:
f_cols

['ex_1', 'ex_2', 'ex_3', 'ex_4']

In [7]:
Y_df.head()

Unnamed: 0,unique_id,ds,y
0,HUFL,2016-07-01 00:00:00,-0.041413
1,HUFL,2016-07-01 00:15:00,-0.185467
2,HUFL,2016-07-01 00:30:00,-0.257495
3,HUFL,2016-07-01 00:45:00,-0.57751
4,HUFL,2016-07-01 01:00:00,-0.385501


In [8]:
X_df.head()

Unnamed: 0,unique_id,ds,ex_1,ex_2,ex_3,ex_4
0,HUFL,2016-07-01 00:00:00,-0.5,0.166667,-0.5,-0.00137
1,HUFL,2016-07-01 00:15:00,-0.5,0.166667,-0.5,-0.00137
2,HUFL,2016-07-01 00:30:00,-0.5,0.166667,-0.5,-0.00137
3,HUFL,2016-07-01 00:45:00,-0.5,0.166667,-0.5,-0.00137
4,HUFL,2016-07-01 01:00:00,-0.456522,0.166667,-0.5,-0.00137


In [9]:
# Architecture parameters
mc_model = {}

mc_model['seq_len'] = 96
mc_model['label_len'] = 48
mc_model['pred_len'] = 96
mc_model['output_attention'] = False
mc_model['enc_in'] = 7
mc_model['dec_in'] = 7
mc_model['d_model'] = 512
mc_model['c_out'] = 7
mc_model['embed'] = 'timeF'
mc_model['freq'] = 'h'
mc_model['dropout'] = 0.05
mc_model['factor'] = 1
mc_model['n_heads'] = 8
mc_model['d_ff'] = 2_048
mc_model['moving_avg'] = 25 
mc_model['activation'] = 'gelu'
mc_model['e_layers'] = 2 
mc_model['d_layers'] = 1
mc_model['loss_train'] = 'MAE'
mc_model['loss_hypar'] = 0.5
mc_model['loss_valid'] = 'MAE'
mc_model['learning_rate'] = 0.001
mc_model['lr_decay'] = 0.5
mc_model['weight_decay'] = 0.
mc_model['lr_decay_step_size'] = 2
mc_model['random_seed'] = 1

# Dataset parameters
mc_data = {}
mc_data['mode'] = 'iterate_windows'
mc_data['n_time_in'] = mc_model['seq_len']
mc_data['n_time_out'] = mc_model['pred_len']
mc_data['batch_size'] = 2
mc_data['scaler'] = None
mc_data['max_epochs'] = None
mc_data['max_steps'] = 1
mc_data['early_stop_patience'] = 20

len_val = 11_520
len_test = 11_520

In [29]:
# Cell

from neuralforecast.data.tsdataset import IterateWindowsDataset, WindowsDataset, BaseDataset, TimeSeriesDataset
from neuralforecast.data.scalers import Scaler
from torch.utils.data import DataLoader
from typing import Tuple


# Cell
def get_mask_dfs(Y_df: pd.DataFrame,
                 ds_in_val: int, ds_in_test: int) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
    """
    Generates train, test and validation mask.
    Train mask begins by avoiding ds_in_test.
    Parameters
    ----------
    Y_df: pd.DataFrame
        Target time series with columns ['unique_id', 'ds', 'y'].
    ds_in_val: int
        Number of ds in validation.
    ds_in_test: int
        Number of ds in test.
    Returns
    -------
    train_mask_df: pd.DataFrame
        Train mask dataframe.
    val_mask_df: pd.DataFrame
        Validation mask dataframe.
    test_mask_df: pd.DataFrame
        Test mask dataframe.
    """

    # train mask
    train_mask_df = Y_df.copy()[['unique_id', 'ds']]
    train_mask_df.sort_values(by=['unique_id', 'ds'], inplace=True)
    train_mask_df.reset_index(drop=True, inplace=True)

    train_mask_df['sample_mask'] = 1
    train_mask_df['available_mask'] = 1

    idx_out = train_mask_df.groupby('unique_id').tail(ds_in_val+ds_in_test).index
    train_mask_df.loc[idx_out, 'sample_mask'] = 0

    # test mask
    test_mask_df = train_mask_df.copy()
    test_mask_df['sample_mask'] = 0
    idx_test = test_mask_df.groupby('unique_id').tail(ds_in_test).index
    test_mask_df.loc[idx_test, 'sample_mask'] = 1

    # validation mask
    val_mask_df = train_mask_df.copy()
    val_mask_df['sample_mask'] = 1
    val_mask_df['sample_mask'] = val_mask_df['sample_mask'] - train_mask_df['sample_mask']
    val_mask_df['sample_mask'] = val_mask_df['sample_mask'] - test_mask_df['sample_mask']

    assert len(train_mask_df)==len(Y_df), \
        f'The mask_df length {len(train_mask_df)} is not equal to Y_df length {len(Y_df)}'

    return train_mask_df, val_mask_df, test_mask_df

# Cell
def get_random_mask_dfs(Y_df: pd.DataFrame, ds_in_test: int,
                        n_val_windows: int, n_ds_val_window: int,
                        n_uids: int, freq: str) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
    """
    Generates train, test and random validation mask.
    Train mask begins by avoiding ds_in_test
    Validation mask: 1) samples n_uids unique ids
                     2) creates windows of size n_ds_val_window
    Parameters
    ----------
    Y_df: pd.DataFrame
        Target time series with columns ['unique_id', 'ds', 'y'].
    ds_in_test: int
        Number of ds in test.
    n_val_windows: int
        Number of windows for validation.
    n_ds_val_window: int
        Number of ds in each validation window.
    n_uids: int
        Number of unique ids in validation.
    freq: str
        string that determines datestamp frequency, used in
        random windows creation.
    Returns
    -------
    train_mask_df: pd.DataFrame
        Train mask dataframe.
    val_mask_df: pd.DataFrame
        Validation mask dataframe.
    test_mask_df: pd.DataFrame
        Test mask dataframe.
    """
    np.random.seed(1)
    #----------------------- Train mask -----------------------#
    # Initialize masks
    train_mask_df, val_mask_df, test_mask_df = get_mask_dfs(Y_df=Y_df,
                                                            ds_in_val=0,
                                                            ds_in_test=ds_in_test)

    assert val_mask_df['sample_mask'].sum()==0, 'Muerte'

    #----------------- Random Validation mask -----------------#
    # Overwrite validation with random windows
    uids = train_mask_df['unique_id'].unique()
    val_uids = np.random.choice(uids, n_uids, replace=False)

    # Validation avoids test
    idx_test = train_mask_df.groupby('unique_id').tail(ds_in_test).index
    available_ds = train_mask_df.loc[~train_mask_df.index.isin(idx_test)]['ds'].unique()
    val_init_ds = np.random.choice(available_ds, n_val_windows, replace=False)

    # Creates windows
    val_ds = [pd.date_range(init, periods=n_ds_val_window, freq=freq) for init in val_init_ds]
    val_ds = np.concatenate(val_ds)

    # Cleans random windows from train mask
    val_idx = train_mask_df.query('unique_id in @val_uids & ds in @val_ds').index
    train_mask_df.loc[val_idx, 'sample_mask'] = 0
    val_mask_df.loc[val_idx, 'sample_mask'] = 1

    return train_mask_df, val_mask_df, test_mask_df

# Cell


def create_datasets(mc: dict, S_df: pd.DataFrame, Y_df: pd.DataFrame, X_df: pd.DataFrame, f_cols: list,
                    ds_in_test: int, ds_in_val: int, verbose: bool=False):
    """
    Creates train, validation and test datasets.
    Parameters
    ----------
    mc: dict
        Model configuration.
    S_df: pd.DataFrame
        Static exogenous variables with columns ['unique_id', 'ds']
        and static variables.
    Y_df: pd.DataFrame
        Target time series with columns ['unique_id', 'ds', 'y'].
    X_df: pd.DataFrame
        Exogenous time series with columns ['unique_id', 'ds', 'y']
    f_cols: list
        List of exogenous variables of the future.
    ds_in_test: int
        Number of ds in test.
    ds_in_val: int
        Number of ds in validation.
    Returns
    -------
    train_dataset: BaseDataset
        Train dataset.
    valid_dataset: BaseDataset
        Validation dataset.
    test_dataset: BaseDataset
        Test dataset.
    """

    #------------------------------------- Available and Validation Mask ------------------------------------#
    train_mask_df, valid_mask_df, test_mask_df = get_mask_dfs(Y_df=Y_df,
                                                              ds_in_val=ds_in_val,
                                                              ds_in_test=ds_in_test)

    #---------------------------------------------- Scaler ----------------------------------------------#
    if mc['scaler'] is not None:
        scaler = Scaler(technique=mc['scaler'])
    else:
        scaler = None

    #----------------------------------------- Declare Dataset and Loaders ----------------------------------#

    if mc['mode'] == 'simple':
        train_dataset = WindowsDataset(S_df=S_df, Y_df=Y_df, X_df=X_df,
                                       mask_df=train_mask_df, f_cols=f_cols,
                                       input_size=int(mc['n_time_in']),
                                       output_size=int(mc['n_time_out']),
                                       sample_freq=int(mc['idx_to_sample_freq']),
                                       complete_windows=mc['complete_windows'],
                                       #scaler=scaler,
                                       verbose=verbose)

        valid_dataset = WindowsDataset(S_df=S_df, Y_df=Y_df, X_df=X_df,
                                       mask_df=valid_mask_df, f_cols=f_cols,
                                       input_size=int(mc['n_time_in']),
                                       output_size=int(mc['n_time_out']),
                                       sample_freq=int(mc['val_idx_to_sample_freq']),
                                       complete_windows=True,
                                       #scaler=scaler,
                                       verbose=verbose)

        test_dataset = WindowsDataset(S_df=S_df, Y_df=Y_df, X_df=X_df,
                                      mask_df=test_mask_df, f_cols=f_cols,
                                      input_size=int(mc['n_time_in']),
                                      output_size=int(mc['n_time_out']),
                                      sample_freq=int(mc['val_idx_to_sample_freq']),
                                      complete_windows=True,
                                      #scaler=scaler,
                                      verbose=verbose)
    if mc['mode'] == 'iterate_windows':
        train_dataset = IterateWindowsDataset(S_df=S_df, Y_df=Y_df, X_df=X_df,
                                              mask_df=train_mask_df, f_cols=f_cols,
                                              input_size=int(mc['n_time_in']),
                                              output_size=int(mc['n_time_out']),
                                              #scaler=scaler,
                                              verbose=verbose)

        valid_dataset = IterateWindowsDataset(S_df=S_df, Y_df=Y_df, X_df=X_df,
                                              mask_df=valid_mask_df, f_cols=f_cols,
                                              input_size=int(mc['n_time_in']),
                                              output_size=int(mc['n_time_out']),
                                              #scaler=scaler,
                                              verbose=verbose)

        test_dataset = IterateWindowsDataset(S_df=S_df, Y_df=Y_df, X_df=X_df,
                                             mask_df=test_mask_df, f_cols=f_cols,
                                             input_size=int(mc['n_time_in']),
                                             output_size=int(mc['n_time_out']),
                                             #scaler=scaler,
                                             verbose=verbose)

    if mc['mode'] == 'full':
        train_dataset = TimeSeriesDataset(S_df=S_df, Y_df=Y_df, X_df=X_df,
                                          mask_df=train_mask_df, f_cols=f_cols,
                                          input_size=int(mc['n_time_in']),
                                          output_size=int(mc['n_time_out']),
                                          #scaler=scaler,
                                          verbose=verbose)

        valid_dataset = TimeSeriesDataset(S_df=S_df, Y_df=Y_df, X_df=X_df,
                                          mask_df=valid_mask_df, f_cols=f_cols,
                                          input_size=int(mc['n_time_in']),
                                          output_size=int(mc['n_time_out']),
                                          #scaler=scaler,
                                          verbose=verbose)

        test_dataset = TimeSeriesDataset(S_df=S_df, Y_df=Y_df, X_df=X_df,
                                         mask_df=test_mask_df, f_cols=f_cols,
                                         input_size=int(mc['n_time_in']),
                                         output_size=int(mc['n_time_out']),
                                         #scaler=scaler,
                                         verbose=verbose)

    if ds_in_test == 0:
        test_dataset = None

    return train_dataset, valid_dataset, test_dataset


In [30]:
from neuralforecast.data.tsdataset import IterateWindowsDataset
#from neuralforecast.experiments.utils import create_datasets
from torch.utils.data import DataLoader


train_dataset, val_dataset, test_dataset = create_datasets(mc=mc_data,S_df=None,Y_df=Y_df, X_df=X_df, f_cols=f_cols,ds_in_val=len_val,ds_in_test=len_test)



In [31]:
train_loader = DataLoader(dataset=train_dataset,
                          batch_size=int(mc_data['batch_size']),
                          shuffle=True,
                          drop_last=True)

val_loader = DataLoader(dataset=val_dataset,
                        batch_size=int(mc_data['batch_size']),
                        shuffle=False)

test_loader = DataLoader(dataset=test_dataset,
                         batch_size=int(mc_data['batch_size']),
                         shuffle=False)

In [32]:
model = Autoformer(**mc_model)

INFO:torch.distributed.nn.jit.instantiator:Created a temporary directory at /tmp/tmpdl1r9cf7
INFO:torch.distributed.nn.jit.instantiator:Writing /tmp/tmpdl1r9cf7/_remote_module_non_sriptable.py


# Using pl training model

In [33]:
early_stopping = pl.callbacks.EarlyStopping(monitor='val_loss', 
                                            min_delta=1e-4, 
                                            patience=mc_data['early_stop_patience'],
                                            verbose=False,
                                            mode="min")

trainer = pl.Trainer(max_epochs=mc_data['max_epochs'], 
                     max_steps=mc_data['max_steps'],
                     gradient_clip_val=1.0,
                     progress_bar_refresh_rate=10, 
                     check_val_every_n_epoch=1,
                     num_sanity_val_steps=1,
                     val_check_interval=1,
                     limit_val_batches=1,
                     callbacks=[early_stopping])

trainer.fit(model, train_loader, val_loader)

  f"Setting `Trainer(progress_bar_refresh_rate={progress_bar_refresh_rate})` is deprecated in v1.5 and"
GPU available: True, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
`Trainer(limit_val_batches=1)` was configured so 1 batch will be used.
`Trainer(val_check_interval=1)` was configured so validation will run after every batch.
Missing logger folder: /content/lightning_logs

  | Name  | Type        | Params
--------------------------------------
0 | model | _Autoformer | 10.5 M
--------------------------------------
10.5 M    Trainable params
0         Non-trainable params
10.5 M    Total params
42.144    Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

## Make Predictions

In [34]:
outputs = trainer.predict(model, val_loader)

Predicting: 1it [00:00, ?it/s]

In [36]:

print("outputs[0][0].shape", outputs[0][0].shape)
print("outputs[0][1].shape", outputs[0][1].shape)
print("outputs[0][2].shape", outputs[0][2].shape)

outputs[0][0].shape torch.Size([2, 96, 7])
outputs[0][1].shape torch.Size([2, 96, 7])
outputs[0][2].shape torch.Size([2, 96, 7])


## Forecast

In [38]:
Y_forecast_df = Y_df[Y_df['ds']<'2017-10-24']
Y_forecast_df = Y_forecast_df.reset_index(drop=True)
Y_forecast_df.tail()

Unnamed: 0,unique_id,ds,y
322555,OT,2017-10-23 22:45:00,99.424266
322556,OT,2017-10-23 23:00:00,99.424266
322557,OT,2017-10-23 23:15:00,99.405312
322558,OT,2017-10-23 23:30:00,99.386359
322559,OT,2017-10-23 23:45:00,99.367361


In [39]:
X_forecast_df = X_df[X_df['ds']<'2017-10-25']
X_forecast_df = X_forecast_df.reset_index(drop=True)
X_forecast_df['ds'] = pd.to_datetime(X_forecast_df['ds'])
X_forecast_df.tail()

Unnamed: 0,unique_id,ds,ex_1,ex_2,ex_3,ex_4
323227,OT,2017-10-24 22:45:00,0.456522,-0.333333,0.266667,0.310959
323228,OT,2017-10-24 23:00:00,0.5,-0.333333,0.266667,0.310959
323229,OT,2017-10-24 23:15:00,0.5,-0.333333,0.266667,0.310959
323230,OT,2017-10-24 23:30:00,0.5,-0.333333,0.266667,0.310959
323231,OT,2017-10-24 23:45:00,0.5,-0.333333,0.266667,0.310959


In [40]:
forecast_df = model.forecast(Y_df=Y_forecast_df, X_df=X_forecast_df, S_df=S_df)


INFO:root:Train Validation splits

INFO:root:                              ds                    
                             min                 max
unique_id sample_mask                               
HUFL      0           2016-07-01 2017-10-23 23:45:00
          1           2017-10-24 2017-10-24 23:45:00
HULL      0           2016-07-01 2017-10-23 23:45:00
          1           2017-10-24 2017-10-24 23:45:00
LUFL      0           2016-07-01 2017-10-23 23:45:00
          1           2017-10-24 2017-10-24 23:45:00
LULL      0           2016-07-01 2017-10-23 23:45:00
          1           2017-10-24 2017-10-24 23:45:00
MUFL      0           2016-07-01 2017-10-23 23:45:00
          1           2017-10-24 2017-10-24 23:45:00
MULL      0           2016-07-01 2017-10-23 23:45:00
          1           2017-10-24 2017-10-24 23:45:00
OT        0           2016-07-01 2017-10-23 23:45:00
          1           2017-10-24 2017-10-24 23:45:00
INFO:root:
Total data 			323232 time stamps 
Available

Predicting: 0it [00:00, ?it/s]

In [41]:
forecast_df

Unnamed: 0,unique_id,ds,y
0,HUFL,2017-10-24 00:00:00,-0.669236
1,HUFL,2017-10-24 00:15:00,-1.063869
2,HUFL,2017-10-24 00:30:00,-1.336051
3,HUFL,2017-10-24 00:45:00,-1.639866
4,HUFL,2017-10-24 01:00:00,-1.725134
...,...,...,...
667,OT,2017-10-24 22:45:00,98.561096
668,OT,2017-10-24 23:00:00,99.437943
669,OT,2017-10-24 23:15:00,99.841583
670,OT,2017-10-24 23:30:00,100.093605
