# Imports

In [1]:
import torch.nn as nn
import torch.nn.functional as F
from torch import optim
from torch.utils.data import DataLoader, random_split
from torch.utils.data.distributed import DistributedSampler
import numpy as np
import pandas as pd
import torch as torch

from pathlib import Path
import pickle
import warnings

import numpy as np
import pandas as pd
from pandas.core.common import SettingWithCopyWarning
import pytorch_lightning as pl
from pytorch_lightning.callbacks import EarlyStopping, LearningRateMonitor
from pytorch_lightning.loggers import TensorBoardLogger
import torch

from pytorch_forecasting import GroupNormalizer, TemporalFusionTransformer, TimeSeriesDataSet
from pytorch_forecasting.data.examples import get_stallion_data
from pytorch_forecasting.metrics import MAE, RMSE, SMAPE, PoissonLoss, QuantileLoss
from pytorch_forecasting.models.temporal_fusion_transformer.tuning import optimize_hyperparameters
from pytorch_forecasting.utils import profile

warnings.simplefilter("error", category=SettingWithCopyWarning)

# init

In [24]:
class MQCNNModel(nn.Module):
    def __init__(self, static_features, timevarying_features, time_step, future_information, ltsp, lead_future,
                 global_hidden_units, horizon_specific_hidden_units,
                 horizon_agnostic_hidden_units, local_mlp_hidden_units, local_mlp_output_units):
        super(MQCNNModel, self).__init__()
        self.time_step = time_step
        self.static_features = static_features
        self.timevarying_features = timevarying_features
        self.future_information = future_information
        self.ltsp = ltsp
        self.lead_future = lead_future
        self.global_hidden_units = global_hidden_units
        self.horizon_specific_hidden_units = horizon_specific_hidden_units
        self.horizon_agnostic_hidden_units = horizon_agnostic_hidden_units
        self.local_mlp_hidden_units = local_mlp_hidden_units
        self.local_mlp_output_units = local_mlp_output_units

        self.encoder = MQCNNEncoder(self.time_step, self.static_features, self.timevarying_features)
        self.decoder = MQCNNDecoder(self.time_step, self.lead_future, self.ltsp, self.future_information,
                                    self.global_hidden_units, self.horizon_specific_hidden_units,
                                    self.horizon_agnostic_hidden_units, self.local_mlp_hidden_units,
                                    self.local_mlp_output_units)

    def forward(self, x):
        encoding = self.encoder(x)
        output = self.decoder(encoding, x)

        return output

class MQCNNEncoder(nn.Module):
    def __init__(self, time_step, static_features, timevarying_features):
        super().__init__()
        self.time_step = time_step
        self.static_features = static_features
        self.timevarying_features = timevarying_features
        self.static = StaticLayer(in_channels = 1, #len(self.static_features),
                                  time_step = self.time_step,
                                  static_features = self.static_features)

        self.conv = ConvLayer(in_channels = len(self.timevarying_features),
                              timevarying_features = self.timevarying_features)

    def forward(self, x):
        x_s = self.static(x)
        x_t = self.conv(x)

        return torch.cat((x_s, x_t), axis = 1)


class MQCNNDecoder(nn.Module):
    """Decoder implementation for MQCNN

    Parameters
    ----------
    config
        Configurations
    ltsp : list of tuple of int
        List of lead-time / span tuples to make predictions for
    expander : HybridBlock
        Overrides default future data expander if not None
    hf1 : HybridBlock
        Overrides default global future layer if not None
    hf2 : HybridBlock
        Overrides default local future layer if not None
    ht1 : HybridBlock
        Overrides horizon-specific layer if not None
    ht2 : HybridBlock
        Overrides horizon-agnostic layer if not None
    h : HybridBlock
        Overrides local MLP if not None
    span_1 : HybridBlock
        Overrides span 1 layer if not None
    span_N : HybridBlock
        Overrides span N layer if not None

    Inputs:
        - **xf** : Future data of shape
            (batch_size, Trnn + lead_future - 1, num_future_ts_features)
        - **encoded** : Encoded input tensor of shape
            (batch_size, Trnn, n) for some n
    Outputs:
        - **pred_1** :  Span 1 predictions of shape
            (batch_size, Trnn, Tpred * num_quantiles)
        - **pred_N** : Span N predictions of shape
            (batch_size, Trnn, span_N_count * num_quantiles)

        In both outputs, the last dimensions has the predictions grouped
        together by quantile. For example, the quantiles are P10 and P90
        then the span 1 predictions will be:
        Tpred_0_p50, Tpred_1_p50, ..., Tpred_N_p50, Tpred_0_p90,
        Tpred_1_p90, ... Tpred_N_90
        
        
    """

    def __init__(self, time_step, lead_future, future_information, ltsp,
                 global_hidden_units, horizon_specific_hidden_units, horizon_agnostic_hidden_units,
                 local_mlp_hidden_units, local_mlp_output_units,
                 num_quantiles=2, expander=None, hf1=None, hf2=None,
                 ht1=None, ht2=None, h=None, span_1=None, span_N=None,
                 **kwargs):
        super(MQCNNDecoder, self).__init__(**kwargs)
        self.future_features_count = len(future_information)
        self.future_information = future_information
        self.time_step = time_step
        self.lead_future = lead_future
        self.ltsp = ltsp
        self.num_quantiles = num_quantiles
        self.global_hidden_units = global_hidden_units
        self.horizon_specific_hidden_units = horizon_specific_hidden_units
        self.horizon_agnostic_hidden_units = horizon_agnostic_hidden_units
        self.local_mlp_hidden_units = local_mlp_hidden_units
        self.local_mlp_output_units = local_mlp_output_units

        # We assume that Tpred == span1_count.
        # Tpred = forecast_end_index
#         self.Tpred = max(map(lambda x: x[0] + x[1], self.ltsp))
        self.Tpred = 91
#         span1_count = len(list(filter(lambda x: x[1] == 1, self.ltsp)))
        span1_count = 1
        #print(self.Tpred, span1_count)
        #assert span1_count == self.Tpred, f"Number of span 1 horizons: {span1_count}\
                                            #does not match Tpred: {self.Tpred}" 

#         self.spanN_count = len(list(filter(lambda x: x[1] != 1, self.ltsp)))
        self.spanN_count = 1
        # Setting default components:
        if expander is None:
            expander = ExpandLayer(self.time_step, self.lead_future, self.future_information)
        if hf1 is None:
            hf1 = GlobalFutureLayer(self.lead_future, self.future_features_count, out_channels=self.global_hidden_units)
        if ht1 is None:
            ht1 = HorizonSpecific(self.Tpred, self.time_step, num = self.horizon_specific_hidden_units)
        if ht2 is None:
            ht2 = HorizonAgnostic(self.horizon_agnostic_hidden_units, self.lead_future)
        if h is None:
            h = LocalMlp(self.local_mlp_hidden_units, self.local_mlp_output_units)
        if span_1 is None:
            span_1 = Span1(self.time_step, self.lead_future, self.num_quantiles)
        if span_N is None:
            span_N = SpanN(self.time_step, self.lead_future, self.num_quantiles, self.spanN_count)

        self.expander = expander
        self.hf1 = hf1
        self.hf2 = hf2
        self.ht1 = ht1
        self.ht2 = ht2
        self.h = h
        self.span_1 = span_1
        self.span_N = span_N

    def forward(self, F, x, encoded):
        xf = x[self.future_information]
        expanded = self.expander(xf)
        hf1 = self.hf1(expanded)
        hf2 = F.tanh(expanded)

        ht = torch.cat(encoded, hf1, dim=-1)
        ht1 = self.ht1(ht)
        ht2 = self.ht2(ht)
        h = torch.cat(ht1, ht2, hf2, dim=-1)
        h = self.h(h)
        return self.span_1(h), self.span_N(h)

# submodule

class StaticLayer(nn.Module):
    def __init__(self, in_channels, time_step, static_features, out_channels = 30, dropout = 0.4):
        super().__init__()
        self.time_step = time_step
        self.static_features = static_features
        self.dropout = nn.Dropout(dropout)
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.static = nn.Linear(self.in_channels, self.out_channels)

    def forward(self, x):
        x = x[self.static_features]
        x = self.dropout(x)
        x = self.static(x)
        return x.unsqueeze(1).repeat(1, self.time_step, 1)

class ConvLayer(nn.Module):
    def __init__(self, timevarying_features, in_channels, out_channels = 30, kernel_size = 2):
        super().__init__()
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.kernel_size = kernel_size
        self.timevarying_features = timevarying_features

        c1 = nn.Conv1d(self.in_channels, self.out_channels, self.kernel_size, dilation = 1)
        c2 = nn.Conv1d(self.out_channels, self.out_channels, self.kernel_size, dilation = 2)
        c3 = nn.Conv1d(self.out_channels, self.out_channels, self.kernel_size,  dilation = 4)
        c4 = nn.Conv1d(self.out_channels, self.out_channels, self.kernel_size, dilation = 8)
        c5 = nn.Conv1d(self.out_channels, self.out_channels, self.kernel_size, dilation = 16)
        c6 = nn.Conv1d(self.out_channels, self.out_channels, self.kernel_size, dilation = 32)

    def forward(self, x):
        x_t = x[self.timevarying_features]
        x_t = x_t.permute(0, 2, 1)
        x_t = F.pad(x_t, (0,0), "constant", 0)
        x_t = c1(x_t)
        x_t = F.pad(x_t, (2,0), "constant", 0)
        x_t = c2(x_t)
        x_t = F.pad(x_t, (4,0), "constant", 0)
        x_t = c3(x_t)
        x_t = F.pad(x_t, (8,0), "constant", 0)
        x_t = c4(x_t)
        x_t = F.pad(x_t, (16,0), "constant", 0)
        x_t = c5(x_t)
        x_t = F.pad(x_t, (32,0), "constant", 0)
        x_t = c6(x_t)
        
        return x_t.permute(0, 2, 1)

class ExpandLayer(nn.Module):
    """Expands the dimension referred to as `expand_axis` into two
    dimensions by applying a sliding window. For example, a tensor of
    shape (1, 4, 2) as follows:

    [[[0. 1.]
      [2. 3.]
      [4. 5.]
      [6. 7.]]]

    where `expand_axis` = 1 and `time_step` = 3 (number of windows) and
    `lead_future` = 2 (window length) will become:

    [[[[0. 1.]
       [2. 3.]]

      [[2. 3.]
       [4. 5.]]

      [[4. 5.]
       [6. 7.]]]]

    Used for expanding future information tensors

    Parameters
    ----------
    time_step : int
        Length of the time sequence (number of windows)
    lead_future : int
        Number of future time points (window length)
    expand_axis : int
        Axis to expand"""

    def __init__(self, time_step, lead_future, future_information, **kwargs):
        super(ExpandLayer, self).__init__(**kwargs)
    
        self.time_step = time_step
        self.future_information = future_information
        self.lead_future = lead_future

    def forward(self, x):

        # First create a matrix of indices, which we will use to slice
        # `input` along `expand_axis`. For example, for time_step=3 and
        # lead_future=2,
        # idx = [[0. 1.]
        #        [1. 2.]
        #        [2. 3.]]
        # We achieve this by doing a broadcast add of
        # [[0.] [1.] [2.]] and [[0. 1.]]
        x = x[[self.future_information]]
        idx = torch.add(torch.arange(self.time_step).unsqueeze(axis = 1),
                        torch.arange(self.lead_future).unsqueeze(axis = 0))
        # Now we slice `input`, taking elements from `input` that correspond to
        # the indices in `idx` along the `expand_axis` dimension
        return x[:, idx, :]

        
class GlobalFutureLayer(nn.Module):
    def __init__(self, lead_future, future_features_count, out_channels = 30):
        super().__init__()
        self.lead_future = lead_future
        self.future_features_count = future_features_count
        self.out_channels = out_channels

        self.l1 = nn.Linear(self.lead_future * self.future_features_count, out_channels)
        
    def forward(self, x):
        x = x.view(-1, self.time_step, self.lead_future * self.future_features_count)
        
        return self.l1(x)
    
class HorizonSpecific(nn.Module):
    def __init__(self, Tpred, time_step, num = 20):
        super().__init__()
        self.Tpred = Tpred
        self.time_step = time_step
        self.num = num
        
    def forward(self, x):
        x = nn.Linear(x.size(-1), self.Tpred * self.num)(x)
        x = F.relu(x)

        return x.view(-1, self.time_step, self.Tpred, 20)

class HorizonAgnostic(nn.Module):
    def __init__(self, out_channels, lead_future):
        super().__init__()
        self.out_channels = out_channels
        self.lead_future = lead_future
        
    def forward(self, x):
        x = nn.Linear(x.size(-1), self.out_channels)(x)
        x = F.relu(x)
        x = x.unsqueeze(axis = 2)
        x = x.repeat(1,1, self.lead_future, 1)

        return x
    
class LocalMlp(nn.Module):
    def __init__(self, hidden, output):
        super().__init__()
        self.hidden = hidden
        self.output = output
        
    def forward(self, x):
        x = nn.Linear(x.size(-1), self.hidden)(x)
        x = F.relu(x)
        x = nn.Linear(self.hidden, self.output)(x)
        x = F.relu(x)

        return x


class Span1(nn.Module):
    def __init__(self, time_step, lead_future, num_quantiles):
        super().__init__()
        self.time_step = time_step
        self.lead_future = lead_future
        self.num_quantiles = num_quantiles
        
    def forward(self, x):
        x = nn.Linear(x.size(-1), self.num_quantiles)
        x = F.relu(x.contiguous().view(-1, x.size(-2), x.size(-1)))
        x = x.view(-1, self.time_step, self.lead_future, self.num_quantiles)
        x = x.view(-1, self.time_step, self.lead_future*self.num_quantiles)

        return x


class SpanN(nn.Module):
    def __init__(self, time_step, lead_future, num_quantiles, spanN_count):
        super().__init__()
        self.time_step = time_step
        self.lead_future = lead_future
        self.num_quantiles = num_quantiles
        self.spanN_count = spanN_count
        
    def forward(self, x):
        x = x.permute(0, 1, 3, 2)
        x = x.contiguous().view(-1, self.time_step, x.size(-2) * x.size(-1))

        x = nn.Linear(x.size(-1), self.spanN_count * self.num_quantiles)

        return x

In [3]:
data = get_stallion_data()
example_df = data.query('agency == "Agency_22" & sku == "SKU_01"')

In [4]:
example_df.columns

Index(['agency', 'sku', 'volume', 'date', 'industry_volume', 'soda_volume',
       'avg_max_temp', 'price_regular', 'price_actual', 'discount',
       'avg_population_2017', 'avg_yearly_household_income_2017', 'easter_day',
       'good_friday', 'new_year', 'christmas', 'labor_day', 'independence_day',
       'revolution_day_memorial', 'regional_games', 'fifa_u_17_world_cup',
       'football_gold_cup', 'beer_capital', 'music_fest',
       'discount_in_percent', 'timeseries'],
      dtype='object')

In [5]:
example_df.dtypes

agency                                    category
sku                                       category
volume                                     float64
date                                datetime64[ns]
industry_volume                              int64
soda_volume                                  int64
avg_max_temp                               float64
price_regular                              float64
price_actual                               float64
discount                                   float64
avg_population_2017                          int64
avg_yearly_household_income_2017             int64
easter_day                                   int64
good_friday                                  int64
new_year                                     int64
christmas                                    int64
labor_day                                    int64
independence_day                             int64
revolution_day_memorial                      int64
regional_games                 

In [6]:
example_df.shape

(60, 26)

In [7]:
example_df = example_df.set_index(['agency', 'sku'])

In [8]:
example_df['month'] = example_df['date'].dt.month

In [9]:
subset_example_df = example_df[['volume', 'avg_population_2017', 'month', 'price_regular', 'industry_volume']]
subset_example_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,volume,avg_population_2017,month,price_regular,industry_volume
agency,sku,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Agency_22,SKU_01,52.272,48151,1,1168.903668,492612703
Agency_22,SKU_01,62.532,48151,2,1169.357513,431937346
Agency_22,SKU_01,74.196,48151,3,1204.673581,509281531
Agency_22,SKU_01,89.424,48151,4,1235.1875,532390389
Agency_22,SKU_01,79.164,48151,5,1247.061989,551755254


In [10]:
subset_dummies_example_df = pd.get_dummies(subset_example_df, columns=['month'])
subset_dummies_example_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,volume,avg_population_2017,price_regular,industry_volume,month_1,month_2,month_3,month_4,month_5,month_6,month_7,month_8,month_9,month_10,month_11,month_12
agency,sku,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
Agency_22,SKU_01,52.272,48151,1168.903668,492612703,1,0,0,0,0,0,0,0,0,0,0,0
Agency_22,SKU_01,62.532,48151,1169.357513,431937346,0,1,0,0,0,0,0,0,0,0,0,0
Agency_22,SKU_01,74.196,48151,1204.673581,509281531,0,0,1,0,0,0,0,0,0,0,0,0
Agency_22,SKU_01,89.424,48151,1235.1875,532390389,0,0,0,1,0,0,0,0,0,0,0,0
Agency_22,SKU_01,79.164,48151,1247.061989,551755254,0,0,0,0,1,0,0,0,0,0,0,0


In [11]:
subset_dummies_example_df.columns

Index(['volume', 'avg_population_2017', 'price_regular', 'industry_volume',
       'month_1', 'month_2', 'month_3', 'month_4', 'month_5', 'month_6',
       'month_7', 'month_8', 'month_9', 'month_10', 'month_11', 'month_12'],
      dtype='object')

In [12]:
static_features = torch.tensor(subset_dummies_example_df[['avg_population_2017']].to_numpy(np.float32))

In [13]:
timevarying_features = torch.tensor(subset_dummies_example_df[
    ['volume', 'price_regular', 'industry_volume',
     'month_1', 'month_2', 'month_3', 'month_4', 
     'month_5', 'month_6', 'month_7', 'month_8', 
     'month_9', 'month_10', 'month_11', 'month_12']].to_numpy(np.float32))

In [14]:
timevarying_features.shape

torch.Size([60, 15])

In [15]:
future_information = torch.tensor(subset_dummies_example_df[
    ['price_regular',
     'month_1', 'month_2', 'month_3', 'month_4', 
     'month_5', 'month_6', 'month_7', 'month_8', 
     'month_9', 'month_10', 'month_11', 'month_12']].to_numpy(np.float32))

In [16]:
model_input = {'static_features': static_features, 
               'timevarying_features': timevarying_features, 
               'future_information': future_information}

In [25]:
model = MQCNNModel(
    'static_features', 
    'timevarying_features', 
    365, 
    'future_information', 
    [(0, 1), (1, 1)], 
    91, 50, 20, 100, 50, 10
)

In [26]:
model

MQCNNModel(
  (encoder): MQCNNEncoder(
    (static): StaticLayer(
      (dropout): Dropout(p=0.4, inplace=False)
      (static): Linear(in_features=1, out_features=30, bias=True)
    )
    (conv): ConvLayer()
  )
  (decoder): MQCNNDecoder(
    (expander): ExpandLayer()
    (hf1): GlobalFutureLayer(
      (l1): Linear(in_features=182, out_features=50, bias=True)
    )
    (ht1): HorizonSpecific()
    (ht2): HorizonAgnostic()
    (h): LocalMlp()
    (span_1): Span1()
    (span_N): SpanN()
  )
)

In [27]:
model(model_input)

squeeze: torch.Size([60])
no squeeze: torch.Size([60, 1])


AttributeError: 'dict' object has no attribute 'shape'

In [None]:
s = StaticLayer(30, 91, 'static_features', out_channels = 30, dropout = 0.4)

In [None]:
s(model_input)