In [1]:
import pytorch_lightning as pl

import torch.nn as nn
import torch.nn.functional as F
from torch import optim
from torch.utils.data import DataLoader, random_split
from torch.utils.data.distributed import DistributedSampler
import numpy as np
import pandas as pd
import torch as torch

from pathlib import Path
import pickle
import warnings

import numpy as np
import pandas as pd
from pandas.core.common import SettingWithCopyWarning
import pytorch_lightning as pl
from pytorch_lightning.callbacks import EarlyStopping, LearningRateMonitor
from pytorch_lightning.loggers import TensorBoardLogger
import torch

from pytorch_forecasting import GroupNormalizer, TemporalFusionTransformer, TimeSeriesDataSet
from pytorch_forecasting.data.examples import get_stallion_data
from pytorch_forecasting.metrics import MAE, RMSE, SMAPE, PoissonLoss, QuantileLoss
from pytorch_forecasting.models.temporal_fusion_transformer.tuning import optimize_hyperparameters
from pytorch_forecasting.utils import profile
from torch.utils.data import Dataset, DataLoader, IterableDataset

warnings.simplefilter("error", category=SettingWithCopyWarning)

In [2]:
class MQCNNEncoder(nn.Module):
    def __init__(self, time_step, static_features, timevarying_features, num_static_features, num_timevarying_features):
        super().__init__()
        self.time_step = time_step
        self.static_features = static_features
        self.timevarying_features = timevarying_features
        self.num_static_features = num_static_features
        self.num_timevarying_features = num_timevarying_features
        self.static = StaticLayer(in_channels = self.num_static_features,
                                  time_step = self.time_step,
                                  static_features = self.static_features)

        self.conv = ConvLayer(in_channels = self.num_timevarying_features,
                              timevarying_features = self.timevarying_features,
                             time_step = self.time_step)

    def forward(self, x):
        x_s = self.static(x)
        x_t = self.conv(x)
        return torch.cat((x_s, x_t), axis = 2)


class MQCNNDecoder(nn.Module):
    """Decoder implementation for MQCNN

    Parameters
    ----------
    config
        Configurations
    ltsp : list of tuple of int
        List of lead-time / span tuples to make predictions for
    expander : HybridBlock
        Overrides default future data expander if not None
    hf1 : HybridBlock
        Overrides default global future layer if not None
    hf2 : HybridBlock
        Overrides default local future layer if not None
    ht1 : HybridBlock
        Overrides horizon-specific layer if not None
    ht2 : HybridBlock
        Overrides horizon-agnostic layer if not None
    h : HybridBlock
        Overrides local MLP if not None
    span_1 : HybridBlock
        Overrides span 1 layer if not None
    span_N : HybridBlock
        Overrides span N layer if not None

    Inputs:
        - **xf** : Future data of shape
            (batch_size, Trnn + lead_future - 1, num_future_ts_features)
        - **encoded** : Encoded input tensor of shape
            (batch_size, Trnn, n) for some n
    Outputs:
        - **pred_1** :  Span 1 predictions of shape
            (batch_size, Trnn, Tpred * num_quantiles)
        - **pred_N** : Span N predictions of shape
            (batch_size, Trnn, span_N_count * num_quantiles)

        In both outputs, the last dimensions has the predictions grouped
        together by quantile. For example, the quantiles are P10 and P90
        then the span 1 predictions will be:
        Tpred_0_p50, Tpred_1_p50, ..., Tpred_N_p50, Tpred_0_p90,
        Tpred_1_p90, ... Tpred_N_90
        
        
    """

    def __init__(self, time_step, lead_future, ltsp, future_information, num_future_features,
                 global_hidden_units, horizon_specific_hidden_units, horizon_agnostic_hidden_units,
                 local_mlp_hidden_units, local_mlp_output_units,
                 num_quantiles=2, expander=None, hf1=None, hf2=None,
                 ht1=None, ht2=None, h=None, span_1=None, span_N=None,
                 **kwargs):
        super(MQCNNDecoder, self).__init__(**kwargs)
        self.future_features_count = num_future_features
        self.future_information = future_information
        self.time_step = time_step
        self.lead_future = lead_future
        self.ltsp = ltsp
        self.num_quantiles = num_quantiles
        self.global_hidden_units = global_hidden_units
        self.horizon_specific_hidden_units = horizon_specific_hidden_units
        self.horizon_agnostic_hidden_units = horizon_agnostic_hidden_units
        self.local_mlp_hidden_units = local_mlp_hidden_units
        self.local_mlp_output_units = local_mlp_output_units

        # We assume that Tpred == span1_count.
        # Tpred = forecast_end_index
#         self.Tpred = max(map(lambda x: x[0] + x[1], self.ltsp))
        self.Tpred = 6
#         span1_count = len(list(filter(lambda x: x[1] == 1, self.ltsp)))
        span1_count = 1
        #print(self.Tpred, span1_count)
        #assert span1_count == self.Tpred, f"Number of span 1 horizons: {span1_count}\
                                            #does not match Tpred: {self.Tpred}" 

#         self.spanN_count = len(list(filter(lambda x: x[1] != 1, self.ltsp)))
        self.spanN_count = 1
        # Setting default components:
        if expander is None:
            expander = ExpandLayer(self.time_step, self.lead_future, self.future_information)
        if hf1 is None:
            hf1 = GlobalFutureLayer(self.time_step, self.lead_future, self.future_features_count, out_channels=self.global_hidden_units)
        if ht1 is None:
            ht1 = HorizonSpecific(self.Tpred, self.time_step, num = self.horizon_specific_hidden_units)
        if ht2 is None:
            ht2 = HorizonAgnostic(self.horizon_agnostic_hidden_units, self.lead_future)
        if h is None:
            h = LocalMlp(self.local_mlp_hidden_units, self.local_mlp_output_units)
        if span_1 is None:
            span_1 = Span1(self.time_step, self.lead_future, self.num_quantiles)
        if span_N is None:
            span_N = SpanN(self.time_step, self.lead_future, self.num_quantiles, self.spanN_count)

        self.expander = expander
        self.hf1 = hf1
        self.hf2 = hf2
        self.ht1 = ht1
        self.ht2 = ht2
        self.h = h
        self.span_1 = span_1
        self.span_N = span_N

    def forward(self, x, encoded):
        xf = x['future_information']
        expanded = self.expander(xf)
        hf1 = self.hf1(expanded)
        hf2 = F.relu(expanded)
        
        ht = torch.cat((encoded, hf1), dim=-1)
        ht1 = self.ht1(ht)
        ht2 = self.ht2(ht)
        h = torch.cat((ht1, ht2, hf2), dim=-1)
        h = self.h(h)
        return self.span_1(h)#, self.span_N(h)

# submodule

class StaticLayer(nn.Module):
    def __init__(self, in_channels, time_step, static_features, out_channels = 30, dropout = 0.4):
        super().__init__()
        self.time_step = time_step
        #self.static_features = static_features
        self.dropout = nn.Dropout(dropout)
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.static = nn.Linear(self.in_channels, self.out_channels)

    def forward(self, x):
        x = x['static_features'][:,:1,:]
        x = self.dropout(x)
        x = self.static(x)
        return x.repeat(1, self.time_step, 1)

class ConvLayer(nn.Module):
    def __init__(self, time_step, timevarying_features, in_channels, out_channels = 30, kernel_size = 2):
        super().__init__()
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.kernel_size = kernel_size
        self.timevarying_features = timevarying_features
        self.time_step = time_step

        self.c1 = nn.Conv1d(self.in_channels, self.out_channels, self.kernel_size, dilation = 1)
        self.c2 = nn.Conv1d(self.out_channels, self.out_channels, self.kernel_size, dilation = 2)
        self.c3 = nn.Conv1d(self.out_channels, self.out_channels, self.kernel_size,  dilation = 4)
        self.c4 = nn.Conv1d(self.out_channels, self.out_channels, self.kernel_size, dilation = 8)
        self.c5 = nn.Conv1d(self.out_channels, self.out_channels, self.kernel_size, dilation = 16)
        self.c6 = nn.Conv1d(self.out_channels, self.out_channels, self.kernel_size, dilation = 32)

    def forward(self, x):
        x_t = x['timevarying_features'][:, :self.time_step, :]
        x_t = x_t.permute(0, 2, 1)
        x_t = F.pad(x_t, (1,0), "constant", 0)
        x_t = self.c1(x_t)
        x_t = F.pad(x_t, (2,0), "constant", 0)
        x_t = self.c2(x_t)
        x_t = F.pad(x_t, (4,0), "constant", 0)
        x_t = self.c3(x_t)
        x_t = F.pad(x_t, (8,0), "constant", 0)
        x_t = self.c4(x_t)
        x_t = F.pad(x_t, (16,0), "constant", 0)
        x_t = self.c5(x_t)
        
        return x_t.permute(0, 2, 1)

class ExpandLayer(nn.Module):
    """Expands the dimension referred to as `expand_axis` into two
    dimensions by applying a sliding window. For example, a tensor of
    shape (1, 4, 2) as follows:

    [[[0. 1.]
      [2. 3.]
      [4. 5.]
      [6. 7.]]]

    where `expand_axis` = 1 and `time_step` = 3 (number of windows) and
    `lead_future` = 2 (window length) will become:

    [[[[0. 1.]
       [2. 3.]]

      [[2. 3.]
       [4. 5.]]

      [[4. 5.]
       [6. 7.]]]]

    Used for expanding future information tensors

    Parameters
    ----------
    time_step : int
        Length of the time sequence (number of windows)
    lead_future : int
        Number of future time points (window length)
    expand_axis : int
        Axis to expand"""

    def __init__(self, time_step, lead_future, future_information, **kwargs):
        super(ExpandLayer, self).__init__(**kwargs)
    
        self.time_step = time_step
        self.future_information = future_information
        self.lead_future = lead_future

    def forward(self, x):

        # First create a matrix of indices, which we will use to slice
        # `input` along `expand_axis`. For example, for time_step=3 and
        # lead_future=2,
        # idx = [[0. 1.]
        #        [1. 2.]
        #        [2. 3.]]
        # We achieve this by doing a broadcast add of
        # [[0.] [1.] [2.]] and [[0. 1.]]
        idx = torch.add(torch.arange(self.time_step).unsqueeze(axis = 1),
                        torch.arange(self.lead_future).unsqueeze(axis = 0))
        # Now we slice `input`, taking elements from `input` that correspond to
        # the indices in `idx` along the `expand_axis` dimension
        return x[:, idx, :]

        
class GlobalFutureLayer(nn.Module):
    def __init__(self, time_step, lead_future, future_features_count, out_channels = 30):
        super().__init__()
        self.time_step = time_step
        self.lead_future = lead_future
        self.future_features_count = future_features_count
        self.out_channels = out_channels

        self.l1 = nn.Linear(self.lead_future * self.future_features_count, out_channels)
        
    def forward(self, x):
        x = x.contiguous().view(-1, self.time_step, self.lead_future * self.future_features_count)
        
        return self.l1(x)
    
class HorizonSpecific(nn.Module):
    def __init__(self, Tpred, time_step, num = 20):
        super().__init__()
        self.Tpred = Tpred
        self.time_step = time_step
        self.num = num
        
    def forward(self, x):
        x = nn.Linear(x.size(-1), self.Tpred * self.num)(x)
        x = F.relu(x)

        return x.view(-1, self.time_step, self.Tpred, 20)

class HorizonAgnostic(nn.Module):
    def __init__(self, out_channels, lead_future):
        super().__init__()
        self.out_channels = out_channels
        self.lead_future = lead_future
        
    def forward(self, x):
        x = nn.Linear(x.size(-1), self.out_channels)(x)
        x = F.relu(x)
        x = x.unsqueeze(axis = 2)
        x = x.repeat(1,1, self.lead_future, 1)

        return x
    
class LocalMlp(nn.Module):
    def __init__(self, hidden, output):
        super().__init__()
        self.hidden = hidden
        self.output = output
        
    def forward(self, x):
        x = nn.Linear(x.size(-1), self.hidden)(x)
        x = F.relu(x)
        x = nn.Linear(self.hidden, self.output)(x)
        x = F.relu(x)

        return x


class Span1(nn.Module):
    def __init__(self, time_step, lead_future, num_quantiles):
        super().__init__()
        self.time_step = time_step
        self.lead_future = lead_future
        self.num_quantiles = num_quantiles
        
    def forward(self, x):
        x = nn.Linear(x.size(-1), self.num_quantiles)(x)
        x = F.relu(x.contiguous().view(-1, x.size(-2), x.size(-1)))
        x = x.view(-1, self.time_step, self.lead_future, self.num_quantiles)
        x = x.view(-1, self.time_step, self.lead_future*self.num_quantiles)

        return x


class SpanN(nn.Module):
    def __init__(self, time_step, lead_future, num_quantiles, spanN_count):
        super().__init__()
        self.time_step = time_step
        self.lead_future = lead_future
        self.num_quantiles = num_quantiles
        self.spanN_count = spanN_count
        
    def forward(self, x):
        x = x.permute(0, 1, 3, 2)
        x = x.contiguous().view(-1, self.time_step, x.size(-2) * x.size(-1))

        x = nn.Linear(x.size(-1), self.spanN_count * self.num_quantiles)(x)

        return x

In [3]:
class MQCNNModel(pl.LightningModule):
    def __init__(self, static_features, timevarying_features, future_information, time_step, ltsp, lead_future,
                 global_hidden_units, horizon_specific_hidden_units,
                 horizon_agnostic_hidden_units, local_mlp_hidden_units, local_mlp_output_units):
        super(MQCNNModel, self).__init__()
        #self.input_tensor = input_tensor
        self.time_step = time_step
        self.static_features = static_features
        self.num_static_features = len(static_features)
        self.timevarying_features = timevarying_features
        self.num_timevarying_features = len(timevarying_features)
        self.future_information = future_information
        self.num_future_features = len(future_information)
        self.ltsp = ltsp
        self.lead_future = lead_future
        self.global_hidden_units = global_hidden_units
        self.horizon_specific_hidden_units = horizon_specific_hidden_units
        self.horizon_agnostic_hidden_units = horizon_agnostic_hidden_units
        self.local_mlp_hidden_units = local_mlp_hidden_units
        self.local_mlp_output_units = local_mlp_output_units

        self.encoder = MQCNNEncoder(self.time_step, self.static_features, self.timevarying_features,
                                   self.num_static_features, self.num_timevarying_features)
        
        self.decoder = MQCNNDecoder(self.time_step, self.lead_future, self.ltsp, self.future_information,
                                    self.num_future_features, self.global_hidden_units, self.horizon_specific_hidden_units,
                                    self.horizon_agnostic_hidden_units, self.local_mlp_hidden_units,
                                    self.local_mlp_output_units)
                                    

    def forward(self, x):
        encoding = self.encoder(x)
        output = self.decoder(x, encoding)

        return output

    def configure_optimizers(self):
        optimizer = optim.SGD(self.parameters(), lr = 1e-2)

        return optimizer

    def training_step(self, batch, batch_idx):
        x, y = batch, batch['targets']
                                    
        quantiles = torch.tensor([0.5, 0.9]).view(2, 1)

        outputs = self(x)

        loss = self.loss(outputs, y, quantiles)
        pbar = {'train_loss': loss[0] + loss[1]}

        return {"loss": loss[0] + loss[1], "progress_bar": pbar}

    def train_dataloader(self):

        train_data = Dataset("-----enter args here------")

        train_loader = DataLoader(train_data, batch_size = batch_size)

        return train_loader
                                    
    def loss(self, outputs, targets, quantiles):
        l = outputs - targets.repeat_interleave(2, dim=2)
        
        p50 = torch.mul(torch.where(l > torch.zeros(l.shape), l, torch.zeros_like(l)), 1 - quantiles[0]) + \
            torch.mul(torch.where(l < torch.zeros(l.shape), -l, torch.zeros_like(l)), quantiles[0])
    
        p90 = torch.mul(torch.where(l > torch.zeros(l.shape), l, torch.zeros_like(l)), 1 - quantiles[1]) + \
            torch.mul(torch.where(l < torch.zeros(l.shape), -l, torch.zeros_like(l)), quantiles[1])
        
        p50 = p50.mean()
        p90 = p90.mean()
    
        return p50, p90

In [4]:
class Dataset(Dataset):
    
    def __init__(self, data, static_features, timevarying_features, future_information, 
                 target, train_time_step, predict_time_step, num_quantiles, ltsp):
        
        self.data = data
        self.train_time_step = train_time_step
        self.predict_time_step = predict_time_step
        self.num_quantiles = num_quantiles
        
        self.ltsp_kernel = _ltsp_kernel(predict_time_step, ltsp)
        
        self.ltsp_idx = _ltsp_idx(time_step = train_time_step, Tpred = predict_time_step)
        
        
        self.static_features = torch.tensor(self.data.\
                              loc[self.data['time_idx'] < self.train_time_step][static_features].\
                to_numpy(np.float64).reshape(-1, self.train_time_step, len(static_features))).float()
        
        self.timevarying_features = torch.tensor(self.data.\
                              loc[self.data['time_idx'] < self.train_time_step][timevarying_features].\
                to_numpy(np.float64).reshape(-1, self.train_time_step, len(timevarying_features))).float()
            
        self.future_information = torch.tensor(self.data[future_information].\
                to_numpy(np.float64).reshape(-1, (self.train_time_step + self.predict_time_step), len(future_information))).float()
        
        self.targets = torch.tensor(self.data[target].\
            to_numpy(np.float64).reshape(-1, (self.train_time_step + self.predict_time_step))).float()
        
        self.targets = _apply_ltsp_kernel(self.targets, self.ltsp_idx, self.ltsp_kernel)
        
    def __len__(self):
        
        return self.timevarying_features.shape[1]
    
    def __getitem__(self, idx):
        
        static_features = self.static_features[idx, :, :]
        timevarying_features = self.timevarying_features[idx, :, :]
        future_information = self.future_information[idx, :, :]
        targets = self.targets[idx, :, :]
        
        return dict(static_features = static_features, timevarying_features = timevarying_features,
                    future_information = future_information, targets = targets)
    
def _ltsp_idx(time_step, Tpred):
        idx = np.arange(time_step).reshape(-1, 1) + np.arange(Tpred)
        return torch.tensor(idx)

def _ltsp_kernel(Tpred, ltsp, normalize = True):
    
        ltsp_count = len(ltsp)
        kernel = np.zeros((Tpred, ltsp_count), dtype = 'float32')
        for i in range(len(ltsp)):
            lead_time = ltsp[i][0]
            span = ltsp[i][1]
            if normalize:
                kernel[lead_time:lead_time + span, i] = 1.0/span
            else:
                kernel[lead_time:lead_time + span, i] = 1.0

        return torch.tensor(kernel)

def _apply_ltsp_kernel(s, ltsp_idx, ltsp_kernel):
        s_ltsp = s[:, ltsp_idx].float()
        
        return s_ltsp @ ltsp_kernel 

In [5]:
data = get_stallion_data()

In [6]:
# add time index
data["time_idx"] = data["date"].dt.year * 12 + data["date"].dt.month

data["time_idx"] -= data["time_idx"].min()
# add additional features

# show sample data
data.sample(10, random_state=521)

Unnamed: 0,agency,sku,volume,date,industry_volume,soda_volume,avg_max_temp,price_regular,price_actual,discount,...,independence_day,revolution_day_memorial,regional_games,fifa_u_17_world_cup,football_gold_cup,beer_capital,music_fest,discount_in_percent,timeseries,time_idx
291,Agency_25,SKU_03,0.5076,2013-01-01,492612703,718394219,25.845238,1264.162234,1152.473405,111.688829,...,0,0,0,0,0,0,0,8.835008,228,0
871,Agency_29,SKU_02,8.748,2015-01-01,498567142,762225057,27.584615,1316.098485,1296.804924,19.293561,...,0,0,0,0,0,0,0,1.465966,177,24
19532,Agency_47,SKU_01,4.968,2013-09-01,454252482,789624076,30.665957,1269.25,1266.49049,2.75951,...,1,0,0,0,0,0,0,0.217413,322,8
2089,Agency_53,SKU_07,21.6825,2013-10-01,480693900,791658684,29.197727,1193.842373,1128.124395,65.717978,...,0,0,0,0,0,1,0,5.504745,240,9
9755,Agency_17,SKU_02,960.552,2015-03-01,515468092,871204688,23.60812,1338.334248,1232.128069,106.206179,...,0,0,0,0,0,0,1,7.935699,259,26
7561,Agency_05,SKU_03,1184.6535,2014-02-01,425528909,734443953,28.668254,1369.556376,1161.135214,208.421162,...,0,0,0,0,0,0,0,15.218151,21,13
19204,Agency_11,SKU_05,5.5593,2017-08-01,623319783,1049868815,31.915385,1922.486644,1651.307674,271.17897,...,0,0,0,0,0,0,0,14.105636,17,55
8781,Agency_48,SKU_04,4275.1605,2013-03-01,509281531,892192092,26.767857,1761.258209,1546.05967,215.198539,...,0,0,0,0,0,0,1,12.218455,151,2
2540,Agency_07,SKU_21,0.0,2015-10-01,544203593,761469815,28.987755,0.0,0.0,0.0,...,0,0,0,0,0,0,0,0.0,300,33
12084,Agency_21,SKU_03,46.3608,2017-04-01,589969396,940912941,32.47891,1675.922116,1413.571789,262.350327,...,0,0,0,0,0,0,0,15.654088,181,51


In [7]:
data['month'] = data['date'].dt.month

data_sorted = data.sort_values(['agency', 'sku', 'date'])

data_sorted = pd.get_dummies(data_sorted, columns=['month'])

static_cols=['avg_population_2017']
timevarying_cols=['volume', 'industry_volume', 'soda_volume', 'price_regular']
future_cols=['month_1', 'month_2','month_3', 'month_4', 'month_5', 'month_6', 'month_7', 'month_8',
                    'month_9', 'month_10', 'month_11', 'month_12', 'price_regular']

In [8]:
ltsp = [(i, 1) for i in range(6)]
len(ltsp)

6

In [9]:
training = Dataset(data_sorted,
                   static_features = static_cols,
                   timevarying_features = timevarying_cols,
                   future_information = future_cols,
                   target=['volume'], 
                   train_time_step=54, 
                   predict_time_step=6,
                   num_quantiles = 2,
                   ltsp = ltsp)

In [10]:
MQCNN = MQCNNModel(static_cols, timevarying_cols, future_cols, 
                   54, ltsp, 6, 50, 20, 100, 50, 10)

In [11]:
trainer = pl.Trainer(max_epochs = 10)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores


In [12]:
train_loader = DataLoader(training, 16)

In [13]:
trainer.fit(MQCNN, train_loader)


  | Name    | Type         | Params
-----------------------------------------
0 | encoder | MQCNNEncoder | 9 K   
1 | decoder | MQCNNDecoder | 3 K   


HBox(children=(HTML(value='Training'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), max…

Please use self.log(...) inside the lightningModule instead.

# log on a step or aggregate epoch metric to the logger and/or progress bar
# (inside LightningModule)
self.log('train_loss', loss, on_step=True, on_epoch=True, prog_bar=True)





1

In [14]:
trainer.__dir__()

['dev_debugger',
 'config_validator',
 'data_connector',
 'optimizer_connector',
 'accelerator_connector',
 'logger_connector',
 'model_connector',
 'precision_connector',
 'callback_connector',
 'debugging_connector',
 'training_tricks_connector',
 'profile_connector',
 'checkpoint_connector',
 'slurm_connector',
 'tuner',
 'accelerator_backend',
 'evaluation_loop',
 'train_loop',
 'plugin_connector',
 'weights_summary',
 'model',
 'resume_from_checkpoint',
 '_default_root_dir',
 '_weights_save_path',
 'callbacks',
 '_progress_bar_callback',
 'lr_schedulers',
 'optimizers',
 'optimizer_frequencies',
 'datamodule',
 'prepare_data_per_node',
 'check_val_every_n_epoch',
 'reload_dataloaders_every_epoch',
 '_is_data_prepared',
 'terminate_on_nan',
 'gradient_clip_val',
 'track_grad_norm',
 'accumulate_grad_batches',
 'accumulation_scheduler',
 'truncated_bptt_steps',
 'deterministic',
 'distributed_backend',
 'benchmark',
 'num_nodes',
 'log_gpu_memory',
 'sync_batchnorm',
 'tpu_cores',
 