In [1]:
import pandas as pd
import numpy as np
from torch import Tensor
import torch
import os
import warnings

warnings.filterwarnings("ignore")  # avoid printing out absolute paths
from datetime import date, timedelta
from torch.utils.data import DataLoader
from torch.utils.data import Dataset


class TimeSeriesDataset(Dataset):

    def __init__(self, df: pd.DataFrame, encoder_len: int, decoder_len: int, targets: list, reals=None,
                 categoricals=None, time_var=None):
        self.df = df
        self.encoder_len = encoder_len
        self.decoder_len = decoder_len
        self.targets = targets
        self.reals = reals
        self.categoricals = categoricals
        self.time_var = time_var

    def __len__(self):
        return self.df.shape[0]

    def __getitem__(self, idx):

        """
        Get data window of  size = decoder + encoder length from the given element index
        :param idx:
        :return:
        """

        def get_window(idx_start: int, idx_end: int):

            """
            Get data window of  size = decoder + encoder length
            :param self:
            :param idx_start: index of the first element in the window
            :param idx_end: index of the last element in the window
            :return:
            """

            tensor_arr = []

            targets_tensor = torch.Tensor(size=(self.encoder_len + self.decoder_len, len(self.targets)))
            targets_tensor[:, :] = torch.from_numpy(self.df.iloc[idx_start: idx_end][self.targets].values).to(targets_tensor)
            tensor_arr.append(targets_tensor)

            if self.reals:
                conts_tensor = torch.Tensor(size=(self.encoder_len + self.decoder_len, len(self.reals)))
                conts_tensor[:, :] = torch.from_numpy(self.df.iloc[idx_start: idx_end][self.reals].values).to(conts_tensor)
                tensor_arr.append(conts_tensor)

            if self.categoricals:
                cats_tensor = torch.Tensor(size=(self.encoder_len + self.decoder_len, len(self.categoricals)))
                cats_tensor[:, :] = torch.from_numpy(self.df.iloc[idx_start: idx_end][self.categoricals].values).to(cats_tensor)
                tensor_arr.append(cats_tensor)

            window = torch.cat(tensor_arr, 1)

            return window

        last_data_element_idx = self.df.shape[0] - 1
        window_size = self.encoder_len + self.decoder_len
        begin_idx = idx - self.encoder_len
        end_idx = idx + self.decoder_len

        if begin_idx < 0:
            # return window starting from idx=0
            sample = get_window(0, window_size)
        elif end_idx > last_data_element_idx:
            # return window, ending on the last element in the dataset
            sample = get_window(last_data_element_idx - window_size, last_data_element_idx)
        else:
            sample = get_window(begin_idx, end_idx)

        return sample

In [2]:
np.random.seed(666)
n = 100
time_series = np.random.random(n)
time_series

array([0.70043712, 0.84418664, 0.67651434, 0.72785806, 0.95145796,
       0.0127032 , 0.4135877 , 0.04881279, 0.09992856, 0.50806631,
       0.20024754, 0.74415417, 0.192892  , 0.70084475, 0.29322811,
       0.77447945, 0.00510884, 0.11285765, 0.11095367, 0.24766823,
       0.0232363 , 0.72732115, 0.34003494, 0.19750316, 0.90917959,
       0.97834699, 0.53280254, 0.25913185, 0.58381262, 0.32569065,
       0.88889931, 0.62640453, 0.81887369, 0.54734542, 0.41671201,
       0.74304719, 0.36959638, 0.07516654, 0.77519298, 0.21940924,
       0.07934213, 0.48678052, 0.1536739 , 0.82846513, 0.19136857,
       0.27040895, 0.56103442, 0.90238039, 0.85178834, 0.41808196,
       0.39347627, 0.01622051, 0.29921337, 0.35377822, 0.89350267,
       0.78613657, 0.77138693, 0.42005486, 0.77602514, 0.46430814,
       0.18177017, 0.8840256 , 0.71879227, 0.6718813 , 0.25656363,
       0.43080182, 0.01645358, 0.23499383, 0.51117131, 0.29200924,
       0.50189351, 0.49827313, 0.10377152, 0.44644312, 0.96918

In [3]:
cov_time_series = np.random.random(n)
cov_time_series

array([0.93788262, 0.91099744, 0.39799354, 0.92530327, 0.74549922,
       0.78982636, 0.63770852, 0.99274137, 0.47610554, 0.10211419,
       0.87256332, 0.75612704, 0.29919884, 0.18625652, 0.49435351,
       0.0408955 , 0.47835228, 0.3089902 , 0.92058026, 0.91794568,
       0.46848893, 0.23431308, 0.08982494, 0.67940357, 0.65592832,
       0.0039444 , 0.06654134, 0.00112109, 0.66608382, 0.38565116,
       0.09405827, 0.45856757, 0.64434173, 0.59499774, 0.79060307,
       0.79996907, 0.67969792, 0.43875185, 0.26235889, 0.23652188,
       0.83900208, 0.36874334, 0.61918838, 0.46656433, 0.49250063,
       0.71862211, 0.65415881, 0.9665017 , 0.38957233, 0.97017219,
       0.36057961, 0.56184234, 0.03133558, 0.30480028, 0.07269465,
       0.46721993, 0.41345069, 0.42228271, 0.79491031, 0.05651855,
       0.89718201, 0.31869638, 0.36398678, 0.70548804, 0.23103497,
       0.54827043, 0.3195602 , 0.7302113 , 0.94257233, 0.77442252,
       0.86379077, 0.82903615, 0.81379647, 0.63166234, 0.68810

In [4]:
time_series_df = pd.DataFrame({'target': time_series, 'covariate': cov_time_series})
time_series_df

Unnamed: 0,target,covariate
0,0.700437,0.937883
1,0.844187,0.910997
2,0.676514,0.397994
3,0.727858,0.925303
4,0.951458,0.745499
...,...,...
95,0.647501,0.368005
96,0.702241,0.744313
97,0.429582,0.388443
98,0.167775,0.146351


In [5]:
encoder_length = 10
decoder_length = 2
training_dataset = TimeSeriesDataset(
    df=time_series_df,
    targets=['target'],
    encoder_len=encoder_length,
    decoder_len=decoder_length,
    reals=['covariate']
)

In [9]:
training_dataloader = DataLoader(
    dataset=training_dataset,
    batch_size=1,
    num_workers=8
)
tst = next(iter(training_dataloader))
tst.size()

torch.Size([1, 12, 2])

In [7]:
import torch
import torch.nn as nn
import pandas as pd

from torch import Tensor, log, exp

class DeepAR(nn.Module):
    def __init__(
            self,
            batch_size: int,
            hidden_size: int,
            input_size: int,
            likelihood: str = 'normal',
            device: str = 'cpu'
    ):
        """
        This class instantiates DeepAR.

        :param batch_size: size of the batch
        :param hidden_size: number of features in hidden state of rnn cell
        :param inout_size: number of expected features in the input tensor
        :param likelihood: desired likelihood
        :param device: device to calculate on
        """
        super(DeepAR, self).__init__()
        # here we initialize hidden states
        self._h_0 = torch.zeros((batch_size, hidden_size), device=device)
        self._c_0 = torch.zeros((batch_size, hidden_size), device=device)
        self._likelihood = likelihood
        self._device = device

        # here we create base architecture of LSTM cell
        self._lstm_cell = nn.LSTMCell(input_size=input_size, hidden_size=hidden_size)

    @property
    def h_0(self):
        return self._h_0

    @property
    def c_0(self):
        return self._c_0

    @property
    def likelihood(self):
        return self._likelihood

    @property
    def device(self):
        return self._device

    @property
    def lstm_cell(self):
        return self._lstm_cell

    def forward(
            self,
            input_tensor: Tensor # [batch_size, seq_len, input_size]
    ):
        """
        Forward method of our model
        :param input_tensor: input tensor [batch_size, seq_len, input_size]
        :return: output_tensor: output tensor contains parameters or desired distribution [batch_size, seq_len, num_pars]
        """
        batch_size, hidden_size = self.h_0.shape[0], self.h_0.shape[1]
        input_size, seq_len = input_tensor.shape[2], input_tensor.shape[1]
        num_pars = 2
        if self.likelihood == 'normal':
            output_tensor = torch.zeros((batch_size, seq_len, num_pars), device=self.device)
        else:
            raise NotImplementedError('this likelihood not yet implemented, gl&hf')
        # here we iterate through all cells (seq_len)
        num_cells = input_tensor.shape[1]

        if self.likelihood == 'normal':
            for cell_index in range(num_cells):
                self._h_0, self._c_0 = self.lstm_cell(input_tensor[:, cell_index, :], (self.h_0, self.c_0))
                mean = nn.Linear(in_features=hidden_size, out_features=1)(self.h_0)
                variance = log(1 + exp(nn.Linear(in_features=hidden_size, out_features=1)(self.h_0)))
                output_tensor[:, cell_index, 0] = torch.flatten(mean)
                output_tensor[:, cell_index, 1] = torch.flatten(variance)
        else:
            raise NotImplementedError('this likelihood not yet implemented, gl&hf')

        return output_tensor

In [11]:
hidden_size=12
model = DeepAR(batch_size=1, input_size=time_series_df.shape[1], hidden_size=hidden_size)
for batch_num, X in enumerate(training_dataloader):
    distrib_pars = model(X)
    print(f'distrib_pars = {distrib_pars}')
    # #print(f'shape of yhat = {y_hat.shape}; shape of y = {y.shape}')
    # loss = loss_function(y_hat, y.squeeze())
    # mean_loss += loss.item()
    # optimizer.zero_grad()
    # loss.backward()
    # optimizer.step()
    # batches_count += 1

distrib_pars = tensor([[[-0.2184,  0.6173],
         [-0.0121,  0.6075],
         [-0.2547,  0.6683],
         [ 0.0768,  0.8558],
         [-0.1699,  0.5325],
         [-0.2894,  0.5420],
         [-0.1711,  0.8906],
         [ 0.2801,  0.7003],
         [-0.0904,  0.7947],
         [ 0.1464,  0.5530],
         [-0.0432,  0.6617],
         [ 0.1724,  0.6653]]], grad_fn=<CopySlices>)
distrib_pars = tensor([[[ 0.0736,  0.9317],
         [-0.2946,  0.7896],
         [-0.1464,  0.6856],
         [ 0.1480,  0.5457],
         [ 0.2013,  0.7815],
         [-0.1202,  0.7254],
         [ 0.3004,  0.6818],
         [-0.0694,  0.8290],
         [-0.1612,  0.7585],
         [ 0.3977,  0.7929],
         [ 0.2283,  0.7246],
         [ 0.3267,  0.6063]]], grad_fn=<CopySlices>)
distrib_pars = tensor([[[ 0.4221,  0.5970],
         [ 0.1728,  0.7327],
         [-0.0988,  0.6339],
         [ 0.1168,  0.8869],
         [ 0.0722,  0.7321],
         [ 0.2399,  0.5288],
         [-0.2566,  0.6906],
        

In [12]:
a = np.array([1, 3, 5])
b = np.flip(a)
b

array([5, 3, 1])