In [1]:
# default_exp data.ts_loader_general

In [1]:
#hide
%load_ext autoreload
%autoreload 2

In [2]:
#export
import numpy as np
import pandas as pd
import random
import torch as t
import copy
from fastcore.foundation import patch
from nixtla.data.ts_dataset import TimeSeriesDataset
from collections import defaultdict

In [3]:
#export
class TimeSeriesLoader(object):
    def __init__(self,
                 ts_dataset:TimeSeriesDataset,
                 model:str,
                 offset:int,
                 window_sampling_limit: int, 
                 input_size: int,
                 output_size: int,
                 idx_to_sample_freq: int, #TODO: not active yet
                 batch_size: int,
                 n_series_per_batch: int = None,
                 ts_outsample_mask: list=[]):
        """
        """
        self.model = model
        self.window_sampling_limit = window_sampling_limit
        self.input_size = input_size
        self.output_size = output_size
        self.batch_size = batch_size
        self.idx_to_sample_freq = idx_to_sample_freq
        self.offset = offset
        self.ts_dataset = copy.deepcopy(ts_dataset) #TODO: sacar deep_copy
        self.ts_outsample_mask = ts_outsample_mask
        self.t_cols = self.ts_dataset.t_cols
        self.n_series_per_batch = n_series_per_batch if n_series_per_batch is not None else batch_size
        self.windows_per_serie = self.batch_size // self.n_series_per_batch

        if len(self.ts_outsample_mask) > 0:
            self.ts_dataset.ts_tensor[:, self.t_cols.index('outsample_mask'), :] = t.as_tensor(ts_outsample_mask,dtype=t.float32)
       
        self._is_train = True

    def _get_sampleable_windows_idxs(self, ts_windows):
        # Only sample during training windows with at least one active output mask
        sampling_idx = t.sum(ts_windows[:, self.t_cols.index('outsample_mask'), -self.output_size:], axis=1)
        sampling_idx = t.nonzero(sampling_idx > 0)
        return list(sampling_idx.flatten().numpy())

    def _create_windows_tensor(self, ts_idxs=None):
        """
        Comment here
        TODO: Cuando creemos el otro dataloader, si es compatible lo hacemos funcion transform en utils
        """
        tensor, right_padding = self.ts_dataset.get_filtered_tensor(offset=self.offset, output_size=self.output_size,
                                                                    window_sampling_limit=self.window_sampling_limit,
                                                                    ts_idxs=ts_idxs)
        tensor = t.Tensor(tensor)
        _, n_channels, _ = tensor.size()

        padder = t.nn.ConstantPad1d(padding=(self.input_size-1, right_padding), value=0)
        tensor = padder(tensor)

        # Last output_size outsample_mask and y to 0
        tensor[:, self.t_cols.index('y'), -self.output_size:] = 0 # overkill to ensure no leakage
        tensor[:, self.t_cols.index('outsample_mask'), -self.output_size:] = 0

        # Creating rolling windows
        windows = tensor.unfold(dimension=-1, size=self.input_size + self.output_size, step=1)
        windows = windows.permute(2,0,1,3)
        windows = windows.reshape(-1, n_channels, self.input_size + self.output_size)
        return windows

    def __iter__(self):
        while True:
            if self._is_train:
                ts_idxs = np.random.choice(range(self.ts_dataset.n_series),
                                           size=self.n_series_per_batch, 
                                           replace=True)
            else:
                ts_idxs = range(self.get_n_series())
        
            batch = self.__get_item__(index=ts_idxs)

            yield batch

    def __get_item__(self, index):
        if self.model == 'nbeats':
            return self._nbeats_batch(index)
        elif self.model == 'esrnn':
            assert 1<0, 'hacer esrnn'
        else:
            assert 1<0, 'error'

    def _nbeats_batch(self, index):

        # Create windows for each sampled ts and sample random unmasked windows from each ts
        windows = self._create_windows_tensor(index)
    
        sampleable_windows = self._get_sampleable_windows_idxs(windows)

        if self._is_train:
            windows_idxs = np.random.choice(sampleable_windows, self.batch_size, replace=True)
            windows = windows[windows_idxs]
        else:
            windows_idxs = index
            windows = windows[-self.get_n_series():]
        
        
        #TODO: Fix this part. 
        x_s = self.ts_dataset.x_s[index]
        x_s = x_s.repeat(self.windows_per_serie, 1)
        x_s = x_s[windows_idxs]

        insample_y = windows[:, self.t_cols.index('y'), :self.input_size]
        insample_x_t = windows[:, (self.t_cols.index('y')+1):self.t_cols.index('insample_mask'), :self.input_size]
        insample_mask = windows[:, self.t_cols.index('insample_mask'), :self.input_size]

        outsample_y = windows[:, self.t_cols.index('y'), self.input_size:]
        outsample_x_t = windows[:, (self.t_cols.index('y')+1):self.t_cols.index('insample_mask'), self.input_size:]
        outsample_mask = windows[:, self.t_cols.index('outsample_mask'), self.input_size:]

        batch = {'insample_y': insample_y, 'insample_x_t':insample_x_t, 'insample_mask':insample_mask,
                  'outsample_y': outsample_y, 'outsample_x_t':outsample_x_t, 'outsample_mask':outsample_mask,
                  'x_s':x_s}

        return batch

    def update_offset(self, offset):
        if offset == self.offset:
            return # Avoid extra computation
        self.offset = offset

    def get_meta_data_var(self, var):
        """
        """
        return self.ts_dataset.get_meta_data_var(var)

    def get_n_variables(self):
        return self.ts_dataset.n_x_t, self.ts_dataset.n_s_t

    def get_n_series(self):
        return self.ts_dataset.n_series

    def get_max_len(self):
        return self.ts_dataset.max_len

    def get_n_channels(self):
        return self.ts_dataset.n_channels

    def get_frequency(self):
        return self.ts_dataset.frequency

    def train(self):
        self._is_train = True

    def eval(self):
        self._is_train = False

In [None]:
from nixtla.data.datasets.m4 import M4, M4Info
m3_dataset = M4.load(directory='../data', group=M4Info.groups[0])

In [11]:
ts_loader = TimeSeriesLoader(ts_dataset= m3_dataset,
                             model='nbeats',
                             offset=0,
                             window_sampling_limit=50, 
                             input_size=8,
                             output_size=6,
                             idx_to_sample_freq=1,
                             batch_size=1024)

In [12]:
dataloader = iter(ts_loader)
batch = next(dataloader)
insample_y = batch['insample_y']
insample_x_t = batch['insample_x_t']
insample_mask = batch['insample_mask']
outsample_x_t = batch['outsample_x_t']
outsample_y = batch['outsample_y']
outsample_mask = batch['outsample_mask']

In [13]:
insample_y

tensor([[ 4340.4399,  4789.9502,  5128.6602,  ...,  5640.7202,  5730.7900,
          6037.7598],
        [ 1706.0300,  1984.7300,  2358.7500,  ...,  3717.4500,  4201.3501,
          4796.0298],
        [    0.0000,     0.0000,     0.0000,  ...,  1950.0000,  2540.0000,
          2670.0000],
        ...,
        [10100.0000, 10400.0000, 13800.0000,  ..., 10500.0000, 10600.0000,
          8800.0000],
        [ 6300.0000,  3900.0000,  3600.0000,  ...,  5100.0000,  4700.0000,
          4900.0000],
        [ 8473.0000,  8787.5000,  8819.0000,  ..., 10264.5000,  9622.0000,
          9088.5000]])

In [14]:
insample_y

tensor([[ 4340.4399,  4789.9502,  5128.6602,  ...,  5640.7202,  5730.7900,
          6037.7598],
        [ 1706.0300,  1984.7300,  2358.7500,  ...,  3717.4500,  4201.3501,
          4796.0298],
        [    0.0000,     0.0000,     0.0000,  ...,  1950.0000,  2540.0000,
          2670.0000],
        ...,
        [10100.0000, 10400.0000, 13800.0000,  ..., 10500.0000, 10600.0000,
          8800.0000],
        [ 6300.0000,  3900.0000,  3600.0000,  ...,  5100.0000,  4700.0000,
          4900.0000],
        [ 8473.0000,  8787.5000,  8819.0000,  ..., 10264.5000,  9622.0000,
          9088.5000]])

In [15]:
outsample_y

tensor([[ 6225.0000,  6491.6899,  6818.6299,  7070.0400,  7959.7500,  8308.8701],
        [ 5224.8799,  5669.1699,  6139.4199,  6965.0298,  7657.7300,  8130.2300],
        [ 2930.0000,  3120.0000,  3390.0000,  3360.0000,  3780.0000,  4090.0000],
        ...,
        [ 8100.0000,  8000.0000,  8100.0000,  9900.0000, 11500.0000, 10600.0000],
        [ 7900.0000,  6100.0000,  6200.0000,  7500.0000,  6100.0000,  6100.0000],
        [ 8940.0000,  8085.5000,  7955.5000,  7635.0000,  7402.5000,  6958.0000]])

In [16]:
insample_mask.shape

torch.Size([1024, 8])

In [17]:
outsample_mask.mean()

tensor(0.8547)