In [1]:
# default_exp data.tsloader_faston

In [2]:
#hide
%load_ext autoreload
%autoreload 2

In [3]:
#export
import numpy as np
import pandas as pd
import random
import torch as t
import copy
from fastcore.foundation import patch
from nixtla.data.ontsdataset import TimeSeriesDataset
from collections import defaultdict

In [4]:
# export
class TimeSeriesLoader(object):
    def __init__(self,
                 ts_dataset:TimeSeriesDataset,
                 model:str,
                 offset:int,
                 window_sampling_limit: int,
                 input_size: int,
                 output_size: int,
                 idx_to_sample_freq: int, #TODO: not active yet
                 batch_size: int,
                 train_loader: bool):
        """
        """
        self.model = model
        self.window_sampling_limit = window_sampling_limit
        self.input_size = input_size
        self.output_size = output_size
        self.batch_size = batch_size
        self.idx_to_sample_freq = idx_to_sample_freq
        self.offset = offset
        self.ts_dataset = ts_dataset
        self.t_cols = self.ts_dataset.t_cols
        self.X_t_cols = self.ts_dataset.X_t_cols
        self.train_loader = train_loader

        # Create rolling window matrix and broadcasted x_s
        self._create_train_data()
        self._is_train = True

    def _update_sampling_windows_idxs(self):
        # Only sample during training windows with at least one active output mask
        sampling_idx = t.sum(self.ts_windows[:, self.t_cols.index('outsample_mask'), -self.output_size:], axis=1)
        sampling_idx = t.nonzero(sampling_idx > 0)
        return list(sampling_idx.flatten().numpy())

    def _create_windows_tensor(self):
        """
        Comment here
        TODO: Cuando creemos el otro dataloader, si es compatible lo hacemos funcion transform en utils
        """
        tensor, right_padding, train_mask, windows_prob = self.ts_dataset.get_filtered_tensor(offset=self.offset, output_size=self.output_size,
                                                                                window_sampling_limit=self.window_sampling_limit)

        #print('tensor', tensor.shape)

        tensor = t.Tensor(tensor)
        _, n_channels, _ = tensor.size()

        # Outsample mask checks existance of values in ts, train_mask mask to filter ts validation
        if self.train_loader:
            mask = train_mask
        else:
            mask = 1 - train_mask

        tensor[:, self.t_cols.index('outsample_mask'), :] = tensor[:, self.t_cols.index('outsample_mask'), :] * t.Tensor(mask)

        padder = t.nn.ConstantPad1d(padding=(self.input_size-1, right_padding), value=0)
        tensor = padder(tensor)

        # Last output_size outsample_mask and y to 0
        tensor[:, self.t_cols.index('y'), -self.output_size:] = 0 # overkill to ensure no leakage
        tensor[:, self.t_cols.index('outsample_mask'), -self.output_size:] = 0

        # Creating rolling windows
        windows = tensor.unfold(dimension=-1, size=self.input_size + self.output_size, step=1)
        windows = windows.permute(2,0,1,3)
        windows = windows.reshape(-1, n_channels, self.input_size + self.output_size)

        #print('windows', windows.shape)
        
        assert len(windows)==len(windows_prob), f'Windows len {len(windows)} must be equal to windows_prob {len(windows_prob)}'
        return windows, windows_prob

    def __len__(self):
        return len(self.len_series)

    def __iter__(self):
        #TODO: revisar como se hace el -1 de batch_size en un dataloader de torch. Otra opcion es simplemente batch_size grande,
        # tambien se puede arregar con epoca
        while True:
            if self._is_train:
                if self.batch_size > 0:
                    sampled_ts_indices = np.random.choice(a=self.windows_sampling_idx, p=self.windows_prob,
                                                          size=self.batch_size, replace=True)
                else:
                    sampled_ts_indices = self.windows_sampling_idx
            else:
                # Get last n_series windows, dataset is ordered because of unfold
                sampled_ts_indices = list(range(self.n_windows-self.ts_dataset.n_series, self.n_windows))

            batch = self.__get_item__(sampled_ts_indices)

            yield batch

    def __get_item__(self, index):
        if self.model == 'nbeats':
            return self._nbeats_batch(index)
        elif self.model == 'esrnn':
            assert 1<0, 'hacer esrnn'
        else:
            assert 1<0, 'error'

    def _nbeats_batch(self, index):
        windows = self.ts_windows[index]
        x_s = self.x_s[index]

        # insample y as ejecutado
        insample_y = windows[:, self.t_cols.index('ejecutado'), :self.input_size]
        # Remove both y and ejecutado
        insample_x_t = windows[:, (self.t_cols.index('ejecutado') + 1):self.t_cols.index('insample_mask'), :self.input_size]
        insample_mask = windows[:, self.t_cols.index('insample_mask'), :self.input_size]

        # y as outsample y
        outsample_y = windows[:, self.t_cols.index('y'), self.input_size:]
        # Remove both y and ejecutado
        outsample_x_t = windows[:, (self.t_cols.index('ejecutado') + 1):self.t_cols.index('insample_mask'), self.input_size:]
        outsample_mask = windows[:, self.t_cols.index('outsample_mask'), self.input_size:]

        batch = {'insample_y':insample_y, 'insample_x_t':insample_x_t, 'insample_mask':insample_mask,
                  'outsample_y':outsample_y, 'outsample_x_t':outsample_x_t, 'outsample_mask':outsample_mask,
                  'x_s':x_s}

        return batch

    def _create_train_data(self):
        """
        """
        #print('Creating windows matrix ...')
        # Create rolling window matrix
        self.ts_windows, windows_prob = self._create_windows_tensor()
        self.n_windows = len(self.ts_windows)
        # Broadcast x_s: This works because unfold in windows_tensor, padded windows, unshuffled data.
        self.x_s = self.ts_dataset.x_s.repeat(int(self.n_windows/self.ts_dataset.n_series), 1)
        self.windows_sampling_idx  = self._update_sampling_windows_idxs()
        self.windows_prob = windows_prob[self.windows_sampling_idx] # Obtain probabilities for each windows_idx
        self.windows_prob = self.windows_prob/np.sum(self.windows_prob)

    def update_offset(self, offset):
        if offset == self.offset:
            return # Avoid extra computation
        self.offset = offset
        self._create_train_data()

    def get_meta_data_var(self, var):
        """
        """
        return self.ts_dataset.get_meta_data_var(var)

    def get_n_variables(self):
        return self.ts_dataset.n_x_t, self.ts_dataset.n_s_t

    def get_n_series(self):
        return self.ts_dataset.n_series

    def get_max_len(self):
        return self.ts_dataset.max_len

    def get_n_channels(self):
        return self.ts_dataset.n_channels

    def get_frequency(self):
        return self.ts_dataset.frequency

    def train(self):
        self._is_train = True

    def eval(self):
        self._is_train = False

In [7]:
from nixtla.data.datasets.on import load_on_data

Y_insample_df, X_insample_df, Y_outsample_df, X_outsample_df, f_cols = load_on_data('2020-11-1')


In [6]:
train_outsample_mask = np.ones(len(Y_insample_df))
train_outsample_mask[-60 * 24 * 4:] = 0 # 60 days x 24 hours x 4 fifteenminutales
print("sum(train_outsample_mask)", sum(train_outsample_mask))
on_dataset = TimeSeriesDataset(Y_df=Y_insample_df, S_df=None, X_df=X_insample_df, ts_train_mask=train_outsample_mask)

NameError: name 'Y_df' is not defined