In [1]:
import time
import numpy as np
import pandas as pd
import random
import torch as t
import copy
from fastcore.foundation import patch
from nixtla.data.tsdataset import TimeSeriesDataset
from nixtla.data.tsloader_fast import TimeSeriesLoader as TimeSeriesLoaderFast
from nixtla.data.tsloader_pinche import TimeSeriesLoader as TimeSeriesLoaderPinche
from nixtla.data.tsloader_general import TimeSeriesLoader as TimeSeriesLoaderGeneral

np.random.seed(1)

In [2]:
from nixtla.data.datasets.epf import EPF, EPFInfo
Y_df, X_df = EPF.load(directory='data', group=EPFInfo.groups[0])
train_outsample_mask = np.ones(len(Y_df))
train_outsample_mask[-365 * 24:] = 0
sum(train_outsample_mask)
epf_dataset = TimeSeriesDataset(Y_df=Y_df, S_df=None, X_df=X_df, ts_train_mask=train_outsample_mask)
print("X_df.shape", X_df.shape)
print("Y_df.shape", Y_df.shape)
Y_df.head()

Processing dataframes ...
Creating ts tensor ...
X_df.shape (34944, 11)
Y_df.shape (34944, 3)


Unnamed: 0,unique_id,ds,y
0,NP,2013-01-01 00:00:00,31.05
1,NP,2013-01-01 01:00:00,30.47
2,NP,2013-01-01 02:00:00,28.92
3,NP,2013-01-01 03:00:00,27.88
4,NP,2013-01-01 04:00:00,26.96


In [3]:
from nixtla.data.datasets.tourism import Tourism, TourismInfo

group = TourismInfo.groups[2]
print("TourismInfo.groups[2]", group)
Y_df, _ = Tourism.load(directory='data', group=group)

# Creating outliers to check leakages
ds = np.sort(Y_df['ds'].unique())
ts_train_mask = np.ones(333) # hardcodeado a serie más larga
ts_train_mask[-10:] = 0
outlier = Y_df.groupby('unique_id').tail(10).reset_index(drop=True)
outlier['y'] = [10**(x+8) for x in range(10)] * len(np.unique(Y_df.unique_id))
outlier.columns = ['unique_id', 'ds', 'y_val']
Y_df = Y_df.merge(outlier, how='left', on=['unique_id', 'ds'])
Y_df['y_val'] = Y_df['y_val'].fillna(0)
Y_df['y'] = Y_df['y'] + Y_df['y_val']
del Y_df['y_val']

tourism_dataset = TimeSeriesDataset(Y_df=Y_df, S_df=None, X_df=None, ts_train_mask=ts_train_mask)

print("Y_df.shape", Y_df.shape)

TourismInfo.groups[2] Monthly
Processing dataframes ...
Creating ts tensor ...
Y_df.shape (109280, 3)


In [4]:
ts_loader_general = TimeSeriesLoaderGeneral(ts_dataset=tourism_dataset,
                                            model='nbeats',
                                            offset=0,
                                            window_sampling_limit=200*4, 
                                            input_size=3*4,
                                            output_size=4,
                                            idx_to_sample_freq=1,
                                            batch_size= 2048,
                                            n_series_per_batch=32,
                                            is_train_loader=True)

ts_loader_pinche = TimeSeriesLoaderPinche(ts_dataset=tourism_dataset,
                                          model='nbeats',
                                          offset=0,
                                          window_sampling_limit=200*4, 
                                          input_size=3*4,
                                          output_size=4,
                                          idx_to_sample_freq=1,
                                          batch_size= 2048,
                                          is_train_loader=True)

ts_loader_fast = TimeSeriesLoaderFast(ts_dataset=tourism_dataset,
                                      model='nbeats',
                                      offset=0,
                                      window_sampling_limit=200*4, 
                                      input_size=3*4,
                                      output_size=4,
                                      idx_to_sample_freq=1,
                                      batch_size= 2048,
                                      is_train_loader=True)

# ts_loader_general = TimeSeriesLoaderGeneral(ts_dataset=epf_dataset,
#                                             model='nbeats',
#                                             offset=0,
#                                             window_sampling_limit=365*4*24, 
#                                             input_size=3*24,
#                                             output_size=24,
#                                             idx_to_sample_freq=1,
#                                             batch_size= 2048,
#                                             n_series_per_batch=1,
#                                             is_train_loader=True)

# # TODO: Investigar porqué el pinche escala de 0.13 a 4 segundos con exogenas EAI puede no tener la culpa
# ts_loader_pinche = TimeSeriesLoaderPinche(ts_dataset=epf_dataset,
#                                         model='nbeats',
#                                         offset=0,
#                                         window_sampling_limit=365*4*24, 
#                                         input_size=3*24,
#                                         output_size=24,
#                                         idx_to_sample_freq=1,
#                                         batch_size= 2048,
#                                         is_train_loader=True)

# ts_loader_fast = TimeSeriesLoaderFast(ts_dataset=epf_dataset,
#                                         model='nbeats',
#                                         offset=0,
#                                         window_sampling_limit=365*4*24, 
#                                         input_size=3*24,
#                                         output_size=24,
#                                         idx_to_sample_freq=1,
#                                         batch_size= 2048,
#                                         is_train_loader=True)

## COMPARING BATCH TIMES AND CHECKING MODEL INPUT SHAPES
## CHECKING VALIDATION MASK AND LEAKAGE PROTECTIONS

In [5]:
start = time.time()
dataloader = iter(ts_loader_general)
batch = next(dataloader)
insample_y = batch['insample_y']
insample_x = batch['insample_x']
insample_mask = batch['insample_mask']
outsample_x = batch['outsample_x']
outsample_y = batch['outsample_y']
outsample_mask = batch['outsample_mask']
print("DataloaderGeneral batch time:", time.time()-start)
print("insample_y.shape", insample_y.shape)
print("insample_x.shape", insample_x.shape)
print("outsample_y.shape", outsample_y.shape)
print("outsample_x.shape", outsample_x.shape)
print("t.max(insample_y)", t.max(insample_y))
print("t.max(outsample_y)", t.max(outsample_y * outsample_mask))

DataloaderGeneral batch time: 0.026996135711669922
insample_y.shape torch.Size([2048, 12])
insample_x.shape torch.Size([2048, 0, 12])
outsample_y.shape torch.Size([2048, 4])
outsample_x.shape torch.Size([2048, 0, 4])
t.max(insample_y) tensor(118350.)
t.max(outsample_y) tensor(118350.)


In [6]:
start = time.time()
dataloader = iter(ts_loader_pinche)
batch = next(dataloader)
insample_y = batch['insample_y']
insample_x = batch['insample_x']
insample_mask = batch['insample_mask']
outsample_x = batch['outsample_x']
outsample_y = batch['outsample_y']
outsample_mask = batch['outsample_mask']
print("DataloaderPinche batch time:", time.time()-start)
print("insample_y.shape", insample_y.shape)
print("insample_x.shape", insample_x.shape)
print("outsample_y.shape", outsample_y.shape)
print("outsample_x.shape", outsample_x.shape)
print("t.max(insample_y)", t.max(insample_y))
print("t.max(outsample_y)", t.max(outsample_y * outsample_mask))

DataloaderPinche batch time: 0.1765608787536621
insample_y.shape torch.Size([2048, 12])
insample_x.shape torch.Size([2048, 0, 12])
outsample_y.shape torch.Size([2048, 4])
outsample_x.shape torch.Size([2048, 0, 4])
t.max(insample_y) tensor(601161.)
t.max(outsample_y) tensor(486600.)


In [7]:
dataloader = iter(ts_loader_fast)
batch = next(dataloader)
start = time.time()
insample_y = batch['insample_y']
insample_x = batch['insample_x']
insample_mask = batch['insample_mask']
outsample_x = batch['outsample_x']
outsample_y = batch['outsample_y']
outsample_mask = batch['outsample_mask']
print("DataloaderFast batch time:", time.time()-start)
print("insample_y.shape", insample_y.shape)
print("insample_x.shape", insample_x.shape)
print("outsample_y.shape", outsample_y.shape)
print("outsample_x.shape", outsample_x.shape)
print("t.max(insample_y)", t.max(insample_y))
print("t.max(outsample_y)", t.max(outsample_y * outsample_mask))

DataloaderFast batch time: 0.0003039836883544922
insample_y.shape torch.Size([2048, 12])
insample_x.shape torch.Size([2048, 0, 12])
outsample_y.shape torch.Size([2048, 4])
outsample_x.shape torch.Size([2048, 0, 4])
t.max(insample_y) tensor(509100.)
t.max(outsample_y) tensor(526800.)


In [None]:
# ts_loader_fast.ts_windows

In [None]:
# ts_loader_fast.ts_windows[20000:20010]