In [1]:
import time
import numpy as np
import pandas as pd
import random
import torch as t
import copy
from fastcore.foundation import patch
from nixtla.data.ts_dataset import TimeSeriesDataset
from nixtla.data.ts_loader import TimeSeriesLoader as TimeSeriesLoaderFast
from nixtla.data.ts_loader_pinche import TimeSeriesLoader as TimeSeriesLoaderPinche
from nixtla.data.ts_loader_general import TimeSeriesLoader as TimeSeriesLoaderGeneral

np.random.seed(1)

In [7]:
from nixtla.data.datasets.tourism import Tourism, TourismInfo
from nixtla.data.datasets.epf import EPF, EPFInfo
Y_df, X_df = EPF.load(directory='data', group=EPFInfo.groups[0])

In [8]:
X_df.head()

Unnamed: 0,unique_id,ds,Exogenous1,Exogenous2,day_0,day_1,day_2,day_3,day_4,day_5,day_6
0,NP,2013-01-01 00:00:00,42497.0,2798.0,0,1,0,0,0,0,0
1,NP,2013-01-01 01:00:00,41463.0,2417.0,0,1,0,0,0,0,0
2,NP,2013-01-01 02:00:00,40812.0,2036.0,0,1,0,0,0,0,0
3,NP,2013-01-01 03:00:00,40246.0,1706.0,0,1,0,0,0,0,0
4,NP,2013-01-01 04:00:00,40088.0,1427.0,0,1,0,0,0,0,0


In [9]:
Y_df.head()

Unnamed: 0,unique_id,ds,y
0,NP,2013-01-01 00:00:00,31.05
1,NP,2013-01-01 01:00:00,30.47
2,NP,2013-01-01 02:00:00,28.92
3,NP,2013-01-01 03:00:00,27.88
4,NP,2013-01-01 04:00:00,26.96


In [10]:
train_outsample_mask = np.ones(len(y_df))
train_outsample_mask[-365 * 24:] = 0
sum(train_outsample_mask)

26184.0

In [11]:
epf_dataset = TimeSeriesDataset(Y_df=Y_df, S_df=None, X_df=X_df, ts_train_mask=train_outsample_mask)

Processing dataframes ...
Creating ts tensor ...


In [12]:
Y_df, _ = Tourism.load(directory='data', group=TourismInfo.groups[0])
tourism_dataset = TimeSeriesDataset(Y_df=Y_df, S_df=None, X_df=None, ts_train_mask=np.ones(47))

Processing dataframes ...
Creating ts tensor ...


In [37]:
# ts_loader_general = TimeSeriesLoaderGeneral(ts_dataset=tourism_dataset,
#                                             model='nbeats',
#                                             offset=0,
#                                             window_sampling_limit=200*4, 
#                                             input_size=3*4,
#                                             output_size=4,
#                                             idx_to_sample_freq=1,
#                                             batch_size= 2048,
#                                             n_series_per_batch=32,
#                                             is_train_loader=True)

# ts_loader_pinche = TimeSeriesLoaderPinche(ts_dataset=tourism_dataset,
#                                           model='nbeats',
#                                           offset=0,
#                                           window_sampling_limit=200*4, 
#                                           input_size=3*4,
#                                           output_size=4,
#                                           idx_to_sample_freq=1,
#                                           batch_size= 2048,
#                                           is_train_loader=True)

# ts_loader_fast = TimeSeriesLoaderFast(ts_dataset=tourism_dataset,
#                                       model='nbeats',
#                                       offset=0,
#                                       window_sampling_limit=200*4, 
#                                       input_size=3*4,
#                                       output_size=4,
#                                       idx_to_sample_freq=1,
#                                       batch_size= 2048,
#                                       is_train_loader=True)

ts_loader_general = TimeSeriesLoaderGeneral(ts_dataset=epf_dataset,
                                            model='nbeats',
                                            offset=0,
                                            window_sampling_limit=365*4*24, 
                                            input_size=3*24,
                                            output_size=24,
                                            idx_to_sample_freq=1,
                                            batch_size= 2048,
                                            n_series_per_batch=1,
                                            is_train_loader=True)

ts_loader_pinche = TimeSeriesLoaderPinche(ts_dataset=epf_dataset,
                                        model='nbeats',
                                        offset=0,
                                        window_sampling_limit=365*4*24, 
                                        input_size=3*24,
                                        output_size=24,
                                        idx_to_sample_freq=1,
                                        batch_size= 2048,
                                        is_train_loader=True)

ts_loader_fast = TimeSeriesLoaderFast(ts_dataset=epf_dataset,
                                        model='nbeats',
                                        offset=0,
                                        window_sampling_limit=365*4*24, 
                                        input_size=3*24,
                                        output_size=24,
                                        idx_to_sample_freq=1,
                                        batch_size= 2048,
                                        is_train_loader=True)

In [38]:
start = time.time()
dataloader = iter(ts_loader_general)
batch = next(dataloader)
insample_y = batch['insample_y']
insample_x = batch['insample_x']
insample_mask = batch['insample_mask']
outsample_x = batch['outsample_x']
outsample_y = batch['outsample_y']
outsample_mask = batch['outsample_mask']
print("DataloaderGeneral batch time:", time.time()-start)
print("insample_y.shape", insample_y.shape)
print("insample_x.shape", insample_x.shape)
print("outsample_y.shape", outsample_y.shape)
print("outsample_x.shape", outsample_x.shape)

DataloaderGeneral batch time: 0.0408320426940918
insample_y.shape torch.Size([2048, 72])
insample_x.shape torch.Size([2048, 9, 72])
outsample_y.shape torch.Size([2048, 24])
outsample_x.shape torch.Size([2048, 9, 24])


In [39]:
start = time.time()
dataloader = iter(ts_loader_pinche)
batch = next(dataloader)
insample_y = batch['insample_y']
insample_x = batch['insample_x']
insample_mask = batch['insample_mask']
outsample_x = batch['outsample_x']
outsample_y = batch['outsample_y']
outsample_mask = batch['outsample_mask']
print("DataloaderPinche batch time:", time.time()-start)
print("insample_y.shape", insample_y.shape)
print("insample_x.shape", insample_x.shape)
print("outsample_y.shape", outsample_y.shape)
print("outsample_x.shape", outsample_x.shape)

DataloaderPinche batch time: 4.104574203491211
insample_y.shape (2048, 72)
insample_x.shape (2048, 9, 72)
outsample_y.shape (2048, 24)
outsample_x.shape (2048, 9, 24)


In [40]:
dataloader = iter(ts_loader_fast)
batch = next(dataloader)
start = time.time()
insample_y = batch['insample_y']
insample_x = batch['insample_x']
insample_mask = batch['insample_mask']
outsample_x = batch['outsample_x']
outsample_y = batch['outsample_y']
outsample_mask = batch['outsample_mask']
print("DataloaderFast batch time:", time.time()-start)
print("insample_y.shape", insample_y.shape)
print("insample_x.shape", insample_x.shape)
print("outsample_y.shape", outsample_y.shape)
print("outsample_x.shape", outsample_x.shape)

DataloaderFast batch time: 0.0019447803497314453
insample_y.shape torch.Size([2048, 72])
insample_x.shape torch.Size([2048, 9, 72])
outsample_y.shape torch.Size([2048, 24])
outsample_x.shape torch.Size([2048, 9, 24])


In [35]:
# ts_loader_fast.ts_windows

In [34]:
# ts_loader_fast.ts_windows[20000:20010]