In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import copy
import random
import time

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch as t
from fastcore.foundation import patch

from nixtla.data.datasets.epf import EPF, EPFInfo
from nixtla.data.tsdataset import TimeSeriesDataset, get_mask_df
from nixtla.data.tsloader_fast import TimeSeriesLoader as TimeSeriesLoaderFast
from nixtla.data.tsloader_pinche import TimeSeriesLoader as TimeSeriesLoaderPinche
from nixtla.data.tsloader_general import TimeSeriesLoader as TimeSeriesLoaderGeneral
from nixtla.models.nbeats.nbeats import Nbeats

## READ NORD POOL ELECTRICITY PRICE FORECASTING DATASET

In [None]:
Y_df, X_df, _ = EPF.load(directory='data', group=EPFInfo.groups[0])
X_df = X_df[['unique_id','ds','Exogenous1','Exogenous2']]
mask_df = get_mask_df(Y_df=Y_df, n_timestamps=365*24)
epf_dataset = TimeSeriesDataset(Y_df=Y_df, S_df=None, X_df=X_df, mask_df=mask_df)

## TRAIN AND VALIDATION DATALOADERS

In [None]:
train_loader = TimeSeriesLoaderGeneral(ts_dataset=epf_dataset,
                                       model='nbeats',
                                       offset=0,
                                       window_sampling_limit=365*4*24, 
                                       input_size=2*24,
                                       output_size=24,
                                       idx_to_sample_freq=1,
                                       batch_size=1024,
                                       complete_inputs=False,
                                       complete_sample=False,
                                       shuffle=False)

val_loader = TimeSeriesLoaderGeneral(ts_dataset=epf_dataset,
                                     model='nbeats',
                                     offset=0,
                                     window_sampling_limit=365*4*24, 
                                     input_size=2*24,
                                     output_size=24,
                                     idx_to_sample_freq=1,
                                     batch_size=1024, 
                                     complete_inputs=False,
                                     complete_sample=False,
                                     shuffle=False)

In [None]:
nbeatsx = Nbeats(input_size_multiplier=2,
                 output_size=24,
                 shared_weights=False,
                 activation='relu',
                 initialization='lecun_normal',
                 stack_types=2*['identity'],
                 n_blocks=2*[1],
                 n_layers=2*[2],
                 n_hidden=2*[[256,256]],
                 n_harmonics=1,
                 n_polynomials=2,
                 exogenous_n_channels=9,
                 batch_normalization=False,
                 dropout_prob_theta=0.01,
                 dropout_prob_exogenous=0.01,
                 x_s_n_hidden=0,
                 learning_rate=0.0005,
                 lr_decay=0.5,
                 n_lr_decay_steps=3,
                 weight_decay=0.0000001,
                 l1_theta=0.0001,
                 n_iterations=50,
                 early_stopping=10,
                 loss='MAE',
                 loss_hypar=None,
                 val_loss='MAE',
                 frequency='H',
                 random_seed=1,
                 seasonality=24)

nbeatsx.fit(train_ts_loader=train_loader, val_ts_loader=val_loader, verbose=True, eval_steps=10)

In [None]:
y_hat = nbeatsx.predict(ts_loader=train_loader)

## READ TURISM DATASET

In [None]:
from nixtla.data.datasets.tourism import Tourism, TourismInfo
group = TourismInfo.groups[0]
print("TourismInfo.groups[0]", group)
Y_df, _ = Tourism.load(directory='data', group=group)
tourism_dataset = TimeSeriesDataset(Y_df=Y_df, S_df=None, X_df=None, ts_train_mask=None)

In [None]:
train_loader = TimeSeriesLoaderGeneral(ts_dataset=tourism_dataset,
                                            model='nbeats',
                                            offset=4,
                                            window_sampling_limit=20*4, 
                                            input_size=7*4,
                                            output_size=4,
                                            idx_to_sample_freq=1,
                                            batch_size= 1024,
                                            n_series_per_batch=32,
                                            is_train_loader=True)

In [None]:
nbeatsx = Nbeats(input_size_multiplier=7,
                 output_size=4,
                 shared_weights=False,
                 activation='relu',
                 initialization='lecun_normal',
                 stack_types=30*['identity'],
                 n_blocks=30*[1],
                 n_layers=30*[4],
                 n_hidden=30*[[512,512,512,512]],
                 n_harmonics=1,
                 n_polynomials=2,
                 exogenous_n_channels=9,
                 batch_normalization=False,
                 dropout_prob_theta=0.0,
                 dropout_prob_exogenous=0.0,
                 x_s_n_hidden=0,
                 learning_rate=0.001,
                 lr_decay=0.5,
                 n_lr_decay_steps=3,
                 weight_decay=0.0000000,
                 l1_theta=0.0000,
                 n_iterations=30,
                 early_stopping=10,
                 loss='MAPE',
                 frequency='M',
                 random_seed=1,
                 seasonality=4)

nbeatsx.fit(train_ts_loader=train_loader, val_ts_loader=None, verbose=True, eval_steps=5)

In [None]:
y_hat = nbeatsx.predict(train_loader)

In [None]:
y_plot = Y_df[Y_df['unique_id']=='Y1']['y']
y_hat_plot = y_hat[y_hat['unique_id']=='Y1']['y_hat']
plt.plot(range(len(y_plot)), y_plot)
plt.plot(range(len(y_plot), len(y_plot)+len(y_hat_plot)), y_hat_plot)

# Should be around 22

In [None]:
y_test = Y_df.groupby('unique_id').tail(4).reset_index(drop=True)
np.mean(np.abs(y_test['y']-y_hat['y_hat'])/np.abs(y_test['y']))