In [1]:
from datasets import load_dataset

dataset = load_dataset("monash_tsf", "traffic_hourly")

In [2]:
dataset

DatasetDict({
    train: Dataset({
        features: ['start', 'target', 'feat_static_cat', 'feat_dynamic_real', 'item_id'],
        num_rows: 862
    })
    test: Dataset({
        features: ['start', 'target', 'feat_static_cat', 'feat_dynamic_real', 'item_id'],
        num_rows: 862
    })
    validation: Dataset({
        features: ['start', 'target', 'feat_static_cat', 'feat_dynamic_real', 'item_id'],
        num_rows: 862
    })
})

In [3]:

train_example = dataset["train"][0]
train_example.keys()
print(train_example["start"])
print(len(train_example["target"]))

validation_example = dataset["validation"][0]
validation_example.keys()
print(validation_example["start"])
print(len(validation_example["target"]))

2015-01-01 00:00:01
17448
2015-01-01 00:00:01
17496


In [4]:
train_dataset = dataset["train"]
test_dataset = dataset["test"]

In [5]:
freq = "1H"
prediction_length = 48

In [6]:
from functools import lru_cache

import pandas as pd
import numpy as np


@lru_cache(10_000)
def convert_to_pandas_period(date, freq):
    return pd.Period(date, freq)


def transform_start_field(batch, freq):
    batch["start"] = [convert_to_pandas_period(date, freq) for date in batch["start"]]
    return batch


from functools import partial

train_dataset.set_transform(partial(transform_start_field, freq=freq))
test_dataset.set_transform(partial(transform_start_field, freq=freq))




In [10]:
print(train_dataset['start'][0])

2015-01-01 00:00


In [11]:
from gluonts.dataset.multivariate_grouper import MultivariateGrouper

num_of_variates = len(train_dataset)

train_grouper = MultivariateGrouper(max_target_dim=num_of_variates)
test_grouper = MultivariateGrouper(
    max_target_dim=num_of_variates,
    num_test_dates=len(test_dataset)
    // num_of_variates,  # number of rolling test windows
)

multi_variate_train_dataset = train_grouper(train_dataset)
multi_variate_test_dataset = test_grouper(test_dataset)

multi_variate_train_example = multi_variate_train_dataset[0]
print(
    f"multi_variate_train_example['target'].shape = {multi_variate_train_example['target'].shape}"
)

multi_variate_train_example['target'].shape = (862, 17448)


In [18]:
multi_variate_train_dataset[0]['target']

array([[0.0048, 0.0072, 0.004 , ..., 0.0205, 0.0091, 0.0055],
       [0.0146, 0.0148, 0.0101, ..., 0.0243, 0.0174, 0.0124],
       [0.0289, 0.035 , 0.0267, ..., 0.0454, 0.0368, 0.0234],
       ...,
       [0.0051, 0.0036, 0.003 , ..., 0.0435, 0.0375, 0.032 ],
       [0.01  , 0.0087, 0.0061, ..., 0.02  , 0.015 , 0.0096],
       [0.0121, 0.0136, 0.0107, ..., 0.0266, 0.0183, 0.0127]],
      dtype=float32)

In [22]:
from transformers import InformerConfig, InformerForPrediction

config = InformerConfig(
    # in the multivariate setting, input_size is the number of variates in the time series per time step
    input_size=num_of_variates,
    # prediction length:
    prediction_length=prediction_length,
    # context length:
    context_length=prediction_length * 2,
    # lags value copied from 1 week before:
    lags_sequence=[1, 24 * 7],
    # we'll add 5 time features ("hour_of_day", ..., and "age"):
    # num_time_features=len(time_features) + 1,
    # informer params:
    dropout=0.1,
    encoder_layers=6,
    decoder_layers=4,
    # project input from num_of_variates*len(lags_sequence)+num_time_features to:
    d_model=64,
)

model = InformerForPrediction(config)

In [23]:
from transformers import InformerConfig, InformerModel

m = InformerModel(config)