In [1]:
import sys


skorch_forecasting_path = '/home/jovyan/skorch_forecasting'

if skorch_forecasting_path not in sys.path:
    sys.path.append(skorch_forecasting_path)
    
%load_ext autoreload
%autoreload 2

In [2]:
import pandas as pd
import matplotlib.pyplot as plt

# Read csv

In [3]:
X = pd.read_csv('grouped_timeseries.csv')

In [5]:
# Meta data

GROUP_IDS = ['PRODUCT_ID']  # Columnas agrupadoras
TIMESTAMP = 'SALE_DATE'
TARGET = 'SALE_UNITS'
FREQ = 'W'

In [7]:
# Set correct dtypes.

X[TIMESTAMP] = pd.to_datetime(X[TIMESTAMP])
X = X.sort_values(GROUP_IDS + [TIMESTAMP]).reset_index(drop=True)

# Group ids to str
for group_id in GROUP_IDS:
    X[group_id] = X[group_id].astype(str)

In [8]:
# Select only top N timeseries.

N = 20
groups_to_keep = X.groupby(GROUP_IDS).sum()[TARGET].nlargest(N).index
X = X.set_index(GROUP_IDS).loc[groups_to_keep].reset_index()

  groups_to_keep = X.groupby(GROUP_IDS).sum()[TARGET].nlargest(N).index


# Train-test split

In [9]:
from skorch_forecasting.utils.datetime import set_date_on_freq
from skorch_forecasting.model_selection import train_test_split

In [10]:
# Dates configuration

TEST_START = '2021-08-01'
TEST_END = '2021-09-01'

TEST_START = set_date_on_freq(TEST_START, FREQ)
TEST_END = set_date_on_freq(TEST_END, FREQ)
PREDICTION_LEN = len(pd.date_range(TEST_START, TEST_END, freq=FREQ))
ENCODER_LEN = 26

In [11]:
# Train-test split.

X_train, X_test = train_test_split(
    X, TEST_START, TEST_END, TIMESTAMP, 
    FREQ, sequence_length=ENCODER_LEN
)

# Preprocessing

In [12]:
from skorch_forecasting.preprocessing import (
    GroupWiseColumnTransformer, 
    PandasColumnTransformer,
    TimeIndexEncoder
)
from sklearn.preprocessing import MinMaxScaler
from sklearn.pipeline import make_pipeline

In [13]:
# Construction of ``preprocessor``.

# Timestamp encoder.
transformers = [(TimeIndexEncoder(freq='W'), TIMESTAMP)]
timestamp_encoder = PandasColumnTransformer(transformers)

# Numerical features transformer.
transformers = [(MinMaxScaler(), ['SALE_PRICE', 'ACTIVE_STORES'])]
features_transformer = GroupWiseColumnTransformer(transformers, GROUP_IDS)

# Target transformer.
transformers = [(MinMaxScaler(), [TARGET])]
target_transformer = GroupWiseColumnTransformer(transformers, GROUP_IDS)


transformations = [
    timestamp_encoder,
    features_transformer,
    target_transformer
]


pipeline = make_pipeline(*transformations)

In [14]:
X_train_transformed = pipeline.fit_transform(X_train)

In [15]:
X_train_transformed.head()

Unnamed: 0,SALE_UNITS,SALE_PRICE,ACTIVE_STORES,SALE_DATE,PRODUCT_ID
0,0.372093,0.140086,0.0,0,16387
1,0.232558,0.0,0.119048,1,16387
2,0.139535,0.140638,0.190476,2,16387
3,0.011628,0.205444,0.214286,3,16387
4,0.209302,0.204243,0.238095,4,16387


# Fit

In [16]:
from skorch_forecasting.nn import Seq2Seq
from skorch_forecasting.nn import TemporalFusionTransformer as TFT
from skorch.callbacks import LRScheduler, GradientNormClipping, EarlyStopping
from torch.optim.lr_scheduler import OneCycleLR

In [17]:
# Construct ``callbacks``.
# Callbacks are a way to customize the behavior of the neural net training.
# For this demo, two callbacks are included: learning rate scheduler and gradient clipping.

callbacks = []

# Learning rate scheduler.
name = 'lr_scheduler'
lr_scheduler = dict(
    policy=OneCycleLR, 
    step_every='batch', 
    max_lr=1e-3, 
    steps_per_epoch='iterations', 
    epochs='max_epochs'
)
obj = LRScheduler(**lr_scheduler)
callbacks.append((name, obj))

# Gradient clipping.
name = 'gradient_clipping'
obj = GradientNormClipping(1)
callbacks.append((name, obj))

In [20]:
seq2seq = Seq2Seq(
    group_ids=GROUP_IDS,
    time_idx=TIMESTAMP,
    target=TARGET,
    max_prediction_length=PREDICTION_LEN,
    max_encoder_length=ENCODER_LEN,
    time_varying_known_reals=['ACTIVE_STORES', 'SALE_PRICE'],
    time_varying_unknown_reals=[TARGET],
    static_categoricals=GROUP_IDS,
    batch_size=64,
    max_epochs=30,
    callbacks=callbacks
)

In [None]:
ctrl+c

In [21]:
seq2seq.fit(X_train_transformed)

  epoch    train_loss      dur
-------  ------------  -------
      1        [36m0.4137[0m  21.8275


In [24]:
output = seq2seq.predict(X_train_transformed, raw=False)

In [25]:
output.head()

Unnamed: 0,PRODUCT_ID,SALE_UNITS,SALE_PRICE,ACTIVE_STORES,SALE_DATE
0,16387,0.059115,0.409812,0.452381,26
1,16387,0.037848,0.398183,0.47619,27
2,16387,0.034087,0.398092,0.47619,28
3,16387,0.035695,0.369724,0.47619,29
4,16387,0.042573,0.375052,0.5,30


In [26]:
pipeline.inverse_transform(output)

Unnamed: 0,SALE_DATE,PRODUCT_ID,SALE_UNITS,SALE_PRICE,ACTIVE_STORES
0,2017-07-09,16387,42.251595,8.454278,34.0
1,2017-07-16,16387,36.764851,8.412099,35.0
2,2017-07-23,16387,35.794357,8.411769,35.0
3,2017-07-30,16387,36.209229,8.308873,35.0
4,2017-08-06,16387,37.983948,8.328200,36.0
...,...,...,...,...,...
4235,2021-06-27,6110,197.458313,7.092176,34.0
4236,2021-07-04,6110,197.689804,6.985708,34.0
4237,2021-07-11,6110,193.829712,6.919861,33.0
4238,2021-07-18,6110,191.609695,6.831414,33.0
