In [2]:
!pip install -r requirements.txt --user

Collecting absl-py==1.3.0
  Using cached absl_py-1.3.0-py3-none-any.whl (124 kB)
Collecting aiohttp==3.8.3
  Using cached aiohttp-3.8.3-cp39-cp39-win_amd64.whl (323 kB)
Collecting aiosignal==1.2.0
  Using cached aiosignal-1.2.0-py3-none-any.whl (8.2 kB)
Collecting alembic==1.8.1
  Using cached alembic-1.8.1-py3-none-any.whl (209 kB)
Collecting async-timeout==4.0.2
  Using cached async_timeout-4.0.2-py3-none-any.whl (5.8 kB)
Collecting attrs==22.1.0
  Using cached attrs-22.1.0-py2.py3-none-any.whl (58 kB)
Collecting autopage==0.5.1
  Using cached autopage-0.5.1-py3-none-any.whl (29 kB)
Collecting cachetools==5.2.0
  Using cached cachetools-5.2.0-py3-none-any.whl (9.3 kB)
Collecting certifi==2022.9.24
  Using cached certifi-2022.9.24-py3-none-any.whl (161 kB)
Collecting charset-normalizer==2.1.1
  Using cached charset_normalizer-2.1.1-py3-none-any.whl (39 kB)
Collecting cliff==4.0.0
  Using cached cliff-4.0.0-py3-none-any.whl (80 kB)
Collecting cmaes==0.8.2
  Using cached cmaes-0.8.2-py3

ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
anaconda-project 0.11.1 requires ruamel-yaml, which is not installed.
conda-repo-cli 1.0.20 requires clyent==1.2.1, but you have clyent 1.2.2 which is incompatible.
conda-repo-cli 1.0.20 requires nbformat==5.4.0, but you have nbformat 5.5.0 which is incompatible.


In [5]:
!pip install clyent==1.2.1

Collecting clyent==1.2.1
  Downloading clyent-1.2.1.tar.gz (20 kB)
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Building wheels for collected packages: clyent
  Building wheel for clyent (setup.py): started
  Building wheel for clyent (setup.py): finished with status 'done'
  Created wheel for clyent: filename=clyent-1.2.1-py3-none-any.whl size=9192 sha256=848faf884d33a3103a86683b4334fc90e627cafda33a3867eaeecbf235b3fd08
  Stored in directory: c:\users\franzschramm\appdata\local\pip\cache\wheels\36\00\a6\496d95012b21c0d7b1c980147f150246fc122768aaa222491f
Successfully built clyent
Installing collected packages: clyent
  Attempting uninstall: clyent
    Found existing installation: clyent 1.2.2
    Uninstalling clyent-1.2.2:
      Successfully uninstalled clyent-1.2.2
Successfully installed clyent-1.2.1


ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
conda-repo-cli 1.0.20 requires nbformat==5.4.0, but you have nbformat 5.7.3 which is incompatible.


In [7]:
!pip freeze > requirements.txt

In [11]:
from pytorch-forecasting.data.examples import get_stallion_data
import numpy as np
data = get_stallion_data()  # load data as pandas dataframe


# add time index
data["time_idx"] = data["date"].dt.year * 12 + data["date"].dt.month
data["time_idx"] -= data["time_idx"].min()
# add additional features
# categories have to be strings
data["month"] = data.date.dt.month.astype(str).astype("category")
data["log_volume"] = np.log(data.volume + 1e-8)
data["avg_volume_by_sku"] = (
    data
    .groupby(["time_idx", "sku"], observed=True)
    .volume.transform("mean")
)
data["avg_volume_by_agency"] = (
    data
    .groupby(["time_idx", "agency"], observed=True)
    .volume.transform("mean")
)
# we want to encode special days as one variable and 
# thus need to first reverse one-hot encoding
special_days = [
    "easter_day", "good_friday", "new_year", "christmas",
    "labor_day", "independence_day", "revolution_day_memorial",
    "regional_games", "fifa_u_17_world_cup", "football_gold_cup",
    "beer_capital", "music_fest"
]
data[special_days] = (
    data[special_days]
    .apply(lambda x: x.map({0: "-", 1: x.name}))
    .astype("category")
)
# show sample data
data.sample(10, random_state=521)

from pytorch_forecasting import (
    TimeSeriesDataSet,
    GroupNormalizer
)
max_prediction_length = 6  # forecast 6 months
max_encoder_length = 24  # use 24 months of history
training_cutoff = data["time_idx"].max() - max_prediction_length
training = TimeSeriesDataSet(
    data[lambda x: x.time_idx <= training_cutoff],
    time_idx="time_idx",
    target="volume",
    group_ids=["agency", "sku"],
    min_encoder_length=0,  # allow predictions without history
    max_encoder_length=max_encoder_length,
    min_prediction_length=1,
    max_prediction_length=max_prediction_length,
    static_categoricals=["agency", "sku"],
    static_reals=[
        "avg_population_2017",
        "avg_yearly_household_income_2017"
    ],
    time_varying_known_categoricals=["special_days", "month"],
    # group of categorical variables can be treated as 
    # one variable
    variable_groups={"special_days": special_days},
    time_varying_known_reals=[
        "time_idx",
        "price_regular",
        "discount_in_percent"
    ],
    time_varying_unknown_categoricals=[],
    time_varying_unknown_reals=[
        "volume",
        "log_volume",
        "industry_volume",
        "soda_volume",
        "avg_max_temp",
        "avg_volume_by_agency",
        "avg_volume_by_sku",
    ],
    target_normalizer=GroupNormalizer(
        groups=["agency", "sku"]#, coerce_positive=1.0
    ),  # use softplus with beta=1.0 and normalize by group
    add_relative_time_idx=True,  # add as feature
    add_target_scales=True,  # add as feature
    add_encoder_length=True,  # add as feature
)
# create validation set (predict=True) which means to predict the
# last max_prediction_length points in time for each series
validation = TimeSeriesDataSet.from_dataset(
    training, data, predict=True, stop_randomization=True
)
# create dataloaders for model
batch_size = 128
train_dataloader = training.to_dataloader(
    train=True, batch_size=batch_size, num_workers=0
)
val_dataloader = validation.to_dataloader(
    train=False, batch_size=batch_size * 10, num_workers=0
)

import pytorch_lightning as pl
from pytorch_lightning.callbacks import (
    EarlyStopping,
    #LearningRateLogger
)
from pytorch_lightning.loggers import TensorBoardLogger
from pytorch_forecasting.metrics import QuantileLoss
from pytorch_forecasting.models import TemporalFusionTransformer
# stop training, when loss metric does not improve on validation set
early_stop_callback = EarlyStopping(
    monitor="val_loss",
    min_delta=1e-4,
    patience=10,
    verbose=False,
    mode="min"
)
#lr_logger = LearningRateLogger()  # log the learning rate
logger = TensorBoardLogger("lightning_logs")  # log to tensorboard
# create trainer
trainer = pl.Trainer(
    max_epochs=30,
    gpus=0,  # train on CPU, use gpus = [0] to run on GPU
    gradient_clip_val=0.1,
    #early_stop_callback=early_stop_callback,
    limit_train_batches=30,  # running validation every 30 batches
    # fast_dev_run=True,  # comment in to quickly check for bugs
    #callbacks=[lr_logger],
    logger=logger,
)
# initialise model
tft = TemporalFusionTransformer.from_dataset(
    training,
    learning_rate=0.03,
    hidden_size=16,  # biggest influence network size
    attention_head_size=1,
    dropout=0.1,
    hidden_continuous_size=8,
    output_size=7,  # QuantileLoss has 7 quantiles by default
    loss=QuantileLoss(),
    log_interval=10,  # log example every 10 batches
    reduce_on_plateau_patience=4,  # reduce learning automatically
)
tft.size() # 29.6k parameters in model
# fit network
trainer.fit(
    tft,
    train_dataloaders=train_dataloader,
    val_dataloaders=val_dataloader
)

SyntaxError: invalid syntax (2261857832.py, line 1)

In [3]:
from pytorch_forecasting.metrics import MAE
import torch
# load the best model according to the validation loss (given that
# we use early stopping, this is not necessarily the last epoch)
best_model_path = trainer.checkpoint_callback.best_model_path
best_tft = TemporalFusionTransformer.load_from_checkpoint(best_model_path)
# calculate mean absolute error on validation set
actuals = torch.cat([y for x, y in iter(val_dataloader)])
predictions = best_tft.predict(val_dataloader)
MAE(predictions, actuals)

TypeError: expected Tensor as element 0 in argument 0, but got tuple