In [49]:
import pandas as pd
import torch
import pytorch_lightning as pl
from pytorch_lightning.callbacks import EarlyStopping, LearningRateMonitor

from pytorch_lightning.loggers import TensorBoardLogger
from pytorch_forecasting.metrics import QuantileLoss, NormalDistributionLoss
from pytorch_forecasting.models import TemporalFusionTransformer, DeepAR
from pytorch_forecasting.data import (
     TimeSeriesDataSet,
     GroupNormalizer
 )

In [12]:
from pytorch_forecasting.data.examples import get_stallion_data
data_mock = get_stallion_data() 
data_mock

Unnamed: 0,agency,sku,volume,date,industry_volume,soda_volume,avg_max_temp,price_regular,price_actual,discount,...,labor_day,independence_day,revolution_day_memorial,regional_games,fifa_u_17_world_cup,football_gold_cup,beer_capital,music_fest,discount_in_percent,timeseries
0,Agency_22,SKU_01,52.2720,2013-01-01,492612703,718394219,25.845238,1168.903668,1069.166193,99.737475,...,0,0,0,0,0,0,0,0,8.532566,0
238,Agency_37,SKU_04,0.0000,2013-01-01,492612703,718394219,26.505000,1852.273642,1611.466298,240.807344,...,0,0,0,0,0,0,0,0,13.000635,5
237,Agency_59,SKU_03,812.9214,2013-01-01,492612703,718394219,22.219737,1270.795012,1197.184260,73.610752,...,0,0,0,0,0,0,0,0,5.792496,9
236,Agency_11,SKU_01,316.4400,2013-01-01,492612703,718394219,25.360000,1176.155397,1082.757488,93.397909,...,0,0,0,0,0,0,0,0,7.940950,14
235,Agency_05,SKU_05,420.9093,2013-01-01,492612703,718394219,24.079012,1327.003396,1207.822992,119.180404,...,0,0,0,0,0,0,0,0,8.981168,22
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6765,Agency_08,SKU_03,9.8136,2017-12-01,618073219,919709619,25.373665,1706.410263,1455.262060,251.148203,...,0,0,0,0,0,0,0,0,14.717926,336
6764,Agency_60,SKU_05,2235.3495,2017-12-01,618073219,919709619,23.081069,1898.981558,1528.616113,370.365445,...,0,0,0,0,0,0,0,0,19.503372,188
6763,Agency_19,SKU_05,87.5430,2017-12-01,618073219,919709619,27.432590,1902.160687,1547.299733,354.860954,...,0,0,0,0,0,0,0,0,18.655677,162
6771,Agency_60,SKU_03,325.8792,2017-12-01,618073219,919709619,23.081069,1704.503815,1444.443913,260.059902,...,0,0,0,0,0,0,0,0,15.257220,187


In [13]:
data_mock.columns

Index(['agency', 'sku', 'volume', 'date', 'industry_volume', 'soda_volume',
       'avg_max_temp', 'price_regular', 'price_actual', 'discount',
       'avg_population_2017', 'avg_yearly_household_income_2017', 'easter_day',
       'good_friday', 'new_year', 'christmas', 'labor_day', 'independence_day',
       'revolution_day_memorial', 'regional_games', 'fifa_u_17_world_cup',
       'football_gold_cup', 'beer_capital', 'music_fest',
       'discount_in_percent', 'timeseries'],
      dtype='object')

In [33]:
data = pd.read_excel('data_files/WAN_Forecast.xlsx')

In [34]:
data['month'] = data['date'].dt.month.values.astype('str')
data['date'] = data.index.values
data['trainName'] = data['trainName'].astype(str)


In [52]:

max_prediction_length = 30*6  # forecast of 6 months
max_encoder_length = 30*18  # using history of 24 months
training_cutoff = data["date"].max() - max_prediction_length
training = TimeSeriesDataSet(
    data[lambda x: x.date <= training_cutoff],
    time_idx="date",
    target="dataUsage",
    group_ids=["wanProvider", "fleet", "trainName"],
    min_encoder_length=0,  # allowing predictions without history
max_encoder_length=max_encoder_length,
    min_prediction_length=1,
    max_prediction_length=max_prediction_length,
    static_categoricals=["wanProvider", "fleet", "trainName"],
time_varying_known_categoricals=["month"],
    # group of categorical variables can be treated as one variable
    time_varying_known_reals=[
        "date",
    ],
    time_varying_unknown_categoricals=[],
    time_varying_unknown_reals=[
        "dataUsage"
    ],
    target_normalizer=GroupNormalizer(
        groups=["wanProvider", "fleet", "trainName"]
    ),  # use softplus with beta=1.0 and normalize by group
    add_relative_time_idx = False,  # add as feature
    add_target_scales=True,  # add as feature
    add_encoder_length=True,  # add as feature
    allow_missing_timesteps=True
)
# creating validation set (predict=True) which means to predict the
# last max_prediction_length points in time for each series
validation = TimeSeriesDataSet.from_dataset(
    training, data, predict=True, stop_randomization=True
)
# create dataloaders for model
batch_size = 64
train_dataloader = training.to_dataloader(
    train=True, batch_size=batch_size, num_workers=0
)
val_dataloader = validation.to_dataloader(
    train=False, batch_size=batch_size * 10, num_workers=0
) 


Epoch 0:  23%|██▎       | 7/31 [19:20<1:06:17, 165.74s/it, loss=1.9, v_num=1, train_loss_step=2.010]
                                                              







In [53]:
early_stop_callback = EarlyStopping(monitor="val_loss", min_delta=1e-4, patience=5, verbose=False, mode="min")
lr_logger = LearningRateMonitor()

trainer = pl.Trainer(
    max_epochs=10,
    gpus=0,
    gradient_clip_val=0.1,
    limit_train_batches=30,
    limit_val_batches=3,
    # fast_dev_run=True,
    # logger=logger,
    # profiler=True,
    callbacks=[lr_logger, early_stop_callback],
)


deepar = DeepAR.from_dataset(
    training,
    learning_rate=0.1,
    hidden_size=32,
    dropout=0.1,
    loss=NormalDistributionLoss(),
    log_interval=10,
    log_val_interval=3,
    # reduce_on_plateau_patience=3,
)
print(f"Number of parameters in network: {deepar.size()/1e3:.1f}k")

# # find optimal learning rate
# deepar.hparams.log_interval = -1
# deepar.hparams.log_val_interval = -1
# trainer.limit_train_batches = 1.0
# res = trainer.tuner.lr_find(
#     deepar, train_dataloaders=train_dataloader, val_dataloaders=val_dataloader, min_lr=1e-5, max_lr=1e2
# )

# print(f"suggested learning rate: {res.suggestion()}")
# fig = res.plot(show=True, suggest=True)
# fig.show()
# deepar.hparams.learning_rate = res.suggestion()

torch.set_num_threads(10)
trainer.fit(
    deepar,
    train_dataloaders=train_dataloader,
    val_dataloaders=val_dataloader,
)

# calcualte mean absolute error on validation set
actuals = torch.cat([y for x, (y, weight) in iter(val_dataloader)])
predictions = deepar.predict(val_dataloader)
print(f"Mean absolute error of model: {(actuals - predictions).abs().mean()}")


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs

  | Name                   | Type                   | Params
------------------------------------------------------------------
0 | loss                   | NormalDistributionLoss | 0     
1 | logging_metrics        | ModuleList             | 0     
2 | embeddings             | MultiEmbedding         | 15.6 K
3 | rnn                    | LSTM                   | 21.4 K
4 | distribution_projector | Linear                 | 66    
------------------------------------------------------------------
37.0 K    Trainable params
0         Non-trainable params
37.0 K    Total params
0.148     Total estimated model params size (MB)


Number of parameters in network: 37.0k
Validation sanity check:   0%|          | 0/2 [00:00<?, ?it/s]

  f"The dataloader, {name}, does not have many workers which may be a bottleneck."


Validation sanity check:  50%|█████     | 1/2 [00:04<00:04,  4.08s/it]

  "Trying to infer the `batch_size` from an ambiguous collection. The batch size we"


ValueError: Expected parameter loc (Tensor of shape (409, 180)) of distribution Normal(loc: torch.Size([409, 180]), scale: torch.Size([409, 180])) to satisfy the constraint Real(), but found invalid values:
tensor([[0.0460, 0.0460, 0.0460,  ..., 0.0460, 0.0460, 0.0460],
        [0.0520, 0.0520, 0.0520,  ..., 0.0520, 0.0520, 0.0520],
        [0.0640, 0.0640, 0.0640,  ..., 0.0640, 0.0640, 0.0640],
        ...,
        [2.5971, 2.5971, 2.5971,  ..., 2.5973, 2.5973, 2.5973],
        [3.9235, 3.9235, 3.9235,  ..., 3.9238, 3.9238, 3.9238],
        [3.1750, 3.1750, 3.1750,  ..., 3.1752, 3.1752, 3.1752]])