In [1]:
%cd ../..

C:\Users\ManuJoseph\OneDrive - Thoucentric\Work\Projects\Playground\AdvancedTimeSeriesForecastingBook\Github\Modern-Time-Series-Forecasting-with-Python-


In [2]:
import os
import shutil

import joblib
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
pio.templates.default = "plotly_white"

from pathlib import Path

from src.forecasting.ml_forecasting import (
    MissingValueConfig,
    calculate_metrics,
)
from src.utils import plotting_utils
from tqdm.autonotebook import tqdm
from src.forecasting.ml_forecasting import calculate_metrics
from src.utils import ts_utils

%load_ext autoreload
%autoreload 2
np.random.seed(42)
tqdm.pandas()

  from tqdm.autonotebook import tqdm


In [3]:
os.makedirs("imgs/chapter_13", exist_ok=True)
preprocessed = Path("data/london_smart_meters/preprocessed")
output = Path("data/london_smart_meters/output")

## Utility Functions

In [4]:
def format_plot(fig, legends=None, xlabel="Time", ylabel="Value", title="", font_size=15):
    if legends:
        names = cycle(legends)
        fig.for_each_trace(lambda t: t.update(name=next(names)))
    fig.update_layout(
        autosize=False,
        width=900,
        height=500,
        title_text=title,
        title={"x": 0.5, "xanchor": "center", "yanchor": "top"},
        titlefont={"size": 20},
        legend_title=None,
        legend=dict(
            font=dict(size=font_size),
            orientation="h",
            yanchor="bottom",
            y=0.98,
            xanchor="right",
            x=1,
        ),
        yaxis=dict(
            title_text=ylabel,
            titlefont=dict(size=font_size),
            tickfont=dict(size=font_size),
        ),
        xaxis=dict(
            title_text=xlabel,
            titlefont=dict(size=font_size),
            tickfont=dict(size=font_size),
        )
    )
    return fig

In [5]:
from itertools import cycle


def plot_forecast(pred_df, forecast_columns, forecast_display_names=None):
    if forecast_display_names is None:
        forecast_display_names = forecast_columns
    else:
        assert len(forecast_columns) == len(forecast_display_names)
    mask = ~pred_df[forecast_columns[0]].isnull()
    colors = [
        "rgba(" + ",".join([str(c) for c in plotting_utils.hex_to_rgb(c)]) + ",<alpha>)"
        for c in px.colors.qualitative.Plotly
    ]
    act_color = colors[0]
    colors = cycle(colors[1:])
    fig = go.Figure()
    fig.add_trace(
        go.Scatter(
            x=pred_df[mask].index,
            y=pred_df[mask].energy_consumption,
            mode="lines",
            line=dict(color=act_color.replace("<alpha>", "0.9")),
            name="Actual Consumption",
        )
    )
    for col, display_col in zip(forecast_columns, forecast_display_names):
        fig.add_trace(
            go.Scatter(
                x=pred_df[mask].index,
                y=pred_df.loc[mask, col],
                mode="lines",
                line=dict(dash="dot", color=next(colors).replace("<alpha>", "1")),
                name=display_col,
            )
        )
    return fig

def highlight_abs_min(s, props=''):
    return np.where(s == np.nanmin(np.abs(s.values)), props, '')

## Reading the data

In [6]:
#Reading the missing value imputed and train test split data
train_df = pd.read_parquet(preprocessed/"selected_blocks_train_missing_imputed_feature_engg.parquet")
# Read in the Validation dataset as test_df so that we predict on it
test_df = pd.read_parquet(preprocessed/"selected_blocks_val_missing_imputed_feature_engg.parquet")
# test_df = pd.read_parquet(preprocessed/"selected_blocks_test_missing_imputed_feature_engg.parquet")

In [7]:
target = "energy_consumption"
index_cols = ["LCLid", "timestamp"]

In [8]:
from pytorch_forecasting import TimeSeriesDataSet
import pytorch_lightning as pl
from pytorch_lightning.callbacks import EarlyStopping, LearningRateMonitor
from pytorch_lightning.loggers import TensorBoardLogger
import torch

from pytorch_forecasting import Baseline, TemporalFusionTransformer, TimeSeriesDataSet
from pytorch_forecasting.data import GroupNormalizer
from pytorch_forecasting.metrics import SMAPE, PoissonLoss, QuantileLoss
from pytorch_forecasting.models.temporal_fusion_transformer.tuning import optimize_hyperparameters

In [9]:
train_df['train'] = True
test_df['train'] = False
data = pd.concat([train_df, test_df])
del train_df, test_df

In [10]:
data['time_idx'] = data.timestamp.apply(lambda x: x.value)
diff = data.iloc[1]['time_idx'] - data.iloc[0]['time_idx']
data["_min_time_idx"] = data.groupby("LCLid", observed=True)['time_idx'].transform("min")
data['time_idx'] = ((data['time_idx']-data['_min_time_idx'])/diff).astype(int)
data.drop(columns="_min_time_idx", inplace=True)

In [11]:
# data["scale"] = data.groupby(["LCLid"], observed=True).energy_consumption.transform("mean")

In [12]:
data.sample(10)

Unnamed: 0,timestamp,LCLid,energy_consumption,frequency,series_length,stdorToU,Acorn,Acorn_grouped,file,holidays,...,timestamp_Minute_sin_3,timestamp_Minute_sin_4,timestamp_Minute_sin_5,timestamp_Minute_cos_1,timestamp_Minute_cos_2,timestamp_Minute_cos_3,timestamp_Minute_cos_4,timestamp_Minute_cos_5,train,time_idx
2930,2012-12-09 01:00:00,MAC003543,0.072,30min,24336,Std,ACORN-M,Adversity,block_85,NO_HOLIDAY,...,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,True,2930
27432,2013-12-03 12:00:00,MAC002809,0.236,30min,31584,ToU,ACORN-E,Affluent,block_37,NO_HOLIDAY,...,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,True,27432
2677,2012-07-14 18:30:00,MAC004079,0.102,30min,31152,Std,ACORN-Q,Adversity,block_102,NO_HOLIDAY,...,2.143751e-15,-2.266215e-15,6.123234e-16,-1.0,1.0,-1.0,1.0,-1.0,True,2677
20227,2013-06-28 09:30:00,MAC001009,0.822,30min,31968,Std,ACORN-H,Comfortable,block_68,NO_HOLIDAY,...,2.143751e-15,-2.266215e-15,6.123234e-16,-1.0,1.0,-1.0,1.0,-1.0,True,20227
21557,2013-10-05 02:30:00,MAC004642,0.036,30min,28560,Std,ACORN-H,Comfortable,block_63,NO_HOLIDAY,...,2.143751e-15,-2.266215e-15,6.123234e-16,-1.0,1.0,-1.0,1.0,-1.0,True,21557
30313,2014-01-09 12:30:00,MAC005529,0.124,30min,32688,ToU,ACORN-L,Adversity,block_82,NO_HOLIDAY,...,2.143751e-15,-2.266215e-15,6.123234e-16,-1.0,1.0,-1.0,1.0,-1.0,False,30313
3379,2012-09-21 09:30:00,MAC004642,0.06,30min,28560,Std,ACORN-H,Comfortable,block_63,NO_HOLIDAY,...,2.143751e-15,-2.266215e-15,6.123234e-16,-1.0,1.0,-1.0,1.0,-1.0,True,3379
8234,2012-11-02 13:00:00,MAC001243,0.249,30min,31392,Std,ACORN-Q,Adversity,block_96,NO_HOLIDAY,...,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,True,8234
8016,2012-07-14 00:00:00,MAC005008,0.048,30min,36528,Std,ACORN-E,Affluent,block_29,NO_HOLIDAY,...,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,True,8016
8169,2012-06-19 04:30:00,MAC004459,0.151,30min,37872,Std,ACORN-Q,Adversity,block_104,NO_HOLIDAY,...,2.143751e-15,-2.266215e-15,6.123234e-16,-1.0,1.0,-1.0,1.0,-1.0,True,8169


In [13]:
train_df = data.loc[data.train]
test_df = data.loc[~data.train]
del data

In [14]:
train_df.columns

Index(['timestamp', 'LCLid', 'energy_consumption', 'frequency',
       'series_length', 'stdorToU', 'Acorn', 'Acorn_grouped', 'file',
       'holidays', 'visibility', 'windBearing', 'temperature', 'dewPoint',
       'pressure', 'apparentTemperature', 'windSpeed', 'precipType', 'icon',
       'humidity', 'summary', 'energy_consumption_lag_1',
       'energy_consumption_lag_2', 'energy_consumption_lag_3',
       'energy_consumption_lag_4', 'energy_consumption_lag_5',
       'energy_consumption_lag_46', 'energy_consumption_lag_47',
       'energy_consumption_lag_48', 'energy_consumption_lag_49',
       'energy_consumption_lag_50', 'energy_consumption_lag_334',
       'energy_consumption_lag_335', 'energy_consumption_lag_336',
       'energy_consumption_lag_337', 'energy_consumption_lag_338',
       'energy_consumption_rolling_3_mean', 'energy_consumption_rolling_3_std',
       'energy_consumption_rolling_6_mean', 'energy_consumption_rolling_6_std',
       'energy_consumption_rolling_12_me

In [15]:
train_df[['holidays', 'visibility', 'windBearing', 'temperature', 'dewPoint',
       'pressure', 'apparentTemperature', 'windSpeed', 'precipType', 'icon',
       'humidity', 'summary']].head()

Unnamed: 0,holidays,visibility,windBearing,temperature,dewPoint,pressure,apparentTemperature,windSpeed,precipType,icon,humidity,summary
0,NO_HOLIDAY,12.99,229,12.12,10.97,1008.099976,12.12,5.9,rain,partly-cloudy-night,0.93,Mostly Cloudy
1,NO_HOLIDAY,12.99,229,12.12,10.97,1008.099976,12.12,5.9,rain,partly-cloudy-night,0.93,Mostly Cloudy
2,NO_HOLIDAY,12.89,238,12.59,11.02,1007.880005,12.59,6.06,rain,cloudy,0.9,Overcast
3,NO_HOLIDAY,12.89,238,12.59,11.02,1007.880005,12.59,6.06,rain,cloudy,0.9,Overcast
4,NO_HOLIDAY,11.54,229,12.45,11.04,1007.950012,12.45,5.31,rain,partly-cloudy-night,0.91,Mostly Cloudy


In [16]:
train_df[['timestamp_Month',
       'timestamp_Quarter', 'timestamp_Is_quarter_end',
       'timestamp_Is_quarter_start', 'timestamp_Is_year_end',
       'timestamp_Is_year_start', 'timestamp_Is_month_start',
       'timestamp_WeekDay', 'timestamp_Dayofweek', 'timestamp_Dayofyear',
       'timestamp_Hour', 'timestamp_Minute', 'timestamp_Elapsed',]].head()

Unnamed: 0,timestamp_Month,timestamp_Quarter,timestamp_Is_quarter_end,timestamp_Is_quarter_start,timestamp_Is_year_end,timestamp_Is_year_start,timestamp_Is_month_start,timestamp_WeekDay,timestamp_Dayofweek,timestamp_Dayofyear,timestamp_Hour,timestamp_Minute,timestamp_Elapsed
0,1,1,0,1,0,1,1,6,6,1,0,0,1325376000
1,1,1,0,1,0,1,1,6,6,1,0,30,1325377800
2,1,1,0,1,0,1,1,6,6,1,1,0,1325379600
3,1,1,0,1,0,1,1,6,6,1,1,30,1325381400
4,1,1,0,1,0,1,1,6,6,1,2,0,1325383200


In [17]:
train_df[['holidays','precipType','summary','timestamp_Month',
       'timestamp_Quarter', 'timestamp_Is_quarter_end',
       'timestamp_Is_quarter_start', 'timestamp_Is_year_end',
       'timestamp_Is_year_start', 'timestamp_Is_month_start','timestamp_Dayofweek', 'timestamp_Dayofyear',
       'timestamp_Hour', 'timestamp_Minute']] = train_df[['holidays','precipType','summary','timestamp_Month',
       'timestamp_Quarter', 'timestamp_Is_quarter_end',
       'timestamp_Is_quarter_start', 'timestamp_Is_year_end',
       'timestamp_Is_year_start', 'timestamp_Is_month_start','timestamp_Dayofweek', 'timestamp_Dayofyear',
       'timestamp_Hour', 'timestamp_Minute']].astype(str)

In [18]:
train_df.reset_index(drop=True, inplace=True)
test_df.reset_index(drop=True, inplace=True)

In [39]:
max_prediction_length = 1
max_encoder_length = 48*2
# training_cutoff = train_df["time_idx"].max()# - max_prediction_length

training = TimeSeriesDataSet(
    train_df,
    time_idx="time_idx",
    target="energy_consumption",
    group_ids=["LCLid"],
    # min_encoder_length=max_encoder_length // 2,  # keep encoder length long (as it is in the validation set)
    max_encoder_length=max_encoder_length,
    # min_prediction_length=1,
    max_prediction_length=max_prediction_length,
    # static_categoricals=['stdorToU', 'Acorn', 'Acorn_grouped', 'file'],
    static_reals=[],
    # time_varying_known_categoricals=['holidays','precipType','summary','timestamp_Month',
    #    'timestamp_Quarter', 'timestamp_Is_quarter_end',
    #    'timestamp_Is_quarter_start', 'timestamp_Is_year_end',
    #    'timestamp_Is_year_start', 'timestamp_Is_month_start','timestamp_Dayofweek', 'timestamp_Dayofyear',
    #    'timestamp_Hour', 'timestamp_Minute'],
    # time_varying_known_reals=["time_idx", 'timestamp_Elapsed'],
    time_varying_unknown_categoricals=[],
    time_varying_unknown_reals=[
        "energy_consumption",
    ],
    target_normalizer=GroupNormalizer(
        groups=["LCLid"], transformation=None
    ),  # use softplus and normalize by group
    # add_relative_time_idx=True,
    # add_target_scales=True,
    # add_encoder_length=True,
)

# create validation set (predict=True) which means to predict the last max_prediction_length points in time
# for each series
validation = TimeSeriesDataSet.from_dataset(training, train_df, predict=True, stop_randomization=True)

# create dataloaders for model
batch_size = 512  # set this between 32 to 128
train_dataloader = training.to_dataloader(train=True, batch_size=batch_size, num_workers=0)
val_dataloader = validation.to_dataloader(train=False, batch_size=batch_size * 10, num_workers=0)

In [20]:
# max_prediction_length = 48
# max_encoder_length = 48*2
# # training_cutoff = train_df["time_idx"].max()# - max_prediction_length

# training = TimeSeriesDataSet(
#     train_df,
#     time_idx="time_idx",
#     target="energy_consumption",
#     group_ids=["LCLid"],
#     min_encoder_length=max_encoder_length // 2,  # keep encoder length long (as it is in the validation set)
#     max_encoder_length=max_encoder_length,
#     min_prediction_length=1,
#     max_prediction_length=max_prediction_length,
#     static_categoricals=['stdorToU', 'Acorn', 'Acorn_grouped', 'file'],
#     static_reals=[],
#     time_varying_known_categoricals=['holidays','precipType','summary','timestamp_Month',
#        'timestamp_Quarter', 'timestamp_Is_quarter_end',
#        'timestamp_Is_quarter_start', 'timestamp_Is_year_end',
#        'timestamp_Is_year_start', 'timestamp_Is_month_start','timestamp_Dayofweek', 'timestamp_Dayofyear',
#        'timestamp_Hour', 'timestamp_Minute'],
#     time_varying_known_reals=["time_idx", 'timestamp_Elapsed'],
#     time_varying_unknown_categoricals=[],
#     time_varying_unknown_reals=[
#         "energy_consumption",
#     ],
#     target_normalizer=GroupNormalizer(
#         groups=["LCLid"], transformation=None
#     ),  # use softplus and normalize by group
#     add_relative_time_idx=True,
#     add_target_scales=True,
#     add_encoder_length=True,
# )

# # create validation set (predict=True) which means to predict the last max_prediction_length points in time
# # for each series
# validation = TimeSeriesDataSet.from_dataset(training, train_df, predict=True, stop_randomization=True)

# # create dataloaders for model
# batch_size = 128  # set this between 32 to 128
# train_dataloader = training.to_dataloader(train=True, batch_size=batch_size, num_workers=0)
# val_dataloader = validation.to_dataloader(train=False, batch_size=batch_size * 10, num_workers=0)

In [21]:
# x, y = next(iter(train_dataloader))
# # print("x =", x)
# # print("\ny =", y)
# print("\nsizes of x =")
# for key, value in x.items():
#     print(f"\t{key} = {value.size()}")

First entry is x, a dictionary of tensors with the entries (and shapes in brackets)
* encoder_cat (batch_size x n_encoder_time_steps x n_features): long tensor of encoded categoricals for encoder - in the order - [*static_categoricals, *time_varying_known_categoricals]

* encoder_cont (batch_size x n_encoder_time_steps x n_features): float tensor of scaled continuous variables for encoder - in the order - [*static_reals, *time_varying_known_reals, *time_varying_unknown_reals]

* encoder_target (batch_size x n_encoder_time_steps or list thereof with each entry for a different target): float tensor with unscaled continous target or encoded categorical target, list of tensors for multiple targets

* encoder_lengths (batch_size): long tensor with lengths of the encoder time series. No entry will be greater than n_encoder_time_steps

* decoder_cat (batch_size x n_decoder_time_steps x n_features): long tensor of encoded categoricals for decoder [*static_categoricals, *time_varying_known_categoricals]

* decoder_cont (batch_size x n_decoder_time_steps x n_features): float tensor of scaled continuous variables for decoder [*static_reals, *time_varying_known_reals]

* decoder_target (batch_size x n_decoder_time_steps or list thereof with each entry for a different target): float tensor with unscaled continous target or encoded categorical target for decoder - this corresponds to first entry of y, list of tensors for multiple targets

* decoder_lengths (batch_size): long tensor with lengths of the decoder time series. No entry will be greater than n_decoder_time_steps

* group_ids (batch_size x number_of_ids): encoded group ids that identify a time series in the dataset

* target_scale (batch_size x scale_size or list thereof with each entry for a different target): parameters used to normalize the target. Typically these are mean and standard deviation. Is list of tensors for multiple targets.


Second entry is y, a tuple of the form (target, weight)

* target (batch_size x n_decoder_time_steps or list thereof with each entry for a different target): unscaled (continuous) or encoded (categories) targets, list of tensors for multiple targets

* weight (None or batch_size x n_decoder_time_steps): weight

In [40]:
x, y = next(iter(train_dataloader))
# print("x =", x)
# print("\ny =", y)
print("\nsizes of x =")
for key, value in x.items():
    print(f"\t{key} = {value.size()}")


sizes of x =
	encoder_cat = torch.Size([512, 96, 0])
	encoder_cont = torch.Size([512, 96, 1])
	encoder_target = torch.Size([512, 96])
	encoder_lengths = torch.Size([512])
	decoder_cat = torch.Size([512, 1, 0])
	decoder_cont = torch.Size([512, 1, 1])
	decoder_target = torch.Size([512, 1])
	decoder_lengths = torch.Size([512])
	decoder_time_idx = torch.Size([512, 1])
	groups = torch.Size([512, 1])
	target_scale = torch.Size([512, 2])


In [23]:
from src.dl.ptf_models import SingleStepRNN, SingleStepRNNModel

In [24]:
from typing import Dict

class SimpleRNNModel(SingleStepRNN):
    def __init__(
        self,
        rnn_type: str,
        input_size: int,
        hidden_size: int,
        num_layers: int,
        bidirectional: bool,
    ):
        super().__init__(rnn_type, input_size, hidden_size, num_layers, bidirectional)

    def forward(self, x: Dict):
        x = x["encoder_cont"]
        # x --> (batch_size, seq_len, input_size), y--> (batch_size, seq_len, 1)
        x, _ = self.rnn(x)  # --> (batch_size, seq_len, hidden_size)
        x = self.fc(x)  # --> (batch_size, seq_len, 1)
        return x[:,-1,:]

```python
class SingleStepRNNModel(BaseModel):
    def __init__(self,
        network_callable: Callable,
        rnn_type: str,
        input_size: int,
        hidden_size: int,
        num_layers: int,
        bidirectional: bool, **kwargs):
        # saves arguments in signature to `.hparams` attribute, mandatory call - do not skip this
        self.save_hyperparameters()
        # pass additional arguments to BaseModel.__init__, mandatory call - do not skip this
        super().__init__(**kwargs)
        self.network = network_callable(
            rnn_type = self.hparams.rnn_type,
            input_size=self.hparams.input_size,
            num_layers=self.hparams.num_layers,
            hidden_size=self.hparams.hidden_size,
            bidirectional=self.hparams.bidirectional,
        )

    def forward(self, x: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
        prediction = self.network(x)
        # rescale predictions into target space
        prediction = self.transform_output(prediction, target_scale=x["target_scale"])
        # We need to return a dictionary that at least contains the prediction
        # The parameter can be directly forwarded from the input.
        # The conversion to a named tuple can be directly achieved with the `to_network_output` function.
        return self.to_network_output(prediction=prediction)
```

In [35]:
from pytorch_forecasting.metrics import RMSE, MAE

In [41]:
model = SingleStepRNNModel.from_dataset(
    training,
    network_callable=SimpleRNNModel,
    rnn_type="LSTM",
    input_size=len(training.reals),
    hidden_size=128,
    num_layers=1,
    bidirectional=False,
    learning_rate=1e-3,
    optimizer = "adam",
    loss = RMSE(),
    logging_metrics=[RMSE(), MAE()]
)
x, y = next(iter(train_dataloader))
model(x)

Output(prediction=tensor([[0.2732],
        [0.1073],
        [0.5715],
        [0.3891],
        [0.1279],
        [0.1306],
        [0.0830],
        [0.0419],
        [0.2558],
        [0.0694],
        [0.1950],
        [0.1162],
        [0.0692],
        [0.2356],
        [0.0795],
        [0.0711],
        [0.3561],
        [0.0850],
        [0.2493],
        [0.1544],
        [0.2826],
        [0.1428],
        [0.0271],
        [0.4057],
        [0.2490],
        [0.0954],
        [0.3489],
        [0.1654],
        [0.0705],
        [0.2125],
        [0.1254],
        [0.1481],
        [0.0901],
        [0.2695],
        [0.1449],
        [0.0602],
        [0.6247],
        [0.1390],
        [0.5679],
        [0.1434],
        [0.0831],
        [0.1274],
        [0.2820],
        [0.0137],
        [0.5784],
        [0.6617],
        [0.2601],
        [0.1005],
        [0.2955],
        [0.1941],
        [0.1672],
        [0.6145],
        [0.1513],
        [0.0652],
        [0

In [None]:
# # configure network and trainer
# early_stop_callback = EarlyStopping(monitor="val_loss", min_delta=1e-4, patience=10, verbose=False, mode="min")
# # lr_logger = LearningRateMonitor()  # log the learning rate
# logger = TensorBoardLogger("lightning_logs")  # logging results to a tensorboard

# trainer = pl.Trainer(
#     max_epochs=30,
#     gpus=0,
#     weights_summary="top",
#     gradient_clip_val=0.1,
#     limit_train_batches=30,  # coment in for training, running valiation every 30 batches
#     # fast_dev_run=True,  # comment in to check that networkor dataset has no serious bugs
#     callbacks=[lr_logger, early_stop_callback],
#     logger=logger,
# )


In [45]:
trainer = pl.Trainer(
    auto_select_gpus=True,
    gpus=-1,
    min_epochs=1,
    max_epochs=10,
    callbacks=[pl.callbacks.EarlyStopping(monitor="val_loss", patience=3)],
)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs


In [46]:
trainer.fit(model, train_dataloaders=train_dataloader,
    val_dataloaders=val_dataloader,)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name            | Type           | Params
---------------------------------------------------
0 | loss            | RMSE           | 0     
1 | logging_metrics | ModuleList     | 0     
2 | network         | SimpleRNNModel | 67.2 K
---------------------------------------------------
67.2 K    Trainable params
0         Non-trainable params
67.2 K    Total params
0.269     Total estimated model params size (MB)


Validation sanity check: 0it [00:00, ?it/s]

  rank_zero_warn(
  rank_zero_warn(


Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

In [197]:
best_model_path

'C:\\Users\\ManuJoseph\\OneDrive - Thoucentric\\Work\\Projects\\Playground\\AdvancedTimeSeriesForecastingBook\\Github\\Modern-Time-Series-Forecasting-with-Python-\\lightning_logs\\version_1\\checkpoints\\epoch=4-step=41789.ckpt'

In [47]:
best_model_path = trainer.checkpoint_callback.best_model_path
best_model = SingleStepRNNModel.load_from_checkpoint(best_model_path)

In [48]:
# calcualte mean absolute error on validation set
actuals = torch.cat([y[0] for x, y in iter(val_dataloader)])
predictions = best_model.predict(val_dataloader)
(actuals - predictions).abs().mean()

tensor(0.0736)

In [74]:
train_df["__start_idx__"] = train_df.groupby("LCLid")['time_idx'].transform("max")-48*2
hist_df = train_df.loc[train_df.time_idx>train_df["__start_idx__"]]

In [75]:
full_df = pd.concat([hist_df.drop(columns=["__start_idx__"]), test_df])

In [60]:
full_df

Unnamed: 0,timestamp,LCLid,energy_consumption,frequency,series_length,stdorToU,Acorn,Acorn_grouped,file,holidays,...,timestamp_Minute_sin_3,timestamp_Minute_sin_4,timestamp_Minute_sin_5,timestamp_Minute_cos_1,timestamp_Minute_cos_2,timestamp_Minute_cos_3,timestamp_Minute_cos_4,timestamp_Minute_cos_5,train,time_idx
34991,2013-12-29 23:30:00,MAC000061,0.187,30min,37872,Std,ACORN-Q,Adversity,block_96,NO_HOLIDAY,...,2.143751e-15,-2.266215e-15,6.123234e-16,-1.0,1.0,-1.0,1.0,-1.0,True,34991
34992,2013-12-30 00:00:00,MAC000061,0.050,30min,37872,Std,ACORN-Q,Adversity,block_96,NO_HOLIDAY,...,0.000000e+00,0.000000e+00,0.000000e+00,1.0,1.0,1.0,1.0,1.0,True,34992
34993,2013-12-30 00:30:00,MAC000061,0.050,30min,37872,Std,ACORN-Q,Adversity,block_96,NO_HOLIDAY,...,2.143751e-15,-2.266215e-15,6.123234e-16,-1.0,1.0,-1.0,1.0,-1.0,True,34993
34994,2013-12-30 01:00:00,MAC000061,0.049,30min,37872,Std,ACORN-Q,Adversity,block_96,NO_HOLIDAY,...,0.000000e+00,0.000000e+00,0.000000e+00,1.0,1.0,1.0,1.0,1.0,True,34994
34995,2013-12-30 01:30:00,MAC000061,0.046,30min,37872,Std,ACORN-Q,Adversity,block_96,NO_HOLIDAY,...,2.143751e-15,-2.266215e-15,6.123234e-16,-1.0,1.0,-1.0,1.0,-1.0,True,34995
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
223195,2014-01-31 21:30:00,MAC005529,0.431,30min,32688,ToU,ACORN-L,Adversity,block_82,NO_HOLIDAY,...,2.143751e-15,-2.266215e-15,6.123234e-16,-1.0,1.0,-1.0,1.0,-1.0,False,31387
223196,2014-01-31 22:00:00,MAC005529,0.407,30min,32688,ToU,ACORN-L,Adversity,block_82,NO_HOLIDAY,...,0.000000e+00,0.000000e+00,0.000000e+00,1.0,1.0,1.0,1.0,1.0,False,31388
223197,2014-01-31 22:30:00,MAC005529,0.395,30min,32688,ToU,ACORN-L,Adversity,block_82,NO_HOLIDAY,...,2.143751e-15,-2.266215e-15,6.123234e-16,-1.0,1.0,-1.0,1.0,-1.0,False,31389
223198,2014-01-31 23:00:00,MAC005529,0.398,30min,32688,ToU,ACORN-L,Adversity,block_82,NO_HOLIDAY,...,0.000000e+00,0.000000e+00,0.000000e+00,1.0,1.0,1.0,1.0,1.0,False,31390


In [107]:
test_ds = TimeSeriesDataSet.from_parameters(training.get_parameters(), full_df.reset_index(drop=True), predict=False, stop_randomization=True)

In [109]:
pred, index = model.predict(test_ds, return_index=True, show_progress_bar=True)

Predict:   0%|          | 0/3488 [00:00<?, ? batches/s]

In [112]:
pred.shape

torch.Size([223200, 1])

In [115]:
index['prediction'] = pred

In [140]:
pred_df = test_df[["LCLid", "timestamp",'energy_consumption','time_idx']].copy()
pred_df = pred_df.merge(index, on=["time_idx","LCLid"], how='left')

In [118]:
pred_df.head()

Unnamed: 0,timestamp,LCLid,energy_consumption,frequency,series_length,stdorToU,Acorn,Acorn_grouped,file,holidays,...,timestamp_Minute_sin_4,timestamp_Minute_sin_5,timestamp_Minute_cos_1,timestamp_Minute_cos_2,timestamp_Minute_cos_3,timestamp_Minute_cos_4,timestamp_Minute_cos_5,train,time_idx,prediction
0,2014-01-01 00:00:00,MAC000061,0.165,30min,37872,Std,ACORN-Q,Adversity,block_96,New Year?s Day,...,0.0,0.0,1.0,1.0,1.0,1.0,1.0,False,35088,0.109971
1,2014-01-01 00:30:00,MAC000061,0.167,30min,37872,Std,ACORN-Q,Adversity,block_96,New Year?s Day,...,-2.266215e-15,6.123234e-16,-1.0,1.0,-1.0,1.0,-1.0,False,35089,0.120308
2,2014-01-01 01:00:00,MAC000061,0.15,30min,37872,Std,ACORN-Q,Adversity,block_96,New Year?s Day,...,0.0,0.0,1.0,1.0,1.0,1.0,1.0,False,35090,0.120858
3,2014-01-01 01:30:00,MAC000061,0.091,30min,37872,Std,ACORN-Q,Adversity,block_96,New Year?s Day,...,-2.266215e-15,6.123234e-16,-1.0,1.0,-1.0,1.0,-1.0,False,35091,0.114554
4,2014-01-01 02:00:00,MAC000061,0.047,30min,37872,Std,ACORN-Q,Adversity,block_96,New Year?s Day,...,0.0,0.0,1.0,1.0,1.0,1.0,1.0,False,35092,0.074378


In [193]:
def evaluate_forecast(pred_df, train_data, fc_column, name, target_name="energy_consumption"):
    metric_l = []
    for _id in tqdm(pred_df.index.get_level_values(0).unique(), desc="Calculating metrics..."):
        target = pred_df.xs(_id)[[target_name]]
        _y_pred = pred_df.xs(_id)[[fc_column]]
        history = train_data.xs(_id)[[target_name]]
        # display(history.tail())
        # display(_y_pred.head())
        # display(target.head())
        metric_l.append(
            calculate_metrics(target, _y_pred, name=name, y_train=history)
        )
    eval_metrics_df = pd.DataFrame(metric_l)
    agg_metrics = {
            "Algorithm": name,
            "MAE": np.nanmean(np.abs(pred_df[fc_column]-pred_df[target_name])),
            "MSE": np.nanmean(np.power(pred_df[fc_column]-pred_df[target_name], 2)),
            "meanMASE": eval_metrics_df.loc[:, "MASE"].mean(),
            "Forecast Bias": 100*(np.nansum(pred_df[fc_column])-np.nansum(pred_df[target_name]))/np.nansum(pred_df[target_name])
    }
    return agg_metrics, eval_metrics_df

In [141]:
pred_df.set_index(['LCLid',"timestamp"], inplace=True)

In [136]:
pred_df[['energy_consumption']]

Unnamed: 0_level_0,Unnamed: 1_level_0,energy_consumption
LCLid,timestamp,Unnamed: 2_level_1
MAC000061,2014-01-01 00:00:00,0.165
MAC000061,2014-01-01 00:30:00,0.167
MAC000061,2014-01-01 01:00:00,0.150
MAC000061,2014-01-01 01:30:00,0.091
MAC000061,2014-01-01 02:00:00,0.047
...,...,...
MAC005529,2014-01-31 21:30:00,0.431
MAC005529,2014-01-31 22:00:00,0.407
MAC005529,2014-01-31 22:30:00,0.395
MAC005529,2014-01-31 23:00:00,0.398


In [134]:
pred_df[['prediction']]

Unnamed: 0_level_0,Unnamed: 1_level_0,prediction
LCLid,timestamp,Unnamed: 2_level_1
MAC000061,2014-01-01 00:00:00,0.109971
MAC000061,2014-01-01 00:30:00,0.120308
MAC000061,2014-01-01 01:00:00,0.120858
MAC000061,2014-01-01 01:30:00,0.114554
MAC000061,2014-01-01 02:00:00,0.074378
...,...,...
MAC005529,2014-01-31 21:30:00,0.378347
MAC005529,2014-01-31 22:00:00,0.386622
MAC005529,2014-01-31 22:30:00,0.355529
MAC005529,2014-01-31 23:00:00,0.318764


In [147]:
eval_metrics_df =  evaluate_forecast(pred_df[['prediction']], pred_df[['energy_consumption']], train_df[['LCLid','timestamp','energy_consumption']].set_index(['LCLid','timestamp']), "test")

Calculating metrics...:   0%|          | 0/150 [00:00<?, ?it/s]

In [152]:
np.nanmean(np.abs(pred_df['prediction']-pred_df['energy_consumption']))

0.079538785

In [157]:
ts_utils.forecast_bias_aggregate??

[1;31mSignature:[0m [0mts_utils[0m[1;33m.[0m[0mforecast_bias_aggregate[0m[1;33m([0m[0mactuals[0m[1;33m,[0m [0mpredictions[0m[1;33m)[0m[1;33m[0m[1;33m[0m[0m
[1;31mDocstring:[0m <no docstring>
[1;31mSource:[0m   
[1;32mdef[0m [0mforecast_bias_aggregate[0m[1;33m([0m[0mactuals[0m[1;33m,[0m [0mpredictions[0m[1;33m)[0m[1;33m:[0m[1;33m
[0m    [1;32mreturn[0m [1;36m100[0m[1;33m*[0m[1;33m([0m[0mnp[0m[1;33m.[0m[0mnansum[0m[1;33m([0m[0mpredictions[0m[1;33m)[0m[1;33m-[0m[0mnp[0m[1;33m.[0m[0mnansum[0m[1;33m([0m[0mactuals[0m[1;33m)[0m[1;33m)[0m[1;33m/[0m[0mnp[0m[1;33m.[0m[0mnansum[0m[1;33m([0m[0mactuals[0m[1;33m)[0m[1;33m[0m[1;33m[0m[0m
[1;31mFile:[0m      c:\users\manujoseph\onedrive - thoucentric\work\projects\playground\advancedtimeseriesforecastingbook\github\modern-time-series-forecasting-with-python-\src\utils\ts_utils.py
[1;31mType:[0m      function


In [156]:
np.nanmean(np.power(pred_df['prediction']-pred_df['energy_consumption'], 2))

0.029546931

In [171]:
d = train_df[['LCLid','timestamp','energy_consumption']].set_index(['LCLid','timestamp']).head()
d.xs("MAC000061")[["energy_consumption"]].head()

Unnamed: 0_level_0,energy_consumption
timestamp,Unnamed: 1_level_1
2012-01-01 00:00:00,0.114
2012-01-01 00:30:00,0.113
2012-01-01 01:00:00,0.113
2012-01-01 01:30:00,0.098
2012-01-01 02:00:00,0.06


In [194]:
agg_metrics, eval_metrics_df = evaluate_forecast(
    pred_df = pred_df,
    train_data = train_df[['LCLid','timestamp','energy_consumption']].set_index(['LCLid','timestamp']),
    fc_column="prediction",
    name="test",
)

Calculating metrics...:   0%|          | 0/150 [00:00<?, ?it/s]

In [195]:
agg_metrics

{'Algorithm': 'test',
 'MAE': 0.079538785,
 'MSE': 0.029546931,
 'meanMASE': 1.0054973,
 'Forecast Bias': -5.816829365986762}

In [124]:
test_data = pd.DataFrame(
    dict(
        value=np.tile(np.arange(100,110), 3),
        group=np.repeat(np.arange(3), 10),
        static_cat1=np.repeat(np.arange(4,7), 10),
        static_cat2=np.repeat(np.arange(10,13), 10),
        time_idx=np.tile(np.arange(10), 3),
        static_real1 = np.repeat(np.random.randn(3), 10),
        dynamic_real1 = np.random.randn(30),
        dynamic_cat1 = np.tile(np.arange(40,50), 3),
    )
)
test_data[["static_cat1","static_cat2","dynamic_cat1"]] = test_data[["static_cat1","static_cat2","dynamic_cat1"]].astype(str)
test_data.head()

Unnamed: 0,value,group,static_cat1,static_cat2,time_idx,static_real1,dynamic_real1,dynamic_cat1
0,100,0,4,10,0,-2.166303,1.814835,40
1,101,0,4,10,1,-2.166303,-0.474048,41
2,102,0,4,10,2,-2.166303,-0.053309,42
3,103,0,4,10,3,-2.166303,0.109954,43
4,104,0,4,10,4,-2.166303,0.717073,44


In [149]:
# create the dataset from the pandas dataframe
dataset = TimeSeriesDataSet(
    test_data,
    group_ids=["group"],
    target="value",
    time_idx="time_idx",
    static_categoricals=["static_cat1","static_cat2"],
    static_reals=['static_real1'],
    time_varying_known_categoricals=["dynamic_cat1"],
    time_varying_unknown_reals=['value'],
    time_varying_known_reals=['dynamic_real1'],
    min_encoder_length=5,
    max_encoder_length=5,
    min_prediction_length=2,
    max_prediction_length=2,
    target_normalizer = None,
    # categorical_encoders=None,
    # scalers = None
)

In [150]:
# convert the dataset to a dataloader
dataloader = dataset.to_dataloader(batch_size=4)

In [151]:
x, y = next(iter(dataloader))
# print("x =", x)
# print("\ny =", y)
print("\nsizes of x =")
for key, value in x.items():
    print(f"\t{key} = {value.size()}")


sizes of x =
	encoder_cat = torch.Size([4, 5, 3])
	encoder_cont = torch.Size([4, 5, 3])
	encoder_target = torch.Size([4, 5])
	encoder_lengths = torch.Size([4])
	decoder_cat = torch.Size([4, 2, 3])
	decoder_cont = torch.Size([4, 2, 3])
	decoder_target = torch.Size([4, 2])
	decoder_lengths = torch.Size([4])
	decoder_time_idx = torch.Size([4, 2])
	groups = torch.Size([4, 1])
	target_scale = torch.Size([4, 2])


In [152]:
x['encoder_cont'][0], x['decoder_cont'][0]

(tensor([[  1.0283,   0.2793, 101.0000],
         [  1.0283,  -1.3530, 102.0000],
         [  1.0283,  -0.2948, 103.0000],
         [  1.0283,  -0.5262, 104.0000],
         [  1.0283,  -0.2384, 105.0000]]),
 tensor([[  1.0283,  -0.1680, 106.0000],
         [  1.0283,  -0.8070, 107.0000]]))

In [153]:
x["encoder_target"][0], x["decoder_target"][0]

(tensor([101., 102., 103., 104., 105.]), tensor([106., 107.]))

In [154]:
import os
import warnings
import copy
from pathlib import Path
import warnings

import numpy as np
import pandas as pd
import pytorch_lightning as pl
from pytorch_lightning.callbacks import EarlyStopping, LearningRateMonitor
from pytorch_lightning.loggers import TensorBoardLogger
import torch

from pytorch_forecasting import Baseline, TemporalFusionTransformer, TimeSeriesDataSet
from pytorch_forecasting.data import GroupNormalizer
from pytorch_forecasting.metrics import SMAPE, PoissonLoss, QuantileLoss
from pytorch_forecasting.models.temporal_fusion_transformer.tuning import optimize_hyperparameters

In [155]:
from pytorch_forecasting.data.examples import get_stallion_data

data = get_stallion_data()

# add time index
data["time_idx"] = data["date"].dt.year * 12 + data["date"].dt.month
data["time_idx"] -= data["time_idx"].min()

# add additional features
data["month"] = data.date.dt.month.astype(str).astype("category")  # categories have be strings
data["log_volume"] = np.log(data.volume + 1e-8)
data["avg_volume_by_sku"] = data.groupby(["time_idx", "sku"], observed=True).volume.transform("mean")
data["avg_volume_by_agency"] = data.groupby(["time_idx", "agency"], observed=True).volume.transform("mean")

# we want to encode special days as one variable and thus need to first reverse one-hot encoding
special_days = [
    "easter_day",
    "good_friday",
    "new_year",
    "christmas",
    "labor_day",
    "independence_day",
    "revolution_day_memorial",
    "regional_games",
    "fifa_u_17_world_cup",
    "football_gold_cup",
    "beer_capital",
    "music_fest",
]
data[special_days] = data[special_days].apply(lambda x: x.map({0: "-", 1: x.name})).astype("category")
data.sample(10, random_state=521)

Unnamed: 0,agency,sku,volume,date,industry_volume,soda_volume,avg_max_temp,price_regular,price_actual,discount,...,football_gold_cup,beer_capital,music_fest,discount_in_percent,timeseries,time_idx,month,log_volume,avg_volume_by_sku,avg_volume_by_agency
291,Agency_25,SKU_03,0.5076,2013-01-01,492612703,718394219,25.845238,1264.162234,1152.473405,111.688829,...,-,-,-,8.835008,228,0,1,-0.678062,1225.306376,99.6504
871,Agency_29,SKU_02,8.748,2015-01-01,498567142,762225057,27.584615,1316.098485,1296.804924,19.293561,...,-,-,-,1.465966,177,24,1,2.168825,1634.434615,11.397086
19532,Agency_47,SKU_01,4.968,2013-09-01,454252482,789624076,30.665957,1269.25,1266.49049,2.75951,...,-,-,-,0.217413,322,8,9,1.603017,2625.472644,48.29565
2089,Agency_53,SKU_07,21.6825,2013-10-01,480693900,791658684,29.197727,1193.842373,1128.124395,65.717978,...,-,beer_capital,-,5.504745,240,9,10,3.076505,38.529107,2511.035175
9755,Agency_17,SKU_02,960.552,2015-03-01,515468092,871204688,23.60812,1338.334248,1232.128069,106.206179,...,-,-,music_fest,7.935699,259,26,3,6.867508,2143.677462,396.02214
7561,Agency_05,SKU_03,1184.6535,2014-02-01,425528909,734443953,28.668254,1369.556376,1161.135214,208.421162,...,-,-,-,15.218151,21,13,2,7.077206,1566.643589,1881.866367
19204,Agency_11,SKU_05,5.5593,2017-08-01,623319783,1049868815,31.915385,1922.486644,1651.307674,271.17897,...,-,-,-,14.105636,17,55,8,1.715472,1385.225478,109.6992
8781,Agency_48,SKU_04,4275.1605,2013-03-01,509281531,892192092,26.767857,1761.258209,1546.05967,215.198539,...,-,-,music_fest,12.218455,151,2,3,8.360577,1757.950603,1925.272108
2540,Agency_07,SKU_21,0.0,2015-10-01,544203593,761469815,28.987755,0.0,0.0,0.0,...,-,-,-,0.0,300,33,10,-18.420681,0.0,2418.71955
12084,Agency_21,SKU_03,46.3608,2017-04-01,589969396,940912941,32.47891,1675.922116,1413.571789,262.350327,...,-,-,-,15.654088,181,51,4,3.836454,2034.293024,109.3818


In [162]:
max_prediction_length = 3
max_encoder_length = 6
training_cutoff = data["time_idx"].max() - max_prediction_length

training = TimeSeriesDataSet(
    data[lambda x: x.time_idx <= training_cutoff],
    time_idx="time_idx",
    target="volume",
    group_ids=["agency", "sku"],
    min_encoder_length=max_encoder_length // 2,  # keep encoder length long (as it is in the validation set)
    max_encoder_length=max_encoder_length,
    min_prediction_length=1,
    max_prediction_length=max_prediction_length,
    static_categoricals=["agency", "sku"],
    static_reals=["avg_population_2017", "avg_yearly_household_income_2017"],
    time_varying_known_categoricals=["special_days", "month"],
    variable_groups={"special_days": special_days},  # group of categorical variables can be treated as one variable
    time_varying_known_reals=["time_idx", "price_regular", "discount_in_percent"],
    time_varying_unknown_categoricals=[],
    time_varying_unknown_reals=[
        "volume",
        "log_volume",
        "industry_volume",
        "soda_volume",
        "avg_max_temp",
        "avg_volume_by_agency",
        "avg_volume_by_sku",
    ],
    target_normalizer=None,
    add_relative_time_idx=True,
    add_target_scales=False,
    add_encoder_length=False,
)

# create validation set (predict=True) which means to predict the last max_prediction_length points in time
# for each series
validation = TimeSeriesDataSet.from_dataset(training, data, predict=True, stop_randomization=True)

# create dataloaders for model
batch_size = 128  # set this between 32 to 128
train_dataloader = training.to_dataloader(train=True, batch_size=batch_size, num_workers=0)
val_dataloader = validation.to_dataloader(train=False, batch_size=batch_size * 10, num_workers=0)

In [163]:
x, y = next(iter(train_dataloader))
# print("x =", x)
# print("\ny =", y)
print("\nsizes of x =")
for key, value in x.items():
    print(f"\t{key} = {value.size()}")


sizes of x =
	encoder_cat = torch.Size([128, 6, 15])
	encoder_cont = torch.Size([128, 6, 13])
	encoder_target = torch.Size([128, 6])
	encoder_lengths = torch.Size([128])
	decoder_cat = torch.Size([128, 3, 15])
	decoder_cont = torch.Size([128, 3, 13])
	decoder_target = torch.Size([128, 3])
	decoder_lengths = torch.Size([128])
	decoder_time_idx = torch.Size([128, 3])
	groups = torch.Size([128, 2])
	target_scale = torch.Size([128, 2])


In [164]:
x['encoder_cont'][0], x['decoder_cont'][0]

(tensor([[ 9.2149e-01,  1.1006e+00, -5.4705e-01,  9.7148e-01,  2.0805e+00,
          -1.0000e+00,  3.7041e+03,  7.0272e-01, -2.8383e-01,  2.9085e-01,
           1.4594e+00,  1.7968e+00, -3.1417e-01],
         [ 9.2149e-01,  1.1006e+00, -4.8626e-01,  9.7823e-01,  1.7988e+00,
          -8.3333e-01,  2.3438e+03,  6.4726e-01, -2.0356e+00, -1.0318e+00,
           2.6880e-01,  1.3209e+00, -3.8310e-01],
         [ 9.2149e-01,  1.1006e+00, -4.2548e-01,  9.7785e-01,  1.0943e+00,
          -6.6667e-01,  2.9568e+03,  6.7541e-01, -1.6238e+00, -7.2117e-01,
           1.5581e-01,  1.7151e+00, -4.1870e-01],
         [ 9.2149e-01,  1.1006e+00, -3.6470e-01,  9.6889e-01,  1.1417e+00,
          -5.0000e-01,  2.3109e+03,  6.4555e-01, -9.0026e-01, -1.7142e+00,
          -1.6613e+00,  1.5119e+00, -4.6163e-01],
         [ 9.2149e-01,  1.1006e+00, -3.0392e-01,  9.7604e-01,  2.2039e+00,
          -3.3333e-01,  2.7434e+03,  6.6634e-01, -2.8630e-01, -1.9705e+00,
          -1.9099e+00,  2.0650e+00,  8.6780e-02],


In [165]:
x["encoder_target"][0], x["decoder_target"][0]

(tensor([3704.0911, 2343.7883, 2956.7808, 2310.9436, 2743.4189, 1345.4784]),
 tensor([1929.4604, 2273.4346, 2341.2322]))

In [6]:
y[0]

NameError: name 'y' is not defined