In [1]:
import os
import warnings

#warnings.filterwarnings("ignore")  # avoid printing out absolute paths

#os.chdir("../../..")

In [2]:
import torch
from torch import nn
from typing import Dict

from pytorch_forecasting.models import BaseModel

import numpy as np
import pandas as pd

import copy
from pathlib import Path
import warnings

import numpy as np
import pandas as pd
import pytorch_lightning as pl
from pytorch_lightning.callbacks import EarlyStopping, LearningRateMonitor
from pytorch_lightning.loggers import TensorBoardLogger
import torch

from pytorch_forecasting import Baseline, TemporalFusionTransformer, TimeSeriesDataSet
from pytorch_forecasting.data import GroupNormalizer
from pytorch_forecasting.metrics import SMAPE, PoissonLoss, QuantileLoss
from pytorch_forecasting.models.temporal_fusion_transformer.tuning import optimize_hyperparameters

In [3]:
# Get raw data
def get_raw_data(path):
    df = pd.read_excel(path)
    # print(df.head(3))
    return df

raw_data = get_raw_data('../hydro-ml/data/raw_data/cascaded_use_case_data.xlsx')

             Datetime  Vindhastighet Nilsebu  Lufttemp. Nilsebu  \
0 2015-01-01 00:00:00                    6.4                1.5   
1 2015-01-01 01:00:00                    5.8                1.9   
2 2015-01-01 02:00:00                    6.1                1.6   

   Vindretning Nilsebu  RelHum Nilsebu  Vannstand Lyngsåna  \
0                200.0            97.3               0.699   
1                197.5            95.0               0.700   
2                205.3            96.7               0.714   

   Vanntemp. Hiafossen  Vannstand Hiafossen  Lufttemp Fister  Nedbør Fister  \
0                  0.9                0.945              7.0            1.8   
1                  0.9                0.946              7.0            2.0   
2                  0.9                0.951              7.3            2.2   

   ...  SNOW_MELT_HBV  SNOW_SWE_HBV  Evap_HBV  SOIL_WAT_HBV  GR_WAT_HBV  \
0  ...            NaN           NaN       NaN           NaN         NaN   
1  ...         

In [4]:
data = raw_data[['Q_Kalltveit']].astype(np.float32)
data["time_idx"] = data.index
data['Place'] = "Kalltveit"
data["Datetime"] = raw_data['Datetime']
data

Unnamed: 0,Q_Kalltveit,time_idx,Place,Datetime
0,3.37060,0,Kalltveit,2015-01-01 00:00:00
1,3.45242,1,Kalltveit,2015-01-01 01:00:00
2,3.56260,2,Kalltveit,2015-01-01 02:00:00
3,3.74423,3,Kalltveit,2015-01-01 03:00:00
4,4.02901,4,Kalltveit,2015-01-01 04:00:00
...,...,...,...,...
63283,2.43457,63283,Kalltveit,2022-03-21 19:00:00
63284,2.42514,63284,Kalltveit,2022-03-21 20:00:00
63285,2.41576,63285,Kalltveit,2022-03-21 21:00:00
63286,2.40640,63286,Kalltveit,2022-03-21 22:00:00


In [5]:
from sklearn.model_selection import train_test_split

train, temp = train_test_split(data, test_size=0.3, shuffle=False)
val, test = train_test_split(temp, test_size=0.2, shuffle=False)

train.shape, val.shape, test.shape

((44301, 4), (15189, 4), (3798, 4))

In [20]:
from pytorch_forecasting import TimeSeriesDataSet

# define dataset
max_encoder_length = 24
max_prediction_length = 12

# create the dataset from the pandas dataframe
training = TimeSeriesDataSet(
    train,
    group_ids=["Place"],
    target="Q_Kalltveit",
    time_idx="time_idx",
    max_encoder_length=max_encoder_length,
    max_prediction_length=max_prediction_length,
    time_varying_unknown_reals=["Q_Kalltveit"],
)
training.get_parameters()


{'time_idx': 'time_idx',
 'target': 'Q_Kalltveit',
 'group_ids': ['Place'],
 'weight': None,
 'max_encoder_length': 24,
 'min_encoder_length': 24,
 'min_prediction_idx': 0,
 'min_prediction_length': 12,
 'max_prediction_length': 12,
 'static_categoricals': [],
 'static_reals': [],
 'time_varying_known_categoricals': [],
 'time_varying_known_reals': [],
 'time_varying_unknown_categoricals': [],
 'time_varying_unknown_reals': ['Q_Kalltveit'],
 'variable_groups': {},
 'constant_fill_strategy': {},
 'allow_missing_timesteps': False,
 'lags': {},
 'add_relative_time_idx': False,
 'add_target_scales': False,
 'add_encoder_length': False,
 'target_normalizer': EncoderNormalizer(),
 'categorical_encoders': {'__group_id__Place': NaNLabelEncoder()},
 'scalers': {},
 'randomize_length': None,
 'predict_mode': False}

In [21]:
validation = TimeSeriesDataSet.from_dataset(
    training,
    val,
    # predict=True,
    stop_randomization=True,
)
testing = TimeSeriesDataSet.from_dataset(
    training,
    test,
    predict=True,
    stop_randomization=True,
)
batch_size = 64
train_dataloader = training.to_dataloader(train=True, batch_size=batch_size, num_workers=12)
val_dataloader = validation.to_dataloader(train=False, batch_size=batch_size, num_workers=12)
test_dataloader = testing.to_dataloader(train=False, batch_size=batch_size, num_workers=12)


In [22]:
actuals = torch.cat([y for x, (y, weight) in iter(val_dataloader)])
baseline_predictions = Baseline().predict(val_dataloader)
(actuals - baseline_predictions).abs().mean().item()

  f"Attribute {k!r} is an instance of `nn.Module` and is already saved during checkpointing."
  f"Attribute {k!r} is an instance of `nn.Module` and is already saved during checkpointing."


0.9628713130950928

In [23]:
# configure network and trainer
pl.seed_everything(42)
trainer = pl.Trainer(
    gpus=0,
    # clipping gradients is a hyperparameter and important to prevent divergance
    # of the gradient for recurrent neural networks
    gradient_clip_val=0.1,
)


tft = TemporalFusionTransformer.from_dataset(
    training,
    # not meaningful for finding the learning rate but otherwise very important
    learning_rate=0.03,
    hidden_size=16,  # most important hyperparameter apart from learning rate
    # number of attention heads. Set to up to 4 for large datasets
    attention_head_size=1,
    dropout=0.1,  # between 0.1 and 0.3 are good values
    hidden_continuous_size=8,  # set to <= hidden_size
    output_size=7,  # 7 quantiles by default
    loss=QuantileLoss(),
    # reduce learning rate if no improvement in validation loss after x epochs
    reduce_on_plateau_patience=4,
)
print(f"Number of parameters in network: {tft.size()/1e3:.1f}k")

Global seed set to 42
  f"Setting `Trainer(gpus={gpus!r})` is deprecated in v1.7 and will be removed"
GPU available: True (cuda), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


Number of parameters in network: 14.8k




In [26]:
# configure network and trainer
early_stop_callback = EarlyStopping(monitor="val_loss", min_delta=1e-4, patience=10, verbose=False, mode="min")
lr_logger = LearningRateMonitor()  # log the learning rate
logger = TensorBoardLogger("lightning_logs")  # logging results to a tensorboard

trainer = pl.Trainer(
    max_epochs=30,
    gpus=0,
    enable_model_summary=True,
    gradient_clip_val=0.1,
    limit_train_batches=30,  # coment in for training, running valiation every 30 batches
    # fast_dev_run=True,  # comment in to check that networkor dataset has no serious bugs
    callbacks=[lr_logger, early_stop_callback],
    logger=logger,
)


tft = TemporalFusionTransformer.from_dataset(
    training,
    learning_rate=0.03,
    hidden_size=16,
    attention_head_size=1,
    dropout=0.1,
    hidden_continuous_size=8,
    output_size=7,  # 7 quantiles by default
    loss=QuantileLoss(),
    log_interval=10,  # uncomment for learning rate finder and otherwise, e.g. to 10 for logging every 10 batches
    reduce_on_plateau_patience=4,
)
print(f"Number of parameters in network: {tft.size()/1e3:.1f}k")

  f"Setting `Trainer(gpus={gpus!r})` is deprecated in v1.7 and will be removed"
GPU available: True (cuda), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


Number of parameters in network: 14.8k


  f"Attribute {k!r} is an instance of `nn.Module` and is already saved during checkpointing."
  f"Attribute {k!r} is an instance of `nn.Module` and is already saved during checkpointing."


In [27]:
# fit network
trainer.fit(
    tft,
    train_dataloaders=train_dataloader,
    val_dataloaders=val_dataloader,
)

Missing logger folder: lightning_logs\lightning_logs

   | Name                               | Type                            | Params
----------------------------------------------------------------------------------------
0  | loss                               | QuantileLoss                    | 0     
1  | logging_metrics                    | ModuleList                      | 0     
2  | input_embeddings                   | MultiEmbedding                  | 0     
3  | prescalers                         | ModuleDict                      | 16    
4  | static_variable_selection          | VariableSelectionNetwork        | 0     
5  | encoder_variable_selection         | VariableSelectionNetwork        | 528   
6  | decoder_variable_selection         | VariableSelectionNetwork        | 0     
7  | static_context_variable_selection  | GatedResidualNetwork            | 1.1 K 
8  | static_context_initial_hidden_lstm | GatedResidualNetwork            | 1.1 K 
9  | static_context_initial

Sanity Checking: 0it [00:00, ?it/s]

IndexError: list index out of range

In [None]:
import pickle

from pytorch_forecasting.models.temporal_fusion_transformer.tuning import optimize_hyperparameters

# create study
study = optimize_hyperparameters(
    train_dataloader,
    val_dataloader,
    model_path="optuna_test",
    n_trials=200,
    max_epochs=50,
    gradient_clip_val_range=(0.01, 1.0),
    hidden_size_range=(8, 128),
    hidden_continuous_size_range=(8, 128),
    attention_head_size_range=(1, 4),
    learning_rate_range=(0.001, 0.1),
    dropout_range=(0.1, 0.3),
    trainer_kwargs=dict(limit_train_batches=30),
    reduce_on_plateau_patience=4,
    use_learning_rate_finder=True,
)

# save study results - also we can resume tuning at a later point in time
with open("test_study.pkl", "wb") as fout:
    pickle.dump(study, fout)

# show best hyperparameters
print(study.best_trial.params)

In [None]:
# load the best model according to the validation loss
# (given that we use early stopping, this is not necessarily the last epoch)
best_model_path = trainer.checkpoint_callback.best_model_path
best_tft = TemporalFusionTransformer.load_from_checkpoint(best_model_path)

In [None]:
# calcualte mean absolute error on validation set
actuals = torch.cat([y[0] for x, y in iter(val_dataloader)])
predictions = best_tft.predict(val_dataloader)
(actuals - predictions).abs().mean()

In [None]:
# raw predictions are a dictionary from which all kind of information including quantiles can be extracted
raw_predictions, x = best_tft.predict(val_dataloader, mode="raw", return_x=True)

In [None]:
for idx in range(10):  # plot 10 examples
    best_tft.plot_prediction(x, raw_predictions, idx=idx, add_loss_to_title=True);

In [None]:
# calcualte metric by which to display
predictions = best_tft.predict(val_dataloader)
mean_losses = SMAPE(reduction="none")(predictions, actuals).mean(1)
indices = mean_losses.argsort(descending=True)  # sort losses
for idx in range(10):  # plot 10 examples
    best_tft.plot_prediction(
        x, raw_predictions, idx=indices[idx], add_loss_to_title=SMAPE(quantiles=best_tft.loss.quantiles)
    );