In [1]:
import os
import warnings

warnings.filterwarnings("ignore")  # avoid printing out absolute paths
import tensorflow as tf 
import tensorboard as tb 
tf.io.gfile = tb.compat.tensorflow_stub.io.gfile
import copy
from pathlib import Path
import warnings

import numpy as np
import pandas as pd
import pytorch_lightning as pl
from pytorch_lightning.callbacks import EarlyStopping, LearningRateMonitor
from pytorch_lightning.loggers import TensorBoardLogger
import torch

from pytorch_forecasting import Baseline, TemporalFusionTransformer, TimeSeriesDataSet
from pytorch_forecasting.data import GroupNormalizer
from pytorch_forecasting.metrics import SMAPE, PoissonLoss, QuantileLoss,MAE,MAPE,RMSE
from pytorch_forecasting.models.temporal_fusion_transformer.tuning import optimize_hyperparameters

In [4]:
# Loading the dataset from an Excel file into a pandas DataFrame.
# The path provided should be updated to where the actual file is located.
import pandas as pd

data = pd.read_excel('Hawaii.xlsx')

# Converting the 'year' and 'day' columns to string data types for consistency
# and to facilitate any operations that require string manipulation.
data["year"] = data["year"].astype(str)
data["day"] = data["day"].astype(str)

# Ensuring that the 'Holiday' column is of type string. This is particularly useful
# if the column contains textual data that represents holiday names or boolean values ('True', 'False').
data['Holiday'] = data['Holiday'].astype(str)

# The 'tourist', 'Trend', 'Seasonal', and 'Resid' columns are converted to floating point numbers.
# This is essential for any subsequent mathematical operations and statistical analysis.
# The 'tourist' column represents the number of tourists,
# whereas 'Trend', 'Seasonal', and 'Resid' are components derived from a decomposition technique
# such as RobustSTL, which are used for time series forecasting.
data["tourist"] = data["tourist"].astype("float64")
data["Trend"] = data["Trend"].astype("float64")
data["Seasonal"] = data["Seasonal"].astype("float64")
data["Resid"] = data["Resid"].astype("float64")

# At this point, the 'data' DataFrame is ready for further analysis or preprocessing steps,
# such as normalization, scaling, or additional feature engineering.


In [5]:
from pytorch_forecasting.data import TimeSeriesDataSet
from pytorch_forecasting.data.encoders import GroupNormalizer

# Define the maximum prediction length and encoder length
max_prediction_length = 3  # The number of time steps the model is predicting into the future
max_encoder_length = 30  # The number of past time steps the model is using to make predictions

# Initialize a TimeSeriesDataSet object, which structures the data for the Temporal Fusion Transformer model.
# It includes various parameters to configure the dataset for time series forecasting.
training = TimeSeriesDataSet(
    data[lambda x: x.time_idx <= 4481],  # Filter data up to a certain time index for training
    time_idx="time_idx",  # The name of the column that provides the time index
    target="Trend",  # The target column to predict (one of the components from the RobustSTL decomposition)
    min_encoder_length=max_encoder_length // 2,  # Minimum length of the encoder, half of the max_encoder_length
    max_encoder_length=max_encoder_length,  # Maximum length of historical data used for predictions
    min_prediction_length=1,  # Minimum prediction length
    max_prediction_length=max_prediction_length,  # Maximum prediction length
    time_varying_known_categoricals=["month", "day of the week", "day", "Holiday"],  # Known categorical features
    time_varying_known_reals=["time_idx"],  # Known real features, like time index
    time_varying_unknown_categoricals=[],  # Unknown categorical features, if any
    time_varying_unknown_reals=["Trend"],  # Unknown real features, which includes the target 'Trend' itself
    group_ids=['destination'],  # The column that identifies the time series group
    target_normalizer=GroupNormalizer(
        groups=['destination'], transformation="softplus"  # A normalization technique
    ),
    add_relative_time_idx=True,  # Adds a column for relative time index
    add_target_scales=True,  # Adds columns for target scales
    add_encoder_length=True,  # Adds a column for encoder length
    allow_missing_timesteps=True,  # Allows model to handle missing timesteps if there are any
)


In [6]:
# Using the previously created TimeSeriesDataSet for training, we create a validation dataset.
# The predict=True flag indicates that the validation dataset should be set up for prediction tasks,
# specifically, it should include the last 'max_prediction_length' points for each time series.

validation = TimeSeriesDataSet.from_dataset(
    training,  # Use the same configurations as the training dataset
    data,  # Source data
    predict=True,  # Indicates the dataset is for prediction
    stop_randomization=True  # Disables randomization when creating batches of data
)

# Create PyTorch DataLoaders for the model. These will be used to efficiently load data in batches during training and validation.
batch_size = 128  # The batch size defines how many samples per batch to load. It is set to 128 and should be adjusted according to the available memory.

# Check if a GPU is available and set PyTorch to use the GPU if possible. Otherwise, it will default to using the CPU.
if torch.cuda.is_available():
    device = "cuda"  # Use GPU
else:
    device = "cpu"  # Use CPU

# Convert the training and validation datasets to PyTorch DataLoaders.
# The DataLoaders are moved to the specified device (either GPU or CPU).
train_dataloader = training.to_dataloader(train=True, batch_size=batch_size, num_workers=0)  # DataLoader for training
val_dataloader = validation.to_dataloader(train=False, batch_size=batch_size*10, num_workers=0)  # DataLoader for validation, with a larger batch size to speed up evaluation


In [None]:
# This section is dedicated to hyperparameter optimization using the Optuna framework
# integrated within the pytorch-forecasting package. This step is optional due to its
# time-consuming nature, but it is crucial for fine-tuning the model to achieve better performance.


import pickle
from pytorch_forecasting.models.temporal_fusion_transformer.tuning import optimize_hyperparameters


# Initialize the hyperparameter optimization study which will search for the best hyperparameters
# over a specified number of trials.
study = optimize_hyperparameters(
    train_dataloader,  # DataLoader containing the training data
    val_dataloader,  # DataLoader containing the validation data
    model_path="optuna_test",  # Directory where the models are saved during optimization
    n_trials=50,  # Number of trials to run
    max_epochs=50,  # Maximum number of epochs to train the model for each trial
    gradient_clip_val_range=(0.01, 1.0),  # Range for gradient clipping for avoiding exploding gradients
    hidden_size_range=(8, 128),  # Range for the size of hidden layers
    hidden_continuous_size_range=(8, 128),  # Range for the size of hidden continuous layers
    attention_head_size_range=(1, 4),  # Range for the number of attention heads
    learning_rate_range=(0.001, 0.1),  # Range for the learning rate
    dropout_range=(0.1, 0.3),  # Range for dropout rates to prevent overfitting
    trainer_kwargs=dict(limit_train_batches=30),  # Limit the number of batches for training to speed up epochs
    reduce_on_plateau_patience=4,  # Patience for reducing the learning rate when a plateau is reached
    use_learning_rate_finder=False  # Whether to use the learning rate finder (here it is turned off)
)

# Save the results of the study to a file so that we can resume the optimization later if needed.
# This is useful for long-running optimizations that may need to be stopped and restarted.
with open("test_study.pkl", "wb") as fout:
    pickle.dump(study, fout)

# After the optimization study is complete, print out the best hyperparameters found.
# These parameters can be used to configure the model for the final training.
print(study.best_trial.params)


## Trend

In [11]:
# This block configures the network and the training process. It is specifically set up to train a model
# to predict the 'Trend' component of the dataset. Subsequent models will be trained similarly to predict
# the 'Seasonal' and 'Resid' components.from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping

from pytorch_lightning.loggers import TensorBoardLogger
from pytorch_forecasting.models import TemporalFusionTransformer
from pytorch_forecasting.metrics import QuantileLoss
from pytorch_lightning.callbacks import ModelCheckpoint

# Setup checkpoints to save the model during training, specifically we save:
# - The last model weights
# - The model with the lowest validation loss
checkpoint_callback = ModelCheckpoint(
    monitor="val_loss",  # Monitor validation loss for checkpointing
    mode="min",  # Mode 'min' saves the model when the monitored metric (val_loss) is minimized
    save_last=True,  # Save the last model state at the end of training
    save_top_k=1,  # Save only the top 1 model with the lowest val_loss
    filename="best_model_{epoch}",  # Custom filename for the checkpoints
    dirpath="saved_models"  # Directory to save model checkpoints
)

# Early stopping callback to stop training early if validation loss has not improved
early_stop_callback = EarlyStopping(
    monitor="val_loss",
    min_delta=1e-4,  # Minimum change in the monitored quantity to qualify as an improvement
    patience=10,  # Number of epochs with no improvement after which training will be stopped
    verbose=False,
    mode="min"  # Mode 'min' will stop when the quantity monitored has stopped decreasing
)

# Learning rate monitor to log the learning rate
lr_logger = LearningRateMonitor()

# TensorBoard logger for visualization
logger = TensorBoardLogger("lightning_logs")

# Initialize the Trainer with configurations like max epochs, GPU usage, gradient clipping
trainer = pl.Trainer(
    max_epochs=50,  # Number of maximum epochs to train the model
    gpus=1,  # Number of GPUs to use for training
    enable_model_summary=True,  # Enables the printing of a model summary before training
    gradient_clip_val=0.03911626926390909,  # Gradient clipping value for avoiding exploding gradients
    limit_train_batches=30,  # Limiting the number of batches per training epoch for faster training
    callbacks=[lr_logger, early_stop_callback, checkpoint_callback],  # List of callbacks to be used during training
    logger=logger,  # Logger to be used for training process
)

# Initialize the Temporal Fusion Transformer model with specific hyperparameters
tft = TemporalFusionTransformer.from_dataset(
    training,  # TimeSeriesDataSet created previously
    learning_rate=0.00842077448532244,  # Learning rate of the model
    hidden_size=125,  # Size of the hidden state in the model
    attention_head_size=1,  # Number of attention heads
    dropout=0.15160823136480017,  # Dropout rate for regularization
    hidden_continuous_size=17,  # Size of the continuous hidden state
    output_size=7,  # Number of outputs of the model (for quantile loss)
    loss=QuantileLoss(),  # Type of loss function to use (quantile loss for probabilistic forecasting)
    log_interval=10,  # Interval for logging the learning rate
    reduce_on_plateau_patience=4,  # Patience for reducing the learning rate on plateau
)

# Print the number of parameters in the network to ensure model complexity is manageable
print(f"Number of parameters in network: {tft.size()/1e3:.1f}k")





  f"Setting `Trainer(gpus={gpus!r})` is deprecated in v1.7 and will be removed"
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


Number of parameters in network: 858.9k


In [12]:
# Now that the Temporal Fusion Transformer model and the PyTorch Lightning trainer are configured,
# we can start the training process. The 'fit' method will train the model on the data provided by
# the training DataLoader and evaluate it on the validation DataLoader.

trainer.fit(
    tft,  # The initialized Temporal Fusion Transformer model
    train_dataloaders=train_dataloader,  # DataLoader providing the training data batch by batch
    val_dataloaders=val_dataloader,   # DataLoader providing the validation data
)

# During the training process, the model's performance is evaluated on the validation set at the end
# of each epoch. The best model according to the validation loss will be saved due to the ModelCheckpoint
# callback configured earlier. Early stopping is also in place to prevent overfitting if the validation loss
# doesn't improve for a set number of epochs.

You are using a CUDA device ('NVIDIA GeForce RTX 3060 Laptop GPU') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

   | Name                               | Type                            | Params
----------------------------------------------------------------------------------------
0  | loss                               | QuantileLoss                    | 0     
1  | logging_metrics                    | ModuleList                      | 0     
2  | input_embeddings                   | MultiEmbedding                  | 539   
3  | prescalers                         | ModuleDict                      | 204   
4  | static_variable_selection          | VariableSelectionNetwork        | 17

Epoch 0:  97%|███████████████████████▏| 30/31 [00:10<00:00,  2.98it/s, loss=1.52e+03, v_num=4, train_loss_step=1.16e+3]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                                                                | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                                   | 0/1 [00:00<?, ?it/s][A
Epoch 0: 100%|████████| 31/31 [00:10<00:00,  2.84it/s, loss=1.52e+03, v_num=4, train_loss_step=1.16e+3, val_loss=871.0][A
Epoch 1:  97%|▉| 30/31 [00:10<00:00,  2.80it/s, loss=927, v_num=4, train_loss_step=578.0, val_loss=871.0, train_loss_ep[A
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                                                                | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                                   | 0/1 [00:00<?, ?it/s][A
Epoch 1: 100%|█| 31/31 [00:11<00:00,  2.66it/s, loss=927, v_num=4, train_l

Epoch 31:  97%|▉| 30/31 [00:10<00:00,  2.88it/s, loss=355, v_num=4, train_loss_step=314.0, val_loss=136.0, train_loss_e[A
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                                                                | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                                   | 0/1 [00:00<?, ?it/s][A
Epoch 31: 100%|█| 31/31 [00:11<00:00,  2.73it/s, loss=355, v_num=4, train_loss_step=314.0, val_loss=117.0, train_loss_e[A
Epoch 32:  97%|▉| 30/31 [00:10<00:00,  2.88it/s, loss=367, v_num=4, train_loss_step=313.0, val_loss=117.0, train_loss_e[A
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                                                                | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                                   | 0/1 [00:00<?, ?it/s][A
Epoch 32: 100%|█| 31/31 [00:12<00:00,  2.49it/s, loss=367, v_num=4, tra

`Trainer.fit` stopped: `max_epochs=50` reached.


Epoch 49: 100%|█| 31/31 [00:18<00:00,  1.69it/s, loss=333, v_num=4, train_loss_step=327.0, val_loss=71.30, train_loss_e


In [7]:
# After the training process is complete, we can retrieve the path to the best model's checkpoint.
# This model checkpoint will have the lowest validation loss observed during training due to the
# configuration of the ModelCheckpoint callback.

best_model_path = trainer.checkpoint_callback.best_model_path  # Path to the best model's checkpoint

# Using the best model's checkpoint, we load the trained Temporal Fusion Transformer model.
# This model is ready for making predictions or can be used for further analysis.

best_tft = TemporalFusionTransformer.load_from_checkpoint(best_model_path)
# The 'best_tft' object now contains the best performing model as per the validation set,
# and it can be used to make predictions on new data or evaluate its performance on a test set.



In [9]:
raw_predictions, x = best_tft.predict(val_dataloader,mode="raw", return_x=True)

raw_predictions[0][:, :, 3]

tensor([[19757.6543, 19776.3711, 19728.0664]])

In [30]:
Trend_forecasting=[19757.6543, 19776.3711, 19728.0664]
Trend_true=[19879.3770, 19879.0254, 19877.6895]  

## Seasonality
#### The following code is a repetition of the 3rd to 10th code blocks from the same notebook, adjusted to predict the 'Seasonal' component instead of 'Trend'. The 'target' and 'time_varying_unknown_reals' parameters are modified accordingly.

In [11]:
from pytorch_forecasting.data import TimeSeriesDataSet
from pytorch_forecasting.data.encoders import GroupNormalizer

# Define the maximum prediction length and encoder length
max_prediction_length = 3  # The number of time steps the model is predicting into the future
max_encoder_length = 30  # The number of past time steps the model is using to make predictions

# Initialize a TimeSeriesDataSet object, which structures the data for the Temporal Fusion Transformer model.
# It includes various parameters to configure the dataset for time series forecasting.
training = TimeSeriesDataSet(
    data[lambda x: x.time_idx <= 4481],  # Filter data up to a certain time index for training
    time_idx="time_idx",  # The name of the column that provides the time index
    target="Seasonal",  # Set 'Seasonal' as the target variable for prediction.
    min_encoder_length=max_encoder_length // 2,  # Minimum length of the encoder, half of the max_encoder_length
    max_encoder_length=max_encoder_length,  # Maximum length of historical data used for predictions
    min_prediction_length=1,  # Minimum prediction length
    max_prediction_length=max_prediction_length,  # Maximum prediction length
    time_varying_known_categoricals=["month", "day of the week", "day", "Holiday"],  # Known categorical features
    time_varying_known_reals=["time_idx"],  # Known real features, like time index
    time_varying_unknown_categoricals=[],  # Unknown categorical features, if any
    time_varying_unknown_reals=["Seasonal"],  # Set 'Seasonal' as the unknown real feature.
    group_ids=['destination'],  # The column that identifies the time series group
    target_normalizer=GroupNormalizer(
        groups=['destination'], transformation="softplus"  # A normalization technique
    ),
    add_relative_time_idx=True,  # Adds a column for relative time index
    add_target_scales=True,  # Adds columns for target scales
    add_encoder_length=True,  # Adds a column for encoder length
    allow_missing_timesteps=True,  # Allows model to handle missing timesteps if there are any
)


In [12]:
# Using the previously created TimeSeriesDataSet for training, we create a validation dataset.
# The predict=True flag indicates that the validation dataset should be set up for prediction tasks,
# specifically, it should include the last 'max_prediction_length' points for each time series.

validation = TimeSeriesDataSet.from_dataset(
    training,  # Use the same configurations as the training dataset
    data,  # Source data
    predict=True,  # Indicates the dataset is for prediction
    stop_randomization=True  # Disables randomization when creating batches of data
)

# Create PyTorch DataLoaders for the model. These will be used to efficiently load data in batches during training and validation.
batch_size = 128  # The batch size defines how many samples per batch to load. It is set to 128 and should be adjusted according to the available memory.

# Check if a GPU is available and set PyTorch to use the GPU if possible. Otherwise, it will default to using the CPU.
if torch.cuda.is_available():
    device = "cuda"  # Use GPU
else:
    device = "cpu"  # Use CPU

# Convert the training and validation datasets to PyTorch DataLoaders.
# The DataLoaders are moved to the specified device (either GPU or CPU).
train_dataloader = training.to_dataloader(train=True, batch_size=batch_size, num_workers=0)  # DataLoader for training
val_dataloader = validation.to_dataloader(train=False, batch_size=batch_size*10, num_workers=0)  # DataLoader for validation, with a larger batch size to speed up evaluation


In [13]:
# This block configures the network and the training process. It is specifically set up to train a model
# to predict the 'Trend' component of the dataset. Subsequent models will be trained similarly to predict
# the 'Seasonal' and 'Resid' components.from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping

from pytorch_lightning.loggers import TensorBoardLogger
from pytorch_forecasting.models import TemporalFusionTransformer
from pytorch_forecasting.metrics import QuantileLoss
from pytorch_lightning.callbacks import ModelCheckpoint

# Setup checkpoints to save the model during training, specifically we save:
# - The last model weights
# - The model with the lowest validation loss
checkpoint_callback = ModelCheckpoint(
    monitor="val_loss",  # Monitor validation loss for checkpointing
    mode="min",  # Mode 'min' saves the model when the monitored metric (val_loss) is minimized
    save_last=True,  # Save the last model state at the end of training
    save_top_k=1,  # Save only the top 1 model with the lowest val_loss
    filename="best_model_{epoch}",  # Custom filename for the checkpoints
    dirpath="saved_models"  # Directory to save model checkpoints
)

# Early stopping callback to stop training early if validation loss has not improved
early_stop_callback = EarlyStopping(
    monitor="val_loss",
    min_delta=1e-4,  # Minimum change in the monitored quantity to qualify as an improvement
    patience=10,  # Number of epochs with no improvement after which training will be stopped
    verbose=False,
    mode="min"  # Mode 'min' will stop when the quantity monitored has stopped decreasing
)

# Learning rate monitor to log the learning rate
lr_logger = LearningRateMonitor()

# TensorBoard logger for visualization
logger = TensorBoardLogger("lightning_logs")

# Initialize the Trainer with configurations like max epochs, GPU usage, gradient clipping
trainer = pl.Trainer(
    max_epochs=50,  # Number of maximum epochs to train the model
    gpus=1,  # Number of GPUs to use for training
    enable_model_summary=True,  # Enables the printing of a model summary before training
    gradient_clip_val=0.03911626926390909,  # Gradient clipping value for avoiding exploding gradients
    limit_train_batches=30,  # Limiting the number of batches per training epoch for faster training
    callbacks=[lr_logger, early_stop_callback, checkpoint_callback],  # List of callbacks to be used during training
    logger=logger,  # Logger to be used for training process
)

# Initialize the Temporal Fusion Transformer model with specific hyperparameters
tft = TemporalFusionTransformer.from_dataset(
    training,  # TimeSeriesDataSet created previously
    learning_rate=0.00842077448532244,  # Learning rate of the model
    hidden_size=125,  # Size of the hidden state in the model
    attention_head_size=1,  # Number of attention heads
    dropout=0.15160823136480017,  # Dropout rate for regularization
    hidden_continuous_size=17,  # Size of the continuous hidden state
    output_size=7,  # Number of outputs of the model (for quantile loss)
    loss=QuantileLoss(),  # Type of loss function to use (quantile loss for probabilistic forecasting)
    log_interval=10,  # Interval for logging the learning rate
    reduce_on_plateau_patience=4,  # Patience for reducing the learning rate on plateau
)

# Print the number of parameters in the network to ensure model complexity is manageable
print(f"Number of parameters in network: {tft.size()/1e3:.1f}k")






  f"Setting `Trainer(gpus={gpus!r})` is deprecated in v1.7 and will be removed"
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


Number of parameters in network: 858.9k


In [14]:
# Now that the Temporal Fusion Transformer model and the PyTorch Lightning trainer are configured,
# we can start the training process. The 'fit' method will train the model on the data provided by
# the training DataLoader and evaluate it on the validation DataLoader.

trainer.fit(
    tft,  # The initialized Temporal Fusion Transformer model
    train_dataloaders=train_dataloader,  # DataLoader providing the training data batch by batch
    val_dataloaders=val_dataloader,   # DataLoader providing the validation data
)

# During the training process, the model's performance is evaluated on the validation set at the end
# of each epoch. The best model according to the validation loss will be saved due to the ModelCheckpoint
# callback configured earlier. Early stopping is also in place to prevent overfitting if the validation loss
# doesn't improve for a set number of epochs.

You are using a CUDA device ('NVIDIA GeForce RTX 3060 Laptop GPU') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

   | Name                               | Type                            | Params
----------------------------------------------------------------------------------------
0  | loss                               | QuantileLoss                    | 0     
1  | logging_metrics                    | ModuleList                      | 0     
2  | input_embeddings                   | MultiEmbedding                  | 539   
3  | prescalers                         | ModuleDict                      | 204   
4  | static_variable_selection          | VariableSelectionNetwork        | 17

Epoch 0:  97%|██████████████████████████████ | 30/31 [00:10<00:00,  2.77it/s, loss=283, v_num=5, train_loss_step=236.0]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                                                                | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                                   | 0/1 [00:00<?, ?it/s][A
Epoch 0: 100%|███████████████| 31/31 [00:11<00:00,  2.63it/s, loss=283, v_num=5, train_loss_step=236.0, val_loss=80.20][A
Epoch 1:  97%|▉| 30/31 [00:11<00:00,  2.72it/s, loss=199, v_num=5, train_loss_step=181.0, val_loss=80.20, train_loss_ep[A
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                                                                | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                                   | 0/1 [00:00<?, ?it/s][A
Epoch 1: 100%|█| 31/31 [00:12<00:00,  2.58it/s, loss=199, v_num=5, train_l

In [18]:
# After the training process is complete, we can retrieve the path to the best model's checkpoint.
# This model checkpoint will have the lowest validation loss observed during training due to the
# configuration of the ModelCheckpoint callback.

best_model_path = trainer.checkpoint_callback.best_model_path  # Path to the best model's checkpoint

# Using the best model's checkpoint, we load the trained Temporal Fusion Transformer model.
# This model is ready for making predictions or can be used for further analysis.

best_tft = TemporalFusionTransformer.load_from_checkpoint(best_model_path)
# The 'best_tft' object now contains the best performing model as per the validation set,
# and it can be used to make predictions on new data or evaluate its performance on a test set.


'D:\\Jupyter notebook\\saved_models\\best_model_epoch=3.ckpt'

In [16]:
raw_predictions, x = best_tft.predict(val_dataloader,mode="raw", return_x=True)

raw_predictions[0][:, :, 3]

tensor([[7085.0757, 6965.2119, 7035.4448]])

In [29]:
Seasonal_forecasting=[7085.0757, 6965.2119, 7035.4448]
Seasonal_true=[7073.4546, 6815.4194, 6926.9561]

# Resid

In [19]:
from pytorch_forecasting.data import TimeSeriesDataSet
from pytorch_forecasting.data.encoders import GroupNormalizer

# Define the maximum prediction length and encoder length
max_prediction_length = 3  # The number of time steps the model is predicting into the future
max_encoder_length = 30  # The number of past time steps the model is using to make predictions

# Initialize a TimeSeriesDataSet object, which structures the data for the Temporal Fusion Transformer model.
# It includes various parameters to configure the dataset for time series forecasting.
training = TimeSeriesDataSet(
    data[lambda x: x.time_idx <= 4481],  # Filter data up to a certain time index for training
    time_idx="time_idx",  # The name of the column that provides the time index
    target="Resid",  # The target column to predict (one of the components from the RobustSTL decomposition)
    min_encoder_length=max_encoder_length // 2,  # Minimum length of the encoder, half of the max_encoder_length
    max_encoder_length=max_encoder_length,  # Maximum length of historical data used for predictions
    min_prediction_length=1,  # Minimum prediction length
    max_prediction_length=max_prediction_length,  # Maximum prediction length
    time_varying_known_categoricals=["month", "day of the week", "day", "Holiday"],  # Known categorical features
    time_varying_known_reals=["time_idx"],  # Known real features, like time index
    time_varying_unknown_categoricals=[],  # Unknown categorical features, if any
    time_varying_unknown_reals=["Resid"],  # Unknown real features, which includes the target 'Trend' itself
    group_ids=['destination'],  # The column that identifies the time series group
    target_normalizer=GroupNormalizer(
        groups=['destination'], transformation="softplus"  # A normalization technique
    ),
    add_relative_time_idx=True,  # Adds a column for relative time index
    add_target_scales=True,  # Adds columns for target scales
    add_encoder_length=True,  # Adds a column for encoder length
    allow_missing_timesteps=True,  # Allows model to handle missing timesteps if there are any
)


In [20]:
# Using the previously created TimeSeriesDataSet for training, we create a validation dataset.
# The predict=True flag indicates that the validation dataset should be set up for prediction tasks,
# specifically, it should include the last 'max_prediction_length' points for each time series.

validation = TimeSeriesDataSet.from_dataset(
    training,  # Use the same configurations as the training dataset
    data,  # Source data
    predict=True,  # Indicates the dataset is for prediction
    stop_randomization=True  # Disables randomization when creating batches of data
)

# Create PyTorch DataLoaders for the model. These will be used to efficiently load data in batches during training and validation.
batch_size = 128  # The batch size defines how many samples per batch to load. It is set to 128 and should be adjusted according to the available memory.

# Check if a GPU is available and set PyTorch to use the GPU if possible. Otherwise, it will default to using the CPU.
if torch.cuda.is_available():
    device = "cuda"  # Use GPU
else:
    device = "cpu"  # Use CPU

# Convert the training and validation datasets to PyTorch DataLoaders.
# The DataLoaders are moved to the specified device (either GPU or CPU).
train_dataloader = training.to_dataloader(train=True, batch_size=batch_size, num_workers=0)  # DataLoader for training
val_dataloader = validation.to_dataloader(train=False, batch_size=batch_size*10, num_workers=0)  # DataLoader for validation, with a larger batch size to speed up evaluation


In [21]:
# This block configures the network and the training process. It is specifically set up to train a model
# to predict the 'Trend' component of the dataset. Subsequent models will be trained similarly to predict
# the 'Seasonal' and 'Resid' components.from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping

from pytorch_lightning.loggers import TensorBoardLogger
from pytorch_forecasting.models import TemporalFusionTransformer
from pytorch_forecasting.metrics import QuantileLoss
from pytorch_lightning.callbacks import ModelCheckpoint

# Setup checkpoints to save the model during training, specifically we save:
# - The last model weights
# - The model with the lowest validation loss
checkpoint_callback = ModelCheckpoint(
    monitor="val_loss",  # Monitor validation loss for checkpointing
    mode="min",  # Mode 'min' saves the model when the monitored metric (val_loss) is minimized
    save_last=True,  # Save the last model state at the end of training
    save_top_k=1,  # Save only the top 1 model with the lowest val_loss
    filename="best_model_{epoch}",  # Custom filename for the checkpoints
    dirpath="saved_models"  # Directory to save model checkpoints
)

# Early stopping callback to stop training early if validation loss has not improved
early_stop_callback = EarlyStopping(
    monitor="val_loss",
    min_delta=1e-4,  # Minimum change in the monitored quantity to qualify as an improvement
    patience=10,  # Number of epochs with no improvement after which training will be stopped
    verbose=False,
    mode="min"  # Mode 'min' will stop when the quantity monitored has stopped decreasing
)

# Learning rate monitor to log the learning rate
lr_logger = LearningRateMonitor()

# TensorBoard logger for visualization
logger = TensorBoardLogger("lightning_logs")

# Initialize the Trainer with configurations like max epochs, GPU usage, gradient clipping
trainer = pl.Trainer(
    max_epochs=50,  # Number of maximum epochs to train the model
    gpus=1,  # Number of GPUs to use for training
    enable_model_summary=True,  # Enables the printing of a model summary before training
    gradient_clip_val=0.03911626926390909,  # Gradient clipping value for avoiding exploding gradients
    limit_train_batches=30,  # Limiting the number of batches per training epoch for faster training
    callbacks=[lr_logger, early_stop_callback, checkpoint_callback],  # List of callbacks to be used during training
    logger=logger,  # Logger to be used for training process
)

# Initialize the Temporal Fusion Transformer model with specific hyperparameters
tft = TemporalFusionTransformer.from_dataset(
    training,  # TimeSeriesDataSet created previously
    learning_rate=0.00842077448532244,  # Learning rate of the model
    hidden_size=125,  # Size of the hidden state in the model
    attention_head_size=1,  # Number of attention heads
    dropout=0.15160823136480017,  # Dropout rate for regularization
    hidden_continuous_size=17,  # Size of the continuous hidden state
    output_size=7,  # Number of outputs of the model (for quantile loss)
    loss=QuantileLoss(),  # Type of loss function to use (quantile loss for probabilistic forecasting)
    log_interval=10,  # Interval for logging the learning rate
    reduce_on_plateau_patience=4,  # Patience for reducing the learning rate on plateau
)

# Print the number of parameters in the network to ensure model complexity is manageable
print(f"Number of parameters in network: {tft.size()/1e3:.1f}k")






  f"Setting `Trainer(gpus={gpus!r})` is deprecated in v1.7 and will be removed"
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


Number of parameters in network: 858.9k


In [22]:
# Now that the Temporal Fusion Transformer model and the PyTorch Lightning trainer are configured,
# we can start the training process. The 'fit' method will train the model on the data provided by
# the training DataLoader and evaluate it on the validation DataLoader.

trainer.fit(
    tft,  # The initialized Temporal Fusion Transformer model
    train_dataloaders=train_dataloader,  # DataLoader providing the training data batch by batch
    val_dataloaders=val_dataloader,   # DataLoader providing the validation data
)

# During the training process, the model's performance is evaluated on the validation set at the end
# of each epoch. The best model according to the validation loss will be saved due to the ModelCheckpoint
# callback configured earlier. Early stopping is also in place to prevent overfitting if the validation loss
# doesn't improve for a set number of epochs.

You are using a CUDA device ('NVIDIA GeForce RTX 3060 Laptop GPU') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

   | Name                               | Type                            | Params
----------------------------------------------------------------------------------------
0  | loss                               | QuantileLoss                    | 0     
1  | logging_metrics                    | ModuleList                      | 0     
2  | input_embeddings                   | MultiEmbedding                  | 539   
3  | prescalers                         | ModuleDict                      | 204   
4  | static_variable_selection          | VariableSelectionNetwork        | 17

Epoch 0:  97%|██████████████████████████████ | 30/31 [00:11<00:00,  2.69it/s, loss=367, v_num=6, train_loss_step=309.0]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                                                                | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                                   | 0/1 [00:00<?, ?it/s][A
Epoch 0: 100%|███████████████| 31/31 [00:12<00:00,  2.56it/s, loss=367, v_num=6, train_loss_step=309.0, val_loss=296.0][A
Epoch 1:  97%|▉| 30/31 [00:11<00:00,  2.72it/s, loss=268, v_num=6, train_loss_step=241.0, val_loss=296.0, train_loss_ep[A
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                                                                | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                                   | 0/1 [00:00<?, ?it/s][A
Epoch 1: 100%|█| 31/31 [00:12<00:00,  2.57it/s, loss=268, v_num=6, train_l

In [35]:
# After the training process is complete, we can retrieve the path to the best model's checkpoint.
# This model checkpoint will have the lowest validation loss observed during training due to the
# configuration of the ModelCheckpoint callback.

best_model_path = trainer.checkpoint_callback.best_model_path  # Path to the best model's checkpoint

# Using the best model's checkpoint, we load the trained Temporal Fusion Transformer model.
# This model is ready for making predictions or can be used for further analysis.

best_tft = TemporalFusionTransformer.load_from_checkpoint(best_model_path)
# The 'best_tft' object now contains the best performing model as per the validation set,
# and it can be used to make predictions on new data or evaluate its performance on a test set.




In [36]:
raw_predictions, x = best_tft.predict(val_dataloader,mode="raw", return_x=True)

raw_predictions[0][:, :, 3]

tensor([[3611.5913, 2123.0720,  312.2786]])

In [39]:
Resid_forecasting=[3611.5913, 2123.0720,  312.2786]
Resid_true=[3735.1682, 2277.5544,  299.3539]

# Output

In [40]:
# Combine the forecasted 'Trend', 'Seasonal', and 'Resid' components to obtain the forecasted 'tourist' numbers.
# The comprehension list sums the corresponding values from the three forecasts for each point in time.

import numpy as np
tourist = [sum(x) for x in zip(Trend_forecasting, Seasonal_forecasting, Resid_forecasting)]

# Similarly, combine the true 'Trend', 'Seasonal', and 'Resid' components to obtain the true 'tourist' numbers.
# This is typically used for validation or testing purposes to compare against the forecasted values.
tourist_t = [sum(x) for x in zip(Trend_true, Seasonal_true, Resid_true)]


# Calculate Mean Absolute Error (MAE), Root Mean Squared Error (RMSE), and Mean Absolute Percentage Error (MAPE) to evaluate the accuracy of the forecasts.
# These metrics provide a quantitative measure of the model's predictive performance.
mae = np.mean(np.abs(np.array(tourist_t) - np.array(tourist)))
rmse = np.sqrt(np.mean(np.square(np.array(tourist_t) - np.array(tourist))))
mape = np.mean(np.abs(np.array(tourist_t) - np.array(tourist)) / np.array(tourist_t)) * 100

# Output the forecasted and true 'tourist' numbers, as well as the calculated error metrics.
print("tourist:", tourist)
print("tourist_t:", tourist_t)
print("MAE:", mae)
print("RMSE:", rmse)
print("MAPE:", mape)

tourist: [30454.3213, 28864.655, 27075.789800000002]
tourist_t: [30687.9998, 28971.9992, 27103.999499999998]
MAE: 123.07746666666571
RMSE: 149.35885846954923
MAPE: 0.41201834338207105
