In [11]:
import os
import warnings

warnings.filterwarnings("ignore")  # avoid printing out absolute paths
import tensorflow as tf 
import tensorboard as tb 
tf.io.gfile = tb.compat.tensorflow_stub.io.gfile
import copy
from pathlib import Path
import warnings

import numpy as np
import pandas as pd
import pytorch_lightning as pl
from pytorch_lightning.callbacks import EarlyStopping, LearningRateMonitor
from pytorch_lightning.loggers import TensorBoardLogger
import torch

from pytorch_forecasting import Baseline, TemporalFusionTransformer, TimeSeriesDataSet
from pytorch_forecasting.data import GroupNormalizer
from pytorch_forecasting.metrics import SMAPE, PoissonLoss, QuantileLoss,MAE,MAPE,RMSE
from pytorch_forecasting.models.temporal_fusion_transformer.tuning import optimize_hyperparameters

In [12]:
# Loading the dataset from an Excel file into a pandas DataFrame.
# The path provided should be updated to where the actual file is located.
import pandas as pd

data = pd.read_excel('.\dataset\Siguniang.xlsx')

data["year"] = data["year"].astype(str)
data["day"] = data["day"].astype(str)
data["tourist"] = data["tourist"].astype("float64")
data["pc_Siguniang"] = data["pc_Siguniang"].astype("float64")
data["mob_Siguniang"] = data["mob_Siguniang"].astype("float64")
data["pc_SichuanEpidemic"] = data["pc_SichuanEpidemic"].astype("float64")
data["mob_SichuanEpidemic"] = data["mob_SichuanEpidemic"].astype("float64")
data["month"] = data["month"].astype(str)
data

Unnamed: 0,date,tourist,pc_Siguniang,mob_Siguniang,pc_SichuanEpidemic,mob_SichuanEpidemic,time_idx,weekday,year,month,day,destination,Trend,Seasonal,Resid
0,2020-04-01,101.0,388.0,856.0,271.0,959.0,1,Tuesday,2020,4,1,SiGuniang,149.586843,969.914316,2481.498840
1,2020-04-02,122.0,445.0,873.0,243.0,933.0,2,Wednesday,2020,4,2,SiGuniang,166.207496,957.303377,2498.489127
2,2020-04-03,149.0,333.0,877.0,201.0,841.0,3,Thursday,2020,4,3,SiGuniang,181.796675,990.298899,2476.904426
3,2020-04-04,850.0,218.0,945.0,116.0,886.0,4,Friday,2020,4,4,SiGuniang,196.325958,955.000971,3198.673070
4,2020-04-05,1499.0,180.0,912.0,106.0,794.0,5,Saturday,2020,4,5,SiGuniang,210.204584,1230.235815,3558.559601
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
525,2021-09-08,1345.0,454.0,965.0,137.0,634.0,526,Tuesday,2021,9,8,SiGuniang,1654.920633,702.491817,2487.587550
526,2021-09-09,1552.0,439.0,986.0,146.0,652.0,527,Wednesday,2021,9,9,SiGuniang,1761.435876,780.546667,2510.017457
527,2021-09-10,1845.0,426.0,1077.0,137.0,586.0,528,Thursday,2021,9,10,SiGuniang,1869.009378,956.961883,2519.028739
528,2021-09-11,3795.0,265.0,1206.0,146.0,889.0,529,Friday,2021,9,11,SiGuniang,1977.625607,1672.263550,3645.110844


In [13]:
from pytorch_forecasting.data import TimeSeriesDataSet
from pytorch_forecasting.data.encoders import GroupNormalizer

# Define the maximum prediction length and encoder length
max_prediction_length = 3  # The number of time steps the model is predicting into the future
max_encoder_length = 30  # The number of past time steps the model is using to make predictions

# Initialize a TimeSeriesDataSet object, which structures the data for the Temporal Fusion Transformer model.
# It includes various parameters to configure the dataset for time series forecasting.
training = TimeSeriesDataSet(
    data[lambda x: x.time_idx <= 440],

    time_idx="time_idx",
    target="Trend",
    min_encoder_length=max_encoder_length // 2, 
    max_encoder_length=max_encoder_length,
    min_prediction_length=1,
    max_prediction_length=max_prediction_length,
    time_varying_known_categoricals=["month","weekday","day"],
    time_varying_known_reals=["time_idx","pc_Siguniang","mob_Siguniang","pc_SichuanEpidemic","mob_SichuanEpidemic"],
    time_varying_unknown_categoricals=[],
    time_varying_unknown_reals=[
        "Trend",
        
    ],
    group_ids=['destination'],
    target_normalizer=GroupNormalizer(
        groups=['destination'], transformation="softplus"),

    add_relative_time_idx=True,
    add_target_scales=True,
    add_encoder_length=True,
    allow_missing_timesteps=True,

   
)

In [14]:
# Using the previously created TimeSeriesDataSet for training, we create a validation dataset.
# The predict=True flag indicates that the validation dataset should be set up for prediction tasks,
# specifically, it should include the last 'max_prediction_length' points for each time series.

validation = TimeSeriesDataSet.from_dataset(
    training,  # Use the same configurations as the training dataset
    data,  # Source data
    predict=True,  # Indicates the dataset is for prediction
    stop_randomization=True  # Disables randomization when creating batches of data
)

# Create PyTorch DataLoaders for the model. These will be used to efficiently load data in batches during training and validation.
batch_size = 128  # The batch size defines how many samples per batch to load. It is set to 128 and should be adjusted according to the available memory.

# Check if a GPU is available and set PyTorch to use the GPU if possible. Otherwise, it will default to using the CPU.
if torch.cuda.is_available():
    device = "cuda"  # Use GPU
else:
    device = "cpu"  # Use CPU

# Convert the training and validation datasets to PyTorch DataLoaders.
# The DataLoaders are moved to the specified device (either GPU or CPU).
train_dataloader = training.to_dataloader(train=True, batch_size=batch_size, num_workers=0)  # DataLoader for training
val_dataloader = validation.to_dataloader(train=False, batch_size=batch_size*10, num_workers=0)  # DataLoader for validation, with a larger batch size to speed up evaluation


In [None]:
# This section is dedicated to hyperparameter optimization using the Optuna framework
# integrated within the pytorch-forecasting package. This step is optional due to its
# time-consuming nature, but it is crucial for fine-tuning the model to achieve better performance.


import pickle
from pytorch_forecasting.models.temporal_fusion_transformer.tuning import optimize_hyperparameters


# Initialize the hyperparameter optimization study which will search for the best hyperparameters
# over a specified number of trials.
study = optimize_hyperparameters(
    train_dataloader,  # DataLoader containing the training data
    val_dataloader,  # DataLoader containing the validation data
    model_path="optuna_test",  # Directory where the models are saved during optimization
    n_trials=50,  # Number of trials to run
    max_epochs=50,  # Maximum number of epochs to train the model for each trial
    gradient_clip_val_range=(0.01, 1.0),  # Range for gradient clipping for avoiding exploding gradients
    hidden_size_range=(8, 128),  # Range for the size of hidden layers
    hidden_continuous_size_range=(8, 128),  # Range for the size of hidden continuous layers
    attention_head_size_range=(1, 4),  # Range for the number of attention heads
    learning_rate_range=(0.001, 0.1),  # Range for the learning rate
    dropout_range=(0.1, 0.3),  # Range for dropout rates to prevent overfitting
    trainer_kwargs=dict(limit_train_batches=30),  # Limit the number of batches for training to speed up epochs
    reduce_on_plateau_patience=4,  # Patience for reducing the learning rate when a plateau is reached
    use_learning_rate_finder=False  # Whether to use the learning rate finder (here it is turned off)
)

# Save the results of the study to a file so that we can resume the optimization later if needed.
# This is useful for long-running optimizations that may need to be stopped and restarted.
with open("test_study.pkl", "wb") as fout:
    pickle.dump(study, fout)

# After the optimization study is complete, print out the best hyperparameters found.
# These parameters can be used to configure the model for the final training.
print(study.best_trial.params)


## Trend

In [8]:
# This block configures the network and the training process. It is specifically set up to train a model
# to predict the 'Trend' component of the dataset. Subsequent models will be trained similarly to predict
# the 'Seasonal' and 'Resid' components.from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping

from pytorch_lightning.loggers import TensorBoardLogger
from pytorch_forecasting.models import TemporalFusionTransformer
from pytorch_forecasting.metrics import QuantileLoss
from pytorch_lightning.callbacks import ModelCheckpoint

# Setup checkpoints to save the model during training, specifically we save:
# - The last model weights
# - The model with the lowest validation loss
checkpoint_callback = ModelCheckpoint(
    monitor="val_loss",  # Monitor validation loss for checkpointing
    mode="min",  # Mode 'min' saves the model when the monitored metric (val_loss) is minimized
    save_last=True,  # Save the last model state at the end of training
    save_top_k=1,  # Save only the top 1 model with the lowest val_loss
    filename="best_model_{epoch}",  # Custom filename for the checkpoints
    dirpath="saved_models"  # Directory to save model checkpoints
)

# Early stopping callback to stop training early if validation loss has not improved
early_stop_callback = EarlyStopping(
    monitor="val_loss",
    min_delta=1e-4,  # Minimum change in the monitored quantity to qualify as an improvement
    patience=10,  # Number of epochs with no improvement after which training will be stopped
    verbose=False,
    mode="min"  # Mode 'min' will stop when the quantity monitored has stopped decreasing
)

# Learning rate monitor to log the learning rate
lr_logger = LearningRateMonitor()

# TensorBoard logger for visualization
logger = TensorBoardLogger("lightning_logs")

# Initialize the Trainer with configurations like max epochs, GPU usage, gradient clipping
trainer = pl.Trainer(
    max_epochs=50,  # Number of maximum epochs to train the model
    gpus=1,  # Number of GPUs to use for training
    enable_model_summary=True,  # Enables the printing of a model summary before training
    gradient_clip_val=0.0894,  # Gradient clipping value for avoiding exploding gradients
    limit_train_batches=30,  # Limiting the number of batches per training epoch for faster training
    callbacks=[lr_logger, early_stop_callback, checkpoint_callback],  # List of callbacks to be used during training
    logger=logger,  # Logger to be used for training process
)

# Initialize the Temporal Fusion Transformer model with specific hyperparameters
tft = TemporalFusionTransformer.from_dataset(
    training,  # TimeSeriesDataSet created previously
    learning_rate=0.0294,  # Learning rate of the model
    hidden_size=82,  # Size of the hidden state in the model
    attention_head_size=1,  # Number of attention heads
    dropout=0.1962,  # Dropout rate for regularization
    hidden_continuous_size=39,  # Size of the continuous hidden state
    output_size=7,  # Number of outputs of the model (for quantile loss)
    loss=QuantileLoss(),  # Type of loss function to use (quantile loss for probabilistic forecasting)
    log_interval=10,  # Interval for logging the learning rate
    reduce_on_plateau_patience=4,  # Patience for reducing the learning rate on plateau
)

# Print the number of parameters in the network to ensure model complexity is manageable
print(f"Number of parameters in network: {tft.size()/1e3:.1f}k")





GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


Number of parameters in network: 521.1k


In [None]:
# Now that the Temporal Fusion Transformer model and the PyTorch Lightning trainer are configured,
# we can start the training process. The 'fit' method will train the model on the data provided by
# the training DataLoader and evaluate it on the validation DataLoader.

trainer.fit(
    tft,  # The initialized Temporal Fusion Transformer model
    train_dataloaders=train_dataloader,  # DataLoader providing the training data batch by batch
    val_dataloaders=val_dataloader,   # DataLoader providing the validation data
)

# During the training process, the model's performance is evaluated on the validation set at the end
# of each epoch. The best model according to the validation loss will be saved due to the ModelCheckpoint
# callback configured earlier. Early stopping is also in place to prevent overfitting if the validation loss
# doesn't improve for a set number of epochs.

In [15]:
# After the training process is complete, we can retrieve the path to the best model's checkpoint.
# This model checkpoint will have the lowest validation loss observed during training due to the
# configuration of the ModelCheckpoint callback.

# Modify this section to choose between utilizing a pre-trained model or opting for the best model from the current training session.
# best_model_path = trainer.checkpoint_callback.best_model_path  # Path to the best model's checkpoint
best_model_path = 'trend_siguniang.ckpt'
 

# Using the best model's checkpoint, we load the trained Temporal Fusion Transformer model.
# This model is ready for making predictions or can be used for further analysis.

best_tft = TemporalFusionTransformer.load_from_checkpoint(best_model_path)
# The 'best_tft' object now contains the best performing model as per the validation set,
# and it can be used to make predictions on new data or evaluate its performance on a test set.



In [16]:
raw_predictions, x = best_tft.predict(val_dataloader,mode="raw", return_x=True)

raw_predictions[0][:, :, 3]

tensor([[1810.7145, 1920.9052, 2064.9780]])

In [17]:
Trend_forecasting=[1810.7145, 1920.9052, 2064.9780]
Trend_true=[1869.0094, 1977.6256, 2087.0510]

## Seasonality
#### The following code is a repetition of the 3rd to 10th code blocks from the same notebook, adjusted to predict the 'Seasonal' component instead of 'Trend'. The 'target' and 'time_varying_unknown_reals' parameters are modified accordingly.

In [18]:
from pytorch_forecasting.data import TimeSeriesDataSet
from pytorch_forecasting.data.encoders import GroupNormalizer

# Define the maximum prediction length and encoder length
max_prediction_length = 3  # The number of time steps the model is predicting into the future
max_encoder_length = 30  # The number of past time steps the model is using to make predictions

# Initialize a TimeSeriesDataSet object, which structures the data for the Temporal Fusion Transformer model.
# It includes various parameters to configure the dataset for time series forecasting.
training = TimeSeriesDataSet(
    data[lambda x: x.time_idx <= 440],

    time_idx="time_idx",
    target="Seasonal",
    min_encoder_length=max_encoder_length // 2, 
    max_encoder_length=max_encoder_length,
    min_prediction_length=1,
    max_prediction_length=max_prediction_length,
    time_varying_known_categoricals=["month","weekday","day"],
    time_varying_known_reals=["time_idx","pc_Siguniang","mob_Siguniang","pc_SichuanEpidemic","mob_SichuanEpidemic"],
    time_varying_unknown_categoricals=[],
    time_varying_unknown_reals=[
        "Seasonal",
        
    ],
    group_ids=['destination'],
    target_normalizer=GroupNormalizer(
        groups=['destination'], transformation="softplus"),

    add_relative_time_idx=True,
    add_target_scales=True,
    add_encoder_length=True,
    allow_missing_timesteps=True,

   
)

In [19]:
# Using the previously created TimeSeriesDataSet for training, we create a validation dataset.
# The predict=True flag indicates that the validation dataset should be set up for prediction tasks,
# specifically, it should include the last 'max_prediction_length' points for each time series.

validation = TimeSeriesDataSet.from_dataset(
    training,  # Use the same configurations as the training dataset
    data,  # Source data
    predict=True,  # Indicates the dataset is for prediction
    stop_randomization=True  # Disables randomization when creating batches of data
)

# Create PyTorch DataLoaders for the model. These will be used to efficiently load data in batches during training and validation.
batch_size = 128  # The batch size defines how many samples per batch to load. It is set to 128 and should be adjusted according to the available memory.

# Check if a GPU is available and set PyTorch to use the GPU if possible. Otherwise, it will default to using the CPU.
if torch.cuda.is_available():
    device = "cuda"  # Use GPU
else:
    device = "cpu"  # Use CPU

# Convert the training and validation datasets to PyTorch DataLoaders.
# The DataLoaders are moved to the specified device (either GPU or CPU).
train_dataloader = training.to_dataloader(train=True, batch_size=batch_size, num_workers=0)  # DataLoader for training
val_dataloader = validation.to_dataloader(train=False, batch_size=batch_size*10, num_workers=0)  # DataLoader for validation, with a larger batch size to speed up evaluation


In [20]:
# This block configures the network and the training process. It is specifically set up to train a model
# to predict the 'Trend' component of the dataset. Subsequent models will be trained similarly to predict
# the 'Seasonal' and 'Resid' components.from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping

from pytorch_lightning.loggers import TensorBoardLogger
from pytorch_forecasting.models import TemporalFusionTransformer
from pytorch_forecasting.metrics import QuantileLoss
from pytorch_lightning.callbacks import ModelCheckpoint

# Setup checkpoints to save the model during training, specifically we save:
# - The last model weights
# - The model with the lowest validation loss
checkpoint_callback = ModelCheckpoint(
    monitor="val_loss",  # Monitor validation loss for checkpointing
    mode="min",  # Mode 'min' saves the model when the monitored metric (val_loss) is minimized
    save_last=True,  # Save the last model state at the end of training
    save_top_k=1,  # Save only the top 1 model with the lowest val_loss
    filename="best_model_{epoch}",  # Custom filename for the checkpoints
    dirpath="saved_models"  # Directory to save model checkpoints
)

# Early stopping callback to stop training early if validation loss has not improved
early_stop_callback = EarlyStopping(
    monitor="val_loss",
    min_delta=1e-4,  # Minimum change in the monitored quantity to qualify as an improvement
    patience=10,  # Number of epochs with no improvement after which training will be stopped
    verbose=False,
    mode="min"  # Mode 'min' will stop when the quantity monitored has stopped decreasing
)

# Learning rate monitor to log the learning rate
lr_logger = LearningRateMonitor()

# TensorBoard logger for visualization
logger = TensorBoardLogger("lightning_logs")

# Initialize the Trainer with configurations like max epochs, GPU usage, gradient clipping
trainer = pl.Trainer(
    max_epochs=50,  # Number of maximum epochs to train the model
    gpus=1,  # Number of GPUs to use for training
    enable_model_summary=True,  # Enables the printing of a model summary before training
    gradient_clip_val=0.0894,  # Gradient clipping value for avoiding exploding gradients
    limit_train_batches=30,  # Limiting the number of batches per training epoch for faster training
    callbacks=[lr_logger, early_stop_callback, checkpoint_callback],  # List of callbacks to be used during training
    logger=logger,  # Logger to be used for training process
)

# Initialize the Temporal Fusion Transformer model with specific hyperparameters
tft = TemporalFusionTransformer.from_dataset(
    training,  # TimeSeriesDataSet created previously
    learning_rate=0.0294,  # Learning rate of the model
    hidden_size=82,  # Size of the hidden state in the model
    attention_head_size=1,  # Number of attention heads
    dropout=0.1962,  # Dropout rate for regularization
    hidden_continuous_size=39,  # Size of the continuous hidden state
    output_size=7,  # Number of outputs of the model (for quantile loss)
    loss=QuantileLoss(),  # Type of loss function to use (quantile loss for probabilistic forecasting)
    log_interval=10,  # Interval for logging the learning rate
    reduce_on_plateau_patience=4,  # Patience for reducing the learning rate on plateau
)

# Print the number of parameters in the network to ensure model complexity is manageable
print(f"Number of parameters in network: {tft.size()/1e3:.1f}k")





GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


Number of parameters in network: 521.1k


In [None]:
# Now that the Temporal Fusion Transformer model and the PyTorch Lightning trainer are configured,
# we can start the training process. The 'fit' method will train the model on the data provided by
# the training DataLoader and evaluate it on the validation DataLoader.

trainer.fit(
    tft,  # The initialized Temporal Fusion Transformer model
    train_dataloaders=train_dataloader,  # DataLoader providing the training data batch by batch
    val_dataloaders=val_dataloader,   # DataLoader providing the validation data
)

# During the training process, the model's performance is evaluated on the validation set at the end
# of each epoch. The best model according to the validation loss will be saved due to the ModelCheckpoint
# callback configured earlier. Early stopping is also in place to prevent overfitting if the validation loss
# doesn't improve for a set number of epochs.

In [21]:
# After the training process is complete, we can retrieve the path to the best model's checkpoint.
# This model checkpoint will have the lowest validation loss observed during training due to the
# configuration of the ModelCheckpoint callback.

# best_model_path = trainer.checkpoint_callback.best_model_path  # Path to the best model's checkpoint
best_model_path = 'seasonal_siguniang.ckpt'


# Using the best model's checkpoint, we load the trained Temporal Fusion Transformer model.
# This model is ready for making predictions or can be used for further analysis.

best_tft = TemporalFusionTransformer.load_from_checkpoint(best_model_path)
# The 'best_tft' object now contains the best performing model as per the validation set,
# and it can be used to make predictions on new data or evaluate its performance on a test set.


In [22]:
raw_predictions, x = best_tft.predict(val_dataloader,mode="raw", return_x=True)

raw_predictions[0][:, :, 3]

tensor([[ 949.7791, 1637.7009, 1487.3295]])

In [23]:
Seasonal_forecasting=[ 949.7791, 1637.7009, 1487.3295]
Seasonal_true=[ 956.9619, 1672.2635, 1484.6394]

# Resid

In [24]:
from pytorch_forecasting.data import TimeSeriesDataSet
from pytorch_forecasting.data.encoders import GroupNormalizer

# Define the maximum prediction length and encoder length
max_prediction_length = 3  # The number of time steps the model is predicting into the future
max_encoder_length = 30  # The number of past time steps the model is using to make predictions

# Initialize a TimeSeriesDataSet object, which structures the data for the Temporal Fusion Transformer model.
# It includes various parameters to configure the dataset for time series forecasting.
training = TimeSeriesDataSet(
    data[lambda x: x.time_idx <= 440],

    time_idx="time_idx",
    target="Resid",
    min_encoder_length=max_encoder_length // 2, 
    max_encoder_length=max_encoder_length,
    min_prediction_length=1,
    max_prediction_length=max_prediction_length,
    time_varying_known_categoricals=["month","weekday","day"],
    time_varying_known_reals=["time_idx","pc_Siguniang","mob_Siguniang","pc_SichuanEpidemic","mob_SichuanEpidemic"],
    time_varying_unknown_categoricals=[],
    time_varying_unknown_reals=[
        "Resid",
        
    ],
    group_ids=['destination'],
    target_normalizer=GroupNormalizer(
        groups=['destination'], transformation="softplus"),

    add_relative_time_idx=True,
    add_target_scales=True,
    add_encoder_length=True,
    allow_missing_timesteps=True,

   
)

In [25]:
# Using the previously created TimeSeriesDataSet for training, we create a validation dataset.
# The predict=True flag indicates that the validation dataset should be set up for prediction tasks,
# specifically, it should include the last 'max_prediction_length' points for each time series.

validation = TimeSeriesDataSet.from_dataset(
    training,  # Use the same configurations as the training dataset
    data,  # Source data
    predict=True,  # Indicates the dataset is for prediction
    stop_randomization=True  # Disables randomization when creating batches of data
)

# Create PyTorch DataLoaders for the model. These will be used to efficiently load data in batches during training and validation.
batch_size = 128  # The batch size defines how many samples per batch to load. It is set to 128 and should be adjusted according to the available memory.

# Check if a GPU is available and set PyTorch to use the GPU if possible. Otherwise, it will default to using the CPU.
if torch.cuda.is_available():
    device = "cuda"  # Use GPU
else:
    device = "cpu"  # Use CPU

# Convert the training and validation datasets to PyTorch DataLoaders.
# The DataLoaders are moved to the specified device (either GPU or CPU).
train_dataloader = training.to_dataloader(train=True, batch_size=batch_size, num_workers=0)  # DataLoader for training
val_dataloader = validation.to_dataloader(train=False, batch_size=batch_size*10, num_workers=0)  # DataLoader for validation, with a larger batch size to speed up evaluation


In [26]:
# This block configures the network and the training process. It is specifically set up to train a model
# to predict the 'Trend' component of the dataset. Subsequent models will be trained similarly to predict
# the 'Seasonal' and 'Resid' components.from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping

from pytorch_lightning.loggers import TensorBoardLogger
from pytorch_forecasting.models import TemporalFusionTransformer
from pytorch_forecasting.metrics import QuantileLoss
from pytorch_lightning.callbacks import ModelCheckpoint

# Setup checkpoints to save the model during training, specifically we save:
# - The last model weights
# - The model with the lowest validation loss
checkpoint_callback = ModelCheckpoint(
    monitor="val_loss",  # Monitor validation loss for checkpointing
    mode="min",  # Mode 'min' saves the model when the monitored metric (val_loss) is minimized
    save_last=True,  # Save the last model state at the end of training
    save_top_k=1,  # Save only the top 1 model with the lowest val_loss
    filename="best_model_{epoch}",  # Custom filename for the checkpoints
    dirpath="saved_models"  # Directory to save model checkpoints
)

# Early stopping callback to stop training early if validation loss has not improved
early_stop_callback = EarlyStopping(
    monitor="val_loss",
    min_delta=1e-4,  # Minimum change in the monitored quantity to qualify as an improvement
    patience=10,  # Number of epochs with no improvement after which training will be stopped
    verbose=False,
    mode="min"  # Mode 'min' will stop when the quantity monitored has stopped decreasing
)

# Learning rate monitor to log the learning rate
lr_logger = LearningRateMonitor()

# TensorBoard logger for visualization
logger = TensorBoardLogger("lightning_logs")

# Initialize the Trainer with configurations like max epochs, GPU usage, gradient clipping
trainer = pl.Trainer(
    max_epochs=50,  # Number of maximum epochs to train the model
    gpus=1,  # Number of GPUs to use for training
    enable_model_summary=True,  # Enables the printing of a model summary before training
    gradient_clip_val=0.0894,  # Gradient clipping value for avoiding exploding gradients
    limit_train_batches=30,  # Limiting the number of batches per training epoch for faster training
    callbacks=[lr_logger, early_stop_callback, checkpoint_callback],  # List of callbacks to be used during training
    logger=logger,  # Logger to be used for training process
)

# Initialize the Temporal Fusion Transformer model with specific hyperparameters
tft = TemporalFusionTransformer.from_dataset(
    training,  # TimeSeriesDataSet created previously
    learning_rate=0.0294,  # Learning rate of the model
    hidden_size=82,  # Size of the hidden state in the model
    attention_head_size=1,  # Number of attention heads
    dropout=0.1962,  # Dropout rate for regularization
    hidden_continuous_size=39,  # Size of the continuous hidden state
    output_size=7,  # Number of outputs of the model (for quantile loss)
    loss=QuantileLoss(),  # Type of loss function to use (quantile loss for probabilistic forecasting)
    log_interval=10,  # Interval for logging the learning rate
    reduce_on_plateau_patience=4,  # Patience for reducing the learning rate on plateau
)

# Print the number of parameters in the network to ensure model complexity is manageable
print(f"Number of parameters in network: {tft.size()/1e3:.1f}k")





GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


Number of parameters in network: 521.1k


In [None]:
# Now that the Temporal Fusion Transformer model and the PyTorch Lightning trainer are configured,
# we can start the training process. The 'fit' method will train the model on the data provided by
# the training DataLoader and evaluate it on the validation DataLoader.

trainer.fit(
    tft,  # The initialized Temporal Fusion Transformer model
    train_dataloaders=train_dataloader,  # DataLoader providing the training data batch by batch
    val_dataloaders=val_dataloader,   # DataLoader providing the validation data
)

# During the training process, the model's performance is evaluated on the validation set at the end
# of each epoch. The best model according to the validation loss will be saved due to the ModelCheckpoint
# callback configured earlier. Early stopping is also in place to prevent overfitting if the validation loss
# doesn't improve for a set number of epochs.

In [27]:
# After the training process is complete, we can retrieve the path to the best model's checkpoint.
# This model checkpoint will have the lowest validation loss observed during training due to the
# configuration of the ModelCheckpoint callback.

# best_model_path = trainer.checkpoint_callback.best_model_path  # Path to the best model's checkpoint
best_model_path = 'resid_siguniang.ckpt'

# Using the best model's checkpoint, we load the trained Temporal Fusion Transformer model.
# This model is ready for making predictions or can be used for further analysis.

best_tft = TemporalFusionTransformer.load_from_checkpoint(best_model_path)
# The 'best_tft' object now contains the best performing model as per the validation set,
# and it can be used to make predictions on new data or evaluate its performance on a test set.




In [29]:
raw_predictions, x = best_tft.predict(val_dataloader,mode="raw", return_x=True)

raw_predictions[0][:, :, 3]

tensor([[2514.8813, 2914.3496, 3032.1069]])

In [30]:
Resid_forecasting=[2514.8813, 2914.3496, 3032.1069]
Resid_true=[2519.0288, 3645.1108, 3093.3096]

# Output

In [31]:
# Combine the forecasted 'Trend', 'Seasonal', and 'Resid' components to obtain the forecasted 'tourist' numbers.
# The comprehension list sums the corresponding values from the three forecasts for each point in time.

import numpy as np
tourist = [sum(x) for x in zip(Trend_forecasting, Seasonal_forecasting, Resid_forecasting)]

# Similarly, combine the true 'Trend', 'Seasonal', and 'Resid' components to obtain the true 'tourist' numbers.
# This is typically used for validation or testing purposes to compare against the forecasted values.
tourist_t = [sum(x) for x in zip(Trend_true, Seasonal_true, Resid_true)]

# "After decomposing the dataset using RobustSTL, it is possible for some sequences to have negative values.
# To facilitate the training of the model, we adjusted the decomposed columns to ensure that most of the sequences are greater than zero.
# This adjustment involved increasing the sum of the decomposed sequences by 3500
# Therefore, to accurately reflect the original data scale in the final prediction results, we subtract 3500 from the total forecasted values.
tourist = [x - 3500 for x in tourist]
tourist_t = [x - 3500 for x in tourist_t]

# Calculate Mean Absolute Error (MAE), Root Mean Squared Error (RMSE), and Mean Absolute Percentage Error (MAPE) to evaluate the accuracy of the forecasts.
# These metrics provide a quantitative measure of the model's predictive performance.
mae = np.mean(np.abs(np.array(tourist_t) - np.array(tourist)))
rmse = np.sqrt(np.mean(np.square(np.array(tourist_t) - np.array(tourist))))
mape = np.mean(np.abs(np.array(tourist_t) - np.array(tourist)) / np.array(tourist_t)) * 100

# Output the forecasted and true 'tourist' numbers, as well as the calculated error metrics.
print("tourist:", tourist)
print("tourist_t:", tourist_t)
print("MAE:", mae)
print("RMSE:", rmse)
print("MAPE:", mape)

tourist: [1775.3748999999998, 2972.9557000000004, 3084.4143999999997]
tourist_t: [1845.0001000000002, 3794.9999000000007, 3165.0]
MAE: 324.0850000000003
RMSE: 478.57370527991486
MAPE: 9.327038728152441
