In [1]:
import os
import warnings

warnings.filterwarnings("ignore")  # avoid printing out absolute paths

import tensorflow as tf 
import tensorboard as tb 
tf.io.gfile = tb.compat.tensorflow_stub.io.gfile

import copy
from pathlib import Path
import warnings

import numpy as np
import pandas as pd
import pytorch_lightning as pl
from pytorch_lightning.callbacks import EarlyStopping, LearningRateMonitor
from pytorch_lightning.loggers import TensorBoardLogger
import torch

from pytorch_forecasting import Baseline, TemporalFusionTransformer, TimeSeriesDataSet
from pytorch_forecasting.data import GroupNormalizer
from pytorch_forecasting.metrics import SMAPE, PoissonLoss, QuantileLoss,MAE,MAPE,RMSE
from pytorch_forecasting.models.temporal_fusion_transformer.tuning import optimize_hyperparameters
from pytorch_lightning.callbacks import ModelCheckpoint

In [2]:
import pandas as pd

# Load the excel file
data = pd.read_excel('./dataset/Siguniang.xlsx')
data["year"] = data["year"].astype(str)
data["day"] = data["day"].astype(str)
data["tourist"] = data["tourist"].astype("float64")
data["pc_Siguniang"] = data["pc_Siguniang"].astype("float64")
data["mob_Siguniang"] = data["mob_Siguniang"].astype("float64")
data["pc_SichuanEpidemic"] = data["pc_SichuanEpidemic"].astype("float64")
data["mob_SichuanEpidemic"] = data["mob_SichuanEpidemic"].astype("float64")
data["month"] = data["month"].astype(str)
data

Unnamed: 0,date,tourist,pc_Siguniang,mob_Siguniang,pc_SichuanEpidemic,mob_SichuanEpidemic,time_idx,weekday,year,month,day,destination,Trend,Seasonal,Resid
0,2020-04-01,101.0,388.0,856.0,271.0,959.0,1,Tuesday,2020,4,1,SiGuniang,149.586843,969.914316,2481.498840
1,2020-04-02,122.0,445.0,873.0,243.0,933.0,2,Wednesday,2020,4,2,SiGuniang,166.207496,957.303377,2498.489127
2,2020-04-03,149.0,333.0,877.0,201.0,841.0,3,Thursday,2020,4,3,SiGuniang,181.796675,990.298899,2476.904426
3,2020-04-04,850.0,218.0,945.0,116.0,886.0,4,Friday,2020,4,4,SiGuniang,196.325958,955.000971,3198.673070
4,2020-04-05,1499.0,180.0,912.0,106.0,794.0,5,Saturday,2020,4,5,SiGuniang,210.204584,1230.235815,3558.559601
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
525,2021-09-08,1345.0,454.0,965.0,137.0,634.0,526,Tuesday,2021,9,8,SiGuniang,1654.920633,702.491817,2487.587550
526,2021-09-09,1552.0,439.0,986.0,146.0,652.0,527,Wednesday,2021,9,9,SiGuniang,1761.435876,780.546667,2510.017457
527,2021-09-10,1845.0,426.0,1077.0,137.0,586.0,528,Thursday,2021,9,10,SiGuniang,1869.009378,956.961883,2519.028739
528,2021-09-11,3795.0,265.0,1206.0,146.0,889.0,529,Friday,2021,9,11,SiGuniang,1977.625607,1672.263550,3645.110844


In [4]:
# create dataloaders for model
batch_size = 128  # set this between 32 to 128


# check if GPU is available
if torch.cuda.is_available():
    device = "cuda"
else:
    device = "cpu"

# move dataloaders to device



In [None]:
# Part 1: Extended window forecasting evaluation

In [5]:


start_prediction_idx = 441
end_prediction_idx = 530  # Includes 90 time points
prediction_length = 3  # The step length for each prediction
max_encoder_length=30

# Initialize an empty list to store all predictions
all_predictions = []



In [6]:
# Trend

In [7]:
pretrained_model_paths=['.\\saved_models\\best_model_epoch=31-v16.ckpt', '.\\saved_models\\best_model_epoch=14-v43.ckpt', '.\\saved_models\\best_model_epoch=19-v17.ckpt', '.\\saved_models\\best_model_epoch=31-v17.ckpt', '.\\saved_models\\best_model_epoch=20-v26.ckpt', '.\\saved_models\\best_model_epoch=16-v30.ckpt', '.\\saved_models\\best_model_epoch=26-v36.ckpt', '.\\saved_models\\best_model_epoch=19-v17.ckpt', '.\\saved_models\\best_model_epoch=30-v19.ckpt', '.\\saved_models\\best_model_epoch=19-v17.ckpt', '.\\saved_models\\best_model_epoch=27-v16.ckpt', '.\\saved_models\\best_model_epoch=20-v26.ckpt', '.\\saved_models\\best_model_epoch=30-v19.ckpt', '.\\saved_models\\best_model_epoch=15-v16.ckpt', '.\\saved_models\\best_model_epoch=13-v21.ckpt', '.\\saved_models\\best_model_epoch=13-v19.ckpt', '.\\saved_models\\best_model_epoch=31-v16.ckpt', '.\\saved_models\\best_model_epoch=30-v20.ckpt', '.\\saved_models\\best_model_epoch=19-v17.ckpt', '.\\saved_models\\best_model_epoch=30-v19.ckpt', '.\\saved_models\\best_model_epoch=8-v57.ckpt', '.\\saved_models\\best_model_epoch=27-v16.ckpt', '.\\saved_models\\best_model_epoch=27-v16.ckpt', '.\\saved_models\\best_model_epoch=27-v16.ckpt', '.\\saved_models\\best_model_epoch=21-v11.ckpt', '.\\saved_models\\best_model_epoch=19-v17.ckpt', '.\\saved_models\\best_model_epoch=19-v17.ckpt', '.\\saved_models\\best_model_epoch=14-v43.ckpt', '.\\saved_models\\best_model_epoch=12-v42.ckpt', '.\\saved_models\\best_model_epoch=12-v42.ckpt']


In [8]:
# Used for creating the validation set
training = TimeSeriesDataSet(
        data[lambda x: x.time_idx <= 440], 
        time_idx="time_idx",
        target="Trend",
        min_encoder_length=30 // 2,
        max_encoder_length=30,
        min_prediction_length=1,
        max_prediction_length=3,
        time_varying_known_categoricals=["month","weekday","day"],
        time_varying_known_reals=["time_idx","pc_Siguniang","mob_Siguniang","pc_SichuanEpidemic","mob_SichuanEpidemic"],
        time_varying_unknown_categoricals=[],
        time_varying_unknown_reals=[
            "Trend",

        ],
        group_ids=['destination'],
        target_normalizer=GroupNormalizer(
            groups=['destination'], transformation="softplus"),
         # 取了对数
        add_relative_time_idx=True,
        add_target_scales=True,
        add_encoder_length=True,
        allow_missing_timesteps=True,)

In [9]:
# Loop for making predictions
for i, model_path in enumerate(pretrained_model_paths):
    start_idx = start_prediction_idx + i * prediction_length
    end_idx = start_idx + prediction_length

    if end_idx > end_prediction_idx:
        end_idx = end_prediction_idx + 1  
    
    # Create a new validation dataset
    validation_data = data[data['time_idx'] < end_idx].copy()
    validation = TimeSeriesDataSet.from_dataset(training, validation_data, predict=True, stop_randomization=True)
    val_dataloader = validation.to_dataloader(train=False, batch_size=batch_size, num_workers=0)

    # Load the pretrained model and make predictions
    best_tft = TemporalFusionTransformer.load_from_checkpoint(model_path)
    raw_predictions, x = best_tft.predict(val_dataloader, mode="raw", return_x=True)
    current_prediction = raw_predictions[0][:, :, 3]  # Retrieve current prediction results
    current_prediction_numpy = current_prediction.numpy().flatten()
    all_predictions.extend(current_prediction_numpy.tolist())

# Print all predictions
print(all_predictions)

[1731.3883056640625, 1714.385009765625, 1687.429443359375, 1978.5191650390625, 1966.654052734375, 1917.821533203125, 1499.083984375, 1543.3009033203125, 1543.3428955078125, 1566.6710205078125, 1527.0262451171875, 1586.82177734375, 1690.5296630859375, 1508.6417236328125, 1467.68359375, 1420.192138671875, 1353.586181640625, 1464.023681640625, 2257.87109375, 2280.037109375, 2193.5380859375, 2270.1005859375, 2358.6630859375, 2267.876953125, 2749.620849609375, 2749.26025390625, 2795.364013671875, 2689.845458984375, 2691.594970703125, 2713.60400390625, 2740.8173828125, 2459.05322265625, 2559.2451171875, 2800.362060546875, 2811.26416015625, 2784.82421875, 2849.9619140625, 2865.648681640625, 2844.1689453125, 2818.42138671875, 2889.4775390625, 2883.34765625, 1738.5400390625, 1965.8919677734375, 1271.64794921875, 958.9987182617188, 747.0792236328125, 357.2618103027344, 934.1387939453125, 842.2722778320312, 759.3531494140625, 955.425048828125, 928.4222412109375, 906.4942016601562, 1216.5461425781

In [10]:
Trend_fore=[1731.3883056640625, 1714.385009765625, 1687.429443359375, 1978.5191650390625, 1966.654052734375, 1917.821533203125, 1499.083984375, 1543.3009033203125, 1543.3428955078125, 1566.6710205078125, 1527.0262451171875, 1586.82177734375, 1690.5296630859375, 1508.6417236328125, 1467.68359375, 1420.192138671875, 1353.586181640625, 1464.023681640625, 2257.87109375, 2280.037109375, 2193.5380859375, 2270.1005859375, 2358.6630859375, 2267.876953125, 2749.620849609375, 2749.26025390625, 2795.364013671875, 2689.845458984375, 2691.594970703125, 2713.60400390625, 2740.8173828125, 2459.05322265625, 2559.2451171875, 2800.362060546875, 2811.26416015625, 2784.82421875, 2849.9619140625, 2865.648681640625, 2844.1689453125, 2818.42138671875, 2889.4775390625, 2883.34765625, 1738.5400390625, 1965.8919677734375, 1271.64794921875, 958.9987182617188, 747.0792236328125, 357.2618103027344, 934.1387939453125, 842.2722778320312, 759.3531494140625, 955.425048828125, 928.4222412109375, 906.4942016601562, 1216.546142578125, 1624.296142578125, 1656.7322998046875, 915.2662353515625, 928.4640502929688, 888.5672607421875, 1327.02490234375, 1398.9154052734375, 1192.9051513671875, 1468.441650390625, 1416.564453125, 1372.365234375, 1486.193359375, 1469.1005859375, 1449.427490234375, 1169.113525390625, 1071.313232421875, 1069.7135009765625, 864.85302734375, 969.8883666992188, 854.2677001953125, 976.2835693359375, 935.8225708007812, 913.2532958984375, 768.7459716796875, 1068.1837158203125, 1092.1436767578125, 1383.463134765625, 1387.175537109375, 1379.53662109375, 1727.316650390625, 1677.2274169921875, 1797.581787109375, 2107.3603515625, 2375.767578125, 2460.5439453125]

In [11]:
# Seasonal

In [12]:
pretrained_model_paths=['.\\saved_models\\best_model_epoch=31-v18.ckpt', '.\\saved_models\\best_model_epoch=8-v59.ckpt', '.\\saved_models\\best_model_epoch=31-v18.ckpt', '.\\saved_models\\best_model_epoch=36-v78.ckpt', '.\\saved_models\\best_model_epoch=36-v78.ckpt', '.\\saved_models\\best_model_epoch=32-v28.ckpt', '.\\saved_models\\best_model_epoch=22-v27.ckpt', '.\\saved_models\\best_model_epoch=19-v18.ckpt', '.\\saved_models\\best_model_epoch=30-v21.ckpt', '.\\saved_models\\best_model_epoch=22-v28.ckpt', '.\\saved_models\\best_model_epoch=31-v18.ckpt', '.\\saved_models\\best_model_epoch=31-v18.ckpt', '.\\saved_models\\best_model_epoch=38-v94.ckpt', '.\\saved_models\\best_model_epoch=38-v94.ckpt', '.\\saved_models\\best_model_epoch=38-v94.ckpt', '.\\saved_models\\best_model_epoch=19-v18.ckpt', '.\\saved_models\\best_model_epoch=22-v28.ckpt', '.\\saved_models\\best_model_epoch=19-v18.ckpt', '.\\saved_models\\best_model_epoch=19-v18.ckpt', '.\\saved_models\\best_model_epoch=39-v78.ckpt', '.\\saved_models\\best_model_epoch=22-v27.ckpt', '.\\saved_models\\best_model_epoch=8-v59.ckpt', '.\\saved_models\\best_model_epoch=22-v28.ckpt', '.\\saved_models\\best_model_epoch=31-v18.ckpt', '.\\saved_models\\best_model_epoch=36-v81.ckpt', '.\\saved_models\\best_model_epoch=8-v59.ckpt', '.\\saved_models\\best_model_epoch=22-v27.ckpt', '.\\saved_models\\best_model_epoch=36-v81.ckpt', '.\\saved_models\\best_model_epoch=31-v18.ckpt', '.\\saved_models\\best_model_epoch=31-v18.ckpt']


In [13]:
training = TimeSeriesDataSet(
        data[lambda x: x.time_idx <= 440],  
        time_idx="time_idx",
        target="Seasonal",
        min_encoder_length=30 // 2,
        max_encoder_length=30,
        min_prediction_length=1,
        max_prediction_length=3,
        time_varying_known_categoricals=["month","weekday","day"],
        time_varying_known_reals=["time_idx","pc_Siguniang","mob_Siguniang","pc_SichuanEpidemic","mob_SichuanEpidemic"],
        time_varying_unknown_categoricals=[],
        time_varying_unknown_reals=[
            "Seasonal",

        ],
        group_ids=['destination'],
        target_normalizer=GroupNormalizer(
            groups=['destination'], transformation="softplus"),
         # 取了对数
        add_relative_time_idx=True,
        add_target_scales=True,
        add_encoder_length=True,
        allow_missing_timesteps=True,)

In [14]:
all_predictions = []
for i, model_path in enumerate(pretrained_model_paths):
    start_idx = start_prediction_idx + i * prediction_length
    end_idx = start_idx + prediction_length

    if end_idx > end_prediction_idx:
        end_idx = end_prediction_idx + 1  
    
    validation_data = data[data['time_idx'] < end_idx].copy()
    validation = TimeSeriesDataSet.from_dataset(training, validation_data, predict=True, stop_randomization=True)
    val_dataloader = validation.to_dataloader(train=False, batch_size=batch_size, num_workers=0)

    best_tft = TemporalFusionTransformer.load_from_checkpoint(model_path)
    raw_predictions, x = best_tft.predict(val_dataloader, mode="raw", return_x=True)
    current_prediction = raw_predictions[0][:, :, 3]  
    current_prediction_numpy = current_prediction.numpy().flatten()
    all_predictions.extend(current_prediction_numpy.tolist())

print(all_predictions)

[686.5208740234375, 774.5216064453125, 697.252197265625, 851.274169921875, 1414.61474609375, 1405.7696533203125, 735.7564697265625, 873.7840576171875, 719.978759765625, 753.2317504882812, 805.974853515625, 1744.396728515625, 1369.681640625, 697.9782104492188, 748.2681884765625, 686.7267456054688, 776.7510986328125, 864.9180908203125, 1712.051025390625, 1535.4305419921875, 671.1087646484375, 739.1500244140625, 763.4954223632812, 768.4699096679688, 736.6378784179688, 1776.34716796875, 1376.7940673828125, 701.6195068359375, 791.264892578125, 688.1738891601562, 696.1347045898438, 747.6446533203125, 1707.837158203125, 1448.4453125, 719.15576171875, 751.6934814453125, 636.50634765625, 784.8070068359375, 831.2423706054688, 1668.440185546875, 1516.5810546875, 784.4439697265625, 843.336181640625, 838.0128784179688, 818.8256225585938, 988.447265625, 946.5116577148438, 956.6556396484375, 717.60205078125, 803.4473876953125, 695.1112060546875, 926.8746337890625, 928.2173461914062, 1014.705139160156

In [15]:
Seasonal_fore=[686.5208740234375, 774.5216064453125, 697.252197265625, 851.274169921875, 1414.61474609375, 1405.7696533203125, 735.7564697265625, 873.7840576171875, 719.978759765625, 753.2317504882812, 805.974853515625, 1744.396728515625, 1369.681640625, 697.9782104492188, 748.2681884765625, 686.7267456054688, 776.7510986328125, 864.9180908203125, 1712.051025390625, 1535.4305419921875, 671.1087646484375, 739.1500244140625, 763.4954223632812, 768.4699096679688, 736.6378784179688, 1776.34716796875, 1376.7940673828125, 701.6195068359375, 791.264892578125, 688.1738891601562, 696.1347045898438, 747.6446533203125, 1707.837158203125, 1448.4453125, 719.15576171875, 751.6934814453125, 636.50634765625, 784.8070068359375, 831.2423706054688, 1668.440185546875, 1516.5810546875, 784.4439697265625, 843.336181640625, 838.0128784179688, 818.8256225585938, 988.447265625, 946.5116577148438, 956.6556396484375, 717.60205078125, 803.4473876953125, 695.1112060546875, 926.8746337890625, 928.2173461914062, 1014.7051391601562, 1084.1063232421875, 884.66748046875, 884.2811279296875, 722.271728515625, 781.793701171875, 832.777587890625, 1617.489501953125, 1516.982177734375, 858.547607421875, 805.388916015625, 787.2996215820312, 798.02587890625, 814.6080932617188, 1631.4765625, 1458.4307861328125, 699.8377685546875, 812.302490234375, 685.079345703125, 790.8538208007812, 898.1590576171875, 1714.5390625, 1383.02734375, 823.8980712890625, 799.6177978515625, 865.6594848632812, 891.4102783203125, 944.0714111328125, 1660.93994140625, 1380.4295654296875, 718.6041259765625, 681.352294921875, 674.2620849609375, 713.3812255859375, 722.8145751953125, 1709.02783203125, 1390.280029296875]

In [16]:
# Resid

In [17]:
pretrained_model_paths=['.\\saved_models\\best_model_epoch=3-v68.ckpt', '.\\saved_models\\best_model_epoch=4-v74.ckpt', '.\\saved_models\\best_model_epoch=35-v18.ckpt', '.\\saved_models\\best_model_epoch=35-v18.ckpt', '.\\saved_models\\best_model_epoch=8-v62.ckpt', '.\\saved_models\\best_model_epoch=4-v75.ckpt', '.\\saved_models\\best_model_epoch=34-v56.ckpt', '.\\saved_models\\best_model_epoch=8-v62.ckpt', '.\\saved_models\\best_model_epoch=4-v74.ckpt', '.\\saved_models\\best_model_epoch=16-v32.ckpt', '.\\saved_models\\best_model_epoch=3-v66.ckpt', '.\\saved_models\\best_model_epoch=35-v17.ckpt', '.\\saved_models\\best_model_epoch=3-v68.ckpt', '.\\saved_models\\best_model_epoch=35-v17.ckpt', '.\\saved_models\\best_model_epoch=34-v54.ckpt', '.\\saved_models\\best_model_epoch=35-v17.ckpt', '.\\saved_models\\best_model_epoch=3-v68.ckpt', '.\\saved_models\\best_model_epoch=5-v88.ckpt', '.\\saved_models\\best_model_epoch=38-v96.ckpt', '.\\saved_models\\best_model_epoch=35-v17.ckpt', '.\\saved_models\\best_model_epoch=8-v62.ckpt', '.\\saved_models\\best_model_epoch=8-v62.ckpt', '.\\saved_models\\best_model_epoch=5-v88.ckpt', '.\\saved_models\\best_model_epoch=8-v62.ckpt', '.\\saved_models\\best_model_epoch=5-v88.ckpt', '.\\saved_models\\best_model_epoch=5-v88.ckpt', '.\\saved_models\\best_model_epoch=34-v54.ckpt', '.\\saved_models\\best_model_epoch=34-v54.ckpt', '.\\saved_models\\best_model_epoch=8-v62.ckpt', '.\\saved_models\\best_model_epoch=24-v41.ckpt']

In [18]:
training = TimeSeriesDataSet(
        data[lambda x: x.time_idx <= 440],  
        time_idx="time_idx",
        target="Resid",
        min_encoder_length=30 // 2,
        max_encoder_length=30,
        min_prediction_length=1,
        max_prediction_length=3,
        time_varying_known_categoricals=["month","weekday","day"],
        time_varying_known_reals=["time_idx","pc_Siguniang","mob_Siguniang","pc_SichuanEpidemic","mob_SichuanEpidemic"],
        time_varying_unknown_categoricals=[],
        time_varying_unknown_reals=[
            "Resid",

        ],
        group_ids=['destination'],
        target_normalizer=GroupNormalizer(
            groups=['destination'], transformation="softplus"),
        add_relative_time_idx=True,
        add_target_scales=True,
        add_encoder_length=True,
        allow_missing_timesteps=True,)

In [19]:
all_predictions = []
for i, model_path in enumerate(pretrained_model_paths):
    start_idx = start_prediction_idx + i * prediction_length
    end_idx = start_idx + prediction_length

    if end_idx > end_prediction_idx:
        end_idx = end_prediction_idx + 1 
    
    validation_data = data[data['time_idx'] < end_idx].copy()
    validation = TimeSeriesDataSet.from_dataset(training, validation_data, predict=True, stop_randomization=True)
    val_dataloader = validation.to_dataloader(train=False, batch_size=batch_size, num_workers=0)

    best_tft = TemporalFusionTransformer.load_from_checkpoint(model_path)
    raw_predictions, x = best_tft.predict(val_dataloader, mode="raw", return_x=True)
    current_prediction = raw_predictions[0][:, :, 3]  
    current_prediction_numpy = current_prediction.numpy().flatten()
    all_predictions.extend(current_prediction_numpy.tolist())

print(all_predictions)

[2240.224853515625, 2383.2119140625, 2708.57958984375, 2185.77099609375, 2318.840576171875, 2251.0791015625, 2339.20751953125, 2496.25048828125, 2447.79345703125, 2380.122802734375, 2396.26123046875, 2578.960205078125, 2617.89208984375, 2528.6484375, 2495.7568359375, 2665.850341796875, 2682.67822265625, 2669.67041015625, 2864.9833984375, 2827.62451171875, 2525.793701171875, 2547.6435546875, 2559.744140625, 2614.052978515625, 2030.5975341796875, 2480.9482421875, 2497.482177734375, 2574.29150390625, 2574.22119140625, 2578.620361328125, 2141.217041015625, 2223.296875, 2618.441162109375, 2743.9169921875, 2642.78759765625, 2666.93994140625, 2428.5986328125, 2899.520263671875, 3105.008544921875, 3199.552734375, 3670.397216796875, 2911.7109375, 2694.65673828125, 3009.888671875, 2567.009765625, 2383.5439453125, 2408.81591796875, 2374.8173828125, 2704.9794921875, 2549.938232421875, 2632.7275390625, 2121.982421875, 2170.2880859375, 2278.94677734375, 2024.250244140625, 2032.546875, 1595.879394531

In [20]:
Resid_fore=[2240.224853515625, 2383.2119140625, 2708.57958984375, 2185.77099609375, 2318.840576171875, 2251.0791015625, 2339.20751953125, 2496.25048828125, 2447.79345703125, 2380.122802734375, 2396.26123046875, 2578.960205078125, 2617.89208984375, 2528.6484375, 2495.7568359375, 2665.850341796875, 2682.67822265625, 2669.67041015625, 2864.9833984375, 2827.62451171875, 2525.793701171875, 2547.6435546875, 2559.744140625, 2614.052978515625, 2030.5975341796875, 2480.9482421875, 2497.482177734375, 2574.29150390625, 2574.22119140625, 2578.620361328125, 2141.217041015625, 2223.296875, 2618.441162109375, 2743.9169921875, 2642.78759765625, 2666.93994140625, 2428.5986328125, 2899.520263671875, 3105.008544921875, 3199.552734375, 3670.397216796875, 2911.7109375, 2694.65673828125, 3009.888671875, 2567.009765625, 2383.5439453125, 2408.81591796875, 2374.8173828125, 2704.9794921875, 2549.938232421875, 2632.7275390625, 2121.982421875, 2170.2880859375, 2278.94677734375, 2024.250244140625, 2032.546875, 1595.87939453125, 2468.12890625, 2488.593505859375, 2546.210205078125, 2575.2421875, 2668.540771484375, 2541.9296875, 2600.458984375, 2546.55517578125, 2483.7724609375, 2472.955078125, 2514.865234375, 2469.960693359375, 2573.691650390625, 2530.4892578125, 2558.204345703125, 2389.116943359375, 2471.150634765625, 2473.537109375, 2440.818603515625, 2449.149658203125, 2404.684814453125, 2480.22607421875, 2439.072509765625, 2466.3623046875, 2511.63525390625, 2521.49267578125, 2445.599853515625, 2490.698486328125, 2521.857666015625, 2510.983642578125, 2500.946533203125, 2709.25146484375, 2743.718994140625] 

In [21]:
true_values=[1091.0, 1551.0, 1602.0, 1320.0, 2140.0, 2343.0, 1123.0, 1492.0, 1187.0, 1174.0, 1261.0, 2412.0, 2187.0, 1239.0, 1169.0, 1238.0, 1314.0, 1761.0, 3126.0, 3488.0, 1894.0, 2051.0, 2205.0, 2161.0, 2128.0, 3566.0, 3670.0, 2491.0, 2619.0, 2430.0, 2053.0, 1905.0, 3145.0, 3317.0, 2918.0, 3006.0, 2666.0, 3087.0, 3277.0, 5210.0, 4722.0, 2959.0, 2693.0, 2145.0, 715.0, 548.0, 518.0, 451.0, 514.0, 675.0, 742.0, 754.0, 507.0, 793.0, 795.0, 518.0, 707.0, 712.0, 781.0, 1090.0, 1846.0, 2533.0, 1090.0, 1502.0, 1350.0, 1081.0, 1279.0, 2106.0, 1865.0, 883.0, 1105.0, 823.0, 469.0, 851.0, 1580.0, 1316.0, 682.0, 561.0, 605.0, 851.0, 1016.0, 2047.0, 1781.0, 1060.0, 1289.0, 1345.0, 1552.0, 1845.0, 3795.0, 3165.0]

In [22]:
# Combine trend, season, and residual components to get the final 'tourist' forecast

tourist = [sum(x) for x in zip(Trend_fore, Seasonal_fore, Resid_fore)]
tourist = [x - 3500 for x in tourist]


# Calculate Mean Absolute Error (MAE), Root Mean Square Error (RMSE), and Mean Absolute Percentage Error (MAPE)
mae = np.mean(np.abs(np.array(true_values) - np.array(tourist)))
rmse = np.sqrt(np.mean(np.square(np.array(true_values) - np.array(tourist))))
mape = np.mean(np.abs(np.array(true_values) - np.array(tourist)) / np.array(true_values))

# Print the evaluation metrics
print("MAE:", mae)
print("RMSE:", rmse)
print("MAPE:", mape)

MAE: 131.25858323838975
RMSE: 223.1631970704529
MAPE: 0.09919044083969884


In [None]:
# Part 2: Model Training

In [None]:
# trend

start_prediction_idx = 441
end_prediction_idx = 530 

prediction_length = 3


max_encoder_length = 30 

all_predictions = []
all_best_models = []

In [None]:


for prediction_idx in range(start_prediction_idx, end_prediction_idx + 1, prediction_length):

    training = TimeSeriesDataSet(
        data[lambda x: x.time_idx <= prediction_idx - 1],  
        time_idx="time_idx",
        target="Trend",
        min_encoder_length=max_encoder_length // 2,
        max_encoder_length=max_encoder_length,
        min_prediction_length=1,
        max_prediction_length=prediction_length,
        time_varying_known_categoricals=["month","weekday","day"],
        time_varying_known_reals=["time_idx","pc_Siguniang","mob_Siguniang","pc_SichuanEpidemic","mob_SichuanEpidemic"],
        time_varying_unknown_categoricals=[],
        time_varying_unknown_reals=[
            "Trend",

        ],
        group_ids=['destination'],
        target_normalizer=GroupNormalizer(
            groups=['destination'], transformation="softplus"),

        add_relative_time_idx=True,
        add_target_scales=True,
        add_encoder_length=True,
        allow_missing_timesteps=True,

   
)


    validation_data = data[lambda x: x.time_idx <= prediction_idx - 1 + prediction_length].copy()
    validation = TimeSeriesDataSet.from_dataset(training, validation_data, predict=True, stop_randomization=True)

    train_dataloader = training.to_dataloader(train=True, batch_size=batch_size, num_workers=0)
    val_dataloader = validation.to_dataloader(train=False, batch_size=batch_size * 10, num_workers=0)
    
    
    # configure network and trainer
    

    checkpoint_callback = ModelCheckpoint(
        monitor="val_loss",
        mode="min",
        save_last=True,
        save_top_k=1,  
        filename="best_model_{epoch}",  
        dirpath="saved_models"
    )

    early_stop_callback = EarlyStopping(monitor="val_loss", min_delta=1e-4, patience=10, verbose=False, mode="min")
    lr_logger = LearningRateMonitor()  # log the learning rate
    logger = TensorBoardLogger("lightning_logs")  # logging results to a tensorboard

    trainer = pl.Trainer(
        max_epochs=40,
        gpus=1,
        enable_model_summary=True,
        gradient_clip_val=0.0894,
        limit_train_batches=30,  # coment in for training, running valiation every 30 batches
        # fast_dev_run=True,  # comment in to check that networkor dataset has no serious bugs
        callbacks=[lr_logger, early_stop_callback,checkpoint_callback],
        logger=logger,
    )


    tft = TemporalFusionTransformer.from_dataset(
        training,
        learning_rate=0.0294,
        hidden_size=82,
        attention_head_size=1,
        dropout=0.1962,
        hidden_continuous_size=39,
        output_size=7,  # 7 quantiles by default
        loss=QuantileLoss(),
        log_interval=10,  # uncomment for learning rate finder and otherwise, e.g. to 10 for logging every 10 batches
        reduce_on_plateau_patience=4,
    )
    print(f"Number of parameters in network: {tft.size()/1e3:.1f}k")


    
    

    trainer.fit(
        tft,
        train_dataloaders=train_dataloader,
        val_dataloaders=val_dataloader,
    )


    best_model_path = trainer.checkpoint_callback.best_model_path
    current_model=best_model_path
    best_tft = TemporalFusionTransformer.load_from_checkpoint(best_model_path)
    raw_predictions, x = best_tft.predict(val_dataloader, mode="raw", return_x=True)
    all_best_models.append(current_model)
    current_prediction = raw_predictions[0][:, :, 3]  
    all_predictions.append(current_prediction)


In [None]:
all_predictions 


In [None]:
all_best_models

In [None]:
# seasonal

start_prediction_idx = 441
end_prediction_idx = 530 

prediction_length = 3


max_encoder_length = 30 

all_predictions = []
all_best_models = []

In [None]:



for prediction_idx in range(start_prediction_idx, end_prediction_idx + 1, prediction_length):

    training = TimeSeriesDataSet(
        data[lambda x: x.time_idx <= prediction_idx - 1],  # 更新训练集的时间索引上限
        time_idx="time_idx",
        target="Seasonal",
        min_encoder_length=max_encoder_length // 2,
        max_encoder_length=max_encoder_length,
        min_prediction_length=1,
        max_prediction_length=prediction_length,
        time_varying_known_categoricals=["month","weekday","day"],
        time_varying_known_reals=["time_idx","pc_Siguniang","mob_Siguniang","pc_SichuanEpidemic","mob_SichuanEpidemic"],
        time_varying_unknown_categoricals=[],
        time_varying_unknown_reals=[
            "Seasonal",

        ],
        group_ids=['destination'],
        target_normalizer=GroupNormalizer(
            groups=['destination'], transformation="softplus"),

        add_relative_time_idx=True,
        add_target_scales=True,
        add_encoder_length=True,
        allow_missing_timesteps=True,

   
)


    validation_data = data[lambda x: x.time_idx <= prediction_idx - 1 + prediction_length].copy()
    validation = TimeSeriesDataSet.from_dataset(training, validation_data, predict=True, stop_randomization=True)

    train_dataloader = training.to_dataloader(train=True, batch_size=batch_size, num_workers=0)
    val_dataloader = validation.to_dataloader(train=False, batch_size=batch_size * 10, num_workers=0)
    
    
    # configure network and trainer
    

    checkpoint_callback = ModelCheckpoint(
        monitor="val_loss",
        mode="min",
        save_last=True,
        save_top_k=1,  
        filename="best_model_{epoch}",  
        dirpath="saved_models"
    )

    early_stop_callback = EarlyStopping(monitor="val_loss", min_delta=1e-4, patience=10, verbose=False, mode="min")
    lr_logger = LearningRateMonitor()  # log the learning rate
    logger = TensorBoardLogger("lightning_logs")  # logging results to a tensorboard

    trainer = pl.Trainer(
        max_epochs=40,
        gpus=1,
        enable_model_summary=True,
        gradient_clip_val=0.0894,
        limit_train_batches=30,  # coment in for training, running valiation every 30 batches
        # fast_dev_run=True,  # comment in to check that networkor dataset has no serious bugs
        callbacks=[lr_logger, early_stop_callback,checkpoint_callback],
        logger=logger,
    )


    tft = TemporalFusionTransformer.from_dataset(
        training,
        learning_rate=0.0294,
        hidden_size=82,
        attention_head_size=1,
        dropout=0.1962,
        hidden_continuous_size=39,
        output_size=7,  # 7 quantiles by default
        loss=QuantileLoss(),
        log_interval=10,  # uncomment for learning rate finder and otherwise, e.g. to 10 for logging every 10 batches
        reduce_on_plateau_patience=4,
    )
    print(f"Number of parameters in network: {tft.size()/1e3:.1f}k")


    
    

    trainer.fit(
        tft,
        train_dataloaders=train_dataloader,
        val_dataloaders=val_dataloader,
    )


    best_model_path = trainer.checkpoint_callback.best_model_path
    current_model=best_model_path
    best_tft = TemporalFusionTransformer.load_from_checkpoint(best_model_path)
    raw_predictions, x = best_tft.predict(val_dataloader, mode="raw", return_x=True)
    all_best_models.append(current_model)
    current_prediction = raw_predictions[0][:, :, 3]  
    all_predictions.append(current_prediction)


In [None]:
all_predictions 


In [None]:
all_best_models

In [None]:
# resid

start_prediction_idx = 441
end_prediction_idx = 530 

prediction_length = 3


max_encoder_length = 30 

all_predictions = []
all_best_models = []

In [None]:


for prediction_idx in range(start_prediction_idx, end_prediction_idx + 1, prediction_length):

    training = TimeSeriesDataSet(
        data[lambda x: x.time_idx <= prediction_idx - 1],  
        time_idx="time_idx",
        target="Resid",
        min_encoder_length=max_encoder_length // 2,
        max_encoder_length=max_encoder_length,
        min_prediction_length=1,
        max_prediction_length=prediction_length,
        time_varying_known_categoricals=["month","weekday","day"],
        time_varying_known_reals=["time_idx","pc_Siguniang","mob_Siguniang","pc_SichuanEpidemic","mob_SichuanEpidemic"],
        time_varying_unknown_categoricals=[],
        time_varying_unknown_reals=[
            "Resid",

        ],
        group_ids=['destination'],
        target_normalizer=GroupNormalizer(
            groups=['destination'], transformation="softplus"),

        add_relative_time_idx=True,
        add_target_scales=True,
        add_encoder_length=True,
        allow_missing_timesteps=True,

   
)


    validation_data = data[lambda x: x.time_idx <= prediction_idx - 1 + prediction_length].copy()
    validation = TimeSeriesDataSet.from_dataset(training, validation_data, predict=True, stop_randomization=True)

    train_dataloader = training.to_dataloader(train=True, batch_size=batch_size, num_workers=0)
    val_dataloader = validation.to_dataloader(train=False, batch_size=batch_size * 10, num_workers=0)
    
    
    # configure network and trainer
    

    checkpoint_callback = ModelCheckpoint(
        monitor="val_loss",
        mode="min",
        save_last=True,
        save_top_k=1, 
        filename="best_model_{epoch}",  
        dirpath="saved_models"
    )

    early_stop_callback = EarlyStopping(monitor="val_loss", min_delta=1e-4, patience=10, verbose=False, mode="min")
    lr_logger = LearningRateMonitor()  # log the learning rate
    logger = TensorBoardLogger("lightning_logs")  # logging results to a tensorboard

    trainer = pl.Trainer(
        max_epochs=40,
        gpus=1,
        enable_model_summary=True,
        gradient_clip_val=0.0894,
        limit_train_batches=30,  # coment in for training, running valiation every 30 batches
        # fast_dev_run=True,  # comment in to check that networkor dataset has no serious bugs
        callbacks=[lr_logger, early_stop_callback,checkpoint_callback],
        logger=logger,
    )


    tft = TemporalFusionTransformer.from_dataset(
        training,
        learning_rate=0.0294,
        hidden_size=82,
        attention_head_size=1,
        dropout=0.1962,
        hidden_continuous_size=39,
        output_size=7,  # 7 quantiles by default
        loss=QuantileLoss(),
        log_interval=10,  # uncomment for learning rate finder and otherwise, e.g. to 10 for logging every 10 batches
        reduce_on_plateau_patience=4,
    )
    print(f"Number of parameters in network: {tft.size()/1e3:.1f}k")


    
    

    trainer.fit(
        tft,
        train_dataloaders=train_dataloader,
        val_dataloaders=val_dataloader,
    )


    best_model_path = trainer.checkpoint_callback.best_model_path
    current_model=best_model_path
    best_tft = TemporalFusionTransformer.load_from_checkpoint(best_model_path)
    raw_predictions, x = best_tft.predict(val_dataloader, mode="raw", return_x=True)
    all_best_models.append(current_model)
    current_prediction = raw_predictions[0][:, :, 3]  
    all_predictions.append(current_prediction)


In [None]:
all_predictions 


In [None]:
all_best_models