# This ipynb attempts to infer about predicting time series based on trained models, there are some note you must know:
- Make sure you have trained and gotten the specified checkpoints

In [None]:
from utils.tools import dotdict
import argparse
import random
import numpy as np
import torch

# fix seed to reproduce result
fix_seed = 2021 
random.seed(fix_seed)
torch.manual_seed(fix_seed)
np.random.seed(fix_seed)

# Check the experiment environment
print("="*40)
print("🔍 Checking PyTorch CUDA environment...")
if torch.cuda.is_available():
    print("✅ CUDA is available. GPU(s) detected!")
    print(f"🔢 Number of GPUs available: {torch.cuda.device_count()}")
    print(f"🖥️  Current GPU name       : {torch.cuda.get_device_name(torch.cuda.current_device())}")
    print(f"🎯 Current GPU device ID   : {torch.cuda.current_device()}")
else:
    print("❌ CUDA is not available. The model will run on CPU.")
print("="*40)

In [11]:
import torch

print("="*40)
print("🚀 Selecting CUDA device...")

if torch.cuda.is_available():
    device_id = 0  # 👈 指定设备 ID
    torch.cuda.set_device(device_id)
    print(f"✅ CUDA is available. Setting device to GPU {device_id}")
    print(f"🖥️  Using GPU: {torch.cuda.get_device_name(device_id)}")
    print(f"🎯 torch.cuda.current_device(): {torch.cuda.current_device()}")
else:
    print("❌ CUDA is not available. Falling back to CPU.")
print("="*40)

🚀 Selecting CUDA device...
❌ CUDA is not available. Falling back to CPU.


In [19]:
def get_default_args():
    """
    be consistent with the resolution of args in run
    """
    parser = argparse.ArgumentParser("infer phase")
    # basic config
    parser.add_argument('--task_name', type=str, default='long_term_forecast',
                        help='task name, options:[long_term_forecast, short_term_forecast]')
    parser.add_argument('--is_training', type=int, default=0, help='status')
    parser.add_argument('--model_id', type=str, default='1', help='model id')
    parser.add_argument('--model', type=str, default='Autoformer',
                        help='model name, options: [Autoformer, Informer, Transformer]')
    
    # data loader
    parser.add_argument('--data', type=str, default='ETTm1', help='dataset type')
    parser.add_argument('--root_path', type=str, default='./data/ETT/', help='root path of the data file')
    parser.add_argument('--data_path', type=str, default='ETTh1.csv', help='data file')
    parser.add_argument('--features', type=str, default='M',
                        help='forecasting task, options:[M, S, MS]; M:multivariate predict multivariate, S:univariate '
                             'predict univariate, MS:multivariate predict univariate')
    parser.add_argument('--target', type=str, default='OT', help='target feature in S or MS task')
    parser.add_argument('--freq', type=str, default='h',
                        help='freq for time features encoding, options:[s:secondly, t:minutely, h:hourly, d:daily, '
                             'b:business days, w:weekly, m:monthly], you can also use more detailed freq like 15min '
                             'or 3h')
    parser.add_argument('--checkpoints', type=str, default='./checkpoints/', help='location of model checkpoints')
    
    # forecasting task
    parser.add_argument('--seq_len', type=int, default=96, help='input sequence length')
    parser.add_argument('--label_len', type=int, default=48, help='start token length')
    parser.add_argument('--pred_len', type=int, default=96, help='prediction sequence length')
    parser.add_argument('--seasonal_patterns', type=str, default='Monthly', help='subset for M4')
    parser.add_argument('--inverse', action='store_true', help='inverse output data', default=False)
    
    # model define
    parser.add_argument('--expand', type=int, default=2, help='expansion factor for Mamba')
    parser.add_argument('--d_conv', type=int, default=4, help='conv kernel size for Mamba')
    parser.add_argument('--top_k', type=int, default=5, help='for TimesBlock')
    parser.add_argument('--num_kernels', type=int, default=6, help='for Inception')
    parser.add_argument('--enc_in', type=int, default=7, help='encoder input size')
    parser.add_argument('--dec_in', type=int, default=7, help='decoder input size')
    parser.add_argument('--c_out', type=int, default=7, help='output size')
    parser.add_argument('--d_model', type=int, default=512, help='dimension of model')
    parser.add_argument('--n_heads', type=int, default=8, help='num of heads')
    parser.add_argument('--e_layers', type=int, default=2, help='num of encoder layers')
    parser.add_argument('--d_layers', type=int, default=1, help='num of decoder layers')
    parser.add_argument('--d_ff', type=int, default=2048, help='dimension of fcn')
    parser.add_argument('--moving_avg', type=int, default=25, help='window size of moving average')
    parser.add_argument('--factor', type=int, default=1, help='attn factor')
    parser.add_argument('--distil', action='store_false',
                        help='whether to use distilling in encoder, using this argument means not using distilling',
                        default=True)
    parser.add_argument('--dropout', type=float, default=0.1, help='dropout')
    parser.add_argument('--embed', type=str, default='timeF',
                        help='time features encoding, options:[timeF, fixed, learned]')
    parser.add_argument('--activation', type=str, default='gelu', help='activation')
    parser.add_argument('--channel_independence', type=int, default=1,
                        help='0: channel dependence 1: channel independence for FreTS model')
    parser.add_argument('--decomp_method', type=str, default='moving_avg',
                        help='method of series decompsition, only support moving_avg or dft_decomp')
    parser.add_argument('--use_norm', type=int, default=1, help='whether to use normalize; True 1 False 0')
    parser.add_argument('--down_sampling_layers', type=int, default=0, help='num of down sampling layers')
    parser.add_argument('--down_sampling_window', type=int, default=1, help='down sampling window size')
    parser.add_argument('--down_sampling_method', type=str, default=None,
                        help='down sampling method, only support avg, max, conv')
    parser.add_argument('--seg_len', type=int, default=96,
                        help='the length of segmen-wise iteration of SegRNN')
    
    # optimization
    parser.add_argument('--num_workers', type=int, default=10, help='data loader num workers')
    parser.add_argument('--itr', type=int, default=1, help='experiments times')
    parser.add_argument('--train_epochs', type=int, default=10, help='train epochs')
    parser.add_argument('--batch_size', type=int, default=1, help='batch size of train input data')
    parser.add_argument('--patience', type=int, default=3, help='early stopping patience')
    parser.add_argument('--learning_rate', type=float, default=0.0001, help='optimizer learning rate')
    parser.add_argument('--des', type=str, default='test', help='exp description')
    parser.add_argument('--loss', type=str, default='MSE', help='loss function')
    parser.add_argument('--lradj', type=str, default='type1', help='adjust learning rate')
    parser.add_argument('--use_amp', action='store_true', help='use automatic mixed precision training', default=False)
    
    # GPU
    parser.add_argument('--use_gpu', type=bool, help='use gpu')
    parser.add_argument('--gpu', type=int, default=0, help='gpu')
    parser.add_argument('--gpu_type', type=str, default='cuda', help='gpu type')  # cuda or mps
    parser.add_argument('--use_multi_gpu', action='store_true', help='use multiple gpus', default=False)
    parser.add_argument('--devices', type=str, default='0,1,2,3', help='device ids of multile gpus')
    
    # de-stationary projector params
    parser.add_argument('--p_hidden_dims', type=int, nargs='+', default=[128, 128],
                        help='hidden layer dimensions of projector (List)')
    parser.add_argument('--p_hidden_layers', type=int, default=2, help='number of hidden layers in projector')
    
    # metrics (dtw)
    parser.add_argument('--use_dtw', type=bool, default=False,
                        help='the controller of using dtw metric (dtw is time consuming, not suggested unless necessary)')
    
    # Augmentation
    parser.add_argument('--augmentation_ratio', type=int, default=0, help="How many times to augment")
    parser.add_argument('--seed', type=int, default=2, help="Randomization seed")
    parser.add_argument('--jitter', default=False, action="store_true", help="Jitter preset augmentation")
    parser.add_argument('--scaling', default=False, action="store_true", help="Scaling preset augmentation")
    parser.add_argument('--permutation', default=False, action="store_true",
                        help="Equal Length Permutation preset augmentation")
    parser.add_argument('--randompermutation', default=False, action="store_true",
                        help="Random Length Permutation preset augmentation")
    parser.add_argument('--magwarp', default=False, action="store_true", help="Magnitude warp preset augmentation")
    parser.add_argument('--timewarp', default=False, action="store_true", help="Time warp preset augmentation")
    parser.add_argument('--windowslice', default=False, action="store_true", help="Window slice preset augmentation")
    parser.add_argument('--windowwarp', default=False, action="store_true", help="Window warp preset augmentation")
    parser.add_argument('--rotation', default=False, action="store_true", help="Rotation preset augmentation")
    parser.add_argument('--spawner', default=False, action="store_true", help="SPAWNER preset augmentation")
    parser.add_argument('--dtwwarp', default=False, action="store_true", help="DTW warp preset augmentation")
    parser.add_argument('--shapedtwwarp', default=False, action="store_true", help="Shape DTW warp preset augmentation")
    parser.add_argument('--wdba', default=False, action="store_true", help="Weighted DBA preset augmentation")
    parser.add_argument('--discdtw', default=False, action="store_true",
                        help="Discrimitive DTW warp preset augmentation")
    parser.add_argument('--discsdtw', default=False, action="store_true",
                        help="Discrimitive shapeDTW warp preset augmentation")
    parser.add_argument('--extra_tag', type=str, default="", help="Anything extra")
    
    # TimeXer
    parser.add_argument('--patch_len', type=int, default=16, help='patch length')
    
    return dotdict(vars(parser.parse_args([]))) # 不传参数，使用 default


# get default args
args = get_default_args()

# get training args,which can found in script dir to invoke infer
args.device = 0
args.task_name = 'long_term_forecast'
args.is_training = 0 
args.root_path = './dataset/ETT-small/'
args.data_path = 'ETTh1.csv'
args.model_id = 'ETTh1_96_24'
args.model = 'Autoformer'
args.data = 'ETTh1'
args.features = 'M'
args.seq_len = 96
args.label_len = 48
args.pred_len = 24
args.e_layers = 2
args.d_layers = 1
args.factor = 3
args.enc_in = 7
args.dec_in = 7
args.c_out = 7
args.des = 'Exp'
args.iter = 1

# try to get setting 
setting = '{}_{}_{}_{}_ft{}_sl{}_ll{}_pl{}_dm{}_nh{}_el{}_dl{}_df{}_expand{}_dc{}_fc{}_eb{}_dt{}_{}_{}'.format(
    args.task_name,
    args.model_id,
    args.model,
    args.data,
    args.features,
    args.seq_len,
    args.label_len,
    args.pred_len,
    args.d_model,
    args.n_heads,
    args.e_layers,
    args.d_layers,
    args.d_ff,
    args.expand,
    args.d_conv,
    args.factor,
    args.embed,
    args.distil,
    args.des, 0) # 👈 这里 ID 注意与iter对齐


setting

'long_term_forecast_ETTh1_96_24_Autoformer_ETTh1_ftM_sl96_ll48_pl24_dm512_nh8_el2_dl1_df2048_expand2_dc4_fc3_ebtimeF_dtTrue_Exp_0'

In [None]:
# load checkpoint, but please check you have set args correctly
import os
import torch
from exp.exp_main import Exp_Long_Term_Forecast

# 构建实验对象
print("🚀 Initializing inference experiment...")
exp = Exp_Long_Term_Forecast(args)
model = exp.model

# 构建 checkpoint 路径
checkpoint_path = os.path.join(args.checkpoints, setting, 'checkpoint.pth')

# 加载 checkpoint
if os.path.exists(checkpoint_path):
    print(f"📂 Found checkpoint at: {checkpoint_path}")
    print("📥 Loading model weights...")

    checkpoint = torch.load(checkpoint_path, map_location='cuda' if torch.cuda.is_available() else 'cpu')
    model.load_state_dict(checkpoint)

    print("✅ Model weights loaded successfully.")
else:
    print("❌ Checkpoint file not found!")
    print(f"⚠️  Please check the path: {checkpoint_path}")
    raise FileNotFoundError(f"[ERROR] Missing checkpoint at {checkpoint_path}")

In [ ]:
from utils.timefeatures import time_features
from torch.utils.data import Dataset
import pandas as pd
from sklearn.preprocessing import StandardScaler

class Dataset_Kuai_Easy_QPS_Infer(Dataset):
    def __init__(self, args,
                 root_path, flag='train',
                 size=None, features='S',
                 data_path='', target='OT',
                 scale=True,
                 timeenc=0,
                 freq='h', stride=60, inverse=False, seasonal_patterns=None):
        # size [sql_len, label_len, pred_len]
        self.args = args
        if size is None:
            self.seq_len = 24 * 4 * 4
            self.label_len = 24 * 4
            self.pred_len = 24 * 4
        else:
            self.seq_len = size[0]
            self.label_len = size[1]
            self.pred_len = size[2]

        self.stride = stride
        assert flag in ['infer']

        self.features = features
        self.target = target
        self.scale = scale
        self.inverse = inverse
        self.timeenc = timeenc
        self.freq = freq
        self.root_path = root_path
        self.data_path = data_path
        self.__read_data__()

    def __read_data__(self):
        self.scaler = StandardScaler()
        df_raw = pd.read_csv(os.path.join(self.root_path, self.data_path))

        if self.features in ['M','MS']:
            cols_data = df_raw.columns[1:]
            df_data = df_raw[cols_data]
        else:
            df_data = df_raw[[self.target]]

        if self.scale:
            self.scaler.fit(df_data.values)
            data = self.scaler.transform(df_data.values)
        else:
            data = df_data.values

        df_raw['date'] = pd.to_datetime(df_raw['date'])

        self.data_x = data
        if self.inverse:
            self.data_y = df_data.values
        else:
            self.data_y = data

        # 生成时间特征
        df_stamp = df_raw[['date']]
        pred_dates = pd.date_range(df_stamp.date.values[-1], periods=self.pred_len + 1, freq=self.freq)

        df_pred_stamp = pd.DataFrame({'date': pred_dates[1:]})
        df_stamp = pd.concat([df_stamp, df_pred_stamp], ignore_index=True)

        data_stamp = pd.DataFrame
        df_stamp = df_raw[['date']]
        df_stamp['date'] = pd.to_datetime(df_stamp.date)
        if self.timeenc == 0:
            df_stamp['month'] = df_stamp.date.apply(lambda row: row.month, 1)
            df_stamp['day'] = df_stamp.date.apply(lambda row: row.day, 1)
            df_stamp['weekday'] = df_stamp.date.apply(lambda row: row.weekday(), 1)
            df_stamp['hour'] = df_stamp.date.apply(lambda row: row.hour, 1)
            data_stamp = df_stamp.drop(['date'], 1).values
        elif self.timeenc == 1:
            data_stamp = time_features(pd.to_datetime(df_stamp['date'].values), freq=self.freq)
            data_stamp = data_stamp.transpose(1, 0)

        self.data_stamp = data_stamp

    def __getitem__(self, index):
        s_begin = index
        s_end = s_begin + self.seq_len
        r_begin = s_end - self.label_len
        r_end = r_begin + self.label_len + self.pred_len

        seq_x = self.data_x[s_begin:s_end]
        seq_y = self.data_y[r_begin:r_end]
        seq_x_mark = self.data_stamp[s_begin:s_end]
        seq_y_mark = self.data_stamp[r_begin:r_end]

        return seq_x, seq_y, seq_x_mark, seq_y_mark

    def __len__(self):
        return max((len(self.data_x) - self.seq_len) // self.stride + 1, 0)

    def inverse_transform(self, data):
        return self.scaler.inverse_transform(data)

In [ ]:
from utils.metrics import metric
from utils.dtw_metric import accelerated_dtw
from data_provider.data_factory import data_provider
import time

infer_data, infer_loader = data_provider(args, flag='infer')


"""
infer stats
"""

preds = []
trues = []
infer_times = []

model.eval()
with torch.no_grad():
    for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(infer_loader):
        if infer_data is None or len(infer_data) < args.seq_len:
            raise ValueError("Inference data is empty or too short for the specified seq_len.")
        
        infer_start = time.time()
        batch_x = batch_x.float().to(args.device)
        batch_y = batch_y.float().to(args.device)
        batch_x_mark = batch_x_mark.float().to(args.device)
        batch_y_mark = batch_y_mark.float().to(args.device)
        
        # decoder input
        dec_inp = torch.zeros_like(batch_y[:, -args.pred_len:, :]).float()
        dec_inp = torch.cat([batch_y[:, :args.label_len, :], dec_inp], dim=1).float().to(args.device)
        
        # encoder-decoder forward
        if args.use_amp:
            with torch.cuda.amp.autocast():
                outputs = model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
        else:
            outputs = model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
        
        f_dim = -1 if args.features == 'MS' else 0
        outputs = outputs[:, -args.pred_len:, :]
        batch_y = batch_y[:, -args.pred_len:, :].to(args.device)

        outputs = outputs.detach().cpu().numpy()
        batch_y = batch_y.detach().cpu().numpy()    
        
        
        if infer_data.scale and args.inverse:
            shape = batch_y.shape
            if outputs.shape[-1] != batch_y.shape[-1]:
                outputs = np.tile(outputs, [1, 1, int(batch_y.shape[-1] / outputs.shape[-1])])
            outputs = infer_data.inverse_transform(outputs.reshape(shape[0] * shape[1], -1)).reshape(shape)
            batch_y = infer_data.inverse_transform(batch_y.reshape(shape[0] * shape[1], -1)).reshape(shape)

        outputs = outputs[:, :, f_dim:]
        batch_y = batch_y[:, :, f_dim:]
        
        pred = outputs
        true = batch_y
        
        preds.append(pred)
        trues.append(true)
        
        infer_end = time.time()
        infer_times.append(infer_end - infer_start)
        
    # concatenate results
    preds = np.concatenate(preds, axis=0)
    trues = np.concatenate(trues, axis=0)
    print('test shape:', preds.shape, trues.shape)
    
    preds = preds.reshape(-1, preds.shape[-2], preds.shape[-1])
    trues = trues.reshape(-1, trues.shape[-2], trues.shape[-1])
    print('reshaped test shape:', preds.shape, trues.shape)
    
    # DTW calculation
    if args.use_dtw:
        dtw_list = []
        manhattan_distance = lambda x, y: np.abs(x - y)
        for i in range(preds.shape[0]):
            x = preds[i].reshape(-1, 1)
            y = trues[i].reshape(-1, 1)
            if i % 100 == 0:
                print("calculating dtw iter:", i)
            d, _, _, _ = accelerated_dtw(x, y, dist=manhattan_distance)
            dtw_list.append(d)
            dtw = np.array(dtw_list).mean()
        else:
            dtw = 'Not calculated'
    
    # metrics
    mae, mse, rmse, mape, mspe = metric(preds, trues)
    print('mse:{}, mae:{}, dtw:{}'.format(mse, mae, dtw))

In [ ]:
# based on preds、trues to plot
preds = []
trues = []