In [49]:
import numpy as np
import random
import time
import matplotlib.pyplot as plt
import os
from sklearn.metrics import mean_squared_error, mean_absolute_error
from utils.tools import EarlyStopping, adjust_learning_rate, visual
from utils.metrics import metric
import torch
import torch.nn as nn
from torch import optim
from torch.optim import lr_scheduler
import pandas as pd
from sklearn.preprocessing import StandardScaler
from torch.utils.data import ConcatDataset
import warnings
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torch.distributions.normal import Normal

In [50]:
# RevIN类实现了可逆的实例归一化操作，可以在前向传播和反向传播过程中进行归一化和反归一化操作。
class RevIN(nn.Module):
    def __init__(self, num_features: int, eps=1e-5):
        """
        :param num_features: the number of features or channels  输入特征的数量
        :param eps: a value added for numerical stability  用于数值稳定性的值
        """
        super(RevIN, self).__init__()
        self.num_features = num_features  # 输入特征的数量
        self.eps = eps

    def forward(self, x, mode:str):  # 根据mode的值，选择执行归一化或反归一化操作
        if mode == 'norm':
            self._get_statistics(x)
            x = self._normalize(x)
        elif mode == 'denorm':
            x = self._denormalize(x)
        else: raise NotImplementedError
        return x

    def _get_statistics(self, x):  # 计算输入x的均值和标准差。
        dim2reduce = tuple(range(1, x.ndim-1))
        self.mean = torch.mean(x, dim=dim2reduce, keepdim=True).detach()
        self.stdev = torch.sqrt(torch.var(x, dim=dim2reduce, keepdim=True, unbiased=False) + self.eps).detach()

    def _normalize(self, x):  # 执行归一化操作，即减去均值，然后除以标准差。
        x = x - self.mean
        x = x / self.stdev
        return x

    def _denormalize(self, x):  # 执行反归一化操作，即先乘以标准差，最后加上均值。
        x = x * self.stdev
        x = x + self.mean
        return x

In [52]:
class Intra_Patch_MLP(nn.Module):
    def __init__(self, input_dim, hidden_dim):
        super(Intra_Patch_MLP, self).__init__()
        self.linear1 = nn.Linear(input_dim, hidden_dim)
        self.relu = nn.ReLU()
        self.linear2 = nn.Linear(hidden_dim, input_dim)

    def forward(self, x):
        x = self.linear1(x)
        x = self.relu(x)
        x = self.linear2(x)
        return x

In [53]:
class Inter_Patch_MLP(nn.Module):
    def __init__(self, input_dim, hidden_dim):
        super(Inter_Patch_MLP, self).__init__()
        self.linear1 = nn.Linear(input_dim, hidden_dim)
        self.relu = nn.ReLU()
        self.linear2 = nn.Linear(hidden_dim, input_dim)

    def forward(self, x):
        x = self.linear1(x)
        x = self.relu(x)
        x = self.linear2(x)
        return x

In [None]:
class MLPLayer(nn.Module):
    def __init__(self, device, d_ff, num_nodes, patch_nums, patch_size, dynamic, factorized, layer_number):
        super(MLPLayer, self).__init__()
        self.device = device
        self.num_nodes = num_nodes
        self.patch_nums = patch_nums
        self.patch_size = patch_size
        self.layer_number = layer_number

        # Intra and Inter Patch MLP
        self.intra_patch_mlp = Intra_Patch_MLP(patch_size * num_nodes, d_ff)
        self.intra_Linear = nn.Linear(self.patch_nums*self.patch_size, self.patch_nums*self.patch_size)
        self.inter_patch_mlp = Inter_Patch_MLP(self.patch_nums, self.patch_nums)

        # Dropout
        self.dropout = nn.Dropout(0.1)

        # FeedForward layer
        self.ff = nn.Sequential(
            nn.Linear(num_nodes, d_ff, bias=True),
            nn.GELU(),
            nn.Dropout(0.2),
            nn.Linear(d_ff, num_nodes, bias=True)
        )

    def forward(self, x):
        batch_size = x.size(0)
        intra_out_concat = None

        # Intra Patch MLP
        for i in range(self.patch_nums):
            t = x[:, i * self.patch_size:(i + 1) * self.patch_size, :]  # x: [batch, seq_len, num_nodes], t: [batch, patch_size, num_nodes]
            t = t.view(batch_size, -1)  # Flatten the input  t: [batch, patch_size * num_nodes]
            t = self.intra_patch_mlp(t)
            t = t.view(batch_size, self.patch_size, -1)  # Reshape the output  t: [batch, patch_size, num_nodes]

            if intra_out_concat is None:
                intra_out_concat = t
            else:
                intra_out_concat = torch.cat([intra_out_concat, t], dim=1)  # Concatenate the output  [batch, patch_size * patch_nums, num_nodes]

        intra_out_concat = intra_out_concat.permute(0, 2, 1)  # [batch, patch_size * patch_nums, num_nodes] -> [batch, num_nodes, patch_size * patch_nums]
        intra_out_concat = self.intra_Linear(intra_out_concat)  # [batch, num_nodes, patch_size * patch_nums]-> [batch, num_nodes, patch_size * patch_nums]
        intra_out_concat = intra_out_concat.permute(0, 2, 1)  # [batch, num_nodes, patch_size * patch_nums] -> [batch, patch_size * patch_nums, num_nodes] 

        # Inter Patch MLP
        u = x.unfold(dimension=1, size=self.patch_size, step=self.patch_size)   # 将张量沿指定维度展开成滑动窗口，创建一个包含多个小块（patches）的张量[batch, patch_num, patch_size, num_nodes]
        u = u.permute(0, 3, 2, 1)  # [batch, num_nodes, patch_size, patch_num]
        inter_out = self.inter_patch_mlp(u)
        inter_out = inter_out.permute(0, 3, 2, 1)  # [batch, patch_num, patch_size, num_nodes]
        inter_out = inter_out.reshape(batch_size, self.patch_nums * self.patch_size, self.num_nodes)  # [batch, patch_nums * patch_size, num_nodes]

        out = inter_out + intra_out_concat  
        return out

In [56]:
class PatchModel(nn.Module):
    def __init__(self, configs):
        super(PatchModel, self).__init__()
        self.layer_nums = configs.layer_nums  # 设置pathway的层数
        self.num_nodes = configs.num_nodes  # 输入特征的维度
        self.pre_len = configs.pred_len
        self.seq_len = configs.seq_len  # 20241210 configs.seq_len改为128，对应趋势感知嵌入扩维的改动
        self.k = configs.k
        self.num_experts_list = configs.num_experts_list
        self.patch_size_list = configs.patch_size_list
        self.d_ff = configs.d_ff
        self.revin = configs.revin  # 默认为1
        if self.revin:
            self.revin_layer = RevIN(num_features=configs.num_nodes)  # 进行了实例归一化操作，使样本的特征分布符合标准正态分布

        self.AMS_lists = nn.ModuleList()  # 定义一个空的模块列表，多层自适应多尺度模块
        self.device = torch.device('cuda:{}'.format(configs.gpu))  # 设置设备

        for num in range(self.layer_nums):  # 依次添加多层自适应多尺度模块
            self.AMS_lists.append(
                AMS(self.seq_len, self.seq_len, self.num_experts_list[num], self.device, k=self.k,
                    num_nodes=self.num_nodes, patch_size=self.patch_size_list[num], noisy_gating=True,
                    d_ff=self.d_ff, layer_number=num + 1))  # 自适应多尺度模块
        self.projections = nn.Sequential(nn.Linear(self.seq_len, self.pre_len) )  # 线性层，预测器

    def forward(self, x):
        if self.revin:
            x = self.revin_layer(x, 'norm')  # 实例归一化 x:（batch_size, seq_len, num_nodes）
        out = x  # x:（batch_size, seq_len, num_nodes）
        batch_size = x.shape[0]
        for layer in self.AMS_lists:
            out = layer(out)   # 多层自适应多尺度模块  out:（batch_size, seq_len, num_nodes）
        out = out.permute(0,2,1).reshape(batch_size, self.num_nodes, -1)  # 原来的第二个和第三个维度被交换了位置，然后将第三个维度展平  out:（batch_size, num_nodes, seq_len）
        out = self.projections(out).transpose(2, 1)  # 通过线性层，并将第三个维度和第二个维度交换位置  out:（batch_size, pre_len, num_nodes）
        if self.revin:
            out = self.revin_layer(out, 'denorm')
        return out

In [57]:
class Dataset_NASA(Dataset):
    def __init__(self, root_path, flag='train', size=None, data_path={'train':'data.csv'}):
        if size == None:
            self.seq_len = 24 * 4 * 4
            self.pred_len = 24 * 4
        else:
            self.seq_len = size[0]
            self.pred_len = size[1]

        assert flag in ['train1', 'train2', 'train3', 'test', 'val'] 
        self.root_path = root_path 
        self.data_path = data_path[flag]  
        self.__read_data__()  # 读取数据

    def __read_data__(self):
        self.scaler = StandardScaler() # 标准化：将数据缩放到均值为0、标准差为1的范围内。
        df_raw = pd.read_csv(os.path.join(self.root_path,
                                          self.data_path))  # 读取数据

        cols_data = df_raw.columns[1:]  
        df_data = df_raw[cols_data]
        data = df_data.values
        self.data_x = data  # 根据train, test, val选择数据
        self.data_y = data  

    def __getitem__(self, index):
        s_begin = index
        s_end = s_begin + self.seq_len 
        r_begin = s_end
        r_end = r_begin + self.pred_len

        seq_x = self.data_x[s_begin:s_end]
        seq_y = self.data_y[r_begin:r_end]

        return seq_x, seq_y

    def __len__(self):
        return len(self.data_x) - self.seq_len - self.pred_len + 1  # 返回数据集的长度

    def inverse_transform(self, data):
        return self.scaler.inverse_transform(data)  # 反标准化

In [58]:
data_dict = {'NASA': Dataset_NASA}

def data_provider(args, flag):
    Data = data_dict[args.data]  # 根据数据集名称选择对应的数据集类class
    
    if flag == 'test':
        shuffle_flag = False
        drop_last = False
        batch_size = args.batch_size

    else:  # 'train' or 'val'
        shuffle_flag = True
        drop_last = False
        batch_size = args.batch_size

    data_set = Data(root_path=args.root_path, data_path=args.data_path, flag=flag, size=[args.seq_len, args.pred_len])  # 实例化数据集类，得到数据集对象（初始化类）
    print(flag, len(data_set))

    data_loader = DataLoader(
        data_set,  
        batch_size=batch_size,  
        shuffle=shuffle_flag,  # 是否在每个epoch开始的时候打乱数据。默认为False。
        num_workers=args.num_workers,  # 用于数据加载的子进程数。默认为0，表示数据将在主进程中加载。
        drop_last=drop_last)  # 如果数据集大小不能被批次大小整除，设置为True将丢弃最后一个不完整的批次。如果设为False并且数据集的大小不能被批次大小整除，则最后一个批次将更小。默认为False。
        
    return data_set, data_loader


In [59]:
class Exp_Basic(object):
    def __init__(self, args):
        self.args = args
        self.device = self._acquire_device()
        self.model = self._build_model().to(self.device)

    def _build_model(self):
        raise NotImplementedError
        return None

    def _acquire_device(self):
        if self.args.use_gpu:
            os.environ["CUDA_VISIBLE_DEVICES"] = str(
                self.args.gpu) if not self.args.use_multi_gpu else self.args.devices
            device = torch.device('cuda:{}'.format(self.args.gpu))
            print('Use GPU: cuda:{}'.format(self.args.gpu))
        else:
            device = torch.device('cpu')
            print('Use CPU')
            
        return device

    def _get_data(self):
        pass

    def vali(self):
        pass

    def train(self):
        pass

    def test(self):
        pass


In [60]:
class Exp_Main(Exp_Basic):
    def __init__(self, args):
        super(Exp_Main, self).__init__(args)

    def _build_model(self):
        model = PatchModel(self.args).float()
        return model

    def _get_data(self, flag):
        data_set, data_loader = data_provider(self.args, flag)
        return data_set, data_loader

    def _select_optimizer(self):
        model_optim = optim.Adam(self.model.parameters(), lr=self.args.learning_rate)
        return model_optim

    def _select_criterion(self):
        criterion = nn.L1Loss()  # 平均绝对误差
        return criterion

    def vali(self, vali_data, vali_loader, criterion):
        self.model.eval()  # 模型评估
        preds = []
        trues = []
        inputx = []
        with torch.no_grad():  # with torch.no_grad():是一个上下文管理器，用于在其内部的代码块中禁用梯度计算。
            for i, (batch_x, batch_y) in enumerate(vali_loader):
                batch_x = batch_x.float().to(self.device)
                batch_y = batch_y.float()
                outputs = self.model(batch_x)
                f_dim = -1 
                outputs = outputs[:, -self.args.pred_len:, f_dim:]
                batch_y = batch_y[:, -self.args.pred_len:, f_dim:].to(self.device)

                outputs = outputs.detach().cpu().numpy()
                batch_y = batch_y.detach().cpu().numpy()
                # 将模型的输出outputs和目标值batch_y从GPU移动到CPU，并将它们从PyTorch张量转换为NumPy数组。

                pred = outputs  
                true = batch_y  

                preds.append(pred)
                trues.append(true)
                inputx.append(batch_x.detach().cpu().numpy())

        # 将预测结果、实际结果和输入数据转换为NumPy数组
        preds = np.concatenate(preds, axis=0)
        trues = np.concatenate(trues, axis=0)
        inputx = np.concatenate(inputx, axis=0)

        preds = preds.reshape(-1, preds.shape[-2], preds.shape[-1])
        trues = trues.reshape(-1, trues.shape[-2], trues.shape[-1])
        inputx = inputx.reshape(-1, inputx.shape[-2], inputx.shape[-1])  # 将预测结果、实际结果和输入数据的形状转换为（batch_size*seq_len, pred_len, features）

        mae, mse, rmse, mape, mspe, rse, corr = metric(preds, trues)  # 计算预测结果的评价指标
        self.model.train()
        return mae   # 返回验证损失

    def train(self, setting):
        train_data1, train_loader1 = self._get_data(flag='train1')  # 获取数据处理器
        train_data2, train_loader2 = self._get_data(flag='train2')  # 获取数据处理器
        train_data3, train_loader3 = self._get_data(flag='train3')  # 获取数据处理器
        test_data, test_loader = self._get_data(flag='test')
        # 合并数据集
        train_data = ConcatDataset([train_data1, train_data2, train_data3])

        # 创建一个新的数据加载器
        train_loader = torch.utils.data.DataLoader(train_data, batch_size=self.args.batch_size, shuffle=True, num_workers=self.args.num_workers, drop_last=True)

        path = os.path.join(self.args.checkpoints, setting) # 检查点路径
        if not os.path.exists(path):
            os.makedirs(path)  # 创建路径

        total_num = sum(p.numel() for p in self.model.parameters())  # 计算模型参数总数
        time_now = time.time()

        train_steps = len(train_loader)  # 训练步数（batch的数量）
        early_stopping = EarlyStopping(patience=self.args.patience, verbose=True) # 早停
        # verbose参数通常用于控制是否输出额外的调试信息。如果verbose=True，那么在早停条件满足时，EarlyStopping类会打印一些有关早停的信息，例如当前的epoch数、最佳的验证损失等。

        model_optim = self._select_optimizer()  # 选择优化器
        criterion = self._select_criterion()  # 选择损失函数

        scheduler = lr_scheduler.OneCycleLR(optimizer=model_optim,
                                            steps_per_epoch=train_steps,
                                            pct_start=self.args.pct_start,
                                            epochs=self.args.train_epochs,
                                            max_lr=self.args.learning_rate)  # 学习率调整器
        # OneCycleLR策略的思想是在训练过程中先线性地增加学习率，然后再线性地减少学习率。这种策略可以帮助模型快速收敛，并且可以防止模型陷入局部最优。
        # pct_start参数用于控制增加学习率阶段的持续时间。它的值是一个介于0和1之间的浮点数，表示增加学习率阶段的epoch数占总epoch数的比例。
        train_losses = []
        test_losses = []
        for epoch in range(self.args.train_epochs):  # 训练epochs
            iter_count = 0
            train_loss = []
            self.model.train()
            epoch_time = time.time()
            # train_loader是一个DataLoader对象，它可以将数据集分成多个批次，每个批次包含多个样本。在每次迭代中，train_loader都会返回一个批次的数据。
            for i, (batch_x, batch_y) in enumerate(train_loader):
                iter_count += 1
                model_optim.zero_grad()
                batch_x = batch_x.float().to(self.device)  # （batch_size, seq_len, features）

                batch_y = batch_y.float().to(self.device)  # （batch_size, pred_len, features）
                # 无论batch_x原来的数据类型是什么，无论原来存储在哪个设备上，都可以确保batch_x是float32类型并且存储在正确的设备上，可以直接用于神经网络的计算。

                outputs = self.model(batch_x)
                f_dim = -1 
                outputs = outputs[:, -self.args.pred_len:, f_dim:]
                batch_y = batch_y[:, -self.args.pred_len:, f_dim:].to(self.device)
                loss = criterion(outputs, batch_y)
                train_loss.append(loss.item())

                if (i + 1) % 100 == 0:  # 每100个batch打印一次
                    print("\titers: {0}, epoch: {1} | loss: {2:.7f}".format(i + 1, epoch + 1, loss.item())) # 打印迭代次数、epoch和损失值
                    speed = (time.time() - time_now) / iter_count
                    left_time = speed * ((self.args.train_epochs - epoch) * train_steps - i)
                    print('\tspeed: {:.4f}s/iter; left time: {:.4f}s'.format(speed, left_time))
                    iter_count = 0
                    time_now = time.time()

                loss.backward()  #计算梯度
                model_optim.step()  # 更新参数

                adjust_learning_rate(model_optim, scheduler, epoch + 1, self.args, printout=False) # 调整学习率
                # 如果printout=True，那么每次调整学习率时都会打印一些信息，例如当前的学习率等。如果printout=False，那么不会打印这些信息。
                scheduler.step() # 更新学习率
            # 每个epoch结束后，计算验证损失和测试损失
            print("Epoch: {} cost time: {}".format(epoch + 1, time.time() - epoch_time))
            train_loss = np.average(train_loss)
            test_loss = self.vali(test_data, test_loader, criterion)
            print("Epoch: {0}, Steps: {1} | Train Loss: {2:.7f}  Test Loss: {3:.7f}".format(
                epoch + 1, train_steps, train_loss, test_loss))
            # 收集损失数据
            train_losses.append(train_loss)
            test_losses.append(test_loss)
            early_stopping(train_loss, self.model, path)  
            # 检查vali_loss是否比之前看到的最佳验证损失更好。如果是，它就保存当前的模型到path；如果不是，并且在一定数量的连续训练轮中没有改善，它就将early_stop属性设置为True。
            if early_stopping.early_stop:
                print("Early stopping")
                break
            print('Updating learning rate to {}'.format(scheduler.get_last_lr()[0]))

        # # 绘制损失曲线
        # # 保存 true 和 pred 数据
        # folder_path = './test_results/' + setting + '/'
        # np.save(os.path.join(folder_path, 'train_losses.npy'), train_losses)
        # np.save(os.path.join(folder_path, 'test_losses.npy'), test_losses)

        # 加载最佳模型
        best_model_path = path + '/' + 'checkpoint.pth'
        self.model.load_state_dict(torch.load(best_model_path))
        return self.model  # 返回模型


    def test(self, setting, test=0):
        test_data, test_loader = self._get_data(flag='test')  # 获取测试数据

        if test:
            print('loading model')
            self.model.load_state_dict(torch.load(os.path.join('./checkpoints/' + setting, 'checkpoint.pth'))) # 加载最佳模型

        preds = []
        trues = []
        inputx = []
        folder_path = './test_results/' + setting + '/'
        if not os.path.exists(folder_path):
            os.makedirs(folder_path) # 创建测试结果路径
        infer_time = []
        self.model.eval()  # self.model.eval()是一个PyTorch中的方法，用于将模型设置为评估模式。
        with torch.no_grad():
            for i, (batch_x, batch_y) in enumerate(test_loader):
                batch_x = batch_x.float().to(self.device)
                batch_y = batch_y.float().to(self.device)
                # 记录开始时间
                start_time = time.time()
                outputs = self.model(batch_x)
                # 记录结束时间
                end_time = time.time()
                # 计算推理用时
                inference_time = end_time - start_time
                infer_time.append(inference_time)
                f_dim = -1 
                outputs = outputs[:, -self.args.pred_len:, f_dim:]  # 取预测长度的数据
                batch_y = batch_y[:, -self.args.pred_len:, f_dim:].to(self.device)
                outputs = outputs.detach().cpu().numpy()
                batch_y = batch_y.detach().cpu().numpy()
                # 将模型的输出outputs和目标值batch_y从GPU移动到CPU，并将它们从PyTorch张量转换为NumPy数组。

                pred = outputs  
                true = batch_y  

                preds.append(pred)
                trues.append(true)
                inputx.append(batch_x.detach().cpu().numpy())
        print('Inference time: ', np.mean(infer_time))
        # 将预测结果、实际结果和输入数据转换为NumPy数组
        preds = np.concatenate(preds, axis=0)
        trues = np.concatenate(trues, axis=0)
        inputx = np.concatenate(inputx, axis=0)

        preds = preds.reshape(-1, preds.shape[-2], preds.shape[-1])
        trues = trues.reshape(-1, trues.shape[-2], trues.shape[-1])
        inputx = inputx.reshape(-1, inputx.shape[-2], inputx.shape[-1])  # 将预测结果、实际结果和输入数据的形状转换为（batch_size*seq_len, pred_len, features）

        mae, mse, rmse, mape, mspe, rse, corr = metric(preds, trues)  # 计算预测结果的评价指标
        print('rmse:{}, mae:{}, rse:{}'.format(rmse, mae, rse))
        f = open("result.txt", 'a')
        f.write(setting + "  \n")
        f.write('rmse:{}, mae:{}, rse:{}'.format(rmse, mae, rse)) 
        # f.write('\n')
        f.write('\n')  # 将评价指标写入文件
        f.close()
        return preds, trues  # 返回预测结果、实际结果和输入数据

In [61]:
def train_test(args):
    Exp = Exp_Main
    if args.is_training:  # 训练
        for ii in range(args.itr):  # 实验次数
            setting = '{}_{}_sl{}_pl{}_{}'.format(
                args.model_id,
                args.model,
                # args.features,
                args.seq_len,
                args.pred_len, ii)  #记录实验设置
            # format函数用于格式化字符串。在这个例子中，每个{}都会被format函数的一个参数替换。参数的顺序与{}中的顺序相对应。

            exp = Exp(args)  # set experiments

            print('>>>>>>>start training : {}>>>>>>>>>>>>>>>>>>>>>>>>>>'.format(setting))
            exp.train(setting)  # 训练

            print('>>>>>>>testing : {}<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<'.format(setting))
            exp.test(setting)  # 测试
            torch.cuda.empty_cache()  # 释放GPU缓存
    else:
        ii = 2
        setting = '{}_{}_sl{}_pl{}_{}'.format(
            args.model_id,
            args.model,
            # args.features,
            args.seq_len,
            args.pred_len, ii)

        exp = Exp(args)  # set experiments
        print('>>>>>>>testing : {}<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<'.format(setting))
        exp.test(setting, test=1)
        torch.cuda.empty_cache()  # 释放GPU缓存

In [65]:
fix_seed = 1024  # 随机数种子
random.seed(fix_seed)
torch.manual_seed(fix_seed)
np.random.seed(fix_seed)

class Args:
    is_training = 1
    model = 'PathFormer' 
    data = 'NASA'
    root_path = './NASA'
    checkpoints = './checkpoints/'
    seq_len = 36
    pred_len = 1
    d_ff = 64
    num_nodes = 1
    layer_nums = 2
    k = 1
    num_experts_list = [4, 4]
    patch_size_list = [6, 9, 12, 18, 2, 3, 4, 6]  
    revin = 1
    drop = 0.1
    metric = 'mae'
    num_workers = 0
    itr = 3
    train_epochs = 100
    batch_size = 32
    patience = 100
    learning_rate = 0.005
    pct_start = 0.4
    use_gpu = True
    gpu = 0
    use_multi_gpu = False
    devices = '2'
    threshold = 1.4
    start_point = 20
    d_emb = 64

if __name__ == '__main__':
    data_path4 = {'train1': 'B0006.csv', 'train2': 'B0007.csv', 'train3': 'B0005.csv', 'test': 'B0018.csv'}
    data_path3 = {'train1': 'B0018.csv', 'train2': 'B0005.csv', 'train3': 'B0006.csv', 'test': 'B0007.csv'}
    data_path2 = {'train1': 'B0007.csv', 'train2': 'B0005.csv', 'train3': 'B0018.csv', 'test': 'B0006.csv'}
    data_path1 = {'train1': 'B0007.csv', 'train2': 'B0018.csv', 'train3': 'B0006.csv', 'test': 'B0005.csv'}

    args = Args()

    # 检查是否使用 GPU
    args.use_gpu = True if torch.cuda.is_available() and args.use_gpu else False

    if args.use_gpu and args.use_multi_gpu:
        args.devices = args.devices.replace(' ', '')
        device_ids = args.devices.split(',')
        args.device_ids = [int(id_) for id_ in device_ids]
        args.gpu = args.device_ids[0]

    args.patch_size_list = np.array(args.patch_size_list).reshape(args.layer_nums, -1).tolist()  # 将patch_size_list转换为列表，得到3*4的列表

    for i in range(1): 
        if i == 0:
            args.k = 3
            args.is_training = 1
            args.model = 'PathFormer'
            args.data_path = data_path3
            args.train_epochs = 100
            args.seq_len = 36
            args.start_point = 66
            args.d_emb = 36
            args.learning_rate = 0.005
            args.revin = 1
        args.model_id = args.data_path['test'].split('.')[0]
        train_test(args)

Use CPU
>>>>>>>start training : B0007_PathFormer_sl36_pl1_0>>>>>>>>>>>>>>>>>>>>>>>>>>
train1 96
train2 132
train3 132
test 132
Epoch: 1 cost time: 0.0860135555267334
Epoch: 1, Steps: 11 | Train Loss: 0.0697246  Test Loss: 0.0518708
Validation loss decreased (inf --> 0.069725).  Saving model ...
Updating learning rate to 0.00020743212569329043
Epoch: 2 cost time: 0.08111405372619629
Epoch: 2, Steps: 11 | Train Loss: 0.0675455  Test Loss: 0.0494756
Validation loss decreased (0.069725 --> 0.067546).  Saving model ...
Updating learning rate to 0.00022968247236289548
Epoch: 3 cost time: 0.08494210243225098
Epoch: 3, Steps: 11 | Train Loss: 0.0637382  Test Loss: 0.0460014
Validation loss decreased (0.067546 --> 0.063738).  Saving model ...
Updating learning rate to 0.0002666132338645091
Epoch: 4 cost time: 0.0808420181274414
Epoch: 4, Steps: 11 | Train Loss: 0.0579614  Test Loss: 0.0399620
Validation loss decreased (0.063738 --> 0.057961).  Saving model ...
Updating learning rate to 0.000317