### In this notebook we perform individual training.
In individual learning each base station has access only to it's private dataset.

In [49]:
import sys
import os

from pathlib import Path
# 解释：将当前目录的父目录加入到sys.path中，这样就可以在当前目录下导入父目录的模块
parent = Path(os.path.abspath("")).resolve().parents[0]
if parent not in sys.path:
    sys.path.insert(0, str(parent))

In [50]:
import random

import numpy as np
import torch

from argparse import Namespace

In [51]:
# 解释
from ml.utils.data_utils import read_data, generate_time_lags, time_to_feature, handle_nans, to_Xy, \
    to_torch_dataset, to_timeseries_rep, assign_statistics, \
    to_train_val, scale_features, get_data_by_area, remove_identifiers, get_exogenous_data_by_area, handle_outliers,\
    read_data_yl,get_data_by_area_yl, get_exogenous_data_by_area_yl, remove_identifiers_yl


In [52]:
from ml.utils.train_utils import train, test

In [53]:
from ml.models.mlp import MLP
from ml.models.rnn import RNN
from ml.models.lstm import LSTM
from ml.models.gru import GRU
from ml.models.cnn import CNN
from ml.models.rnn_autoencoder import DualAttentionAutoEncoder

In [54]:
args = Namespace(
    # 数据路径
    data_path='/home/yanglin/Federated-Time-Series-Forecasting/dataset/yl/train_data.csv',  # 训练集路径
    data_path_test='/home/yanglin/Federated-Time-Series-Forecasting/dataset/yl/test_data.csv',  # 测试集路径
    test_size=0.2,  # 验证集大小
    
    # 预测目标
    targets=['needs_charging'],  # 需要预测的列
    
    # 时间序列参数
    num_lags=10,  # 输入特征的时间滞后数

    # 数据处理参数
    # identifier='vehicle_id',  # 标识车辆的列名
    identifier='port',  # 标识港口
    nan_constant=0,  # 替换NaN值的常数
    x_scaler='minmax',  # 特征标准化方法
    y_scaler='minmax',  # 目标标准化方法
    outlier_detection=None,  # 异常值处理设置为None

    # 模型训练参数
    criterion='mse',  # 损失函数
    epochs=150,  # 最大训练轮数
    lr=0.001,  # 学习率
    optimizer='adam',  # 优化器
    batch_size=128,  # 批量大小
    early_stopping=True,  # 是否使用早停机制
    patience=50,  # 早停耐心值
    max_grad_norm=0.0,  # 梯度裁剪
    reg1=0.0,  # L1正则化
    reg2=0.0,  # L2正则化
    
    # 其他设置
    plot_history=True,  # 是否绘制训练损失图
    cuda=True,  # 是否使用GPU
    seed=0,  # 随机种子
    assign_stats=None,  # 是否使用统计数据作为外生数据
    use_time_features=True  # 是否使用时间特征
)

> You can define the base station to perform train on the filter_bs parameter and use it in block 12 or you can define the base station to block 12 explicitly 

In [56]:
print(f"Script arguments: {args}\n")

Script arguments: Namespace(assign_stats=None, batch_size=128, criterion='mse', cuda=True, data_path='/home/yanglin/Federated-Time-Series-Forecasting/dataset/yl/train_data.csv', data_path_test='/home/yanglin/Federated-Time-Series-Forecasting/dataset/yl/test_data.csv', early_stopping=True, epochs=150, identifier='port', lr=0.001, max_grad_norm=0.0, nan_constant=0, num_lags=10, optimizer='adam', outlier_detection=None, patience=50, plot_history=True, reg1=0.0, reg2=0.0, seed=0, targets=['needs_charging'], test_size=0.2, use_time_features=True, x_scaler='minmax', y_scaler='minmax')



In [55]:
import torch.version


device = "cuda" if args.cuda and torch.cuda.is_available() else "cpu"
print("torch_version",torch.__version__)
print("args.cuda",args.cuda)
print("torch.cuda.is_available()",torch.cuda.is_available())
print(f"Using {device}")

torch_version 2.3.0
args.cuda True
torch.cuda.is_available() False
Using cpu


In [None]:
# Outlier detection specification 异常值检测:暂时不需要异常值检测因为我们已经处理过
# if args.outlier_detection is not None:
#     outlier_columns = ['rb_down', 'rb_up', 'down', 'up'] #需要进行异常值检测的列名
#     outlier_kwargs = {"ElBorn": (10, 90), "LesCorts": (10, 90), "PobleSec": (5, 95)} #每个键是一个区域的名称，每个值是一个元组，用于指定该区域的异常值检测参数。"ElBorn": (10, 90)：表示对于ElBorn区域，异常值检测的阈值是10和90。
#     args.outlier_columns = outlier_columns 
#     args.outlier_kwargs = outlier_kwargs

In [57]:
def seed_all():
    # ensure reproducibility 确保结果的可重复性
    random.seed(args.seed) #设置Python标准库中的随机数生成器的种子为args.seed。这会影响使用random模块生成的所有随机数。
    np.random.seed(args.seed) #设置NumPy库的随机数生成器的种子为args.seed。这会影响使用numpy.random模块生成的所有随机数。
    torch.manual_seed(args.seed) #设置PyTorch库的随机数生成器的种子为args.seed。这会影响CPU上的所有PyTorch操作生成的随机数。
    torch.cuda.manual_seed_all(args.seed) #设置所有CUDA设备（即GPU）的随机数生成器的种子为args.seed。这会影响在GPU上执行的所有PyTorch操作生成的随机数。
    torch.backends.cudnn.deterministic = True #设置CuDNN后端为确定性模式，这意味着CuDNN将使用确定性的算法，从而确保相同的输入始终产生相同的输出。
    torch.backends.cudnn.benchmark = False #禁用CuDNN的benchmark模式。启用benchmark模式可能会导致不同的计算选择不同的算法，从而产生不同的结果，因此禁用它可以确保结果的一致性。

In [58]:
seed_all()

### The preprocessing pipeline performed here for the base station specified in filter_bs argument
Preprocessing inlcudes:
1. NaNs Handling NANs处理
2. Outliers Handling 异常值处理
3. Scaling Data 数据缩放
4. Generating time lags 生成时间滞后
5. Generating and importing exogenous data as features (time, statistics) (if applied) 生成和导入外生数据

In [None]:
import pandas as pd
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer

# 读取数据
data = pd.read_csv("/home/yanglin/Federated-Time-Series-Forecasting/dataset/yl/train_data.csv")

# 只选择连续型数据特征
continuous_features = ['position_x', 'position_y', 'position_z', 'heading', 'speed','speed_command', 'mileage_distance', 'power_on_time', 
                       'soc', 'fuel_level', 'avg_speed_5min']

# 使用SimpleImputer填充NaN值
imputer = SimpleImputer(strategy='mean')  # 可以根据需要选择'mean', 'median', 'most_frequent', 'constant'
imputed_data = imputer.fit_transform(data[continuous_features])

# 数据标准化
scaler = StandardScaler()
scaled_data = scaler.fit_transform(imputed_data)

# 创建PCA模型并进行拟合
pca = PCA(n_components=2)  # 选择要保留的主成分数量
principal_components = pca.fit_transform(scaled_data)

# 将主成分转换为DataFrame
principal_df = pd.DataFrame(data=principal_components, columns=['PC1', 'PC2'])

# 可视化主成分分析结果
import matplotlib.pyplot as plt

plt.scatter(principal_df['PC1'], principal_df['PC2'])
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.title('Principal Component Analysis')
plt.show()

# 查看PCA模型的载荷矩阵
loadings = pd.DataFrame(pca.components_.T, columns=['PC1', 'PC2'], index=continuous_features)
print(loadings)


In [67]:
data.head()

Unnamed: 0,vehicle_id,position_x,position_y,position_z,heading,speed,speed_command,mileage_distance,power_on_time,soc,fuel_level,chassis_mode,estop,task_state_running,task_state_estop,task_state_lock,task_stage,current_task,error_code,target_location,vesselVisitID,mission_type,container1_type,container2_type,vehicle_mode,missionID,port,hour,day_of_week,avg_speed_5min,needs_charging
0,A022,154.233978,-218.729309,0,0.000933,0.0,0.0,11339,30930888,72,0,1,0,0,0,0,0.02444533,0.0,0.0,0.0,0.0,0.030909,0.018724,0.015729,0.0,365fbfb4-9ecc-11ed-ac70-0242ac110030,tianhaiheda,13,5,0.0,0
1,A022,154.221878,-218.728104,0,0.000567,0.0,0.0,11339,30936910,72,0,1,0,0,0,0,0.02444533,0.0,0.0,0.0,0.0,0.030909,0.018724,0.015729,0.0,365fbfb4-9ecc-11ed-ac70-0242ac110030,tianhaiheda,15,5,0.0,0
2,A028,-173.617371,13.083996,0,-5.2e-05,3.529296,3.234201,11419,31048816,59,0,1,0,1,0,0,0.02444533,0.0,0.0,2.144994e-06,0.0,0.0,0.0,0.015729,0.0,6c9eb5ba-9fdc-11ed-8e85-0242ac11003c,changjinxiantai,21,6,5.109102,0
3,A022,-269.873993,-97.462982,0,-0.004478,1.23601,1.225601,11332,30914202,79,0,1,0,1,0,0,0.02444533,0.0,0.0,7.953511e-10,0.0,0.0,0.0,0.015729,0.0,4d15657e-9ea4-11ed-8856-0242ac110030,tianhaiheda,8,5,0.96283,0
4,A022,-248.316193,2.064377,0,-0.003248,0.0,0.0,11334,30915288,78,0,1,0,0,0,0,4.799157e-08,0.0,0.0,1.307926e-08,0.0,0.0,0.0,0.015729,0.0,af0ad1c2-9ea6-11ed-8e1d-0242ac110030,tianhaiheda,9,5,0.0,0


In [61]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

def make_preprocessing(filter_data=None):
    """
    预处理给定的 CSV 数据。

    参数:
        filter_data (str, 可选): 用于指定要过滤的车辆ID。如果没有提供该参数，则处理所有车辆的数据。

    返回:
        X_train, X_val, y_train, y_val, exogenous_data_train, exogenous_data_val, x_scaler, y_scaler
    """
    # 读取数据
    df = read_data_yl(args.data_path, filter_data=filter_data)
    
    # 指定特征列和目标列
    # feature_cols = [
    #     'position_x', 'position_y', 'position_z', 'heading', 'speed', 'speed_command',
    #     'mileage_distance', 'power_on_time', 'soc', 'fuel_level', 'chassis_mode', 
    #     'estop', 'task_state_running', 'task_state_estop', 'task_state_lock', 
    #     'task_stage', 'current_task', 'error_code', 'target_location', 
    #     'vesselVisitID', 'mission_type', 'container1_type', 'container2_type', 
    #     'vehicle_mode', 'port','hour', 'day_of_week', 'avg_speed_5min'
    # ]
    
    # # 将'needs_charging'设为目标变量
    # target_col = 'needs_charging'
    
    # # 分离特征和目标变量
    # X = df[feature_cols]
    # y = df[target_col]
    
    # # 这里假设 'exogenous_data' 是某些特定的列
    # # 例如: 车辆当前任务、任务类型等
    # exogenous_data_cols = ['current_task', 'mission_type']
    # exogenous_data = df[exogenous_data_cols]
    
    # # 检查每个特征列是否包含非数值型数据
    # for col in X.columns:
    #     if not pd.api.types.is_numeric_dtype(X[col]):
    #         raise ValueError(f"特征列 '{col}' 包含非数值型数据，无法进行标准化。")
    
    # # 将数据分为训练集和验证集
    X_train, X_val, y_train, y_val, exogenous_data_train, exogenous_data_val = train_test_split(
    #     X, y, exogenous_data, test_size=0.2, random_state=42)
    
    # # 初始化缩放器
    # x_scaler = StandardScaler()
    # y_scaler = StandardScaler()
    # # 拟合并转换训练数据
    # X_train_scaled = x_scaler.fit_transform(X_train)
    # X_train = pd.DataFrame(X_train_scaled, columns=X_train.columns)

    # # 转换验证数据
    # X_val_scaled = x_scaler.transform(X_val)
    # X_val = pd.DataFrame(X_val_scaled, columns=X_val.columns)

    
    # # 拟合并转换训练数据
    # X_train = x_scaler.fit_transform(X_train)
    # y_train = y_scaler.fit_transform(y_train.values.reshape(-1, 1))
    
    # # 转换验证数据
    # X_val = x_scaler.transform(X_val)
    # y_val = y_scaler.transform(y_val.values.reshape(-1, 1))
    
    # return X_train, X_val, y_train, y_val, exogenous_data_train, exogenous_data_val, x_scaler, y_scaler
    return df

    

vehicle_id             object
position_x            float64
position_y            float64
position_z              int64
heading               float64
speed                 float64
speed_command         float64
mileage_distance        int64
power_on_time           int64
soc                     int64
fuel_level              int64
chassis_mode            int64
estop                   int64
task_state_running      int64
task_state_estop        int64
task_state_lock         int64
task_stage            float64
current_task          float64
error_code            float64
target_location       float64
vesselVisitID         float64
mission_type          float64
container1_type       float64
container2_type       float64
vehicle_mode          float64
missionID              object
port                   object
hour                    int64
day_of_week             int64
avg_speed_5min        float64
needs_charging          int64
dtype: object


In [None]:
# 示例用法
X_train, X_val, y_train, y_val, exogenous_data_train, exogenous_data_val, x_scaler, y_scaler = make_preprocessing(filter_data='tianhaiheda')

In [62]:
print(X_train[:5])  # 查看前5行数据


   position_x  position_y  position_z   heading     speed  speed_command  \
0    0.777461   -1.053085         0.0 -0.664916 -0.275249      -0.273404   
1   -0.548309    0.811175         0.0  0.867434 -0.275249      -0.273404   
2    2.140261    0.105426         0.0  0.893322  4.026245       4.171201   
3    0.777376   -1.053034         0.0 -0.664030 -0.275249      -0.273404   
4    0.777458   -1.053079         0.0 -0.664220 -0.275249      -0.273404   

   mileage_distance  power_on_time       soc  fuel_level  chassis_mode  \
0          0.984277       0.521547 -0.943597         0.0     -0.299467   
1         -0.561244      -0.226106  0.658042         0.0     -0.299467   
2         -0.174864       0.150822  0.017386         0.0     -0.299467   
3          0.984277       1.332137 -0.943597         0.0     -0.299467   
4          0.984277       0.241321 -0.943597         0.0     -0.299467   

      estop  task_state_running  task_state_estop  task_state_lock  \
0 -0.760983           -0.329

In [66]:
x_scaler, y_scaler

(StandardScaler(), StandardScaler())

### Postprocessing Stage

In this stage we transform data in a way that can be fed into ML algorithms.

In [None]:
import numpy as np


def make_postprocessing(X_train, X_val, y_train, y_val, exogenous_data_train, exogenous_data_val, x_scaler, y_scaler, args):
    """
    对数据进行后处理，使其准备好输入机器学习算法。
    
    参数:
        X_train, X_val: 训练集和验证集的特征数据。
        y_train, y_val: 训练集和验证集的目标数据。
        exogenous_data_train, exogenous_data_val: 训练集和验证集的外生数据。
        x_scaler, y_scaler: 特征数据和目标数据的缩放器。
        args: 包含必要信息的其他参数。

    返回:
        X_train, X_val, y_train, y_val, area_X_train, area_X_val, area_y_train, area_y_val, exogenous_data_train, exogenous_data_val
    """

    # 检查训练数据集中是否有多个不同的区域。
    if X_train[args.identifier].nunique() != 1:
        area_X_train, area_X_val, area_y_train, area_y_val = get_data_by_area_yl(X_train, X_val,
                                                                              y_train, y_val, 
                                                                              identifier=args.identifier)
    else:
        area_X_train, area_X_val, area_y_train, area_y_val = None, None, None, None

    # 获取每个区域的外生数据。
    if exogenous_data_train is not None:
        exogenous_data_train, exogenous_data_val = get_exogenous_data_by_area_yl(exogenous_data_train,
                                                                              exogenous_data_val)

    # 将区域分割的数据转换为 NumPy 数组并移除标识符列。
    if area_X_train is not None:
        for area in area_X_train:
            tmp_X_train, tmp_y_train, tmp_X_val, tmp_y_val = remove_identifiers_yl(
                area_X_train[area], area_y_train[area], area_X_val[area], area_y_val[area])
            tmp_X_train, tmp_y_train = tmp_X_train.to_numpy(), tmp_y_train.to_numpy()
            tmp_X_val, tmp_y_val = tmp_X_val.to_numpy(), tmp_y_val.to_numpy()
            area_X_train[area] = tmp_X_train
            area_X_val[area] = tmp_X_val
            area_y_train[area] = tmp_y_train
            area_y_val[area] = tmp_y_val
    
    # 将外生数据转换为 NumPy 数组。
    if exogenous_data_train is not None:
        for area in exogenous_data_train:
            exogenous_data_train[area] = exogenous_data_train[area].to_numpy()
            exogenous_data_val[area] = exogenous_data_val[area].to_numpy()

    # 从特征和目标数据中移除标识符列。
    X_train, y_train, X_val, y_val = remove_identifiers_yl(X_train, y_train, X_val, y_val)
    assert len(X_train.columns) == len(X_val.columns)

    # 计算特征数量，考虑滞后期数量。
    num_features = len(X_train.columns) // args.num_lags

    # 将特征数据转换为时间序列表示。
    X_train = to_timeseries_rep(X_train.to_numpy(), num_lags=args.num_lags, num_features=num_features)
    X_val = to_timeseries_rep(X_val.to_numpy(), num_lags=args.num_lags, num_features=num_features)

    if area_X_train is not None:
        area_X_train = to_timeseries_rep(area_X_train, num_lags=args.num_lags, num_features=num_features)
        area_X_val = to_timeseries_rep(area_X_val, num_lags=args.num_lags, num_features=num_features)

    # 将目标数据转换为 NumPy 数组。
    y_train, y_val = y_train.to_numpy(), y_val.to_numpy()

    # 如果是集中学习，则将所有区域的外生数据合并。
    if not args.filter_bs and exogenous_data_train is not None:
        exogenous_data_train_combined, exogenous_data_val_combined = [], []
        for area in exogenous_data_train:
            exogenous_data_train_combined.extend(exogenous_data_train[area])
            exogenous_data_val_combined.extend(exogenous_data_val[area])
        exogenous_data_train_combined = np.stack(exogenous_data_train_combined)
        exogenous_data_val_combined = np.stack(exogenous_data_val_combined)
        exogenous_data_train["all"] = exogenous_data_train_combined
        exogenous_data_val["all"] = exogenous_data_val_combined
    
    return X_train, X_val, y_train, y_val, area_X_train, area_X_val, area_y_train, area_y_val, exogenous_data_train, exogenous_data_val


In [None]:
def make_postprocessing(X_train, X_val, y_train, y_val, exogenous_data_train, exogenous_data_val, x_scaler, y_scaler, args):
    #X_train, X_val: 训练集和验证集的特征数据。
    #y_train, y_val: 训练集和验证集的目标数据。
    #exogenous_data_train, exogenous_data_val: 训练集和验证集的外生数据。
    #x_scaler, y_scaler: 特征数据和目标数据的缩放器。
    """Make data ready to be fed into ml algorithms"""
    # if there are more than one specified areas, get the data per area 
    #检查训练数据集中是否有多个不同的区域（由 args.identifier 标识）。如果有多个区域，则调用 get_data_by_area 函数将数据按区域分割。
    if X_train[args.identifier].nunique() != 1:
        area_X_train, area_X_val, area_y_train, area_y_val = get_data_by_area(X_train, X_val,
                                                                              y_train, y_val, 
                                                                              identifier=args.identifier)
    else:
        area_X_train, area_X_val, area_y_train, area_y_val = None, None, None, None

    # Get the exogenous data per area. 获取每个区域的外生数据 如果存在外生数据，则将其按区域分割。
    if exogenous_data_train is not None:
        exogenous_data_train, exogenous_data_val = get_exogenous_data_by_area(exogenous_data_train,
                                                                              exogenous_data_val)
    # transform to np 将区域划分的数据转换为 NumPy 数组，并移除标识符列（如区域名称）。
    if area_X_train is not None:
        for area in area_X_train:
            tmp_X_train, tmp_y_train, tmp_X_val, tmp_y_val = remove_identifiers(
                area_X_train[area], area_y_train[area], area_X_val[area], area_y_val[area])
            tmp_X_train, tmp_y_train = tmp_X_train.to_numpy(), tmp_y_train.to_numpy()
            tmp_X_val, tmp_y_val = tmp_X_val.to_numpy(), tmp_y_val.to_numpy()
            area_X_train[area] = tmp_X_train
            area_X_val[area] = tmp_X_val
            area_y_train[area] = tmp_y_train
            area_y_val[area] = tmp_y_val
    
    if exogenous_data_train is not None:
        for area in exogenous_data_train:
            exogenous_data_train[area] = exogenous_data_train[area].to_numpy()
            exogenous_data_val[area] = exogenous_data_val[area].to_numpy()
    
    # remove identifiers from features, targets 对整体数据集，移除标识符列，并确保训练集和验证集的特征列数相同。
    X_train, y_train, X_val, y_val = remove_identifiers(X_train, y_train, X_val, y_val)
    assert len(X_train.columns) == len(X_val.columns)
    #计算特征数量，这里每个特征都有多个滞后期，所以除以滞后期的数量
    num_features = len(X_train.columns) // args.num_lags
    
    # to timeseries representation 将特征数据转换为时间序列表示，即构建时间滞后的特征集
    X_train = to_timeseries_rep(X_train.to_numpy(), num_lags=args.num_lags,
                                            num_features=num_features)
    X_val = to_timeseries_rep(X_val.to_numpy(), num_lags=args.num_lags,
                                          num_features=num_features)
    
    if area_X_train is not None:
        area_X_train = to_timeseries_rep(area_X_train, num_lags=args.num_lags,
                                                     num_features=num_features)
        area_X_val = to_timeseries_rep(area_X_val, num_lags=args.num_lags,
                                                   num_features=num_features)
    
    # transform targets to numpy 将目标数据转换为 NumPy 数组
    y_train, y_val = y_train.to_numpy(), y_val.to_numpy()
    
    # centralized (all) learning specific 在集中学习的情况下，将所有区域的外生数据合并成一个数据集
    if not args.filter_bs and exogenous_data_train is not None:
        exogenous_data_train_combined, exogenous_data_val_combined = [], []
        for area in exogenous_data_train:
            exogenous_data_train_combined.extend(exogenous_data_train[area])
            exogenous_data_val_combined.extend(exogenous_data_val[area])
        exogenous_data_train_combined = np.stack(exogenous_data_train_combined)
        exogenous_data_val_combined = np.stack(exogenous_data_val_combined)
        exogenous_data_train["all"] = exogenous_data_train_combined
        exogenous_data_val["all"] = exogenous_data_val_combined
    return X_train, X_val, y_train, y_val, area_X_train, area_X_val, area_y_train, area_y_val, exogenous_data_train, exogenous_data_val

In [None]:
X_train, X_val, y_train, y_val, area_X_train, area_X_val, area_y_train, area_y_val, exogenous_data_train, exogenous_data_val = make_postprocessing(X_train, X_val, y_train, y_val, exogenous_data_train, exogenous_data_val, x_scaler, y_scaler, args)

In [None]:
X_train[:2]

In [None]:
y_train[:2]

In [None]:
len(X_train), len(X_val)

### Define the input dimensions for the model architecture

In [None]:
def get_input_dims(X_train, exogenous_data_train): 
    #计算模型输入的维度，X_train: 训练集的主输入数据（通常是一个多维数组）；exogenous_data_train: 训练集的外生数据（可以是 None 或包含外生特征的字典）。
    if args.model_name == "mlp":
        input_dim = X_train.shape[1] * X_train.shape[2] #如果模型是多层感知机（MLP），则将输入的所有特征展平成一个一维向量，因此输入维度是 X_train 的第二维度和第三维度的乘积（即 X_train.shape[1] * X_train.shape[2]）
    else:
        input_dim = X_train.shape[2] #对于其他模型（如卷积神经网络或循环神经网络），输入维度保持为 X_train 的第三维度（即 X_train.shape[2]）
    #计算外生数据的维度
    if exogenous_data_train is not None: #如果 exogenous_data_train 不为 None
        if len(exogenous_data_train) == 1: #如果外生数据中只有一个区域
            cid = next(iter(exogenous_data_train.keys()))
            exogenous_dim = exogenous_data_train[cid].shape[1] #从字典中获取该区域的维度。
        else:
            exogenous_dim = exogenous_data_train["all"].shape[1] #如果外生数据中有多个区域，则使用键为 "all" 的区域的维度
    else:
        exogenous_dim = 0 #如果 exogenous_data_train 为 None，则外生数据的维度为 0
    
    return input_dim, exogenous_dim #input_dim: 主输入数据的维度；exogenous_dim: 外生数据的维度

### Initialize the model for training

In [None]:
def get_model(model: str,
              input_dim: int,
              out_dim: int,
              lags: int = 10,
              exogenous_dim: int = 0,
              seed=0):
    if model == "mlp":
        model = MLP(input_dim=input_dim, layer_units=[256, 128, 64], num_outputs=out_dim)
    elif model == "rnn":
        model = RNN(input_dim=input_dim, rnn_hidden_size=128, num_rnn_layers=1, rnn_dropout=0.0,
                    layer_units=[128], num_outputs=out_dim, matrix_rep=True, exogenous_dim=exogenous_dim)
    elif model == "lstm":
        model = LSTM(input_dim=input_dim, lstm_hidden_size=128, num_lstm_layers=1, lstm_dropout=0.0,
                     layer_units=[128], num_outputs=out_dim, matrix_rep=True, exogenous_dim=exogenous_dim)
    elif model == "gru":
        model = GRU(input_dim=input_dim, gru_hidden_size=128, num_gru_layers=1, gru_dropout=0.0,
                    layer_units=[128], num_outputs=out_dim, matrix_rep=True, exogenous_dim=exogenous_dim)
    elif model == "cnn":
        model = CNN(num_features=input_dim, lags=lags, exogenous_dim=exogenous_dim, out_dim=out_dim)
    elif model == "da_encoder_decoder":
        model = DualAttentionAutoEncoder(input_dim=input_dim, architecture="lstm", matrix_rep=True)
    else:
        raise NotImplementedError("Specified model is not implemented. Plese define your own model or choose one from ['mlp', 'rnn', 'lstm', 'gru', 'cnn', 'da_encoder_decoder']")
    return model

In [None]:
# define the model
args.model_name = "mlp"

input_dim, exogenous_dim = get_input_dims(X_train, exogenous_data_train)

print(input_dim, exogenous_dim)

model = get_model(model=args.model_name,
                  input_dim=input_dim,
                  out_dim=y_train.shape[1],
                  lags=args.num_lags,
                  exogenous_dim=exogenous_dim,
                  seed=args.seed)

In [None]:
model

### The fit function used to train the model specified above

In [None]:
def fit(model, X_train, y_train, X_val, y_val, #model: 需要训练的模型；X_train, y_train: 训练数据和标签；X_val, y_val: 验证数据和标签
        exogenous_data_train=None, exogenous_data_val=None, #exogenous_data_train, exogenous_data_val: 训练和验证数据的外生特征
        idxs=[8, 3, 1, 10, 9], # the indices of our targets in X ；idxs: 目标变量在输入数据中的索引
        log_per=1): #log_per: 记录训练日志的频率
    
    # get exogenous data (if any)
    if exogenous_data_train is not None and len(exogenous_data_train) > 1:
        exogenous_data_train = exogenous_data_train["all"]
        exogenous_data_val = exogenous_data_val["all"]
    elif exogenous_data_train is not None and len(exogenous_data_train) == 1:
        cid = next(iter(exogenous_data_train.keys()))
        exogenous_data_train = exogenous_data_train[cid]
        exogenous_data_val = exogenous_data_val[cid]
    else:
        exogenous_data_train = None
        exogenous_data_val = None
    num_features = len(X_train[0][0]) #计算特征数量，X_train 是一个 3D 数组，形状为 (num_samples, num_lags, num_features)
    
    # to torch loader
    train_loader = to_torch_dataset(X_train, y_train,
                                    num_lags=args.num_lags,
                                    num_features=num_features,
                                    exogenous_data=exogenous_data_train,
                                    indices=idxs,
                                    batch_size=args.batch_size, 
                                    shuffle=False)
    val_loader = to_torch_dataset(X_val, y_val, 
                                  num_lags=args.num_lags,
                                  num_features=num_features,
                                  exogenous_data=exogenous_data_val,
                                  indices=idxs,
                                  batch_size=args.batch_size,
                                  shuffle=False)
    
    # train the model
    model = train(model, 
                  train_loader, val_loader,
                  epochs=args.epochs,
                  optimizer=args.optimizer, lr=args.lr,
                  criterion=args.criterion,
                  early_stopping=args.early_stopping,
                  patience=args.patience,
                  plot_history=args.plot_history, 
                  device=device, log_per=log_per)
    
    
    return model

In [None]:
trained_model = fit(model, X_train, y_train, X_val, y_val)