### In this notebook we perform individual training.
In individual learning each base station has access only to it's private dataset.

In [1]:
import sys
import os

from pathlib import Path

parent = Path(os.path.abspath("")).resolve().parents[0]
if parent not in sys.path:
    sys.path.insert(0, str(parent))

In [2]:
import random

import numpy as np
import torch

from argparse import Namespace

In [3]:
from ml.utils.data_utils import read_data, generate_time_lags, time_to_feature, handle_nans, to_Xy, \
    to_torch_dataset, to_timeseries_rep, assign_statistics, \
    to_train_val, scale_features, get_data_by_area, remove_identifiers, get_exogenous_data_by_area, handle_outliers

In [4]:
from ml.utils.train_utils import train, test

In [5]:
from ml.models.mlp import MLP
from ml.models.rnn import RNN
from ml.models.lstm import LSTM
from ml.models.gru import GRU
from ml.models.cnn import CNN
from ml.models.rnn_autoencoder import DualAttentionAutoEncoder

In [6]:
args = Namespace(
    data_path='/home/yanglin/Federated-Time-Series-Forecasting/dataset/processed_A032_IGVData_KPI.csv', # 数据集路径
    data_path_test=['../dataset/ElBorn_test.csv'], # 测试数据集路径
    test_size=0.2, # 验证集的比例
    targets=['charge_status'], # 目标列
    num_lags=10, # 输入中包括的过去观测值的数量

    filter_bs=None, # 是否在训练中使用单一基站，可动态变化
    identifier='vehicle_id', # 用于识别基站的列名

    nan_constant=0, # 用于转换NaN值的常数
    x_scaler='minmax', # x轴数据的缩放器
    y_scaler='minmax', # y轴数据的缩放器
    outlier_detection=None, # 是否执行异常值检测（如数据截断）

    criterion='mse', # 优化标准，可以是mse（均方误差）或l1
    epochs=150, # 最大迭代轮数
    lr=0.001, # 学习率
    optimizer='adam', # 优化器，可选sgd或adam
    batch_size=128, # 批处理大小
    early_stopping=True, # 是否使用提前停止
    patience=50, # 提前停止的耐心值
    max_grad_norm=0.0, # 是否剪辑梯度范数
    reg1=0.0, # L1正则化
    reg2=0.0, # L2正则化

    plot_history=True, # 是否绘制损失历史图

    cuda=True, # 是否使用GPU

    seed=0, # 可重现性种子

    assign_stats=None, # 是否使用统计数据作为外源数据，可选["mean", "median", "std", "variance", "kurtosis", "skew"]
    use_time_features=False # 是否使用时间特征，即从时序特征中提取时间特征
)


> You can define the base station to perform train on the filter_bs parameter and use it in block 12 or you can define the base station to block 12 explicitly 

In [7]:
print(f"Script arguments: {args}\n")

Script arguments: Namespace(assign_stats=None, batch_size=128, criterion='mse', cuda=True, data_path='/home/yanglin/Federated-Time-Series-Forecasting/dataset/processed_A032_IGVData_KPI.csv', data_path_test=['../dataset/ElBorn_test.csv'], early_stopping=True, epochs=150, filter_bs=None, identifier='vehicle_id', lr=0.001, max_grad_norm=0.0, nan_constant=0, num_lags=10, optimizer='adam', outlier_detection=None, patience=50, plot_history=True, reg1=0.0, reg2=0.0, seed=0, targets=['charge_status'], test_size=0.2, use_time_features=False, x_scaler='minmax', y_scaler='minmax')



In [8]:
device = "cuda" if args.cuda and torch.cuda.is_available() else "cpu"
print(f"Using {device}")

Using cpu


In [9]:
#TODO: 这里不需要执行
# Outlier detection specification
if args.outlier_detection is not None:
    outlier_columns = ['rb_down', 'rb_up', 'down', 'up']
    outlier_kwargs = {"ElBorn": (10, 90), "LesCorts": (10, 90), "PobleSec": (5, 95)}
    args.outlier_columns = outlier_columns
    args.outlier_kwargs = outlier_kwargs

In [10]:
def seed_all():
    # ensure reproducibility
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

In [11]:
seed_all()

## 在这里为filter_bs参数中指定的基站执行预处理管道
预处理中的:
1. nan处理
2. 异常值处理
3. 扩展数据
4. 产生时间滞后
5. 生成和导入外生数据作为特征(时间、统计数据)(如果适用)

In [12]:
def make_preprocessing(filter_bs=None):
    """预处理给定的 .csv"""
    # 读取数据
    df = read_data(args.data_path, filter_data=filter_bs)
    print(f"数据形状: {df.shape}")
    # 处理缺失值
    df = handle_nans(train_data=df, constant=args.nan_constant,
                     identifier=args.identifier)
    print(f"处理缺失值后的数据形状: {df.shape}")
    # 分割训练集和验证集
    train_data, val_data = to_train_val(df)
    print(f"训练集数据形状: {train_data.shape}")
    print(f"验证集数据形状: {val_data.shape}")
    
    # 获取目标变量的索引，在fit中使用
    target_column = args.targets[0]
    target_index = df.columns.get_loc(target_column)
    print(f"目标变量 {target_column} 的索引是: {target_index}")
    
    # 异常值处理（如果指定）
    print(f"异常值检测: {args.outlier_detection}")
    # TODO:目前不需要异常值检测，如果后续需要异常值检测，再进行修改
    if args.outlier_detection is not None:
        train_data = handle_outliers(df=train_data, columns=args.outlier_columns,
                                     identifier=args.identifier, kwargs=args.outlier_kwargs)
    
    # 将数据集划分为特征（X）和目标变量（y）
    # TODO: 这里y_val把小车id也加进去了 
    X_train, X_val, y_train, y_val = to_Xy(train_data=train_data, val_data=val_data,
                                          targets=args.targets)
    print(f"X_train 形状: {X_train.shape}")
    print(f"y_train 形状: {y_train.shape}")
    print(f"X_val 形状: {X_val.shape}")
    print(f"y_val 形状: {y_val.shape}")
    print(f"y_train.ndim:{y_train.ndim}")
        
  
    
    # 标准化处理特征与目标变量
    # 标准化 X
    X_train, X_val, x_scaler = scale_features(train_data=X_train, val_data=X_val,
                                             scaler=args.x_scaler, identifier=args.identifier)
    print(f"标准化后 X_train 形状: {X_train.shape}")
    print(f"标准化后 X_val 形状: {X_val.shape}")
    print(f"X_scaler: {x_scaler}")
    # 标准化 y
    y_train, y_val, y_scaler = scale_features(train_data=y_train, val_data=y_val,
                                             scaler=args.y_scaler, identifier=args.identifier)
    print(f"标准化后 y_train 形状: {y_train.shape}")
    print(f"标准化后 y_val 形状: {y_val.shape}")
    print(f"y_scaler: {y_scaler}")
    # 生成时间滞后
    X_train = generate_time_lags(X_train, args.num_lags)
    X_val = generate_time_lags(X_val, args.num_lags)
    y_train = generate_time_lags(y_train, args.num_lags, is_y=True)
    y_val = generate_time_lags(y_val, args.num_lags, is_y=True)
    print(f"生成时间滞后后 X_train 形状: {X_train.shape}")
    print(f"生成时间滞后后 X_val 形状: {X_val.shape}")
    print(f"生成时间滞后后 y_train 形状: {y_train.shape}")
    print(f"生成时间滞后后 y_val 形状: {y_val.shape}")
    # 获取日期时间特征作为外生数据，这里目前是没有执行的。根据参数来看
    date_time_df_train = time_to_feature(
        X_train, args.use_time_features, identifier=args.identifier
    )
    date_time_df_val = time_to_feature(
        X_val, args.use_time_features, identifier=args.identifier
    )
    print(f"日期时间特征训练集: {date_time_df_train}")
    print(f"日期时间特征验证集: {date_time_df_val}")
    # 获取统计数据作为外生数据
    stats_df_train = assign_statistics(X_train, args.assign_stats, args.num_lags,
                                       targets=args.targets, identifier=args.identifier)
    print(f"统计数据训练集: {stats_df_train}")
    stats_df_val = assign_statistics(X_val, args.assign_stats, args.num_lags, 
                                       targets=args.targets, identifier=args.identifier)
    print(f"统计数据验证集: {stats_df_val}")
    # 将外生特征（如果有）连接到一个数据框中
    if date_time_df_train is not None or stats_df_train is not None:
        exogenous_data_train = pd.concat([date_time_df_train, stats_df_train], axis=1)
        # 移除重复列（如果有）
        exogenous_data_train = exogenous_data_train.loc[:, ~exogenous_data_train.columns.duplicated()].copy()
        assert len(exogenous_data_train) == len(X_train) == len(y_train)
    else:
        exogenous_data_train = None
    if date_time_df_val is not None or stats_df_val is not None:
        exogenous_data_val = pd.concat([date_time_df_val, stats_df_val], axis=1)
        exogenous_data_val = exogenous_data_val.loc[:, ~exogenous_data_val.columns.duplicated()].copy()
        assert len(exogenous_data_val) == len(X_val) == len(y_val)
    else:
        exogenous_data_val = None
        
    return X_train, X_val, y_train, y_val, exogenous_data_train, exogenous_data_val, x_scaler, y_scaler,target_index


In [13]:
# 因为没有从时序特征中提取时间特征，所以 exogenous_data_train,exogenous_data_valde的值是空的
X_train, X_val, y_train, y_val, exogenous_data_train, exogenous_data_val, x_scaler, y_scaler,target_index = make_preprocessing(
    
)

# make_preprocessing(
#      filter_bs="LesCorts"
#  )

INFO logger 2024-06-04 12:47:55,638 | data_utils.py:51 | 缺失值处理执行了，填充值为0


数据形状: (11039, 27)
处理缺失值后的数据形状: (11039, 27)
数据的样本信息:
	总样本数: 11039
	训练样本数: 8832
	验证样本数: 2207
训练集数据形状: (8832, 27)
验证集数据形状: (2207, 27)
目标变量 charge_status 的索引是: 13
异常值检测: None
X_train 形状: (8832, 27)
y_train 形状: (8832, 2)
X_val 形状: (2207, 27)
y_val 形状: (2207, 2)
y_train.ndim:2
标准化后 X_train 形状: (8832, 27)
标准化后 X_val 形状: (2207, 27)
X_scaler: MinMaxScaler()
标准化后 y_train 形状: (8832, 2)
标准化后 y_val 形状: (2207, 2)
y_scaler: MinMaxScaler()
Processing area: 0.0, initial shape: (8832, 27)
Generated lag-1 for column position_x, shape: (8832, 28)
Generated lag-1 for column position_y, shape: (8832, 29)
Generated lag-1 for column position_z, shape: (8832, 30)
Generated lag-1 for column heading, shape: (8832, 31)
Generated lag-1 for column speed, shape: (8832, 32)
Generated lag-1 for column speed_command, shape: (8832, 33)
Generated lag-1 for column mileage_distance, shape: (8832, 34)
Generated lag-1 for column power_on_time, shape: (8832, 35)
Generated lag-1 for column soc, shape: (8832, 36)
Generated lag-

In [14]:
X_train.head()

Unnamed: 0_level_0,missionID _lag-10,vehicle_mode_lag-10,container2_type_lag-10,container1_type_lag-10,mission_type_lag-10,vesselVisitID_lag-10,target_location_lag-10,error_code_lag-10,current_task_lag-10,task_stage_lag-10,...,soc_lag-1,power_on_time_lag-1,mileage_distance_lag-1,speed_command_lag-1,speed_lag-1,heading_lag-1,position_z_lag-1,position_y_lag-1,position_x_lag-1,vehicle_id
local_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1970-01-01 00:00:00.000000010,0.944444,1.0,0.0,0.0,0.666667,0.0,0.25,0.0,0.75,0.333333,...,0.338462,0.000977,0.0,0.032353,0.024842,0.499589,0.0,0.838228,0.287738,0.0
1970-01-01 00:00:00.000000011,0.944444,1.0,0.0,0.0,0.666667,0.0,0.25,0.0,0.75,0.333333,...,0.338462,0.001099,0.0,0.032353,0.024842,0.499592,0.0,0.838228,0.287738,0.0
1970-01-01 00:00:00.000000012,0.944444,1.0,0.0,0.0,0.666667,0.0,0.25,0.0,0.75,0.333333,...,0.338462,0.001221,0.0,0.032353,0.024842,0.499581,0.0,0.838227,0.287738,0.0
1970-01-01 00:00:00.000000013,0.944444,1.0,0.0,0.0,0.666667,0.0,0.25,0.0,0.75,0.333333,...,0.338462,0.001221,0.0,0.032353,0.024842,0.499571,0.0,0.838224,0.287737,0.0
1970-01-01 00:00:00.000000014,0.944444,1.0,0.0,0.0,0.666667,0.0,0.25,0.0,0.75,0.333333,...,0.338462,0.001465,0.0,0.032353,0.024842,0.499575,0.0,0.838217,0.287731,0.0


In [15]:
y_train.head()

Unnamed: 0_level_0,charge_status,vehicle_id
local_time,Unnamed: 1_level_1,Unnamed: 2_level_1
1970-01-01 00:00:00.000000010,0.0,0.0
1970-01-01 00:00:00.000000011,0.0,0.0
1970-01-01 00:00:00.000000012,0.0,0.0
1970-01-01 00:00:00.000000013,0.0,0.0
1970-01-01 00:00:00.000000014,0.0,0.0


In [16]:
x_scaler, y_scaler

(MinMaxScaler(), MinMaxScaler())

### Postprocessing Stage

In this stage we transform data in a way that can be fed into ML algorithms.

In [17]:
def make_postprocessing(X_train, X_val, y_train, y_val, exogenous_data_train, exogenous_data_val, x_scaler, y_scaler):
    """将数据处理为可供机器学习算法使用的格式"""

    # 如果指定的区域多于一个，按区域获取数据；单个小车在这里不执行
    if X_train[args.identifier].nunique() != 1:
        print("按区域获取数据...")
        area_X_train, area_X_val, area_y_train, area_y_val = get_data_by_area(X_train, X_val,
                                                                              y_train, y_val, 
                                                                              identifier=args.identifier)
        print(f"区域训练集数据: {area_X_train}")
        print(f"区域验证集数据: {area_X_val}")
    else:
        area_X_train, area_X_val, area_y_train, area_y_val = None, None, None, None

    # 按区域获取外生数据 单个小车在这里不执行
    if exogenous_data_train is not None:
        print("按区域获取外生数据...")
        exogenous_data_train, exogenous_data_val = get_exogenous_data_by_area(exogenous_data_train,
                                                                              exogenous_data_val)
        print(f"外生数据训练集: {exogenous_data_train}")
        print(f"外生数据验证集: {exogenous_data_val}")
        

    # 将数据转换为 numpy 数组，没有区域数据不执行
    if area_X_train is not None:
        for area in area_X_train:
            print(f"将区域 {area} 的数据转换为 numpy 数组...")
            tmp_X_train, tmp_y_train, tmp_X_val, tmp_y_val = remove_identifiers(
                area_X_train[area], area_y_train[area], area_X_val[area], area_y_val[area])
            tmp_X_train, tmp_y_train = tmp_X_train.to_numpy(), tmp_y_train.to_numpy()
            tmp_X_val, tmp_y_val = tmp_X_val.to_numpy(), tmp_y_val.to_numpy()
            area_X_train[area] = tmp_X_train
            area_X_val[area] = tmp_X_val
            area_y_train[area] = tmp_y_train
            area_y_val[area] = tmp_y_val
    # 没有外源数据不执行
    if exogenous_data_train is not None:
        for area in exogenous_data_train:
            print(f"将外生数据区域 {area} 转换为 numpy 数组...")
            exogenous_data_train[area] = exogenous_data_train[area].to_numpy()
            exogenous_data_val[area] = exogenous_data_val[area].to_numpy()

    # 从特征和目标变量中移除标识符
    print("移除标识符...")
    X_train, y_train, X_val, y_val = remove_identifiers(X_train, y_train, X_val, y_val)
    # 标签与特征数量对比，如果不对的话，抛出异常
    assert len(X_train.columns) == len(X_val.columns)
    # 计算每个时间步长的特征数量
    print("计算每个时间步长的特征数量...", len(X_train.columns),args.num_lags)
    num_features = len(X_train.columns) // args.num_lags
    print(f"每个时间步长的特征数量: {num_features}")
   
    # 转换为时间序列表示
    print("将数据转换为时间序列表示...")
    X_train = to_timeseries_rep(X_train.to_numpy(), num_lags=args.num_lags,
                                num_features=num_features)
    X_val = to_timeseries_rep(X_val.to_numpy(), num_lags=args.num_lags,
                              num_features=num_features)

    if area_X_train is not None:
        print("将区域数据转换为时间序列表示...")
        area_X_train = {area: to_timeseries_rep(area_X_train[area], num_lags=args.num_lags,
                                                num_features=num_features) for area in area_X_train}
        area_X_val = {area: to_timeseries_rep(area_X_val[area], num_lags=args.num_lags,
                                              num_features=num_features) for area in area_X_val}

    # 将目标变量转换为 numpy 数组
    print("将目标变量转换为 numpy 数组...")
    y_train, y_val = y_train.to_numpy(), y_val.to_numpy()
    print(f"y_train 形状: {y_train.shape}")
    print(f"y_val 形状: {y_val.shape}")

    # 中心化（所有）学习特定处理，这里也不会执行
    if not args.filter_bs and exogenous_data_train is not None:
        print("处理中心化学习的外生数据...")
        exogenous_data_train_combined, exogenous_data_val_combined = [], []
        for area in exogenous_data_train:
            exogenous_data_train_combined.extend(exogenous_data_train[area])
            exogenous_data_val_combined.extend(exogenous_data_val[area])
        exogenous_data_train_combined = np.stack(exogenous_data_train_combined)
        exogenous_data_val_combined = np.stack(exogenous_data_val_combined)
        exogenous_data_train["all"] = exogenous_data_train_combined
        exogenous_data_val["all"] = exogenous_data_val_combined
 


    return X_train, X_val, y_train, y_val, area_X_train, area_X_val, area_y_train, area_y_val, exogenous_data_train, exogenous_data_val


In [18]:
X_train, X_val, y_train, y_val, area_X_train, area_X_val, area_y_train, area_y_val, exogenous_data_train, exogenous_data_val = make_postprocessing(X_train, X_val, y_train, y_val, exogenous_data_train, exogenous_data_val, x_scaler, y_scaler)

移除标识符...
计算每个时间步长的特征数量... 260 10
每个时间步长的特征数量: 26
将数据转换为时间序列表示...
x的类型是：<class 'numpy.ndarray'>
x的部分值是：[[0.94444448 1.         0.         ... 0.         0.83822787 0.2877382 ]
 [0.94444448 1.         0.         ... 0.         0.83822805 0.2877382 ]
 [0.94444448 1.         0.         ... 0.         0.83822727 0.2877382 ]
 [0.94444448 1.         0.         ... 0.         0.83822405 0.28773665]
 [0.94444448 1.         0.         ... 0.         0.83821714 0.28773081]]
x的类型是：<class 'numpy.ndarray'>
x的部分值是：[[0.12962963 1.         0.         ... 0.         0.92972988 0.98730958]
 [0.12962963 1.         0.         ... 0.         0.92973363 0.98731631]
 [0.12962963 1.         0.         ... 0.         0.92973733 0.98731387]
 [0.12962963 1.         0.         ... 0.         0.9297303  0.98731786]
 [0.12962963 1.         0.         ... 0.         0.92972732 0.98731452]]
将目标变量转换为 numpy 数组...
y_train 形状: (8822, 1)
y_val 形状: (2197, 1)


In [19]:
X_train[:2]

array([[[[9.44444478e-01],
         [1.00000000e+00],
         [0.00000000e+00],
         [0.00000000e+00],
         [6.66666627e-01],
         [0.00000000e+00],
         [2.50000000e-01],
         [0.00000000e+00],
         [7.50000000e-01],
         [3.33333343e-01],
         [0.00000000e+00],
         [0.00000000e+00],
         [0.00000000e+00],
         [0.00000000e+00],
         [0.00000000e+00],
         [0.00000000e+00],
         [0.00000000e+00],
         [3.38461548e-01],
         [0.00000000e+00],
         [0.00000000e+00],
         [3.23531739e-02],
         [2.48420089e-02],
         [4.99532372e-01],
         [0.00000000e+00],
         [8.38214278e-01],
         [2.87736416e-01]],

        [[9.44444478e-01],
         [1.00000000e+00],
         [0.00000000e+00],
         [0.00000000e+00],
         [6.66666627e-01],
         [0.00000000e+00],
         [2.50000000e-01],
         [0.00000000e+00],
         [7.50000000e-01],
         [3.33333343e-01],
         [0.00000000e+00],

In [20]:
y_train[:2]

array([[0.],
       [0.]], dtype=float32)

In [21]:
len(X_train), len(X_val)

(8822, 2197)

### Define the input dimensions for the model architecture

In [22]:
def get_input_dims(X_train, exogenous_data_train):
    """
    计算输入维度和外生数据维度。

    参数:
    X_train: numpy.ndarray
        训练集数据，形状为 (样本数, 时间步数, 特征数)。
    exogenous_data_train: dict
        训练集的外生数据，包含不同区域或类别的外生数据。

    返回:
    input_dim: int
        输入维度。
    exogenous_dim: int
        外生数据维度。
    """
    # 根据模型类型计算输入维度
    if args.model_name == "mlp":
        # 如果是多层感知器模型，将时间步数和特征数展平
        input_dim = X_train.shape[1] * X_train.shape[2]
    else:
        # 否则，输入维度为特征数
        input_dim = X_train.shape[2]
    
    if exogenous_data_train is not None:
        if len(exogenous_data_train) == 1:
            cid = next(iter(exogenous_data_train.keys()))
            exogenous_dim = exogenous_data_train[cid].shape[1]
        else:
            exogenous_dim = exogenous_data_train["all"].shape[1]
    else:
        exogenous_dim = 0

    return input_dim, exogenous_dim

### Initialize the model for training

In [23]:
def get_model(model: str,
              input_dim: int,
              out_dim: int,
              lags: int = 10,
              exogenous_dim: int = 0,
              seed=0):
    if model == "mlp":
        model = MLP(input_dim=input_dim, layer_units=[256, 128, 64], num_outputs=out_dim)
    elif model == "rnn":
        model = RNN(input_dim=input_dim, rnn_hidden_size=128, num_rnn_layers=1, rnn_dropout=0.0,
                    layer_units=[128], num_outputs=out_dim, matrix_rep=True, exogenous_dim=exogenous_dim)
    elif model == "lstm":
        model = LSTM(input_dim=input_dim, lstm_hidden_size=128, num_lstm_layers=1, lstm_dropout=0.0,
                     layer_units=[128], num_outputs=out_dim, matrix_rep=True, exogenous_dim=exogenous_dim)
    elif model == "gru":
        model = GRU(input_dim=input_dim, gru_hidden_size=128, num_gru_layers=1, gru_dropout=0.0,
                    layer_units=[128], num_outputs=out_dim, matrix_rep=True, exogenous_dim=exogenous_dim)
    elif model == "cnn":
        model = CNN(num_features=input_dim, lags=lags, exogenous_dim=exogenous_dim, out_dim=out_dim)
    elif model == "da_encoder_decoder":
        model = DualAttentionAutoEncoder(input_dim=input_dim, architecture="lstm", matrix_rep=True)
    else:
        raise NotImplementedError("Specified model is not implemented. Plese define your own model or choose one from ['mlp', 'rnn', 'lstm', 'gru', 'cnn', 'da_encoder_decoder']")
    return model

In [24]:
# define the model
args.model_name = "lstm"

input_dim, exogenous_dim = get_input_dims(X_train, exogenous_data_train)

print(input_dim, exogenous_dim)
model = get_model(model=args.model_name,
                  input_dim=input_dim,
                  out_dim=y_train.shape[1],
                  lags=args.num_lags,
                  exogenous_dim=exogenous_dim,
                  seed=args.seed)

26 0


In [25]:
model

LSTM(
  (lstm): LSTM(26, 128, batch_first=True)
  (MLP_layers): Sequential(
    (0): Linear(in_features=128, out_features=1, bias=True)
  )
)

### The fit function used to train the model specified above

In [26]:
def fit(model, X_train, y_train, X_val, y_val, exogenous_data_train=None, exogenous_data_val=None, idxs=[13], log_per=1):
    
    # 获取外生数据（如果有）
    if exogenous_data_train is not None and len(exogenous_data_train) > 1:
        exogenous_data_train = exogenous_data_train["all"]
        exogenous_data_val = exogenous_data_val["all"]
    elif exogenous_data_train is not None and len(exogenous_data_train) == 1:
        cid = next(iter(exogenous_data_train.keys()))
        exogenous_data_train = exogenous_data_train[cid]
        exogenous_data_val = exogenous_data_val[cid]
    else:
        exogenous_data_train = None
        exogenous_data_val = None
    
    num_features = len(X_train[0][0])
    print(f"特征数: {num_features}")
    
    train_loader = to_torch_dataset(X_train, y_train, num_lags=args.num_lags, num_features=num_features, exogenous_data=exogenous_data_train, indices=idxs, batch_size=args.batch_size, shuffle=False)
    val_loader = to_torch_dataset(X_val, y_val, num_lags=args.num_lags, num_features=num_features, exogenous_data=exogenous_data_val, indices=idxs, batch_size=args.batch_size, shuffle=False)
    print(f"训练集数据加载器: {train_loader}")
    print(f"验证集数据加载器: {val_loader}")
    
    model = train(model, train_loader, val_loader, epochs=args.epochs, optimizer=args.optimizer, lr=args.lr, criterion=args.criterion, early_stopping=args.early_stopping, patience=args.patience, plot_history=args.plot_history, device=device, log_per=log_per)
    
    return model


In [27]:
# trained_model = fit(model, X_train, y_train, X_val, y_val)

In [28]:
#打印参数形状
print(X_train.shape)
print(y_train.shape)
print(X_val.shape)
print(y_val.shape)

model = fit(model, X_train, y_train, X_val, y_val, 
            exogenous_data_train=exogenous_data_train, exogenous_data_val=exogenous_data_val,idxs=[target_index])

(8822, 10, 26, 1)
(8822, 1)
(2197, 10, 26, 1)
(2197, 1)
特征数: 26
训练集数据加载器: <torch.utils.data.dataloader.DataLoader object at 0x7fa7f2ba86d0>
验证集数据加载器: <torch.utils.data.dataloader.DataLoader object at 0x7fa810231df0>


IndexError: index 3 is out of bounds for axis 1 with size 1