### In this notebook we perform individual training.
In individual learning each base station has access only to it's private dataset.

In [1]:
import sys
import os

from pathlib import Path

parent = Path(os.path.abspath("")).resolve().parents[0]
if parent not in sys.path:
    sys.path.insert(0, str(parent))

In [2]:
import random

import numpy as np
import torch

from argparse import Namespace

In [3]:
from ml.utils.data_utils import read_data, generate_time_lags, time_to_feature, handle_nans, to_Xy, \
    to_torch_dataset, to_timeseries_rep, assign_statistics, \
    to_train_val, scale_features, get_data_by_area, remove_identifiers, get_exogenous_data_by_area, handle_outliers

In [4]:
from ml.utils.train_utils import train, test

In [5]:
from ml.models.mlp import MLP
from ml.models.rnn import RNN
from ml.models.lstm import LSTM
from ml.models.gru import GRU
from ml.models.cnn import CNN
from ml.models.rnn_autoencoder import DualAttentionAutoEncoder

In [6]:
args = Namespace(
    data_path='/home/yanglin/Federated-Time-Series-Forecasting/dataset/processed_A032_IGVData_KPI.csv', # 数据集路径
    data_path_test=['../dataset/ElBorn_test.csv'], # 测试数据集路径
    test_size=0.2, # 验证集的比例
    targets=['charge_status'], # 目标列
    num_lags=10, # 输入中包括的过去观测值的数量

    filter_bs=None, # 是否在训练中使用单一基站，可动态变化
    identifier='vehicle_id', # 用于识别基站的列名

    nan_constant=0, # 用于转换NaN值的常数
    x_scaler='minmax', # x轴数据的缩放器
    y_scaler='minmax', # y轴数据的缩放器
    outlier_detection=None, # 是否执行异常值检测（如数据截断）

    criterion='mse', # 优化标准，可以是mse（均方误差）或l1
    epochs=150, # 最大迭代轮数
    lr=0.001, # 学习率
    optimizer='adam', # 优化器，可选sgd或adam
    batch_size=128, # 批处理大小
    early_stopping=True, # 是否使用提前停止
    patience=50, # 提前停止的耐心值
    max_grad_norm=0.0, # 是否剪辑梯度范数
    reg1=0.0, # L1正则化
    reg2=0.0, # L2正则化

    plot_history=True, # 是否绘制损失历史图

    cuda=True, # 是否使用GPU

    seed=0, # 可重现性种子

    assign_stats=None, # 是否使用统计数据作为外源数据，可选["mean", "median", "std", "variance", "kurtosis", "skew"]
    use_time_features=False # 是否使用时间特征
)


> You can define the base station to perform train on the filter_bs parameter and use it in block 12 or you can define the base station to block 12 explicitly 

In [7]:
print(f"Script arguments: {args}\n")

Script arguments: Namespace(assign_stats=None, batch_size=128, criterion='mse', cuda=True, data_path='/home/yanglin/Federated-Time-Series-Forecasting/dataset/processed_A032_IGVData_KPI.csv', data_path_test=['../dataset/ElBorn_test.csv'], early_stopping=True, epochs=150, filter_bs=None, identifier='vehicle_id', lr=0.001, max_grad_norm=0.0, nan_constant=0, num_lags=10, optimizer='adam', outlier_detection=None, patience=50, plot_history=True, reg1=0.0, reg2=0.0, seed=0, targets=['charge_status'], test_size=0.2, use_time_features=False, x_scaler='minmax', y_scaler='minmax')



In [7]:
device = "cuda" if args.cuda and torch.cuda.is_available() else "cpu"
print(f"Using {device}")

Using cpu


In [8]:
# Outlier detection specification
if args.outlier_detection is not None:
    outlier_columns = ['rb_down', 'rb_up', 'down', 'up']
    outlier_kwargs = {"ElBorn": (10, 90), "LesCorts": (10, 90), "PobleSec": (5, 95)}
    args.outlier_columns = outlier_columns
    args.outlier_kwargs = outlier_kwargs

In [9]:
def seed_all():
    # ensure reproducibility
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

In [10]:
seed_all()

## 在这里为filter_bs参数中指定的基站执行预处理管道
预处理中的:
1. nan处理
2. 异常值处理
3. 扩展数据
4. 产生时间滞后
5. 生成和导入外生数据作为特征(时间、统计数据)(如果适用)

In [11]:

def make_preprocessing(filter_bs=None):
    """Preprocess a given .csv"""
    # read data
    df = read_data(args.data_path, filter_data=filter_bs)
    print(f"Data shape: {df.shape}")
    # handle nans
    df = handle_nans(train_data=df, constant=args.nan_constant,
                     identifier=args.identifier)
    print(f"Data shape after handling nans: {df.shape}")
    # split to train/validation
    train_data, val_data = to_train_val(df)
    print(f"Train data shape: {train_data.shape}")
    print(f"Validation data shape: {val_data.shape}")
    # handle outliers (if specified)
    print(f"Outlier detection: {args.outlier_detection}")
    # TODO:目前不需要异常值检测，如果后续需要异常值检测，在进行修改
    if args.outlier_detection is not None:
        train_data = handle_outliers(df=train_data, columns=args.outlier_columns,
                                     identifier=args.identifier, kwargs=args.outlier_kwargs)
    
    # get X and y
    # TODO: 这里y_val把小车id也加进去了
    X_train, X_val, y_train, y_val = to_Xy(train_data=train_data, val_data=val_data,
                                          targets=args.targets)
    print(f"X_train shape: {X_train.shape}")
    print(f"y_train shape: {y_train.shape}")
    print(f"X_val shape: {X_val.shape}")
    print(f"y_val shape: {y_val.shape}")
   
    print(f"y_val: {y_val}")
    # scale X
    X_train, X_val, x_scaler = scale_features(train_data=X_train, val_data=X_val,
                                             scaler=args.x_scaler, identifier=args.identifier)
    print(f"X_train shape after scaling: {X_train.shape}")
    print(f"X_val shape after scaling: {X_val.shape}")
    print(f"X_scaler: {x_scaler}")
    # scale y
    y_train, y_val, y_scaler = scale_features(train_data=y_train, val_data=y_val,
                                             scaler=args.y_scaler, identifier=args.identifier)
    print(f"y_train shape after scaling: {y_train.shape}")
    print(f"y_val shape after scaling: {y_val.shape}")
    print(f"y_scaler: {y_scaler}")
    # generate time lags
    print("Generating time lags for X_train")
    X_train = generate_time_lags(X_train, args.num_lags)
    print(f"X_train shape after generating time lags: {X_train.shape}")
    X_val = generate_time_lags(X_val, args.num_lags)
    y_train = generate_time_lags(y_train, args.num_lags, is_y=True)
    y_val = generate_time_lags(y_val, args.num_lags, is_y=True)

    print(f"X_val shape after generating time lags: {X_val.shape}")
    print(f"y_train shape after generating time lags: {y_train.shape}")
    print(f"y_val shape after generating time lags: {y_val.shape}")
    # get datetime features as exogenous data
    date_time_df_train = time_to_feature(
        X_train, args.use_time_features, identifier=args.identifier
    )
    date_time_df_val = time_to_feature(
        X_val, args.use_time_features, identifier=args.identifier
    )
    print(f"X_train: {X_train}")
    print(f"X_val: {X_val}")
    # get statistics as exogenous data
    stats_df_train = assign_statistics(X_train, args.assign_stats, args.num_lags,
                                       targets=args.targets, identifier=args.identifier)
    print(f"Stats df train: {stats_df_train}")
    stats_df_val = assign_statistics(X_val, args.assign_stats, args.num_lags, 
                                       targets=args.targets, identifier=args.identifier)
    print(f"Stats df val: {stats_df_val}")
    # concat the exogenous features (if any) to a single dataframe
    if date_time_df_train is not None or stats_df_train is not None:
        exogenous_data_train = pd.concat([date_time_df_train, stats_df_train], axis=1)
        # remove duplicate columns (if any)
        exogenous_data_train = exogenous_data_train.loc[:, ~exogenous_data_train.columns.duplicated()].copy()
        assert len(exogenous_data_train) == len(X_train) == len(y_train)
    else:
        exogenous_data_train = None
    if date_time_df_val is not None or stats_df_val is not None:
        exogenous_data_val = pd.concat([date_time_df_val, stats_df_val], axis=1)
        exogenous_data_val = exogenous_data_val.loc[:, ~exogenous_data_val.columns.duplicated()].copy()
        assert len(exogenous_data_val) == len(X_val) == len(y_val)
    else:
        exogenous_data_val = None
        
    return X_train, X_val, y_train, y_val, exogenous_data_train, exogenous_data_val, x_scaler, y_scaler

In [12]:
# here exogenous_data_train and val are None.

X_train, X_val, y_train, y_val, exogenous_data_train, exogenous_data_val, x_scaler, y_scaler = make_preprocessing(
    
)

# make_preprocessing(
#      filter_bs="LesCorts"
#  )

INFO logger 2024-06-04 10:43:50,524 | data_utils.py:51 | 缺失值处理执行了，填充值为0


Data shape: (11039, 27)
Data shape after handling nans: (11039, 27)
数据的样本信息:
	总样本数: 11039
	训练样本数: 8832
	验证样本数: 2207
Train data shape: (8832, 27)
Validation data shape: (2207, 27)
Outlier detection: None
X_train shape: (8832, 27)
y_train shape: (8832, 2)
X_val shape: (2207, 27)
y_val shape: (2207, 2)
y_val:                                charge_status  vehicle_id
local_time                                              
1970-01-01 00:00:00.000008737            0.0         0.0
1970-01-01 00:00:00.000008738            0.0         0.0
1970-01-01 00:00:00.000008739            0.0         0.0
1970-01-01 00:00:00.000008740            0.0         0.0
1970-01-01 00:00:00.000008741            0.0         0.0
...                                      ...         ...
1970-01-01 00:00:00.000010851            0.0         0.0
1970-01-01 00:00:00.000010852            0.0         0.0
1970-01-01 00:00:00.000010853            0.0         0.0
1970-01-01 00:00:00.000010854            0.0         0.0
1970-01-

In [14]:
X_train.head()

Unnamed: 0_level_0,missionID _lag-10,vehicle_mode_lag-10,container2_type_lag-10,container1_type_lag-10,mission_type_lag-10,vesselVisitID_lag-10,target_location_lag-10,error_code_lag-10,current_task_lag-10,task_stage_lag-10,...,soc_lag-1,power_on_time_lag-1,mileage_distance_lag-1,speed_command_lag-1,speed_lag-1,heading_lag-1,position_z_lag-1,position_y_lag-1,position_x_lag-1,vehicle_id
local_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1970-01-01 00:00:00.000000010,0.944444,1.0,0.0,0.0,0.666667,0.0,0.25,0.0,0.75,0.333333,...,0.338462,0.000977,0.0,0.032353,0.024842,0.499589,0.0,0.838228,0.287738,0.0
1970-01-01 00:00:00.000000011,0.944444,1.0,0.0,0.0,0.666667,0.0,0.25,0.0,0.75,0.333333,...,0.338462,0.001099,0.0,0.032353,0.024842,0.499592,0.0,0.838228,0.287738,0.0
1970-01-01 00:00:00.000000012,0.944444,1.0,0.0,0.0,0.666667,0.0,0.25,0.0,0.75,0.333333,...,0.338462,0.001221,0.0,0.032353,0.024842,0.499581,0.0,0.838227,0.287738,0.0
1970-01-01 00:00:00.000000013,0.944444,1.0,0.0,0.0,0.666667,0.0,0.25,0.0,0.75,0.333333,...,0.338462,0.001221,0.0,0.032353,0.024842,0.499571,0.0,0.838224,0.287737,0.0
1970-01-01 00:00:00.000000014,0.944444,1.0,0.0,0.0,0.666667,0.0,0.25,0.0,0.75,0.333333,...,0.338462,0.001465,0.0,0.032353,0.024842,0.499575,0.0,0.838217,0.287731,0.0


In [15]:
y_train.head()

Unnamed: 0_level_0,charge_status,vehicle_id
local_time,Unnamed: 1_level_1,Unnamed: 2_level_1
1970-01-01 00:00:00.000000010,0.0,0.0
1970-01-01 00:00:00.000000011,0.0,0.0
1970-01-01 00:00:00.000000012,0.0,0.0
1970-01-01 00:00:00.000000013,0.0,0.0
1970-01-01 00:00:00.000000014,0.0,0.0


In [13]:
x_scaler, y_scaler

(MinMaxScaler(), MinMaxScaler())

### Postprocessing Stage

In this stage we transform data in a way that can be fed into ML algorithms.

In [14]:
def make_postprocessing(X_train, X_val, y_train, y_val, exogenous_data_train, exogenous_data_val, x_scaler, y_scaler):
    """Make data ready to be fed into ml algorithms"""
    # if there are more than one specified areas, get the data per area
    if X_train[args.identifier].nunique() != 1:
        area_X_train, area_X_val, area_y_train, area_y_val = get_data_by_area(X_train, X_val,
                                                                              y_train, y_val, 
                                                                              identifier=args.identifier)
    else:
        area_X_train, area_X_val, area_y_train, area_y_val = None, None, None, None

    # Get the exogenous data per area.
    if exogenous_data_train is not None:
        exogenous_data_train, exogenous_data_val = get_exogenous_data_by_area(exogenous_data_train,
                                                                              exogenous_data_val)
    # transform to np
    if area_X_train is not None:
        for area in area_X_train:
            tmp_X_train, tmp_y_train, tmp_X_val, tmp_y_val = remove_identifiers(
                area_X_train[area], area_y_train[area], area_X_val[area], area_y_val[area])
            tmp_X_train, tmp_y_train = tmp_X_train.to_numpy(), tmp_y_train.to_numpy()
            tmp_X_val, tmp_y_val = tmp_X_val.to_numpy(), tmp_y_val.to_numpy()
            area_X_train[area] = tmp_X_train
            area_X_val[area] = tmp_X_val
            area_y_train[area] = tmp_y_train
            area_y_val[area] = tmp_y_val
    
    if exogenous_data_train is not None:
        for area in exogenous_data_train:
            exogenous_data_train[area] = exogenous_data_train[area].to_numpy()
            exogenous_data_val[area] = exogenous_data_val[area].to_numpy()
    
    # remove identifiers from features, targets
    X_train, y_train, X_val, y_val = remove_identifiers(X_train, y_train, X_val, y_val)
    assert len(X_train.columns) == len(X_val.columns)
    
    num_features = len(X_train.columns) // args.num_lags
    
    # to timeseries representation
    X_train = to_timeseries_rep(X_train.to_numpy(), num_lags=args.num_lags,
                                            num_features=num_features)
    X_val = to_timeseries_rep(X_val.to_numpy(), num_lags=args.num_lags,
                                          num_features=num_features)
    
    if area_X_train is not None:
        area_X_train = to_timeseries_rep(area_X_train, num_lags=args.num_lags,
                                                     num_features=num_features)
        area_X_val = to_timeseries_rep(area_X_val, num_lags=args.num_lags,
                                                   num_features=num_features)
    
    # transform targets to numpy
    y_train, y_val = y_train.to_numpy(), y_val.to_numpy()
    
    # centralized (all) learning specific
    if not args.filter_bs and exogenous_data_train is not None:
        exogenous_data_train_combined, exogenous_data_val_combined = [], []
        for area in exogenous_data_train:
            exogenous_data_train_combined.extend(exogenous_data_train[area])
            exogenous_data_val_combined.extend(exogenous_data_val[area])
        exogenous_data_train_combined = np.stack(exogenous_data_train_combined)
        exogenous_data_val_combined = np.stack(exogenous_data_val_combined)
        exogenous_data_train["all"] = exogenous_data_train_combined
        exogenous_data_val["all"] = exogenous_data_val_combined
    return X_train, X_val, y_train, y_val, area_X_train, area_X_val, area_y_train, area_y_val, exogenous_data_train, exogenous_data_val

In [15]:
X_train, X_val, y_train, y_val, area_X_train, area_X_val, area_y_train, area_y_val, exogenous_data_train, exogenous_data_val = make_postprocessing(X_train, X_val, y_train, y_val, exogenous_data_train, exogenous_data_val, x_scaler, y_scaler)

In [16]:
X_train[:2]

array([[[[9.44444478e-01],
         [1.00000000e+00],
         [0.00000000e+00],
         [0.00000000e+00],
         [6.66666627e-01],
         [0.00000000e+00],
         [2.50000000e-01],
         [0.00000000e+00],
         [7.50000000e-01],
         [3.33333343e-01],
         [0.00000000e+00],
         [0.00000000e+00],
         [0.00000000e+00],
         [0.00000000e+00],
         [0.00000000e+00],
         [0.00000000e+00],
         [0.00000000e+00],
         [3.38461548e-01],
         [0.00000000e+00],
         [0.00000000e+00],
         [3.23531739e-02],
         [2.48420089e-02],
         [4.99532372e-01],
         [0.00000000e+00],
         [8.38214278e-01],
         [2.87736416e-01]],

        [[9.44444478e-01],
         [1.00000000e+00],
         [0.00000000e+00],
         [0.00000000e+00],
         [6.66666627e-01],
         [0.00000000e+00],
         [2.50000000e-01],
         [0.00000000e+00],
         [7.50000000e-01],
         [3.33333343e-01],
         [0.00000000e+00],

In [17]:
y_train[:2]

array([[0.],
       [0.]], dtype=float32)

In [18]:
len(X_train), len(X_val)

(8822, 2197)

### Define the input dimensions for the model architecture

In [19]:
def get_input_dims(X_train, exogenous_data_train):
    if args.model_name == "mlp":
        input_dim = X_train.shape[1] * X_train.shape[2]
    else:
        input_dim = X_train.shape[2]
    
    if exogenous_data_train is not None:
        if len(exogenous_data_train) == 1:
            cid = next(iter(exogenous_data_train.keys()))
            exogenous_dim = exogenous_data_train[cid].shape[1]
        else:
            exogenous_dim = exogenous_data_train["all"].shape[1]
    else:
        exogenous_dim = 0
    
    return input_dim, exogenous_dim

### Initialize the model for training

In [20]:
def get_model(model: str,
              input_dim: int,
              out_dim: int,
              lags: int = 10,
              exogenous_dim: int = 0,
              seed=0):
    if model == "mlp":
        model = MLP(input_dim=input_dim, layer_units=[256, 128, 64], num_outputs=out_dim)
    elif model == "rnn":
        model = RNN(input_dim=input_dim, rnn_hidden_size=128, num_rnn_layers=1, rnn_dropout=0.0,
                    layer_units=[128], num_outputs=out_dim, matrix_rep=True, exogenous_dim=exogenous_dim)
    elif model == "lstm":
        model = LSTM(input_dim=input_dim, lstm_hidden_size=128, num_lstm_layers=1, lstm_dropout=0.0,
                     layer_units=[128], num_outputs=out_dim, matrix_rep=True, exogenous_dim=exogenous_dim)
    elif model == "gru":
        model = GRU(input_dim=input_dim, gru_hidden_size=128, num_gru_layers=1, gru_dropout=0.0,
                    layer_units=[128], num_outputs=out_dim, matrix_rep=True, exogenous_dim=exogenous_dim)
    elif model == "cnn":
        model = CNN(num_features=input_dim, lags=lags, exogenous_dim=exogenous_dim, out_dim=out_dim)
    elif model == "da_encoder_decoder":
        model = DualAttentionAutoEncoder(input_dim=input_dim, architecture="lstm", matrix_rep=True)
    else:
        raise NotImplementedError("Specified model is not implemented. Plese define your own model or choose one from ['mlp', 'rnn', 'lstm', 'gru', 'cnn', 'da_encoder_decoder']")
    return model

In [21]:
# define the model
args.model_name = "lstm"

input_dim, exogenous_dim = get_input_dims(X_train, exogenous_data_train)

print(input_dim, exogenous_dim)

model = get_model(model=args.model_name,
                  input_dim=input_dim,
                  out_dim=y_train.shape[1],
                  lags=args.num_lags,
                  exogenous_dim=exogenous_dim,
                  seed=args.seed)

26 0


In [22]:
model

LSTM(
  (lstm): LSTM(26, 128, batch_first=True)
  (MLP_layers): Sequential(
    (0): Linear(in_features=128, out_features=1, bias=True)
  )
)

### The fit function used to train the model specified above

In [23]:
def fit(model, X_train, y_train, X_val, y_val, 
        exogenous_data_train=None, exogenous_data_val=None, 
        idxs=[8, 3, 1, 10, 9], # the indices of our targets in X
        log_per=1):
    
    # get exogenous data (if any)
    if exogenous_data_train is not None and len(exogenous_data_train) > 1:
        exogenous_data_train = exogenous_data_train["all"]
        exogenous_data_val = exogenous_data_val["all"]
    elif exogenous_data_train is not None and len(exogenous_data_train) == 1:
        cid = next(iter(exogenous_data_train.keys()))
        exogenous_data_train = exogenous_data_train[cid]
        exogenous_data_val = exogenous_data_val[cid]
    else:
        exogenous_data_train = None
        exogenous_data_val = None
    num_features = len(X_train[0][0])
    
    # to torch loader
    train_loader = to_torch_dataset(X_train, y_train,
                                    num_lags=args.num_lags,
                                    num_features=num_features,
                                    exogenous_data=exogenous_data_train,
                                    indices=idxs,
                                    batch_size=args.batch_size, 
                                    shuffle=False)
    val_loader = to_torch_dataset(X_val, y_val, 
                                  num_lags=args.num_lags,
                                  num_features=num_features,
                                  exogenous_data=exogenous_data_val,
                                  indices=idxs,
                                  batch_size=args.batch_size,
                                  shuffle=False)
    
    # train the model
    model = train(model, 
                  train_loader, val_loader,
                  epochs=args.epochs,
                  optimizer=args.optimizer, lr=args.lr,
                  criterion=args.criterion,
                  early_stopping=args.early_stopping,
                  patience=args.patience,
                  plot_history=args.plot_history, 
                  device=device, log_per=log_per)
    
    return model

In [24]:
trained_model = fit(model, X_train, y_train, X_val, y_val)

RuntimeError: stack expects each tensor to be equal size, but got [10, 5] at entry 0 and [10, 1] at entry 11

In [28]:
# def train(model, train_loader, val_loader, epochs, optimizer, lr, criterion, early_stopping, patience, plot_history, device, log_per, use_exogenous=False):
#     model.to(device)
#     optimizer = getattr(torch.optim, optimizer)(model.parameters(), lr=lr)
#     criterion = getattr(torch.nn, criterion)()
    
#     for epoch in range(epochs):
#         model.train()
#         for batch in train_loader:
#             if use_exogenous:
#                 x, exogenous, y_hist, y = batch
#                 x, y, exogenous, y_hist = x.to(device), y.to(device), exogenous.to(device), y_hist.to(device)
#                 output = model(x, exogenous)
#             else:
#                 x, y_hist, y = batch
#                 x, y, y_hist = x.to(device), y.to(device), y_hist.to(device)
#                 output = model(x)
                
#             loss = criterion(output, y)
#             optimizer.zero_grad()
#             loss.backward()
#             optimizer.step()
        
#         # 评估和早停逻辑
#         model.eval()
#         with torch.no_grad():
#             for batch in val_loader:
#                 if use_exogenous:
#                     x, exogenous, y_hist, y = batch
#                     x, y, exogenous, y_hist = x.to(device), y.to(device), exogenous.to(device), y_hist.to(device)
#                     output = model(x, exogenous)
#                 else:
#                     x, y_hist, y = batch
#                     x, y, y_hist = x.to(device), y.to(device), y_hist.to(device)
#                     output = model(x)
                    
#                 val_loss = criterion(output, y)
        
#         # 其他日志和早停逻辑

#     return model

# def fit(model, X_train, y_train, X_val, y_val, 
#         exogenous_data_train=None, exogenous_data_val=None, 
#         idxs=None,  # 更新 indices 参数
#         log_per=1):
    
#     # 获取外生数据（如果有的话）
#     if exogenous_data_train is not None and len(exogenous_data_train) > 1:
#         exogenous_data_train = exogenous_data_train["all"]
#         exogenous_data_val = exogenous_data_val["all"]
#     elif exogenous_data_train is not None and len(exogenous_data_train) == 1:
#         cid = next(iter(exogenous_data_train.keys()))
#         exogenous_data_train = exogenous_data_train[cid]
#         exogenous_data_val = exogenous_data_val[cid]
#     else:
#         exogenous_data_train = None
#         exogenous_data_val = None
    
#     num_features = X_train.shape[2]  # 更新 num_features 获取方式
    
#     # 转换为 PyTorch 数据集
#     train_loader = to_torch_dataset(X_train, y_train,
#                                     num_lags=args.num_lags,
#                                     num_features=num_features,
#                                     exogenous_data=exogenous_data_train,
#                                     indices=idxs,
#                                     batch_size=args.batch_size, 
#                                     shuffle=True)  # 设置 shuffle=True 以确保训练集数据被打乱
#     val_loader = to_torch_dataset(X_val, y_val, 
#                                   num_lags=args.num_lags,
#                                   num_features=num_features,
#                                   exogenous_data=exogenous_data_val,
#                                   indices=idxs,
#                                   batch_size=args.batch_size,
#                                   shuffle=False)
    
#     # 训练模型
#     if exogenous_data_train is not None:
#         model = train(model, 
#                       train_loader, val_loader,
#                       epochs=args.epochs,
#                       optimizer=args.optimizer, lr=args.lr,
#                       criterion=args.criterion,
#                       early_stopping=args.early_stopping,
#                       patience=args.patience,
#                       plot_history=args.plot_history, 
#                       device=device, log_per=log_per,
#                       use_exogenous=True)
#     else:
#         model = train(model, 
#                       train_loader, val_loader,
#                       epochs=args.epochs,
#                       optimizer=args.optimizer, lr=args.lr,
#                       criterion=args.criterion,
#                       early_stopping=args.early_stopping,
#                       patience=args.patience,
#                       plot_history=args.plot_history, 
#                       device=device, log_per=log_per,
#                       use_exogenous=False)
    
#     return model


In [29]:
#打印参数形状
print(X_train.shape)
print(y_train.shape)
print(X_val.shape)
print(y_val.shape)
model = fit(model, X_train, y_train, X_val, y_val, 
            exogenous_data_train=exogenous_data_train, exogenous_data_val=exogenous_data_val)

(8822, 10, 26, 1)
(8822, 1)
(2197, 10, 26, 1)
(2197, 1)


AttributeError: module 'torch.optim' has no attribute 'adam'