In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
import pandas as pd
import numpy as np
import os
import warnings
import time
from tqdm import tqdm
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error
from torchinfo import summary
import random

warnings.filterwarnings('ignore')

# =============================================================================
# 0. 设置随机数种子
# =============================================================================
def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
    print(f"随机数种子已设置为: {seed}")

GLOBAL_SEED = 42
set_seed(GLOBAL_SEED)

# =============================================================================
# 1. 文件路径与超参数设置
# =============================================================================
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"当前设备: {device}")

HIGH_FREQ_DATA_PATH = r'D:\python-deeplearning\FirstPaper\PVTimeSeriesLearing\project\小论文1-基于ICEEMDAN分解的时序高维变化的短期光伏功率预测模型\CEEMAN-PosConv1dbiLSTM-LSTM\模型代码流程\完整的模型代码流程 copy 2\MC-Dropout-test\数据集\DKASC\DKASC_high_frequency_data.csv'
LOW_FREQ_DATA_PATH = r'D:\python-deeplearning\FirstPaper\PVTimeSeriesLearing\project\小论文1-基于ICEEMDAN分解的时序高维变化的短期光伏功率预测模型\CEEMAN-PosConv1dbiLSTM-LSTM\模型代码流程\完整的模型代码流程 copy 2\MC-Dropout-test\数据集\DKASC\DKASC_low_frequency_data.csv'
ORIGINAL_DATA_PATH = r'D:\python-deeplearning\FirstPaper\PVTimeSeriesLearing\project\小论文1-基于ICEEMDAN分解的时序高维变化的短期光伏功率预测模型\CEEMAN-PosConv1dbiLSTM-LSTM\模型代码流程\完整的模型代码流程 copy 2\MC-Dropout-test\数据集\DKASC\DKASC.csv'
MODEL_FILE_PATH = 'icial_uv_rpe_model.pth' # 新模型保存路径

# --- 超参数 ---
N_STEPS_IN, N_STEPS_OUT = 96, 24
TARGET_STEPS = [3, 6, 12, 24]
HIGH_FREQ_FEATURES = 1
WEATHER_FEATURES = 5
LOW_FREQ_FEATURES = 1
BATCH_SIZE = 64
EPOCHS = 50
LEARNING_RATE = 0.0001
PATIENCE = 10

# --- 模型结构参数 ---
EMBED_DIM = 128
GRU_LAYERS = 2 
LSTM_LAYERS = 3
NUM_HEADS = 8
DROPOUT = 0.2
TRANSFORMER_LAYERS = 2
RELATIVE_POSITION_BUCKETS = 32

# =============================================================================
# 2. 数据预处理函数 
# =============================================================================
def time_series_to_supervised_mimo(data, n_in=96, n_out=1, dropnan=True):
    n_vars = 1 if isinstance(data, list) else data.shape[1]
    df = pd.DataFrame(data)
    orig_names = df.columns
    cols, names = list(), list()
    for i in range(n_in, 0, -1):
        cols.append(df.shift(i))
        names += [('%s(t-%d)' % (orig_names[j], i)) for j in range(n_vars)]
    for i in range(0, n_out):
        cols.append(df.shift(-i))
        names += [('%s(t%s%d)' % (orig_names[j], '' if i==0 else '+', i)) for j in range(n_vars)]
    agg = pd.concat(cols, axis=1)
    agg.columns = names
    if dropnan:
        agg.dropna(inplace=True)
    return agg

# =============================================================================
# 3. 核心模型架构 (应用u,v缩放的RPE)
# =============================================================================

# --- 模块 1: 相对位置偏置生成器 ---
class RelativePositionBias(nn.Module):
    def __init__(self, num_buckets, max_distance, num_heads):
        super().__init__()
        self.num_buckets = num_buckets
        self.max_distance = max_distance
        self.num_heads = num_heads
        self.relative_attention_bias = nn.Embedding(self.num_buckets, self.num_heads)

    @staticmethod
    def _relative_position_bucket(relative_position, bidirectional=True, num_buckets=32, max_distance=128):
        ret = 0
        n = -relative_position
        if bidirectional:
            num_buckets //= 2
            ret += (n < 0).to(torch.long) * num_buckets
            n = torch.abs(n)
        else:
            n = torch.max(n, torch.zeros_like(n))
        max_exact = num_buckets // 2
        is_small = n < max_exact
        val_if_large = max_exact + (torch.log(n.float() / max_exact) / np.log(max_distance / max_exact) * (num_buckets - max_exact)).to(torch.long)
        val_if_large = torch.min(val_if_large, torch.full_like(val_if_large, num_buckets - 1))
        ret += torch.where(is_small, n, val_if_large)
        return ret

    def forward(self, seq_len, device):
        q_pos = torch.arange(seq_len, dtype=torch.long, device=device)
        k_pos = torch.arange(seq_len, dtype=torch.long, device=device)
        rel_pos = k_pos[None, :] - q_pos[:, None]
        rp_bucket = self._relative_position_bucket(rel_pos, bidirectional=True, num_buckets=self.num_buckets, max_distance=self.max_distance)
        bias = self.relative_attention_bias(rp_bucket)
        return bias.permute(2, 0, 1).unsqueeze(0)

# --- 模块 2: 集成了u,v缩放的RPE的Transformer块 ---
class UV_ScaledRPE_Block(nn.Module):
    def __init__(self, d_model, num_heads, d_ff, dropout):
        super().__init__()
        self.d_model = d_model
        self.num_heads = num_heads
        self.head_dim = d_model // num_heads
        assert self.head_dim * self.num_heads == self.d_model, "embed_dim must be divisible by num_heads"

        self.qkv_proj = nn.Linear(d_model, d_model * 3)
        self.out_proj = nn.Linear(d_model, d_model)
        self.attn_dropout = nn.Dropout(dropout)
        
        self.ffn = nn.Sequential(nn.Linear(d_model, d_ff), nn.GELU(), nn.Dropout(dropout), nn.Linear(d_ff, d_model))
        
        self.norm1 = nn.LayerNorm(d_model)
        self.norm2 = nn.LayerNorm(d_model)
        self.dropout1 = nn.Dropout(dropout)
        self.dropout2 = nn.Dropout(dropout)

        # --- 核心修改: 引入u和v对应的可学习缩放参数 ---
        self.rpe_scale_u = nn.Parameter(torch.ones(1, num_heads, 1, 1))
        self.rpe_scale_v = nn.Parameter(torch.ones(1, num_heads, 1, 1))

    def forward(self, x, bias):
        residual = x
        x = self.norm1(x)
        B, L, D = x.shape
        q, k, v = self.qkv_proj(x).chunk(3, dim=-1)

        q = q.view(B, L, self.num_heads, self.head_dim).transpose(1, 2)
        k = k.view(B, L, self.num_heads, self.head_dim).transpose(1, 2)
        v = v.view(B, L, self.num_heads, self.head_dim).transpose(1, 2)

        attn_scores = torch.matmul(q, k.transpose(-2, -1)) / (self.head_dim ** 0.5)
        
        # --- 应用 u,v 缩放逻辑 ---
        final_bias = (bias * self.rpe_scale_u) + (bias * self.rpe_scale_v)
        attn_scores = attn_scores + final_bias
        
        attn_probs = torch.softmax(attn_scores, dim=-1)
        attn_probs = self.attn_dropout(attn_probs)
        
        attn_output = torch.matmul(attn_probs, v).transpose(1, 2).contiguous().view(B, L, D)
        attn_output = self.out_proj(attn_output)

        x = residual + self.dropout1(attn_output)
        
        residual = x
        x = self.norm2(x)
        x = self.ffn(x)
        x = residual + self.dropout2(x)
        return x

# --- 模块 3: 重构的高频分支  ---
class EnhancedHighFrequencyModel(nn.Module):
    def __init__(self, power_dim, weather_dim, hidden_dim, n_gru_layers, num_heads, dropout, n_transformer_blocks, num_pos_buckets):
        super().__init__()
        self.power_conv = nn.Conv1d(in_channels=power_dim, out_channels=hidden_dim, kernel_size=3, padding=1)
        self.power_bigru = nn.GRU(hidden_dim, hidden_dim, n_gru_layers, batch_first=True, bidirectional=True, dropout=dropout)
        self.power_fc = nn.Linear(hidden_dim * 2, hidden_dim)

        self.weather_bigru = nn.GRU(weather_dim, hidden_dim, 1, batch_first=True, bidirectional=True)
        self.weather_fc = nn.Linear(hidden_dim * 2, hidden_dim)

        self.relative_pos_bias_generator = RelativePositionBias(num_buckets=num_pos_buckets, max_distance=N_STEPS_IN, num_heads=num_heads)
        
        self.transformer_blocks = nn.ModuleList()
        for _ in range(n_transformer_blocks):
            # 每个Transformer块包含一个自注意力和一个交叉注意力
            block = nn.ModuleDict({
                'uv_scaled_rpe_self_attn': UV_ScaledRPE_Block(d_model=hidden_dim, num_heads=num_heads, d_ff=hidden_dim*4, dropout=dropout),
                'norm_cross_attn': nn.LayerNorm(hidden_dim),
                'cross_attn': nn.MultiheadAttention(embed_dim=hidden_dim, num_heads=num_heads, dropout=dropout, batch_first=True),
                'dropout_cross_attn': nn.Dropout(dropout),
                'norm_ffn_cross': nn.LayerNorm(hidden_dim),
                'ffn_cross': nn.Sequential(nn.Linear(hidden_dim, hidden_dim * 4), nn.GELU(), nn.Dropout(dropout), nn.Linear(hidden_dim * 4, hidden_dim)),
                'dropout_ffn_cross': nn.Dropout(dropout)
            })
            self.transformer_blocks.append(block)

    def forward(self, x_high_freq, x_weather):
        h_power = self.power_fc(self.power_bigru(self.power_conv(x_high_freq.permute(0, 2, 1)).permute(0, 2, 1))[0])
        h_weather = self.weather_fc(self.weather_bigru(x_weather)[0])
        
        relative_bias = self.relative_pos_bias_generator(h_power.size(1), device=h_power.device)

        processed_power = h_power
        for block in self.transformer_blocks:
            # 1. 带u,v缩放的RPE自注意力 
            processed_power = block['uv_scaled_rpe_self_attn'](processed_power, relative_bias)

            # 2. 交叉注意力 
            residual = processed_power
            norm_power_for_cross = block['norm_cross_attn'](processed_power)
            cross_attn_output, _ = block['cross_attn'](query=norm_power_for_cross, key=h_weather, value=h_weather)
            processed_power = residual + block['dropout_cross_attn'](cross_attn_output)
            
            # 3. 交叉注意力后的FFN
            residual = processed_power
            norm_power_for_ffn = block['norm_ffn_cross'](processed_power)
            ffn_output = block['ffn_cross'](norm_power_for_ffn)
            processed_power = residual + block['dropout_ffn_cross'](ffn_output)
            
        return processed_power[:, -1, :]

# --- 低频分支  ---
class LowFrequencyLSTM(nn.Module):
    def __init__(self, input_dim, hidden_dim, n_lstm_layers, dropout):
        super().__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, n_lstm_layers, batch_first=True, dropout=dropout)

    def forward(self, x_low_freq):
        _, (h_n, _) = self.lstm(x_low_freq)
        return h_n[-1]

# --- 顶层模型  ---
class ICIAL_UV_RPE_Model(nn.Module):
    def __init__(self, target_steps_names, power_dim, weather_dim, low_freq_dim, embed_dim, n_gru_layers, n_lstm_layers, num_heads, dropout, transformer_layers, num_pos_buckets):
        super().__init__()
        self.target_steps_names = target_steps_names
        self.quantiles_to_predict = sorted(list(set([0.5, 0.025, 0.975, 0.05, 0.95, 0.1, 0.9, 0.15, 0.85, 0.2, 0.8, 0.075, 0.925])))
        
        self.high_freq_branch = EnhancedHighFrequencyModel(power_dim, weather_dim, embed_dim, n_gru_layers, num_heads, dropout, transformer_layers, num_pos_buckets)
        self.low_freq_branch = LowFrequencyLSTM(low_freq_dim, embed_dim, n_lstm_layers, dropout)
        
        self.fusion_mlp = nn.Sequential(nn.Linear(embed_dim * 2, embed_dim * 2), nn.GELU(), nn.Dropout(dropout))
        
        self.quantile_output_heads = nn.ModuleDict()
        for name in self.target_steps_names:
            self.quantile_output_heads[name] = nn.Linear(embed_dim * 2, len(self.quantiles_to_predict))

    def forward(self, x_high, x_weather, x_low):
        shared_high_freq_features = self.high_freq_branch(x_high, x_weather)
        shared_low_freq_features = self.low_freq_branch(x_low)
        combined_features = torch.cat([shared_high_freq_features, shared_low_freq_features], dim=1)
        final_features = self.fusion_mlp(combined_features)
        return_dict = {name: self.quantile_output_heads[name](final_features) for name in self.target_steps_names}
        return return_dict

# --- 分位数损失函数  ---
def quantile_loss(y_true, y_pred_quantiles, quantiles_to_predict):
    losses = []
    for i, q in enumerate(quantiles_to_predict):
        y_pred_q = y_pred_quantiles[:, i]
        errors = y_true - y_pred_q
        loss = torch.max(q * errors, (q - 1) * errors)
        losses.append(loss.mean())
    return torch.stack(losses).sum()


# =============================================================================
# 4. 数据加载与准备 
# =============================================================================
print("--- 步骤1: 正在加载和预处理数据 ---")
try:
    high_freq_df = pd.read_csv(HIGH_FREQ_DATA_PATH).interpolate()
    low_freq_df = pd.read_csv(LOW_FREQ_DATA_PATH).interpolate()
    original_df = pd.read_csv(ORIGINAL_DATA_PATH).interpolate()
except FileNotFoundError as e:
    print(f"错误：找不到数据文件！请检查路径设置。{e}"); exit()

weather_df = original_df[['Temp','Humidity','GHI','DHI','Rainfall']]
power_df = original_df[['Power']]

print("正在将时间序列转换为监督学习格式...")
processed_power = time_series_to_supervised_mimo(power_df, N_STEPS_IN, N_STEPS_OUT)
processed_high = time_series_to_supervised_mimo(high_freq_df, N_STEPS_IN, N_STEPS_OUT)
processed_low = time_series_to_supervised_mimo(low_freq_df, N_STEPS_IN, N_STEPS_OUT)
processed_weather = time_series_to_supervised_mimo(weather_df, N_STEPS_IN, N_STEPS_OUT)

common_index = processed_power.index.intersection(processed_high.index).intersection(processed_low.index).intersection(processed_weather.index)
processed_power, processed_high, processed_low, processed_weather = [df.loc[common_index] for df in [processed_power, processed_high, processed_low, processed_weather]]

# **修复**：确保y_cols的格式化字符串与`time_series_to_supervised_mimo`函数生成的列名完全匹配
y_cols = [f'Power(t{"" if s-1==0 else f"+{s-1}"})' for s in TARGET_STEPS] 
y = processed_power[y_cols].values

X_high = processed_high[[c for c in processed_high.columns if '(t-' in c]].values.reshape(-1, N_STEPS_IN, HIGH_FREQ_FEATURES)
X_weather = processed_weather[[c for c in processed_weather.columns if '(t-' in c]].values.reshape(-1, N_STEPS_IN, WEATHER_FEATURES)
X_low = processed_low[[c for c in processed_low.columns if '(t-' in c]].values.reshape(-1, N_STEPS_IN, LOW_FREQ_FEATURES)

train_size = int(len(y) * 0.8); val_size = int(len(y) * 0.15)
def split_data(data): return data[:train_size], data[train_size:train_size+val_size], data[train_size+val_size:]
train_X_high, val_X_high, test_X_high = split_data(X_high)
train_X_weather, val_X_weather, test_X_weather = split_data(X_weather)
train_X_low, val_X_low, test_X_low = split_data(X_low)
train_y, val_y, test_y = split_data(y)

scaler_high = MinMaxScaler(); scaler_weather = MinMaxScaler(); scaler_low = MinMaxScaler()
scalers_y = {f't_plus_{s}': MinMaxScaler() for s in TARGET_STEPS}

def scale_3d_data(train, val, test, scaler):
    train_s = scaler.fit_transform(train.reshape(-1, train.shape[-1])).reshape(train.shape)
    val_s = scaler.transform(val.reshape(-1, val.shape[-1])).reshape(val.shape)
    test_s = scaler.transform(test.reshape(-1, test.shape[-1])).reshape(test.shape)
    return train_s, val_s, test_s

train_X_high_s, val_X_high_s, test_X_high_s = scale_3d_data(train_X_high, val_X_high, test_X_high, scaler_high)
train_X_weather_s, val_X_weather_s, test_X_weather_s = scale_3d_data(train_X_weather, val_X_weather, test_X_weather, scaler_weather)
train_X_low_s, val_X_low_s, test_X_low_s = scale_3d_data(train_X_low, val_X_low, test_X_low, scaler_low)

train_y_s_list, val_y_s_list, test_y_s_list = [], [], []
for i, s in enumerate(TARGET_STEPS):
    name = f't_plus_{s}'
    train_y_s_list.append(scalers_y[name].fit_transform(train_y[:, i:i+1]).flatten())
    val_y_s_list.append(scalers_y[name].transform(val_y[:, i:i+1]).flatten())
    test_y_s_list.append(scalers_y[name].transform(test_y[:, i:i+1]).flatten())

print("数据准备完成。")

train_y_tensors = [torch.from_numpy(v).float() for v in train_y_s_list]
val_y_tensors = [torch.from_numpy(v).float() for v in val_y_s_list]
test_y_tensors = [torch.from_numpy(v).float() for v in test_y_s_list]

train_data = TensorDataset(torch.from_numpy(train_X_high_s).float(), torch.from_numpy(train_X_weather_s).float(), torch.from_numpy(train_X_low_s).float(), *train_y_tensors)
val_data = TensorDataset(torch.from_numpy(val_X_high_s).float(), torch.from_numpy(val_X_weather_s).float(), torch.from_numpy(val_X_low_s).float(), *val_y_tensors)
test_data_for_loader = TensorDataset(torch.from_numpy(test_X_high_s).float(), torch.from_numpy(test_X_weather_s).float(), torch.from_numpy(test_X_low_s).float(), *test_y_tensors) 

train_loader = DataLoader(train_data, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_data, batch_size=BATCH_SIZE)
test_loader = DataLoader(test_data_for_loader, batch_size=BATCH_SIZE, shuffle=False)

# =============================================================================
# 5. 模型实例化、优化器定义与框架摘要
# =============================================================================
print("\n--- 步骤2: 实例化采用u,v缩放RPE的ICIAL模型 ---")

model = ICIAL_UV_RPE_Model(
    target_steps_names=[f't_plus_{s}' for s in TARGET_STEPS],
    power_dim=HIGH_FREQ_FEATURES, weather_dim=WEATHER_FEATURES, low_freq_dim=LOW_FREQ_FEATURES,
    embed_dim=EMBED_DIM, n_gru_layers=GRU_LAYERS, n_lstm_layers=LSTM_LAYERS,
    num_heads=NUM_HEADS, dropout=DROPOUT, transformer_layers=TRANSFORMER_LAYERS,
    num_pos_buckets=RELATIVE_POSITION_BUCKETS
).to(device)

optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=PATIENCE//2, factor=0.5, verbose=True)

print("\n--- 模型架构摘要 ---")
summary(model, input_size=[ (BATCH_SIZE, N_STEPS_IN, HIGH_FREQ_FEATURES), (BATCH_SIZE, N_STEPS_IN, WEATHER_FEATURES), (BATCH_SIZE, N_STEPS_IN, LOW_FREQ_FEATURES)], col_names=["input_size", "output_size", "num_params", "mult_adds"], verbose=1)


随机数种子已设置为: 42
当前设备: cuda
--- 步骤1: 正在加载和预处理数据 ---
正在将时间序列转换为监督学习格式...
数据准备完成。

--- 步骤2: 实例化采用u,v缩放RPE的ICIAL模型 ---

--- 模型架构摘要 ---
Layer (type:depth-idx)                        Input Shape               Output Shape              Param #                   Mult-Adds
ICIAL_UV_RPE_Model                            [64, 96, 1]               [64, 13]                  --                        --
├─EnhancedHighFrequencyModel: 1-1             [64, 96, 1]               [64, 128]                 --                        --
│    └─Conv1d: 2-1                            [64, 1, 96]               [64, 128, 96]             512                       3,145,728
│    └─GRU: 2-2                               [64, 96, 128]             [64, 96, 256]             494,592                   3,038,773,248
│    └─Linear: 2-3                            [64, 96, 256]             [64, 96, 128]             32,896                    2,105,344
│    └─GRU: 2-4                               [64, 96, 5]               [64, 

Layer (type:depth-idx)                        Input Shape               Output Shape              Param #                   Mult-Adds
ICIAL_UV_RPE_Model                            [64, 96, 1]               [64, 13]                  --                        --
├─EnhancedHighFrequencyModel: 1-1             [64, 96, 1]               [64, 128]                 --                        --
│    └─Conv1d: 2-1                            [64, 1, 96]               [64, 128, 96]             512                       3,145,728
│    └─GRU: 2-2                               [64, 96, 128]             [64, 96, 256]             494,592                   3,038,773,248
│    └─Linear: 2-3                            [64, 96, 256]             [64, 96, 128]             32,896                    2,105,344
│    └─GRU: 2-4                               [64, 96, 5]               [64, 96, 256]             103,680                   637,009,920
│    └─Linear: 2-5                            [64, 96, 256]           

In [2]:
# =============================================================================
# 6. 训练和验证循环
# =============================================================================
# 计算模型总参数量
#模型可训练参数量: 7.75 MB
total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"模型可训练参数总数: {total_params:,}")

# 将参数量转换为MB (新增)
# 假设每个参数占用4字节（即torch.float32）
params_in_mb = (total_params * 4) / (1024 * 1024)
print(f"模型可训练参数量: {params_in_mb:.2f} MB") # 格式化为两位小数

模型可训练参数总数: 1,868,372
模型可训练参数量: 7.13 MB


In [4]:

# =============================================================================
# 6. 训练和验证循环
# =============================================================================

print("\n--- 步骤3: 开始模型训练 ---")
best_val_loss = float('inf') # 记录最佳验证损失
early_stopping_counter = 0 # 早停计数器

model_quantiles = model.quantiles_to_predict # 从模型获取需要预测的分位数列表

# 记录总训练开始时间 (新增)
total_train_start_time = time.time()

for epoch in range(EPOCHS):
    model.train() # 设置模型为训练模式
    train_loss = 0.0
    epoch_start_time = time.time() # 记录当前epoch的开始时间
    
    # 使用tqdm显示训练进度条
    for batch_idx, (x_high, x_weather, x_low, *y_targets) in enumerate(tqdm(train_loader, desc=f"Epoch {epoch+1} 训练")):
        # 将数据移动到指定设备
        x_high, x_weather, x_low = x_high.to(device), x_weather.to(device), x_low.to(device)
        y_targets_on_device = [y.to(device) for y in y_targets] # 将所有目标张量也移动到设备

        optimizer.zero_grad() # 梯度清零
        
        predictions_dict = model(x_high, x_weather, x_low) # 模型前向传播
        
        batch_loss = 0.0
        # 对每个目标预测步长计算并累加分位数损失
        for i, name in enumerate(model.target_steps_names):
            batch_loss += quantile_loss(y_targets_on_device[i], predictions_dict[name], model_quantiles)
        
        batch_loss.backward() # 反向传播
        optimizer.step() # 更新模型参数
        train_loss += batch_loss.item() # 累加批次损失

    avg_train_loss = train_loss / len(train_loader) # 计算平均训练损失
    
    # 验证阶段
    model.eval() # 设置模型为评估模式 (不计算梯度，不应用dropout等)
    val_loss = 0.0
    with torch.no_grad(): # 在此块内禁用梯度计算
        for x_high, x_weather, x_low, *y_targets in tqdm(val_loader, desc=f"Epoch {epoch+1} 验证"):
            x_high, x_weather, x_low = x_high.to(device), x_weather.to(device), x_low.to(device)
            y_targets_on_device = [y.to(device) for y in y_targets]

            predictions_dict = model(x_high, x_weather, x_low)
            
            batch_val_loss = 0.0
            for i, name in enumerate(model.target_steps_names):
                batch_val_loss += quantile_loss(y_targets_on_device[i], predictions_dict[name], model_quantiles)
            val_loss += batch_val_loss.item()

    avg_val_loss = val_loss / len(val_loader) # 计算平均验证损失
    
    epoch_end_time = time.time() # 记录当前epoch的结束时间
    epoch_duration = epoch_end_time - epoch_start_time # 计算当前epoch持续时间

    print(f"Epoch {epoch+1}/{EPOCHS}, 训练损失: {avg_train_loss:.4f}, 验证损失: {avg_val_loss:.4f}, 耗时: {epoch_duration:.2f}s")

    scheduler.step(avg_val_loss) # 根据验证损失调整学习率

    # 早停机制检查
    if avg_val_loss < best_val_loss:
        best_val_loss = avg_val_loss
        early_stopping_counter = 0 # 重置计数器
        torch.save(model.state_dict(), MODEL_FILE_PATH) # 保存最佳模型
        print(f"模型已保存至: {MODEL_FILE_PATH}")
    else:
        early_stopping_counter += 1
        print(f"早停计数器: {early_stopping_counter}/{PATIENCE}")
        if early_stopping_counter >= PATIENCE:
            print("触发早停！训练结束。")
            break

# 记录总训练结束时间 (新增)
total_train_end_time = time.time()
total_train_duration = total_train_end_time - total_train_start_time

# 打印总训练时间 (新增)
print(f"\n模型总训练时间: {total_train_duration:.2f} 秒")
# 可以进一步转换为更易读的格式，例如小时、分钟
hours, rem = divmod(total_train_duration, 3600)
minutes, seconds = divmod(rem, 60)
print(f"总训练时间: {int(hours)}小时 {int(minutes)}分钟 {seconds:.2f}秒")


--- 步骤3: 开始模型训练 ---


Epoch 1 训练: 100%|██████████| 1302/1302 [00:58<00:00, 22.42it/s]
Epoch 1 验证: 100%|██████████| 245/245 [00:02<00:00, 82.14it/s]


Epoch 1/50, 训练损失: 0.9149, 验证损失: 0.4970, 耗时: 61.08s
模型已保存至: icial_uv_rpe_model.pth


Epoch 2 训练: 100%|██████████| 1302/1302 [00:57<00:00, 22.83it/s]
Epoch 2 验证: 100%|██████████| 245/245 [00:03<00:00, 79.33it/s]


Epoch 2/50, 训练损失: 0.5282, 验证损失: 0.4568, 耗时: 60.12s
模型已保存至: icial_uv_rpe_model.pth


Epoch 3 训练: 100%|██████████| 1302/1302 [00:56<00:00, 22.89it/s]
Epoch 3 验证: 100%|██████████| 245/245 [00:02<00:00, 82.63it/s]


Epoch 3/50, 训练损失: 0.4516, 验证损失: 0.3933, 耗时: 59.86s
模型已保存至: icial_uv_rpe_model.pth


Epoch 4 训练: 100%|██████████| 1302/1302 [00:56<00:00, 22.90it/s]
Epoch 4 验证: 100%|██████████| 245/245 [00:02<00:00, 82.56it/s]


Epoch 4/50, 训练损失: 0.4162, 验证损失: 0.3983, 耗时: 59.84s
早停计数器: 1/10


Epoch 5 训练: 100%|██████████| 1302/1302 [00:56<00:00, 22.94it/s]
Epoch 5 验证: 100%|██████████| 245/245 [00:02<00:00, 82.18it/s]


Epoch 5/50, 训练损失: 0.3918, 验证损失: 0.3549, 耗时: 59.75s
模型已保存至: icial_uv_rpe_model.pth


Epoch 6 训练: 100%|██████████| 1302/1302 [00:58<00:00, 22.26it/s]
Epoch 6 验证: 100%|██████████| 245/245 [00:02<00:00, 82.28it/s]


Epoch 6/50, 训练损失: 0.3769, 验证损失: 0.3462, 耗时: 61.47s
模型已保存至: icial_uv_rpe_model.pth


Epoch 7 训练: 100%|██████████| 1302/1302 [00:59<00:00, 21.76it/s]
Epoch 7 验证: 100%|██████████| 245/245 [00:03<00:00, 79.31it/s]


Epoch 7/50, 训练损失: 0.3642, 验证损失: 0.3420, 耗时: 62.93s
模型已保存至: icial_uv_rpe_model.pth


Epoch 8 训练: 100%|██████████| 1302/1302 [01:20<00:00, 16.21it/s]
Epoch 8 验证: 100%|██████████| 245/245 [00:04<00:00, 59.02it/s]


Epoch 8/50, 训练损失: 0.3522, 验证损失: 0.3737, 耗时: 84.46s
早停计数器: 1/10


Epoch 9 训练: 100%|██████████| 1302/1302 [01:14<00:00, 17.57it/s]
Epoch 9 验证: 100%|██████████| 245/245 [00:04<00:00, 60.77it/s]


Epoch 9/50, 训练损失: 0.3444, 验证损失: 0.3250, 耗时: 78.14s
模型已保存至: icial_uv_rpe_model.pth


Epoch 10 训练: 100%|██████████| 1302/1302 [01:13<00:00, 17.78it/s]
Epoch 10 验证: 100%|██████████| 245/245 [00:03<00:00, 77.69it/s]


Epoch 10/50, 训练损失: 0.3377, 验证损失: 0.3216, 耗时: 76.41s
模型已保存至: icial_uv_rpe_model.pth


Epoch 11 训练: 100%|██████████| 1302/1302 [01:06<00:00, 19.49it/s]
Epoch 11 验证: 100%|██████████| 245/245 [00:03<00:00, 69.28it/s]


Epoch 11/50, 训练损失: 0.3301, 验证损失: 0.3113, 耗时: 70.36s
模型已保存至: icial_uv_rpe_model.pth


Epoch 12 训练: 100%|██████████| 1302/1302 [01:17<00:00, 16.78it/s]
Epoch 12 验证: 100%|██████████| 245/245 [00:03<00:00, 62.28it/s]


Epoch 12/50, 训练损失: 0.3235, 验证损失: 0.3259, 耗时: 81.56s
早停计数器: 1/10


Epoch 13 训练: 100%|██████████| 1302/1302 [01:15<00:00, 17.24it/s]
Epoch 13 验证: 100%|██████████| 245/245 [00:03<00:00, 74.05it/s]


Epoch 13/50, 训练损失: 0.3160, 验证损失: 0.2945, 耗时: 78.82s
模型已保存至: icial_uv_rpe_model.pth


Epoch 14 训练: 100%|██████████| 1302/1302 [01:10<00:00, 18.43it/s]
Epoch 14 验证: 100%|██████████| 245/245 [00:03<00:00, 78.60it/s]


Epoch 14/50, 训练损失: 0.3124, 验证损失: 0.3332, 耗时: 73.77s
早停计数器: 1/10


Epoch 15 训练: 100%|██████████| 1302/1302 [01:16<00:00, 17.12it/s]
Epoch 15 验证: 100%|██████████| 245/245 [00:03<00:00, 73.02it/s]


Epoch 15/50, 训练损失: 0.3081, 验证损失: 0.2940, 耗时: 79.40s
模型已保存至: icial_uv_rpe_model.pth


Epoch 16 训练: 100%|██████████| 1302/1302 [01:08<00:00, 18.98it/s]
Epoch 16 验证: 100%|██████████| 245/245 [00:03<00:00, 76.50it/s]


Epoch 16/50, 训练损失: 0.3042, 验证损失: 0.3013, 耗时: 71.81s
早停计数器: 1/10


Epoch 17 训练: 100%|██████████| 1302/1302 [01:02<00:00, 20.77it/s]
Epoch 17 验证: 100%|██████████| 245/245 [00:03<00:00, 78.58it/s]


Epoch 17/50, 训练损失: 0.3004, 验证损失: 0.2942, 耗时: 65.81s
早停计数器: 2/10


Epoch 18 训练: 100%|██████████| 1302/1302 [01:05<00:00, 19.92it/s]
Epoch 18 验证: 100%|██████████| 245/245 [00:03<00:00, 78.27it/s]


Epoch 18/50, 训练损失: 0.2981, 验证损失: 0.2946, 耗时: 68.50s
早停计数器: 3/10


Epoch 19 训练: 100%|██████████| 1302/1302 [01:12<00:00, 17.88it/s]
Epoch 19 验证: 100%|██████████| 245/245 [00:03<00:00, 77.99it/s]


Epoch 19/50, 训练损失: 0.2955, 验证损失: 0.3045, 耗时: 75.95s
早停计数器: 4/10


Epoch 20 训练: 100%|██████████| 1302/1302 [01:06<00:00, 19.48it/s]
Epoch 20 验证: 100%|██████████| 245/245 [00:03<00:00, 72.03it/s]


Epoch 20/50, 训练损失: 0.2928, 验证损失: 0.2966, 耗时: 70.24s
早停计数器: 5/10


Epoch 21 训练: 100%|██████████| 1302/1302 [01:04<00:00, 20.32it/s]
Epoch 21 验证: 100%|██████████| 245/245 [00:03<00:00, 67.26it/s]


Epoch 21/50, 训练损失: 0.2906, 验证损失: 0.2960, 耗时: 67.73s
早停计数器: 6/10


Epoch 22 训练: 100%|██████████| 1302/1302 [01:02<00:00, 21.00it/s]
Epoch 22 验证: 100%|██████████| 245/245 [00:03<00:00, 77.44it/s]


Epoch 22/50, 训练损失: 0.2784, 验证损失: 0.2896, 耗时: 65.18s
模型已保存至: icial_uv_rpe_model.pth


Epoch 23 训练: 100%|██████████| 1302/1302 [00:59<00:00, 21.87it/s]
Epoch 23 验证: 100%|██████████| 245/245 [00:03<00:00, 77.43it/s]


Epoch 23/50, 训练损失: 0.2759, 验证损失: 0.3089, 耗时: 62.71s
早停计数器: 1/10


Epoch 24 训练: 100%|██████████| 1302/1302 [00:58<00:00, 22.21it/s]
Epoch 24 验证: 100%|██████████| 245/245 [00:03<00:00, 77.77it/s]


Epoch 24/50, 训练损失: 0.2748, 验证损失: 0.2945, 耗时: 61.79s
早停计数器: 2/10


Epoch 25 训练: 100%|██████████| 1302/1302 [01:04<00:00, 20.29it/s]
Epoch 25 验证: 100%|██████████| 245/245 [00:03<00:00, 81.11it/s]


Epoch 25/50, 训练损失: 0.2738, 验证损失: 0.2856, 耗时: 67.19s
模型已保存至: icial_uv_rpe_model.pth


Epoch 26 训练: 100%|██████████| 1302/1302 [00:59<00:00, 21.75it/s]
Epoch 26 验证: 100%|██████████| 245/245 [00:03<00:00, 79.74it/s]


Epoch 26/50, 训练损失: 0.2723, 验证损失: 0.2887, 耗时: 62.94s
早停计数器: 1/10


Epoch 27 训练: 100%|██████████| 1302/1302 [01:00<00:00, 21.56it/s]
Epoch 27 验证: 100%|██████████| 245/245 [00:03<00:00, 78.07it/s]


Epoch 27/50, 训练损失: 0.2723, 验证损失: 0.2968, 耗时: 63.55s
早停计数器: 2/10


Epoch 28 训练: 100%|██████████| 1302/1302 [00:59<00:00, 21.78it/s]
Epoch 28 验证: 100%|██████████| 245/245 [00:03<00:00, 79.18it/s]


Epoch 28/50, 训练损失: 0.2698, 验证损失: 0.2881, 耗时: 62.89s
早停计数器: 3/10


Epoch 29 训练: 100%|██████████| 1302/1302 [01:00<00:00, 21.45it/s]
Epoch 29 验证: 100%|██████████| 245/245 [00:03<00:00, 68.61it/s]


Epoch 29/50, 训练损失: 0.2687, 验证损失: 0.3044, 耗时: 64.26s
早停计数器: 4/10


Epoch 30 训练: 100%|██████████| 1302/1302 [01:02<00:00, 20.94it/s]
Epoch 30 验证: 100%|██████████| 245/245 [00:03<00:00, 73.96it/s]


Epoch 30/50, 训练损失: 0.2692, 验证损失: 0.2905, 耗时: 65.51s
早停计数器: 5/10


Epoch 31 训练: 100%|██████████| 1302/1302 [01:02<00:00, 20.82it/s]
Epoch 31 验证: 100%|██████████| 245/245 [00:03<00:00, 79.28it/s]


Epoch 31/50, 训练损失: 0.2677, 验证损失: 0.2885, 耗时: 65.63s
早停计数器: 6/10


Epoch 32 训练: 100%|██████████| 1302/1302 [00:59<00:00, 21.81it/s]
Epoch 32 验证: 100%|██████████| 245/245 [00:03<00:00, 79.27it/s]


Epoch 32/50, 训练损失: 0.2604, 验证损失: 0.2911, 耗时: 62.79s
早停计数器: 7/10


Epoch 33 训练: 100%|██████████| 1302/1302 [01:00<00:00, 21.56it/s]
Epoch 33 验证: 100%|██████████| 245/245 [00:03<00:00, 79.80it/s]


Epoch 33/50, 训练损失: 0.2599, 验证损失: 0.2917, 耗时: 63.47s
早停计数器: 8/10


Epoch 34 训练: 100%|██████████| 1302/1302 [00:59<00:00, 21.94it/s]
Epoch 34 验证: 100%|██████████| 245/245 [00:03<00:00, 79.19it/s]


Epoch 34/50, 训练损失: 0.2594, 验证损失: 0.2900, 耗时: 62.45s
早停计数器: 9/10


Epoch 35 训练: 100%|██████████| 1302/1302 [00:59<00:00, 21.80it/s]
Epoch 35 验证: 100%|██████████| 245/245 [00:03<00:00, 73.05it/s]

Epoch 35/50, 训练损失: 0.2581, 验证损失: 0.2940, 耗时: 63.07s
早停计数器: 10/10
触发早停！训练结束。

模型总训练时间: 2361.68 秒
总训练时间: 0小时 39分钟 21.68秒





In [3]:
# =============================================================================
# 7. 模型在测试集上的评估
# =============================================================================

# --- SMAPE 计算函数 (新增) ---
def smape(y_true, y_pred):
    """
    计算对称平均绝对百分比误差 (SMAPE)。
    参考论文公式: SMAPE = (1/n) * sum(|y_i - y_hat_i| / ((|y_i| + |y_hat_i| + 0.1) / 2)) [cite: 1089]
    """
    numerator = np.abs(y_pred - y_true)
    denominator = (np.abs(y_true) + np.abs(y_pred) + 0.1) / 2
    # 避免除以零，虽然分母已经加了0.1，但在极端情况下仍需谨慎
    # 对于分母为0的情况（理论上此处不会发生），返回0或np.nan，这里选择0以避免NaN影响平均
    ratio = np.where(denominator == 0, 0, numerator / denominator)
    return np.mean(ratio) # SMAPE通常表示为百分比

print("\n--- 步骤4: 正在测试集上评估模型 ---")
# 加载性能最佳的模型
model.load_state_dict(torch.load(MODEL_FILE_PATH))
model.eval() # 设置模型为评估模式

# Store predictions for each target step and quantile
test_predictions_raw = {name: [] for name in model.target_steps_names}
true_values_raw = {name: [] for name in model.target_steps_names} # 存储归一化后的真实值

with torch.no_grad():
    for x_high, x_weather, x_low, *y_targets_scaled in tqdm(test_loader, desc="测试集预测"):
        x_high, x_weather, x_low = x_high.to(device), x_weather.to(device), x_low.to(device)
        
        predictions_dict = model(x_high, x_weather, x_low)
        
        for i, name in enumerate(model.target_steps_names):
            test_predictions_raw[name].append(predictions_dict[name].cpu().numpy())
            true_values_raw[name].append(y_targets_scaled[i].cpu().numpy()) # y_targets_scaled 已经是归一化后的真实值

# Concatenate all batch predictions and true values
for name in model.target_steps_names:
    test_predictions_raw[name] = np.concatenate(test_predictions_raw[name], axis=0)
    true_values_raw[name] = np.concatenate(true_values_raw[name], axis=0)

# Inverse transform predictions and true values to original scale for metric calculation
print("\n--- 正在计算评估指标 ---")
results = {} # 存储最终评估结果

for i, s in enumerate(TARGET_STEPS):
    name = f't_plus_{s}'
    
    # 逆归一化每个分位数的预测值
    predictions_original_scale = np.zeros_like(test_predictions_raw[name])
    for q_idx in range(len(model.quantiles_to_predict)):
        predictions_original_scale[:, q_idx] = scalers_y[name].inverse_transform(test_predictions_raw[name][:, q_idx:q_idx+1]).flatten()

    # 逆归一化真实值
    true_original_scale = scalers_y[name].inverse_transform(true_values_raw[name].reshape(-1, 1)).flatten()

    # 获取中位数预测值 (0.5 分位数)，通常用于点预测评估
    median_pred_idx = model.quantiles_to_predict.index(0.5)
    median_predictions = predictions_original_scale[:, median_pred_idx]
    
    # 计算MAE和RMSE (使用中位数预测)
    mae = mean_absolute_error(true_original_scale, median_predictions)
    rmse = np.sqrt(mean_squared_error(true_original_scale, median_predictions))
    
    # 计算nMAE和nRMSE (归一化指标)
    mean_y = np.mean(true_original_scale)
    range_y = np.max(true_original_scale) - np.min(true_original_scale)
    nmae = mae / mean_y if mean_y != 0 else float('inf')
    nrmse = rmse / mean_y if mean_y != 0 else float('inf')

    # 计算SMAPE (新增)
    _smape = smape(true_original_scale, median_predictions)

    results[name] = {
        'MAE (kW)': mae,
        'RMSE (kW)': rmse,
        'nMAE': nmae,
        'nRMSE': nrmse,
        'SMAPE': _smape, # 新增SMAPE结果
        '真实值 (kW)': true_original_scale,
        '中位数预测 (kW)': median_predictions,
        '所有分位数预测 (kW)': predictions_original_scale
    }
    
    print(f"\n--- {name} 的指标 (MIMO 策略) ---")
    print(f"MAE: {mae:.4f} kW")
    print(f"RMSE: {rmse:.4f} kW")
    print(f"nMAE: {nmae:.4f}")
    print(f"nRMSE: {nrmse:.4f}")
    print(f"SMAPE: {_smape:.2f}")


--- 步骤4: 正在测试集上评估模型 ---


测试集预测: 100%|██████████| 82/82 [00:01<00:00, 60.40it/s]


--- 正在计算评估指标 ---

--- t_plus_3 的指标 (MIMO 策略) ---
MAE: 0.0690 kW
RMSE: 0.2061 kW
nMAE: 0.0536
nRMSE: 0.1601
SMAPE: 0.09

--- t_plus_6 的指标 (MIMO 策略) ---
MAE: 0.0832 kW
RMSE: 0.2383 kW
nMAE: 0.0646
nRMSE: 0.1851
SMAPE: 0.12

--- t_plus_12 的指标 (MIMO 策略) ---
MAE: 0.1009 kW
RMSE: 0.2737 kW
nMAE: 0.0784
nRMSE: 0.2126
SMAPE: 0.15

--- t_plus_24 的指标 (MIMO 策略) ---
MAE: 0.1470 kW
RMSE: 0.3382 kW
nMAE: 0.1142
nRMSE: 0.2627
SMAPE: 0.23



