In [1]:
from kan import *
import pytz
import time
import datetime
import torch.optim as optim
from data_process import data_process_without_norm
from sklearn.preprocessing import StandardScaler,MinMaxScaler
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error
import copy
import pandas as pd
import torch.nn.functional as F
import math

df = data_process_without_norm()
df = df.drop("entsoe",axis=1)
loc_tz = pytz.timezone('Europe/Zurich')
split_date_train_ = loc_tz.localize(datetime.datetime(2016,1,1,0,0,0,0))
split_date_train = loc_tz.localize(datetime.datetime(2016,3,1,0,0,0,0))
split_date_test = loc_tz.localize(datetime.datetime(2016,3,15,0,0,0,0))

df_train_ = df.loc[(split_date_train_ < df.index)]
df_train = df_train_.loc[df_train_.index <= split_date_train].copy()
_temp_df = df.loc[split_date_test > df.index]
df_test = _temp_df.loc[_temp_df.index > split_date_train].copy()

# Input standardization
scaler_input = StandardScaler()
# scaler_input = MinMaxScalerr()
_temp_scaled_input_data = scaler_input.fit_transform(df_train.iloc[:,1:])
x_train_input = _temp_scaled_input_data

#Output standardization
scaler_output = StandardScaler()
# scaler_input = MinMaxScaler()
_temp_scaled_output_data = scaler_output.fit_transform(np.array(df_train.iloc[:,0]).reshape(-1,1))
x_train_label = _temp_scaled_output_data

#Test set standardization
x_test_input = scaler_input.transform(df_test.iloc[:,1:])
x_test_label = scaler_output.transform(np.array(df_test.iloc[:,0]).reshape(-1,1))

_temp_test_input = np.hstack((x_test_input[:,:8],x_test_input[:,-3:-1]))
_temp_test_label = x_test_label
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

_temp_train_input = np.hstack((x_train_input[:, :8], x_train_input[:, -3:-1]))
_temp_train_label = x_train_label

# 将数据转换为 PyTorch 张量
train_input_tensor = torch.tensor(_temp_train_input.astype(np.float32))
train_label_tensor = torch.tensor(_temp_train_label.astype(np.float32))

test_input_tensor = torch.tensor(_temp_test_input.astype(np.float32))
test_label_tensor = torch.tensor(_temp_test_label.astype(np.float32))
# 定义 k 折交叉验证
k = 5  # 例如，5折交叉验证
kf = KFold(n_splits=k, shuffle=True, random_state=42)


In [2]:
# SimpleCNN 模型
class SimpleCNN(nn.Module):
    def __init__(self, num_inputs, num_outputs, conv_out_channels=16, time_step=24):
        super(SimpleCNN, self).__init__()
        self.conv = nn.Conv1d(in_channels=num_inputs, out_channels=conv_out_channels, kernel_size=3, padding=1)
        self.relu = nn.ReLU()
        self.fc = nn.Linear(conv_out_channels * time_step, num_outputs)

    def forward(self, x):
        x = x.permute(0, 2, 1)  # 调整输入形状为 (batch_size, num_inputs, time_step)
        x = self.conv(x)
        x = self.relu(x)
        x = x.view(x.size(0), -1)  # 展平
        x = self.fc(x)
        return x

# SimpleLSTM 模型
class SimpleLSTM(nn.Module):
    def __init__(self, num_inputs, num_outputs, lstm_hidden_units=16):
        super(SimpleLSTM, self).__init__()
        self.lstm = nn.LSTM(input_size=num_inputs, hidden_size=lstm_hidden_units, batch_first=True)
        self.fc = nn.Linear(lstm_hidden_units, num_outputs)

    def forward(self, x):
        x, _ = self.lstm(x)
        x = x[:, -1, :]  # 取最后一个时间步的输出
        x = self.fc(x)
        return x

# CNNLSTM 模型
class CNNLSTM(nn.Module):
    def __init__(self, num_inputs, num_outputs, conv_out_channels=16, lstm_hidden_units=16, time_step=24):
        super(CNNLSTM, self).__init__()
        self.conv = nn.Conv1d(in_channels=num_inputs, out_channels=conv_out_channels, kernel_size=3, padding=1)
        self.relu = nn.ReLU()
        self.lstm = nn.LSTM(input_size=conv_out_channels, hidden_size=lstm_hidden_units, batch_first=True)
        self.fc = nn.Linear(lstm_hidden_units, num_outputs)

    def forward(self, x):
        x = x.permute(0, 2, 1)  # 调整输入形状为 (batch_size, num_inputs, time_step)
        x = self.conv(x)
        x = self.relu(x)
        x = x.permute(0, 2, 1)  # 调整回 LSTM 所需的输入形状 (batch_size, time_step, conv_out_channels)
        x, _ = self.lstm(x)
        x = x[:, -1, :]  # 取最后一个时间步的输出
        x = self.fc(x)
        return x

# SimpleTransformer 模型
class SimpleTransformer(nn.Module):
    def __init__(self, num_inputs, num_outputs, d_model=16, num_heads=2, num_layers=2):
        super(SimpleTransformer, self).__init__()
        self.embedding = nn.Linear(num_inputs, d_model)
        self.transformer = nn.TransformerEncoder(
            nn.TransformerEncoderLayer(d_model=d_model, nhead=num_heads),
            num_layers=num_layers
        )
        self.fc = nn.Linear(d_model, num_outputs)

    def forward(self, x):
        x = self.embedding(x)
        x = self.transformer(x)
        x = self.fc(x)
        return x

# SimpleFCN1 模型
class SimpleFCN1(nn.Module):
    def __init__(self, num_inputs, num_outputs, hidden_units=16):
        super(SimpleFCN1, self).__init__()
        self.net = nn.Sequential(
            nn.Linear(num_inputs, hidden_units),
            nn.ReLU(),
            nn.Linear(hidden_units, num_outputs)
        )

    def forward(self, x):
        return self.net(x)

# SimpleFCN2 模型
class SimpleFCN2(nn.Module):
    def __init__(self, num_inputs, num_outputs, hidden_units1=32, hidden_units2=16):
        super(SimpleFCN2, self).__init__()
        self.net = nn.Sequential(
            nn.Linear(num_inputs, hidden_units1),
            nn.ReLU(),
            nn.Linear(hidden_units1, hidden_units2),
            nn.ReLU(),
            nn.Linear(hidden_units2, num_outputs)
        )

    def forward(self, x):
        return self.net(x)
# Informer 模型
class Informer(nn.Module):
    def __init__(self, num_inputs, num_outputs, d_model=16, n_heads=2, e_layers=2, d_layers=1, d_ff=32, 
                 dropout=0.1, activation='relu', output_attention=False, distil=True):
        super(Informer, self).__init__()
        self.enc_embedding = DataEmbedding(num_inputs, d_model, dropout)
        self.encoder = Encoder(
            [
                EncoderLayer(
                    AttentionLayer(
                        ProbAttention(False, factor=5, attention_dropout=dropout, output_attention=False),
                        d_model, n_heads),
                    d_model,
                    d_ff,
                    dropout=dropout,
                    activation=activation
                ) for l in range(e_layers)
            ],
            norm_layer=torch.nn.LayerNorm(d_model)
        )
        self.projection = nn.Linear(d_model, num_outputs)
        
    def forward(self, x):
        x = self.enc_embedding(x)
        enc_out, attns = self.encoder(x)
        enc_out = enc_out[:, -1, :]  # 取最后一个时间步
        output = self.projection(enc_out)
        return output

# Autoformer 模型
class Autoformer(nn.Module):
    def __init__(self, num_inputs, num_outputs, d_model=16, n_heads=2, e_layers=2, d_ff=32, 
                 moving_avg=25, dropout=0.1, activation='relu'):
        super(Autoformer, self).__init__()
        self.enc_embedding = DataEmbedding(num_inputs, d_model, dropout)
        self.encoder = Encoder(
            [
                EncoderLayer(
                    AutoCorrelationLayer(
                        AutoCorrelation(False, factor=3, attention_dropout=dropout, output_attention=False),
                        d_model, n_heads),
                    d_model,
                    d_ff,
                    moving_avg=moving_avg,
                    dropout=dropout,
                    activation=activation
                ) for l in range(e_layers)
            ],
            norm_layer=torch.nn.LayerNorm(d_model)
        )
        self.projection = nn.Linear(d_model, num_outputs)
        
    def forward(self, x):
        x = self.enc_embedding(x)
        enc_out, attns = self.encoder(x)
        enc_out = enc_out[:, -1, :]  # 取最后一个时间步
        output = self.projection(enc_out)
        return output

# Informer 和 Autoformer 所需的组件
class DataEmbedding(nn.Module):
    def __init__(self, c_in, d_model, dropout=0.1):
        super(DataEmbedding, self).__init__()
        self.value_embedding = TokenEmbedding(c_in=c_in, d_model=d_model)
        self.dropout = nn.Dropout(p=dropout)

    def forward(self, x):
        x = self.value_embedding(x)
        return self.dropout(x)

class TokenEmbedding(nn.Module):
    def __init__(self, c_in, d_model):
        super(TokenEmbedding, self).__init__()
        padding = 1
        self.tokenConv = nn.Conv1d(in_channels=c_in, out_channels=d_model, 
                                  kernel_size=3, padding=padding, padding_mode='circular')
        for m in self.modules():
            if isinstance(m, nn.Conv1d):
                nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='leaky_relu')

    def forward(self, x):
        x = x.permute(0, 2, 1)
        x = self.tokenConv(x)
        x = x.permute(0, 2, 1)
        return x

class Encoder(nn.Module):
    def __init__(self, attn_layers, conv_layers=None, norm_layer=None):
        super(Encoder, self).__init__()
        self.attn_layers = nn.ModuleList(attn_layers)
        self.conv_layers = nn.ModuleList(conv_layers) if conv_layers is not None else None
        self.norm = norm_layer

    def forward(self, x, attn_mask=None):
        attns = []
        if self.conv_layers is not None:
            for attn_layer, conv_layer in zip(self.attn_layers, self.conv_layers):
                x, attn = attn_layer(x, attn_mask=attn_mask)
                x = conv_layer(x)
                attns.append(attn)
            x, attn = self.attn_layers[-1](x, attn_mask=attn_mask)
            attns.append(attn)
        else:
            for attn_layer in self.attn_layers:
                x, attn = attn_layer(x, attn_mask=attn_mask)
                attns.append(attn)

        if self.norm is not None:
            x = self.norm(x)

        return x, attns

class EncoderLayer(nn.Module):
    def __init__(self, attention, d_model, d_ff=None, moving_avg=25, dropout=0.1, activation="relu"):
        super(EncoderLayer, self).__init__()
        d_ff = d_ff or 4 * d_model
        self.attention = attention
        self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1)
        self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1)
        
        # Autoformer 特有的组件
        if moving_avg > 0:
            self.decomp1 = series_decomp(moving_avg)
            self.decomp2 = series_decomp(moving_avg)
        else:
            self.decomp1 = None
            self.decomp2 = None
            
        self.dropout = nn.Dropout(dropout)
        self.activation = F.relu if activation == "relu" else F.gelu

    def forward(self, x, attn_mask=None):
        # 注意力机制
        new_x, attn = self.attention(x, x, x, attn_mask=attn_mask)
        
        # Autoformer 的分解机制
        if self.decomp1 is not None:
            x = x + self.dropout(new_x)
            x, _ = self.decomp1(x)
        else:
            x = x + self.dropout(new_x)

        # 前馈网络
        y = x
        y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1))))
        y = self.dropout(self.conv2(y).transpose(-1, 1))
        
        # Autoformer 的分解机制
        if self.decomp2 is not None:
            res, _ = self.decomp2(x + y)
        else:
            res = x + y
            
        return res, attn

class AttentionLayer(nn.Module):
    def __init__(self, attention, d_model, n_heads, d_keys=None, d_values=None):
        super(AttentionLayer, self).__init__()
        d_keys = d_keys or (d_model // n_heads)
        d_values = d_values or (d_model // n_heads)

        self.inner_attention = attention
        self.query_projection = nn.Linear(d_model, d_keys * n_heads)
        self.key_projection = nn.Linear(d_model, d_keys * n_heads)
        self.value_projection = nn.Linear(d_model, d_values * n_heads)
        self.out_projection = nn.Linear(d_values * n_heads, d_model)
        self.n_heads = n_heads

    def forward(self, queries, keys, values, attn_mask=None):
        B, L, _ = queries.shape
        _, S, _ = keys.shape
        H = self.n_heads

        queries = self.query_projection(queries).view(B, L, H, -1)
        keys = self.key_projection(keys).view(B, S, H, -1)
        values = self.value_projection(values).view(B, S, H, -1)

        out, attn = self.inner_attention(
            queries, keys, values, attn_mask
        )
        out = out.view(B, L, -1)

        return self.out_projection(out), attn

class ProbAttention(nn.Module):
    def __init__(self, mask_flag=True, factor=5, scale=None, attention_dropout=0.1, output_attention=False):
        super(ProbAttention, self).__init__()
        self.factor = factor
        self.scale = scale
        self.mask_flag = mask_flag
        self.output_attention = output_attention
        self.dropout = nn.Dropout(attention_dropout)

    def _prob_QK(self, Q, K, sample_k, n_top):
        B, H, L_K, E = K.shape
        _, _, L_Q, _ = Q.shape

        K_expand = K.unsqueeze(-3).expand(B, H, L_Q, L_K, E)
        index_sample = torch.randint(L_K, (L_Q, sample_k))
        K_sample = K_expand[:, :, torch.arange(L_Q).unsqueeze(1), index_sample, :]
        Q_K_sample = torch.matmul(Q.unsqueeze(-2), K_sample.transpose(-2, -1)).squeeze(-2)

        M = Q_K_sample.max(-1)[0] - torch.div(Q_K_sample.sum(-1), L_K)
        M_top = M.topk(n_top, sorted=False)[1]

        Q_reduce = Q[torch.arange(B)[:, None, None],
                   torch.arange(H)[None, :, None],
                   M_top, :]
        Q_K = torch.matmul(Q_reduce, K.transpose(-2, -1))

        return Q_K, M_top

    def _get_initial_context(self, V, L_Q):
        B, H, L_V, D = V.shape
        if not self.mask_flag:
            V_sum = V.mean(dim=-2)
            contex = V_sum.unsqueeze(-2).expand(B, H, L_Q, V_sum.shape[-1]).clone()
        else:
            contex = V.cumsum(dim=-2)
        return contex

    def _update_context(self, context_in, V, scores, index, L_Q, attn_mask):
        B, H, L_V, D = V.shape

        if self.mask_flag:
            attn_mask = ProbMask(B, H, L_Q, index, scores, device=V.device)
            scores.masked_fill_(attn_mask.mask, -np.inf)

        attn = torch.softmax(scores, dim=-1)

        context_in[torch.arange(B)[:, None, None],
        torch.arange(H)[None, :, None],
        index, :] = torch.matmul(attn, V).type_as(context_in)
        if self.output_attention:
            attns = (torch.ones([B, H, L_V, L_V]) / L_V).type_as(attn).to(attn.device)
            attns[torch.arange(B)[:, None, None], torch.arange(H)[None, :, None], index, :] = attn
            return (context_in, attns)
        else:
            return (context_in, None)

    def forward(self, queries, keys, values, attn_mask=None):
        B, L_Q, H, D = queries.shape
        _, L_K, _, _ = keys.shape

        queries = queries.transpose(2, 1)
        keys = keys.transpose(2, 1)
        values = values.transpose(2, 1)

        U_part = self.factor * np.ceil(np.log(L_K)).astype('int').item()
        u = self.factor * np.ceil(np.log(L_Q)).astype('int').item()

        U_part = U_part if U_part < L_K else L_K
        u = u if u < L_Q else L_Q

        scores_top, index = self._prob_QK(queries, keys, sample_k=U_part, n_top=u)

        scale = self.scale or 1. / math.sqrt(D)
        if scale is not None:
            scores_top = scores_top * scale

        context = self._get_initial_context(values, L_Q)
        context, attn = self._update_context(context, values, scores_top, index, L_Q, attn_mask)

        return context.transpose(2, 1).contiguous(), attn

class AutoCorrelation(nn.Module):
    def __init__(self, mask_flag=True, factor=1, scale=None, attention_dropout=0.1, output_attention=False):
        super(AutoCorrelation, self).__init__()
        self.factor = factor
        self.scale = scale
        self.mask_flag = mask_flag
        self.output_attention = output_attention
        self.dropout = nn.Dropout(attention_dropout)

    def time_delay_agg_training(self, values, corr):
        batch, head, channel, length = values.shape
        # 寻找top-k的相关性
        top_k = int(self.factor * math.log(length))
        mean_value = torch.mean(torch.mean(corr, dim=1), dim=1)
        index = torch.topk(torch.mean(mean_value, dim=0), top_k, dim=-1)[1]
        weights = torch.stack([mean_value[:, index[i]] for i in range(top_k)], dim=-1)
        # 更新聚合的值
        tmp_values = values
        delays_agg = torch.zeros_like(values).float()
        for i in range(top_k):
            pattern = torch.roll(tmp_values, -int(index[i]), -1)
            delays_agg = delays_agg + pattern * \
                         (weights[:, i].unsqueeze(1).unsqueeze(1).unsqueeze(1).repeat(1, head, channel, length))
        return delays_agg

    def time_delay_agg_inference(self, values, corr):
        batch, head, channel, length = values.shape
        # 寻找top-k的相关性
        top_k = int(self.factor * math.log(length))
        mean_value = torch.mean(torch.mean(corr, dim=1), dim=1)
        weights, delay = torch.topk(mean_value, top_k, dim=-1)
        # 更新聚合的值
        tmp_values = values
        delays_agg = torch.zeros_like(values).float()
        for i in range(top_k):
            pattern = torch.roll(tmp_values, -int(delay[0, i]), -1)
            delays_agg = delays_agg + pattern * \
                         (weights[:, i].unsqueeze(1).unsqueeze(1).unsqueeze(1).repeat(1, head, channel, length))
        return delays_agg

    def forward(self, queries, keys, values, attn_mask=None):
        B, L, H, E = queries.shape
        _, S, _, D = values.shape

        # 周期性自相关
        q_fft = torch.fft.rfft(queries.permute(0, 2, 3, 1).contiguous(), dim=-1)
        k_fft = torch.fft.rfft(keys.permute(0, 2, 3, 1).contiguous(), dim=-1)
        res = q_fft * torch.conj(k_fft)
        corr = torch.fft.irfft(res, dim=-1)

        if self.training:
            V = self.time_delay_agg_training(values.permute(0, 2, 3, 1).contiguous(), corr).permute(0, 3, 1, 2)
        else:
            V = self.time_delay_agg_inference(values.permute(0, 2, 3, 1).contiguous(), corr).permute(0, 3, 1, 2)

        if self.output_attention:
            return (V.contiguous(), corr.permute(0, 3, 1, 2))
        else:
            return (V.contiguous(), None)

class AutoCorrelationLayer(nn.Module):
    def __init__(self, correlation, d_model, n_heads, d_keys=None, d_values=None):
        super(AutoCorrelationLayer, self).__init__()
        d_keys = d_keys or (d_model // n_heads)
        d_values = d_values or (d_model // n_heads)

        self.inner_attention = correlation
        self.query_projection = nn.Linear(d_model, d_keys * n_heads)
        self.key_projection = nn.Linear(d_model, d_keys * n_heads)
        self.value_projection = nn.Linear(d_model, d_values * n_heads)
        self.out_projection = nn.Linear(d_values * n_heads, d_model)
        self.n_heads = n_heads

    def forward(self, queries, keys, values, attn_mask=None):
        B, L, _ = queries.shape
        _, S, _ = keys.shape
        H = self.n_heads

        queries = self.query_projection(queries).view(B, L, H, -1)
        keys = self.key_projection(keys).view(B, S, H, -1)
        values = self.value_projection(values).view(B, S, H, -1)

        out, attn = self.inner_attention(
            queries, keys, values, attn_mask
        )
        out = out.view(B, L, -1)

        return self.out_projection(out), attn

class series_decomp(nn.Module):
    def __init__(self, kernel_size):
        super(series_decomp, self).__init__()
        self.moving_avg = MovingAvg(kernel_size, stride=1)

    def forward(self, x):
        moving_mean = self.moving_avg(x)
        res = x - moving_mean
        return res, moving_mean

class MovingAvg(nn.Module):
    def __init__(self, kernel_size, stride):
        super(MovingAvg, self).__init__()
        self.kernel_size = kernel_size
        self.avg = nn.AvgPool1d(kernel_size=kernel_size, stride=stride, padding=0)

    def forward(self, x):
        front = x[:, 0:1, :].repeat(1, (self.kernel_size - 1) // 2, 1)
        end = x[:, -1:, :].repeat(1, (self.kernel_size - 1) // 2, 1)
        x = torch.cat([front, x, end], dim=1)
        x = self.avg(x.permute(0, 2, 1))
        x = x.permute(0, 2, 1)
        return x

class ProbMask():
    def __init__(self, B, H, L, index, scores, device="cpu"):
        _mask = torch.ones(L, scores.shape[-1], dtype=torch.bool).to(device).triu(1)
        _mask_ex = _mask[None, None, :].expand(B, H, L, scores.shape[-1])
        indicator = _mask_ex[torch.arange(B)[:, None, None],
                    torch.arange(H)[None, :, None],
                    index, :].to(device)
        self._mask = indicator.view(scores.shape).to(device)

    @property
    def mask(self):
        return self._mask

# 定义损失函数
loss_func = nn.MSELoss()

# 超参数范围
learning_rates = [0.01, 0.005, 0.001]
batch_sizes = [128, 64, 32]

# K折交叉验证
k = 3
kf = KFold(n_splits=k, shuffle=True, random_state=42)


def create_time_series_data(data, time_step):
    num_samples = data.shape[0]
    num_features = data.shape[1]
    time_series_data = []

    for i in range(num_samples - time_step + 1):
        time_series_data.append(data[i:i + time_step].clone().detach().numpy())

    return torch.tensor(np.array(time_series_data))

In [3]:
# # 超参数范围
# learning_rates = [0.01, 0.005, 0.001]
# batch_sizes = [128, 64, 32]
# num_epochs_list = [50, 100]
# hidden_units_list = [16, 32]

# def create_time_series_data(data, time_step):
#     num_samples = data.shape[0]
#     num_features = data.shape[1]
#     time_series_data = []

#     for i in range(num_samples - time_step + 1):
#         time_series_data.append(data[i:i + time_step].clone().detach().numpy())

#     return torch.tensor(np.array(time_series_data))

# def train_and_evaluate_model(model_class, num_inputs, num_outputs, train_input_tensor, train_label_tensor, time_step=24):
#     results = []
#     for lr in learning_rates:
#         for batch_size in batch_sizes:
#             for num_epochs in num_epochs_list:
#                 for hidden_units in hidden_units_list:
#                     fold_results = []
#                     for fold, (train_index, val_index) in enumerate(kf.split(train_input_tensor)):
#                         # 创建模型实例
#                         if model_class in [SimpleCNN, CNNLSTM]:
#                             model = model_class(num_inputs, num_outputs, hidden_units, time_step)
#                         else:
#                             model = model_class(num_inputs, num_outputs, hidden_units)

#                         optimizer = optim.Adam(model.parameters(), lr=lr)

#                         # 分割数据
#                         train_input_fold = train_input_tensor[train_index]
#                         train_label_fold = train_label_tensor[train_index]
#                         val_input_fold = train_input_tensor[val_index]
#                         val_label_fold = train_label_tensor[val_index]

#                         # 如果模型是CNN或LSTM，转换数据为时间序列格式
#                         if issubclass(model_class, (SimpleCNN, SimpleLSTM, CNNLSTM)):
#                             train_input_fold = create_time_series_data(train_input_fold, time_step)
#                             train_label_fold = train_label_fold[time_step - 1:]  # 对齐标签
#                             val_input_fold = create_time_series_data(val_input_fold, time_step)
#                             val_label_fold = val_label_fold[time_step - 1:]  # 对齐标签

#                         # 创建数据加载器
#                         train_loader = torch.utils.data.DataLoader(
#                             dataset=torch.utils.data.TensorDataset(train_input_fold.clone().detach(), train_label_fold.clone().detach()),
#                             batch_size=batch_size,
#                             shuffle=True
#                         )
#                         val_loader = torch.utils.data.DataLoader(
#                             dataset=torch.utils.data.TensorDataset(val_input_fold.clone().detach(), val_label_fold.clone().detach()),
#                             batch_size=batch_size,
#                             shuffle=False
#                         )

#                         # 训练模型
#                         start_time = time.time()
#                         for epoch in range(num_epochs):
#                             model.train()
#                             for step, (x, y) in enumerate(train_loader):
#                                 output = model(x)
#                                 loss = loss_func(output, y)
#                                 optimizer.zero_grad()
#                                 loss.backward()
#                                 optimizer.step()

#                         # 验证模型
#                         model.eval()
#                         val_loss = 0
#                         with torch.no_grad():
#                             for x, y in val_loader:
#                                 output = model(x)
#                                 loss = loss_func(output, y)
#                                 val_loss += loss.item()
#                         val_loss /= len(val_loader)
#                         fold_results.append(val_loss)
#                         end_time = time.time()
#                         print(f"Fold {fold + 1}, LR: {lr}, Batch Size: {batch_size}, Epochs: {num_epochs}, Hidden Units: {hidden_units}, Validation Loss: {val_loss:.4f}, Time: {end_time - start_time:.2f} seconds")

#                     # 计算当前超参数组合在所有折上的平均损失
#                     avg_val_loss = np.mean(fold_results)
#                     results.append({
#                         'learning_rate': lr,
#                         'batch_size': batch_size,
#                         'num_epochs': num_epochs,
#                         'hidden_units': hidden_units,
#                         'avg_val_loss': avg_val_loss
#                     })

#     # 转换结果为DataFrame以便分析
#     results_df = pd.DataFrame(results)

#     # 找到最佳配置
#     best_config = results_df.loc[results_df['avg_val_loss'].idxmin()]

#     print(f"\nBest Configuration for {model_class.__name__}: {best_config}")
#     return results_df
      
# # 评估每个模型
# cnn_results = train_and_evaluate_model(SimpleCNN, num_inputs=10, num_outputs=1, train_input_tensor=train_input_tensor, train_label_tensor=train_label_tensor)
# lstm_results = train_and_evaluate_model(SimpleLSTM, num_inputs=10, num_outputs=1, train_input_tensor=train_input_tensor, train_label_tensor=train_label_tensor)
# cnn_lstm_results = train_and_evaluate_model(CNNLSTM, num_inputs=10, num_outputs=1, train_input_tensor=train_input_tensor, train_label_tensor=train_label_tensor)
# transformer_results = train_and_evaluate_model(SimpleTransformer, num_inputs=10, num_outputs=1, train_input_tensor=train_input_tensor, train_label_tensor=train_label_tensor)
# fcn1_results = train_and_evaluate_model(SimpleFCN1, num_inputs=10, num_outputs=1, train_input_tensor=train_input_tensor, train_label_tensor=train_label_tensor)
# fcn2_results = train_and_evaluate_model(SimpleFCN2, num_inputs=10, num_outputs=1, train_input_tensor=train_input_tensor, train_label_tensor=train_label_tensor)

In [4]:
# results_df = pd.DataFrame(cnn_results)
# results_df.to_csv('./parameters_configuration/cnn_BaseConfig-SPRING.csv') 
# results_df = pd.DataFrame(lstm_results)
# results_df.to_csv('./parameters_configuration/lstm_BaseConfig-SPRING.csv') 
# results_df = pd.DataFrame(cnn_lstm_results)
# results_df.to_csv('./parameters_configuration/cnn_lstm_BaseConfig-SPRING.csv') 
# results_df = pd.DataFrame(transformer_results)
# results_df.to_csv('./parameters_configuration/transformer_BaseConfig-SPRING.csv') 
# results_df = pd.DataFrame(fcn1_results)
# results_df.to_csv('./parameters_configuration/fcn1_BaseConfig-SPRING.csv') 
# results_df = pd.DataFrame(fcn2_results)
# results_df.to_csv('./parameters_configuration/fcn2_BaseConfig-SPRING.csv') 

In [5]:
from sklearn.metrics import mean_squared_error, mean_absolute_error

results_df_cnn = pd.read_csv('./parameters_configuration/cnn_BaseConfig-SPRING.csv',index_col=0)
results_df_lstm = pd.read_csv('./parameters_configuration/lstm_BaseConfig-SPRING.csv',index_col=0)
results_df_cnn_lstm = pd.read_csv('./parameters_configuration/cnn_lstm_BaseConfig-SPRING.csv',index_col=0)
results_df_transformer = pd.read_csv('./parameters_configuration/transformer_BaseConfig-SPRING.csv',index_col=0)
results_df_fcn1 = pd.read_csv('./parameters_configuration/fcn1_BaseConfig-SPRING.csv',index_col=0)
results_df_fcn2 = pd.read_csv('./parameters_configuration/fcn2_BaseConfig-SPRING.csv',index_col=0)
results_df_Informer = pd.read_csv('./parameters_configuration/Informer_BaseConfig-SPRING.csv',index_col=0)
results_df_Autoformer = pd.read_csv('./parameters_configuration/Autoformer_BaseConfig-SPRING.csv',index_col=0)

def train_and_test_model(model_class, learning_rate, batch_size, num_epochs, hidden_units, train_input_tensor, train_label_tensor, test_input_tensor, test_label_tensor, scaler_output, num_inputs=10, num_outputs=1, time_step=24):
    results = []

    # 如果模型是CNN或LSTM，转换数据为时间序列格式
    if issubclass(model_class, (SimpleCNN, SimpleLSTM, CNNLSTM, Informer, Autoformer)):
        train_input_tensor = create_time_series_data(train_input_tensor, time_step)
        train_label_tensor = train_label_tensor[time_step - 1:]  # 对齐标签
        test_input_tensor = create_time_series_data(test_input_tensor, time_step)
        test_label_tensor = test_label_tensor[time_step - 1:]  # 对齐标签
        learning_rate = learning_rate*0.1
    # 创建数据加载器
    train_loader = torch.utils.data.DataLoader(
        dataset=torch.utils.data.TensorDataset(train_input_tensor.clone().detach(), train_label_tensor.clone().detach()),
        batch_size=int(batch_size),
        shuffle=True
    )

    # 初始化模型、损失函数和优化器
    model = model_class(num_inputs, num_outputs, hidden_units)
    loss_func = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    # 训练模型
    start_time = time.time()
    for epoch in range(num_epochs):
        model.train()
        for step, (x, y) in enumerate(train_loader):
            output = model(x)
            loss = loss_func(output, y)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

    # 验证模型
    model.eval()
    with torch.no_grad():
        output = model(test_input_tensor.clone().detach())
        output_inverse = scaler_output.inverse_transform(output.numpy())
        test_label_inverse = scaler_output.inverse_transform(test_label_tensor.numpy())

        # 计算评估指标
        rmse = np.sqrt(mean_squared_error(test_label_inverse, output_inverse))
        mae = mean_absolute_error(test_label_inverse, output_inverse)
        mape = np.mean(np.abs((test_label_inverse - output_inverse) / test_label_inverse)) * 100

        test_loss = loss_func(output, test_label_tensor.clone().detach()).item()

    # print(f"Test Loss: {test_loss:.4f}")

    results.append({
        'RMSE': rmse,
        'MAE': mae,
        'MAPE': mape
    })

    return results

In [6]:
# 找到每个模型的最佳配置
def get_best_config(results_df):
    best_config = results_df.loc[results_df['avg_val_loss'].idxmin(), ['learning_rate', 'batch_size', 'num_epochs', 'hidden_units']]
    return {
        'learning_rate': best_config['learning_rate'],
        'batch_size': int(best_config['batch_size']),
        'num_epochs': int(best_config['num_epochs']),
        'hidden_units': int(best_config['hidden_units'])
    }

# 获取每个模型的最佳配置
best_config_cnn = get_best_config(results_df_cnn)
best_config_lstm = get_best_config(results_df_lstm)
best_config_cnn_lstm = get_best_config(results_df_cnn_lstm)
best_config_transformer = get_best_config(results_df_transformer)
best_config_fcn1 = get_best_config(results_df_fcn1)
best_config_fcn2 = get_best_config(results_df_fcn2)
best_config_Informer = get_best_config(results_df_Informer)
best_config_Autoformer = get_best_config(results_df_Autoformer)

# 假设 train_input_tensor, train_label_tensor, test_input_tensor, test_label_tensor 已经定义

# 训练和测试每个模型，并打印结果
import time
start_time = time.time()
cnn_test_results = train_and_test_model(SimpleCNN, **best_config_cnn, train_input_tensor=train_input_tensor, train_label_tensor=train_label_tensor, test_input_tensor=test_input_tensor, test_label_tensor=test_label_tensor, scaler_output=scaler_output)
print("CNN Test Results:", cnn_test_results)
end_time = time.time()
print(end_time-start_time)

start_time = time.time()
lstm_test_results = train_and_test_model(SimpleLSTM, **best_config_lstm, train_input_tensor=train_input_tensor, train_label_tensor=train_label_tensor, test_input_tensor=test_input_tensor, test_label_tensor=test_label_tensor, scaler_output=scaler_output)
print("LSTM Test Results:", lstm_test_results)
end_time = time.time()
print(end_time-start_time)

start_time = time.time()
cnn_lstm_test_results = train_and_test_model(CNNLSTM, **best_config_cnn_lstm, train_input_tensor=train_input_tensor, train_label_tensor=train_label_tensor, test_input_tensor=test_input_tensor, test_label_tensor=test_label_tensor, scaler_output=scaler_output)
print("CNN-LSTM Test Results:", cnn_lstm_test_results)
end_time = time.time()
print(end_time-start_time)

start_time = time.time()
transformer_test_results = train_and_test_model(SimpleTransformer, **best_config_transformer, train_input_tensor=train_input_tensor, train_label_tensor=train_label_tensor, test_input_tensor=test_input_tensor, test_label_tensor=test_label_tensor, scaler_output=scaler_output)
print("Transformer Test Results:", transformer_test_results)
end_time = time.time()
print(end_time-start_time)

start_time = time.time()
fcn1_test_results = train_and_test_model(SimpleFCN1, **best_config_fcn1, train_input_tensor=train_input_tensor, train_label_tensor=train_label_tensor, test_input_tensor=test_input_tensor, test_label_tensor=test_label_tensor, scaler_output=scaler_output)
print("FCN1 Test Results:", fcn1_test_results)
end_time = time.time()
print(end_time-start_time)

start_time = time.time()
fcn2_test_results = train_and_test_model(SimpleFCN2, **best_config_fcn2, train_input_tensor=train_input_tensor, train_label_tensor=train_label_tensor, test_input_tensor=test_input_tensor, test_label_tensor=test_label_tensor, scaler_output=scaler_output)
print("FCN2 Test Results:", fcn2_test_results)
end_time = time.time()
print(end_time-start_time)

start_time = time.time()
Informer_test_results = train_and_test_model(Informer, **best_config_Informer, train_input_tensor=train_input_tensor, train_label_tensor=train_label_tensor, test_input_tensor=test_input_tensor, test_label_tensor=test_label_tensor, scaler_output=scaler_output)
print("Informer Test Results:", Informer_test_results)
end_time = time.time()
print(end_time-start_time)

start_time = time.time()
Autoformer_test_results = train_and_test_model(Autoformer, **best_config_Autoformer, train_input_tensor=train_input_tensor, train_label_tensor=train_label_tensor, test_input_tensor=test_input_tensor, test_label_tensor=test_label_tensor, scaler_output=scaler_output)
print("Autoformer Test Results:", Autoformer_test_results)
end_time = time.time()
print(end_time-start_time)

CNN Test Results: [{'RMSE': 402.27502, 'MAE': 329.6201, 'MAPE': 4.347362741827965}]
123.7829978466034
LSTM Test Results: [{'RMSE': 433.89166, 'MAE': 339.44846, 'MAPE': 4.458225145936012}]
38.21889066696167
CNN-LSTM Test Results: [{'RMSE': 415.73938, 'MAE': 344.46292, 'MAPE': 4.536542668938637}]
188.3872127532959




Transformer Test Results: [{'RMSE': 680.3065, 'MAE': 505.82855, 'MAPE': 6.341973692178726}]
554.6263198852539
FCN1 Test Results: [{'RMSE': 386.18146, 'MAE': 311.89343, 'MAPE': 4.135579243302345}]
6.648909091949463
FCN2 Test Results: [{'RMSE': 347.53235, 'MAE': 277.79965, 'MAPE': 3.656603768467903}]
8.081424951553345
Informer Test Results: [{'RMSE': 433.17636, 'MAE': 336.19913, 'MAPE': 4.484019428491592}]
706.3486950397491
Autoformer Test Results: [{'RMSE': 623.0429, 'MAE': 534.8849, 'MAPE': 7.020276784896851}]
523.3416652679443


In [7]:
result = dict()
result["cnn"] = cnn_test_results
result["lstm"] = lstm_test_results
result["cnn_lstm"] = cnn_lstm_test_results
result["transformer"] = transformer_test_results
result["fcn1"] = fcn1_test_results
result["fcn2"] = fcn2_test_results

In [8]:
result

{'cnn': [{'RMSE': 402.27502, 'MAE': 329.6201, 'MAPE': 4.347362741827965}],
 'lstm': [{'RMSE': 433.89166, 'MAE': 339.44846, 'MAPE': 4.458225145936012}],
 'cnn_lstm': [{'RMSE': 415.73938,
   'MAE': 344.46292,
   'MAPE': 4.536542668938637}],
 'transformer': [{'RMSE': 680.3065,
   'MAE': 505.82855,
   'MAPE': 6.341973692178726}],
 'fcn1': [{'RMSE': 386.18146, 'MAE': 311.89343, 'MAPE': 4.135579243302345}],
 'fcn2': [{'RMSE': 347.53235, 'MAE': 277.79965, 'MAPE': 3.656603768467903}]}