In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim

from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score


In [2]:
import pandas as pd
import numpy as np
import torch
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

# 读取数据
data = pd.read_excel("湖北省小时数据.xlsx")  # 替换为你的文件路径

# 特征列
features = ["type", "weather", "wind", "humidity", "barometer", "load"]
data = data[features]

# 独热编码 type 列
data = pd.get_dummies(data, columns=["type"], prefix="type")

# 获取所有列
type_columns = [col for col in data.columns if col.startswith("type_")]
numeric_columns = [col for col in data.columns if col not in type_columns]

# 对非 type 列归一化
scaler = MinMaxScaler()
data[numeric_columns] = scaler.fit_transform(data[numeric_columns])

# 保存完整的 type 数据，用于生成 Y
complete_type_data = data[type_columns].copy()


In [3]:
# 创建滑动窗口
def create_sliding_window(data_X, data_Y, window_size=24, type_columns=None):
    """
    创建滑动窗口数据集
    - X 包含完整输入数据
    - Y 包含完整的目标数据
    """
    X, Y = [], []
    for i in range(len(data_X) - window_size):
        # 输入：连续 window_size 条数据
        X.append(data_X.iloc[i:i+window_size].values)
        # 输出：连续 window_size 条数据的完整 type
        Y.append(data_Y.iloc[i:i+window_size][type_columns].values)
    return torch.tensor(X, dtype=torch.float32), torch.tensor(Y, dtype=torch.float32)

# 创建输入和目标数据
X, Y = create_sliding_window(data, complete_type_data, window_size=24, type_columns=type_columns)

# 划分训练集和测试集
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)


  return torch.tensor(X, dtype=torch.float32), torch.tensor(Y, dtype=torch.float32)


In [4]:

# 引入缺失值（在训练集和测试集上分别操作）
def introduce_missing_types(X, type_columns, missing_fraction=0.1):
    """
    在滑动窗口生成的数据集内随机将一个样本的所有 type 标签置为 0
    """
    X_with_missing = X.clone()  # 避免修改原始数据
    num_samples = X_with_missing.shape[0]  # 样本数
    num_missing = int(num_samples * missing_fraction)

    # 随机选择缺失样本的索引
    missing_indices = np.random.choice(num_samples, num_missing, replace=False)

    # 将缺失样本的 type 列置为 0
    for idx in missing_indices:
        X_with_missing[idx, :, :len(type_columns)] = 0  # 假设 type 列在前
    return X_with_missing

# 在训练集和测试集上引入缺失值
X_train_with_missing = introduce_missing_types(X_train, type_columns, missing_fraction=0.1)
X_test_with_missing = introduce_missing_types(X_test, type_columns, missing_fraction=0.1)

# 输出数据形状
print("X_train_with_missing shape:", X_train_with_missing.shape)
print("Y_train shape:", Y_train.shape)

X_train_with_missing shape: torch.Size([63091, 24, 126])
Y_train shape: torch.Size([63091, 24, 121])


In [5]:
import torch
import torch.nn as nn
import torch.optim as optim
import math

class PositionalEncoding(nn.Module):
    """
    标准的 Transformer 位置编码实现
    参考自 PyTorch 官方教程：
    https://pytorch.org/tutorials/beginner/transformer_tutorial.html
    """
    def __init__(self, d_model, max_len=5000):
        super(PositionalEncoding, self).__init__()
        pe = torch.zeros(max_len, d_model)  # (max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)  # (max_len, 1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
        # 偶数维: sin, 奇数维: cos
        pe[:, 0::2] = torch.sin(position * div_term)  # 偶数索引
        pe[:, 1::2] = torch.cos(position * div_term)  # 奇数索引

        # 注册成缓冲区，不会作为参数训练
        self.register_buffer('pe', pe)

    def forward(self, x):
        """
        x: (batch_size, seq_len, d_model)
        """
        seq_len = x.size(1)
        # (batch_size, seq_len, d_model) + (seq_len, d_model)
        # 需要broadcast: 因此先在 batch 维度上 unsqueeze(0) 再加
        x = x + self.pe[:seq_len].unsqueeze(0)
        return x

class ConditionalVAE_Transformer(nn.Module):
    """
    将原先 LSTM 改为 Transformer 的 Conditional VAE 实现。
    形状约定：
      X: (batch_size, seq_len, x_dim)
      Y: (batch_size, seq_len, y_dim)  (one-hot)

    * use_x_in_decoder = True 表示解码器输入 (z, X_t) 拼接后，再通过 Transformer Decoder。
      这里简化为使用 TransformerEncoder 来处理整个序列（并行），不做自回归 mask。
    """
    def __init__(self,
                 seq_len,
                 x_dim,
                 y_dim,
                 hidden_dim=128,
                 latent_dim=64,
                 num_layers=2,
                 use_x_in_decoder=False,
                 nhead=4,              # transformer多头注意力的头数
                 dim_feedforward=256,  # feedforward隐藏层维度
                 dropout=0.1):
        super().__init__()

        self.seq_len = seq_len
        self.x_dim = x_dim
        self.y_dim = y_dim
        self.hidden_dim = hidden_dim
        self.latent_dim = latent_dim
        self.num_layers = num_layers
        self.use_x_in_decoder = use_x_in_decoder

        # ========== Encoder部分 ==========
        # 1) 将输入 x_dim 投影到 embed_dim
        self.embed_dim = hidden_dim   # 这里可自行设定，比如和 hidden_dim 一样
        self.input_proj = nn.Linear(x_dim, self.embed_dim)
        self.pos_encoder = PositionalEncoding(self.embed_dim)

        # 2) TransformerEncoder
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=self.embed_dim,
            nhead=nhead,
            dim_feedforward=dim_feedforward,
            dropout=dropout,
            batch_first=True  # 若你的PyTorch版本>=1.10,可用batch_first=True
        )
        self.transformer_encoder = nn.TransformerEncoder(
            encoder_layer,
            num_layers=num_layers
        )

        # 3) 从最终的序列表示里取一个向量 (比如最后时刻) => mu, logvar
        self.mu_layer = nn.Linear(self.embed_dim, latent_dim)
        self.logvar_layer = nn.Linear(self.embed_dim, latent_dim)

        # ========== Decoder部分 ==========
        # 解码器的输入维度 (z + X) 或单独 z
        if self.use_x_in_decoder:
            self.decoder_input_dim = x_dim + latent_dim
        else:
            self.decoder_input_dim = latent_dim

        self.decoder_proj = nn.Linear(self.decoder_input_dim, self.embed_dim)
        self.pos_decoder = PositionalEncoding(self.embed_dim)

        # 这里复用 TransformerEncoder 做并行解码（非自回归）
        # 如果要做自回归，可改用 nn.TransformerDecoder
        decoder_layer = nn.TransformerEncoderLayer(
            d_model=self.embed_dim,
            nhead=nhead,
            dim_feedforward=dim_feedforward,
            dropout=dropout,
            batch_first=True
        )
        self.transformer_decoder = nn.TransformerEncoder(
            decoder_layer,
            num_layers=num_layers
        )

        # 输出到 y_dim（比如 121 类 one-hot）
        self.output_layer = nn.Linear(self.embed_dim, y_dim)

    def encode(self, X):
        """
        编码器：X -> (batch, seq_len, embed_dim) -> TransformerEncoder -> 取最后时刻输出 => mu, logvar
        """
        # 1) 投影
        X_embed = self.input_proj(X)  # (B, T, embed_dim)
        # 2) 加位置编码
        X_embed = self.pos_encoder(X_embed)
        # 3) TransformerEncoder
        #    若 batch_first=True, 输出形状依然 (B, T, embed_dim)
        enc_out = self.transformer_encoder(X_embed)  # (B, T, embed_dim)

        # 取最后一个 time step 的 hidden state: enc_out[:, -1, :]
        # 也可以选择平均池化或别的方式
        h_last = enc_out[:, -1, :]  # (B, embed_dim)

        mu = self.mu_layer(h_last)
        logvar = self.logvar_layer(h_last)
        return mu, logvar

    def reparameterize(self, mu, logvar):
        std = torch.exp(0.5 * logvar)
        eps = torch.randn_like(std)
        return mu + eps * std

    def decode(self, z, X=None):
        """
        解码器：z(可以拼接X) -> 投影 -> 位置编码 -> TransformerEncoder -> 输出层
        最终得到 Y_logits: (batch_size, seq_len, y_dim)
        """
        B = z.size(0)

        # 1) 先把 z 在时间维度上重复
        #    z shape: (B, latent_dim) => (B, seq_len, latent_dim)
        z_repeated = z.unsqueeze(1).repeat(1, self.seq_len, 1)  # (B, T, latent_dim)

        if self.use_x_in_decoder and X is not None:
            # 拼接 z 和 X: (B, T, x_dim + latent_dim)
            decoder_input = torch.cat([X, z_repeated], dim=-1)
        else:
            # 只用 z
            decoder_input = z_repeated

        # 2) 投影到 embed_dim
        dec_embed = self.decoder_proj(decoder_input)  # (B, T, embed_dim)
        # 3) 位置编码
        dec_embed = self.pos_decoder(dec_embed)
        # 4) 通过 TransformerEncoder 做并行解码
        dec_out = self.transformer_decoder(dec_embed)  # (B, T, embed_dim)
        # 5) 输出层 => (B, T, y_dim)
        Y_logits = self.output_layer(dec_out)
        return Y_logits

    def forward(self, X):
        """
        前向过程： X -> encode -> reparam -> decode
        """
        mu, logvar = self.encode(X)
        z = self.reparameterize(mu, logvar)
        Y_logits = self.decode(z, X if self.use_x_in_decoder else None)
        return Y_logits, mu, logvar


In [6]:
def cvae_loss_ce(Y_logits, Y_onehot, mu, logvar):
    """
    Y_logits: (batch_size, seq_len, 121)  -- decoder输出的 logits
    Y_onehot: (batch_size, seq_len, 121)  -- 真实的 one-hot
    mu, logvar: (batch_size, latent_dim)
    
    返回:
        total_loss = CE + KL
        ce_loss
        kld
    """
    batch_size, seq_len, num_class = Y_logits.shape

    # 1) 把 logits reshape
    Y_logits_2d = Y_logits.view(-1, num_class)  # (batch_size*seq_len, 121)
    # 2) one-hot 转 index
    Y_label = Y_onehot.argmax(dim=-1).view(-1)  # (batch_size*seq_len,)

    ce_fn = nn.CrossEntropyLoss(reduction='sum')
    ce_loss = ce_fn(Y_logits_2d, Y_label) / batch_size

    # KL 散度
    kld = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp(), dim=1)
    kld = torch.mean(kld)

    total_loss = ce_loss + kld
    return total_loss, ce_loss, kld


In [7]:
def compute_accuracy(Y_logits, Y_onehot):
    """
    对 decoder 输出的 logits (batch_size, seq_len, 121)，
    与真实 one-hot (batch_size, seq_len, 121) 计算准确率
    """
    pred_label = Y_logits.argmax(dim=-1)  # (batch_size, seq_len)
    true_label = Y_onehot.argmax(dim=-1)  # (batch_size, seq_len)

    correct = (pred_label == true_label).float().sum()
    total = pred_label.numel()  # batch_size * seq_len
    acc = correct / total
    return acc.item()


In [8]:
def evaluate_missing_accuracy(model, data_loader, x_dim, type_dim, device):
    """
    逐批统计 overall_acc、missing_acc 累加，再取平均
    返回: (avg_overall_acc, avg_missing_acc)
    """
    model.eval()
    total_overall_acc = 0.0
    total_missing_acc = 0.0
    count = 0

    with torch.no_grad():
        for Xb, Yb in data_loader:
            Xb = Xb.to(device)
            Yb = Yb.to(device)

            Y_logits, _, _ = model(Xb)  # (batch_size, seq_len, y_dim)
            overall_acc, missing_acc = compute_accuracy_both(Xb, Y_logits, Yb, x_dim, type_dim)

            total_overall_acc += overall_acc
            total_missing_acc += missing_acc
            count += 1

    avg_overall_acc = total_overall_acc / count if count > 0 else 0.0
    avg_missing_acc = total_missing_acc / count if count > 0 else 0.0

    return avg_overall_acc, avg_missing_acc


In [9]:
def compute_accuracy_both(Xb, Y_logits, Yb, x_dim, type_dim):
    """
    返回:
      overall_acc: 整个 batch_size * seq_len 的准确率
      missing_acc: 仅在 "Xb 最后 type_dim 列全部为 0" 的时间步上的准确率
    """
    # 1) argmax
    pred_label = Y_logits.argmax(dim=-1)   # (batch_size, seq_len)
    true_label = Yb.argmax(dim=-1)         # (batch_size, seq_len)

    # 2) overall
    correct_all = (pred_label == true_label).sum().float()
    total_all = pred_label.numel()
    overall_acc = correct_all / total_all if total_all > 0 else 0.0

    # 3) missing_mask: (batch_size, seq_len) = True表示type全部为0
    start_type_idx = x_dim - type_dim
    missing_mask = (Xb[..., start_type_idx:].sum(dim=-1) == 0.0)

    # 展开成 1D
    mask_flat = missing_mask.view(-1)
    pred_flat = pred_label.view(-1)
    true_flat = true_label.view(-1)

    pred_missing = pred_flat[mask_flat]
    true_missing = true_flat[mask_flat]

    correct_missing = (pred_missing == true_missing).sum().float()
    total_missing = mask_flat.sum().float()
    missing_acc = correct_missing / total_missing if total_missing > 0 else 0.0

    return overall_acc, missing_acc


In [None]:
from torch.utils.data import TensorDataset, DataLoader

# 请确保以下张量已经准备好:
#   X_train_with_missing: shape [N, 24, 126], float32
#   Y_train: shape [N, 24, 121], float32 (one-hot)
#   X_test_with_missing: shape [N_test, 24, 126]
#   Y_test: shape [N_test, 24, 121]
#
# 如果 Y_train / Y_test 里是 one-hot，就可以直接用 cvae_loss_ce()

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 超参数
seq_len = 24
x_dim = 126
y_dim = 121   # one-hot 分类，共 121 类
hidden_dim = 128
latent_dim = 64
num_layers = 2

batch_size = 64
lr = 1e-3
epochs = 200

# 额外的 Transformer 参数
nhead = 4
dim_feedforward = 256
dropout = 0.1

# 构建 DataLoader
train_ds = TensorDataset(X_train_with_missing, Y_train)
test_ds  = TensorDataset(X_test_with_missing,  Y_test)

train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True)
test_loader  = DataLoader(test_ds, batch_size=batch_size, shuffle=False)

# 初始化模型
model = ConditionalVAE_Transformer(
    seq_len=seq_len,
    x_dim=x_dim,
    y_dim=y_dim,
    hidden_dim=hidden_dim,
    latent_dim=latent_dim,
    num_layers=num_layers,
    use_x_in_decoder=False,  # 是否在Decoder拼接X
    nhead=nhead,
    dim_feedforward=dim_feedforward,
    dropout=dropout
).to(device)

optimizer = optim.Adam(model.parameters(), lr=lr)


for epoch in range(1, epochs+1):
    model.train()
    total_loss_sum = 0.0

    for Xb, Yb in train_loader:
        Xb = Xb.to(device)
        Yb = Yb.to(device)
        optimizer.zero_grad()

        Y_logits, mu, logvar = model(Xb)
        total_loss, ce_loss, kld_loss = cvae_loss_ce(Y_logits, Yb, mu, logvar)
        total_loss.backward()
        optimizer.step()

        total_loss_sum += total_loss.item()

    avg_train_loss = total_loss_sum / len(train_loader)

    # 验证(计算准确率)
    model.eval()
    total_acc = 0.0
    count = 0
    with torch.no_grad():
        for Xb, Yb in test_loader:
            Xb = Xb.to(device)
            Yb = Yb.to(device)

            Y_logits, mu, logvar = model(Xb)
            # 计算准确率
            acc = compute_accuracy(Y_logits, Yb)
            total_acc += acc
            count += 1

    avg_test_acc = total_acc / count
    avg_train_loss = total_loss_sum / len(train_loader)

    # ====== 2. 在整个训练集上计算 overall_acc & missing_acc ======
    train_overall_acc, train_missing_acc = evaluate_missing_accuracy(
        model, train_loader, x_dim=126, type_dim=121, device=device
    )

    # ====== 3. 在测试集上计算 overall_acc & missing_acc ======
    test_overall_acc, test_missing_acc = evaluate_missing_accuracy(
        model, test_loader, x_dim=126, type_dim=121, device=device
    )

    # ====== 4. 打印结果 ======
    print(f"Epoch {epoch}/{epochs} | "
          f"Train Loss: {avg_train_loss:.4f} | "
          f"Train Overall Acc: {train_overall_acc:.4f} | Train Missing Acc: {train_missing_acc:.4f} | "
          f"Test Overall Acc: {test_overall_acc:.4f} | Test Missing Acc: {test_missing_acc:.4f}")


Epoch 1/200 | Train Loss: 49.1291 | Train Overall Acc: 0.4188 | Train Missing Acc: 0.2727 | Test Overall Acc: 0.4195 | Test Missing Acc: 0.2739
Epoch 2/200 | Train Loss: 44.0132 | Train Overall Acc: 0.4179 | Train Missing Acc: 0.2608 | Test Overall Acc: 0.4165 | Test Missing Acc: 0.2631
Epoch 3/200 | Train Loss: 43.5449 | Train Overall Acc: 0.4222 | Train Missing Acc: 0.2060 | Test Overall Acc: 0.4217 | Test Missing Acc: 0.2157
Epoch 4/200 | Train Loss: 43.2408 | Train Overall Acc: 0.4537 | Train Missing Acc: 0.2608 | Test Overall Acc: 0.4531 | Test Missing Acc: 0.2653
Epoch 5/200 | Train Loss: 41.7163 | Train Overall Acc: 0.5224 | Train Missing Acc: 0.2739 | Test Overall Acc: 0.5231 | Test Missing Acc: 0.2785
Epoch 6/200 | Train Loss: 39.8774 | Train Overall Acc: 0.5417 | Train Missing Acc: 0.2764 | Test Overall Acc: 0.5422 | Test Missing Acc: 0.2777
Epoch 7/200 | Train Loss: 39.2262 | Train Overall Acc: 0.5503 | Train Missing Acc: 0.2749 | Test Overall Acc: 0.5506 | Test Missing Acc: