In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim

from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score


In [1]:
import pandas as pd
import numpy as np
import torch
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

# 读取数据
data = pd.read_excel("湖北省小时数据.xlsx")  # 替换为你的文件路径

# 特征列
features = ["type", "weather", "wind", "humidity", "barometer", "load"]
data = data[features]

# 独热编码 type 列
data = pd.get_dummies(data, columns=["type"], prefix="type")

# 获取所有列
type_columns = [col for col in data.columns if col.startswith("type_")]
numeric_columns = [col for col in data.columns if col not in type_columns]

# 对非 type 列归一化
scaler = MinMaxScaler()
data[numeric_columns] = scaler.fit_transform(data[numeric_columns])

# 保存完整的 type 数据，用于生成 Y
complete_type_data = data[type_columns].copy()


In [2]:
# 创建滑动窗口
def create_sliding_window(data_X, data_Y, window_size=24, type_columns=None):
    """
    创建滑动窗口数据集
    - X 包含完整输入数据
    - Y 包含完整的目标数据
    """
    X, Y = [], []
    for i in range(len(data_X) - window_size):
        # 输入：连续 window_size 条数据
        X.append(data_X.iloc[i:i+window_size].values)
        # 输出：连续 window_size 条数据的完整 type
        Y.append(data_Y.iloc[i:i+window_size][type_columns].values)
    return torch.tensor(X, dtype=torch.float32), torch.tensor(Y, dtype=torch.float32)

# 创建输入和目标数据
X, Y = create_sliding_window(data, complete_type_data, window_size=24, type_columns=type_columns)

# 划分训练集和测试集
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)


  return torch.tensor(X, dtype=torch.float32), torch.tensor(Y, dtype=torch.float32)


In [3]:

# 引入缺失值（在训练集和测试集上分别操作）
def introduce_missing_types(X, type_columns, missing_fraction=0.1):
    """
    在滑动窗口生成的数据集内随机将一个样本的所有 type 标签置为 0
    """
    X_with_missing = X.clone()  # 避免修改原始数据
    num_samples = X_with_missing.shape[0]  # 样本数
    num_missing = int(num_samples * missing_fraction)

    # 随机选择缺失样本的索引
    missing_indices = np.random.choice(num_samples, num_missing, replace=False)

    # 将缺失样本的 type 列置为 0
    for idx in missing_indices:
        X_with_missing[idx, :, :len(type_columns)] = 0  # 假设 type 列在前
    return X_with_missing

# 在训练集和测试集上引入缺失值
X_train_with_missing = introduce_missing_types(X_train, type_columns, missing_fraction=0.1)
X_test_with_missing = introduce_missing_types(X_test, type_columns, missing_fraction=0.1)

# 输出数据形状
print("X_train_with_missing shape:", X_train_with_missing.shape)
print("Y_train shape:", Y_train.shape)

X_train_with_missing shape: torch.Size([63091, 24, 126])
Y_train shape: torch.Size([63091, 24, 121])


In [20]:
import torch
import torch.nn as nn
import torch.optim as optim

class ConditionalVAE(nn.Module):
    """
    Conditional VAE:
      - 编码器 (encoder): LSTM, 输入 X => 得到 (mu, logvar)
      - 重参数化 (reparameterize): mu + eps*sigma => z
      - 解码器 (decoder): 以 (z, X) => 生成 Y( logits )
      
    适用于: (batch_size, seq_len, x_dim) => (batch_size, seq_len, y_dim)
    其中 y_dim = 121 (one-hot 分类)
    """
    def __init__(self, seq_len, x_dim, y_dim, hidden_dim, latent_dim, num_layers, use_x_in_decoder):
        super(ConditionalVAE, self).__init__()

        self.seq_len = seq_len
        self.x_dim = x_dim
        self.y_dim = y_dim
        self.hidden_dim = hidden_dim
        self.latent_dim = latent_dim
        self.num_layers = num_layers
        self.use_x_in_decoder = use_x_in_decoder

        # ---------- Encoder: LSTM -> (h_n) -> mu, logvar ---------- #
        self.encoder_lstm = nn.LSTM(
            input_size = x_dim,
            hidden_size = hidden_dim,
            num_layers = num_layers,
            batch_first = True
        )
        self.mu_layer = nn.Linear(hidden_dim, latent_dim)
        self.logvar_layer = nn.Linear(hidden_dim, latent_dim)

        # ---------- Decoder: LSTM -> Y(logits) ---------- #
        # 如果 decoder 也要用 X 信息，可以在输入里拼接 z + X
        if self.use_x_in_decoder:
            self.decoder_input_dim = x_dim + latent_dim
        else:
            self.decoder_input_dim = latent_dim

        self.decoder_lstm = nn.LSTM(
            input_size = self.decoder_input_dim,
            hidden_size = hidden_dim,
            num_layers = num_layers,
            batch_first = True
        )
        # 输出层, 将 hidden_dim -> y_dim(=121), 作为分类 logits
        self.output_layer = nn.Linear(hidden_dim, y_dim)

    def encode(self, X):
        """
        X: (batch_size, seq_len, x_dim)
        返回: mu, logvar => (batch_size, latent_dim)
        """
        # LSTM 输出: out, (h_n, c_n)
        #   out: (batch_size, seq_len, hidden_dim)
        #   h_n: (num_layers, batch_size, hidden_dim)
        _, (h_n, _) = self.encoder_lstm(X)
        h_last = h_n[-1]  # 取最顶层 hidden state, shape (batch_size, hidden_dim)

        mu = self.mu_layer(h_last)
        logvar = self.logvar_layer(h_last)
        return mu, logvar

    def reparameterize(self, mu, logvar):
        std = torch.exp(0.5 * logvar)
        eps = torch.randn_like(std)
        return mu + eps * std

    def decode(self, z, X=None):
        """
        z: (batch_size, latent_dim)
        X: (batch_size, seq_len, x_dim), 如果 use_x_in_decoder=True, 拼到输入
        返回: Y_logits: (batch_size, seq_len, y_dim)
        """
        batch_size = z.size(0)

        # 初始化 (h_0, c_0)
        h_0 = torch.zeros(self.num_layers, batch_size, self.hidden_dim, device=z.device)
        c_0 = torch.zeros_like(h_0)

        if self.use_x_in_decoder and X is not None:
            # 在每个 time step 拼接 [z, X_t]
            # 先把 z 扩展到 (batch_size, seq_len, latent_dim)
            z_repeated = z.unsqueeze(1).repeat(1, self.seq_len, 1)
            decoder_input = torch.cat([z_repeated, X], dim=-1)  # shape: (batch_size, seq_len, x_dim + latent_dim)
        else:
            # 只用 z, 不拼 X
            z_repeated = z.unsqueeze(1).repeat(1, self.seq_len, 1)
            decoder_input = z_repeated

        out, _ = self.decoder_lstm(decoder_input, (h_0, c_0))  # (batch_size, seq_len, hidden_dim)
        Y_logits = self.output_layer(out)                      # (batch_size, seq_len, y_dim)
        return Y_logits

    def forward(self, X):
        """
        前向: X => encode => reparameterize => decode => Y_logits
        """
        mu, logvar = self.encode(X)
        z = self.reparameterize(mu, logvar)
        Y_logits = self.decode(z, X if self.use_x_in_decoder else None)
        return Y_logits, mu, logvar


In [21]:
def cvae_loss_ce(Y_logits, Y_onehot, mu, logvar):
    """
    Y_logits: (batch_size, seq_len, 121)  -- decoder输出的 logits
    Y_onehot: (batch_size, seq_len, 121)  -- 真实的 one-hot
    mu, logvar: (batch_size, latent_dim)
    
    返回:
        total_loss = CE + KL
        ce_loss
        kld
    """
    batch_size, seq_len, num_class = Y_logits.shape

    # 1) 把 logits reshape
    Y_logits_2d = Y_logits.view(-1, num_class)  # (batch_size*seq_len, 121)
    # 2) one-hot 转 index
    Y_label = Y_onehot.argmax(dim=-1).view(-1)  # (batch_size*seq_len,)

    ce_fn = nn.CrossEntropyLoss(reduction='sum')
    ce_loss = ce_fn(Y_logits_2d, Y_label) / batch_size

    # KL 散度
    kld = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp(), dim=1)
    kld = torch.mean(kld)

    total_loss = ce_loss + kld
    return total_loss, ce_loss, kld


In [22]:
def compute_accuracy(Y_logits, Y_onehot):
    """
    对 decoder 输出的 logits (batch_size, seq_len, 121)，
    与真实 one-hot (batch_size, seq_len, 121) 计算准确率
    """
    pred_label = Y_logits.argmax(dim=-1)  # (batch_size, seq_len)
    true_label = Y_onehot.argmax(dim=-1)  # (batch_size, seq_len)

    correct = (pred_label == true_label).float().sum()
    total = pred_label.numel()  # batch_size * seq_len
    acc = correct / total
    return acc.item()


In [26]:
def evaluate_missing_accuracy(model, data_loader, x_dim, type_dim, device):
    """
    逐批统计 overall_acc、missing_acc 累加，再取平均
    返回: (avg_overall_acc, avg_missing_acc)
    """
    model.eval()
    total_overall_acc = 0.0
    total_missing_acc = 0.0
    count = 0

    with torch.no_grad():
        for Xb, Yb in data_loader:
            Xb = Xb.to(device)
            Yb = Yb.to(device)

            Y_logits, _, _ = model(Xb)  # (batch_size, seq_len, y_dim)
            overall_acc, missing_acc = compute_accuracy_both(Xb, Y_logits, Yb, x_dim, type_dim)

            total_overall_acc += overall_acc
            total_missing_acc += missing_acc
            count += 1

    avg_overall_acc = total_overall_acc / count if count > 0 else 0.0
    avg_missing_acc = total_missing_acc / count if count > 0 else 0.0

    return avg_overall_acc, avg_missing_acc


In [27]:
def compute_accuracy_both(Xb, Y_logits, Yb, x_dim, type_dim):
    """
    返回:
      overall_acc: 整个 batch_size * seq_len 的准确率
      missing_acc: 仅在 "Xb 最后 type_dim 列全部为 0" 的时间步上的准确率
    """
    # 1) argmax
    pred_label = Y_logits.argmax(dim=-1)   # (batch_size, seq_len)
    true_label = Yb.argmax(dim=-1)         # (batch_size, seq_len)

    # 2) overall
    correct_all = (pred_label == true_label).sum().float()
    total_all = pred_label.numel()
    overall_acc = correct_all / total_all if total_all > 0 else 0.0

    # 3) missing_mask: (batch_size, seq_len) = True表示type全部为0
    start_type_idx = x_dim - type_dim
    missing_mask = (Xb[..., start_type_idx:].sum(dim=-1) == 0.0)

    # 展开成 1D
    mask_flat = missing_mask.view(-1)
    pred_flat = pred_label.view(-1)
    true_flat = true_label.view(-1)

    pred_missing = pred_flat[mask_flat]
    true_missing = true_flat[mask_flat]

    correct_missing = (pred_missing == true_missing).sum().float()
    total_missing = mask_flat.sum().float()
    missing_acc = correct_missing / total_missing if total_missing > 0 else 0.0

    return overall_acc, missing_acc


In [None]:
from torch.utils.data import TensorDataset, DataLoader

# 请确保以下张量已经准备好:
#   X_train_with_missing: shape [N, 24, 126], float32
#   Y_train: shape [N, 24, 121], float32 (one-hot)
#   X_test_with_missing: shape [N_test, 24, 126]
#   Y_test: shape [N_test, 24, 121]
#
# 如果 Y_train / Y_test 里是 one-hot，就可以直接用 cvae_loss_ce()

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 超参数
seq_len = 24
x_dim = 126
y_dim = 121   # one-hot 分类，共 121 类
hidden_dim = 128
latent_dim = 64
num_layers = 2

batch_size = 64
lr = 1e-3
epochs = 200

# 构建 DataLoader
train_ds = TensorDataset(X_train_with_missing, Y_train)
test_ds  = TensorDataset(X_test_with_missing,  Y_test)

train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True)
test_loader  = DataLoader(test_ds, batch_size=batch_size, shuffle=False)

# 初始化模型
model = ConditionalVAE(
    seq_len=seq_len,
    x_dim=x_dim,
    y_dim=y_dim,
    hidden_dim=hidden_dim,
    latent_dim=latent_dim,
    num_layers=num_layers,
    use_x_in_decoder=False  # Decoder 也能看到 X
).to(device)

optimizer = optim.Adam(model.parameters(), lr=lr)

# 开始训练
for epoch in range(1, epochs+1):
    model.train()
    total_loss_sum = 0.0

    for Xb, Yb in train_loader:
        Xb = Xb.to(device)  # (batch_size, seq_len, x_dim)
        Yb = Yb.to(device)  # (batch_size, seq_len, y_dim=121)

        optimizer.zero_grad()

        # 前向
        Y_logits, mu, logvar = model(Xb)

        # 交叉熵 + KL
        total_loss, ce_loss, kld_loss = cvae_loss_ce(Y_logits, Yb, mu, logvar)

        total_loss.backward()
        optimizer.step()

        total_loss_sum += total_loss.item()

    avg_train_loss = total_loss_sum / len(train_loader)

    # 验证(计算准确率)
    model.eval()
    total_acc = 0.0
    count = 0
    with torch.no_grad():
        for Xb, Yb in test_loader:
            Xb = Xb.to(device)
            Yb = Yb.to(device)

            Y_logits, mu, logvar = model(Xb)
            # 计算准确率
            acc = compute_accuracy(Y_logits, Yb)
            total_acc += acc
            count += 1

    avg_test_acc = total_acc / count
    avg_train_loss = total_loss_sum / len(train_loader)

    # ====== 2. 在整个训练集上计算 overall_acc & missing_acc ======
    train_overall_acc, train_missing_acc = evaluate_missing_accuracy(
        model, train_loader, x_dim=126, type_dim=121, device=device
    )

    # ====== 3. 在测试集上计算 overall_acc & missing_acc ======
    test_overall_acc, test_missing_acc = evaluate_missing_accuracy(
        model, test_loader, x_dim=126, type_dim=121, device=device
    )

    # ====== 4. 打印结果 ======
    print(f"Epoch {epoch}/{epochs} | "
          f"Train Loss: {avg_train_loss:.4f} | "
          f"Train Overall Acc: {train_overall_acc:.4f} | Train Missing Acc: {train_missing_acc:.4f} | "
          f"Test Overall Acc: {test_overall_acc:.4f} | Test Missing Acc: {test_missing_acc:.4f}")


Epoch 1/200 | Train Loss: 53.8526 | Train Overall Acc: 0.3777 | Train Missing Acc: 0.2613 | Test Overall Acc: 0.3795 | Test Missing Acc: 0.2539
Epoch 2/200 | Train Loss: 45.6270 | Train Overall Acc: 0.4350 | Train Missing Acc: 0.2650 | Test Overall Acc: 0.4362 | Test Missing Acc: 0.2562
Epoch 3/200 | Train Loss: 42.4480 | Train Overall Acc: 0.5054 | Train Missing Acc: 0.2576 | Test Overall Acc: 0.5067 | Test Missing Acc: 0.2493
Epoch 4/200 | Train Loss: 39.1940 | Train Overall Acc: 0.5656 | Train Missing Acc: 0.2695 | Test Overall Acc: 0.5659 | Test Missing Acc: 0.2602
Epoch 5/200 | Train Loss: 37.4035 | Train Overall Acc: 0.5904 | Train Missing Acc: 0.2677 | Test Overall Acc: 0.5918 | Test Missing Acc: 0.2572
Epoch 6/200 | Train Loss: 36.3787 | Train Overall Acc: 0.6035 | Train Missing Acc: 0.2724 | Test Overall Acc: 0.6040 | Test Missing Acc: 0.2671
Epoch 7/200 | Train Loss: 35.8486 | Train Overall Acc: 0.6119 | Train Missing Acc: 0.2580 | Test Overall Acc: 0.6130 | Test Missing Acc: