In [5]:
import numpy as np
import torch
import random
from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd

# 设置随机种子
seed = 3407
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)

# 文件列表
file_list = ["Labeled_" + file for file in ["February.csv", "April.csv", "June.csv", "August.csv", "October.csv", "December.csv"]]

# 参数设置
Seq_len = 7*24  # 历史数据长度（15 天）
Pre_len = 24      # 预测长度（24 小时）
features = ['Precipitation','Temperature', 'Wind_speed','Humidity', 'holiday', 'month', 'hour', 'Load']
# features = ['Temperature', 'Humidity', 'holiday', 'month', 'hour', 'Load']

data = pd.read_csv('Labeled_Train Data.csv')
data = data.drop(columns=['DateTime'])
data_np = data.to_numpy().astype(np.float32)

features_indices = [data.columns.get_loc(f) for f in features]

Power_Inputs_list = []
# 处理主数据
for i in range(Seq_len + Pre_len, len(data_np)):
    Power_Input = data_np[i - (Seq_len + Pre_len):i, features_indices]
    Power_Inputs_list.append(Power_Input)

# 处理其他月份的数据
for file_name in file_list:
    data = pd.read_csv(file_name)
    data = data.drop(columns=['DateTime'])
    data_np = data.to_numpy().astype(np.float32)
    
    for i in range(Seq_len + Pre_len, len(data_np)-24):
        Power_Input = data_np[i - (Seq_len + Pre_len):i, features_indices]
        Power_Inputs_list.append(Power_Input)
        
# 转换为 NumPy 数组
# feature维度的最后一个特征是Load
# (num_samples, Seq_len + Pre_len, len(features))
Power_Inputs = np.array(Power_Inputs_list)    

# 初始化归一化器
scaler_Load = StandardScaler()
scaler_Feature = StandardScaler()

# 提取负荷特征和其他数值特征的索引
load_index = features.index('Load')
num_feature_indices = [features.index(f) for f in features if f not in ['holiday', 'month', 'hour', 'Load']]

# 归一化非负荷数值特征
Power_Inputs[:, :, num_feature_indices] = scaler_Feature.fit_transform(
    Power_Inputs[:, :, num_feature_indices].reshape(-1,len(num_feature_indices))
).reshape(Power_Inputs[:, :, num_feature_indices].shape)

# 单独归一化负荷特征
Power_Inputs[:, :, load_index] = scaler_Load.fit_transform(
    Power_Inputs[:, :, load_index].reshape(-1, 1)
).reshape(Power_Inputs[:, :, load_index].shape)

# 数据集划分
Data_train, Data_Temp = train_test_split(Power_Inputs, test_size=0.4, shuffle=False)
Data_val, Data_test = train_test_split(Data_Temp, test_size=0.5, shuffle=True, random_state=seed)

# print(Power_Inputs.shape)
# 自定义 TimePower Dataset
class TimePowerDataset(Dataset):
    def __init__(self, Data, Seq_len, Pre_len=None):
        self.Data = torch.tensor(Data, dtype=torch.float32)
        self.Seq_len = Seq_len

    def __len__(self):
        return len(self.Data)

    def __getitem__(self, idx):
        En_X = self.Data[idx, :self.Seq_len, -1].unsqueeze(1)
        Ex_X = self.Data[idx, :, :-1]
        En_y = self.Data[idx, self.Seq_len:, -1].unsqueeze(1)
        return En_X, Ex_X, En_y

# 数据加载
batch_size = 64
train_dataset = TimePowerDataset(Data=Data_train, Seq_len=Seq_len)
val_dataset = TimePowerDataset(Data=Data_val, Seq_len=Seq_len)
test_dataset = TimePowerDataset(Data=Data_test, Seq_len=Seq_len)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=1)

print(f"训练集大小: {len(train_dataset)}, 验证集大小: {len(val_dataset)}, 测试集大小: {len(test_dataset)}")
# 打印数据shape
# for i, (En_X, Ex_X, En_y) in enumerate(train_dataset):
#     print('En_X:',En_X.shape, 'Ex_X:',Ex_X.shape, 'En_y:',En_y.shape)
#     print(scaler_Load.inverse_transform(En_y).shape)
#     break

训练集大小: 12816, 验证集大小: 4272, 测试集大小: 4272


In [6]:
from math import sqrt
import numpy as np
import pandas as pd
import math
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import matplotlib.pyplot as plt
import time
from torch.utils.data import DataLoader, Dataset
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts
from TimePower import TimePower

# 自定义 TimePower Dataset
class TimePowerDataset(Dataset):
    def __init__(self, Data, Seq_len, Pre_len=None):
        self.Data = torch.tensor(Data, dtype=torch.float32)
        self.Seq_len = Seq_len

    def __len__(self):
        return len(self.Data)

    def __getitem__(self, idx):
        En_X = self.Data[idx, :self.Seq_len, -1].unsqueeze(1)
        Ex_X = self.Data[idx, :, :-1]
        En_y = self.Data[idx, self.Seq_len:, -1].unsqueeze(1)

        return (En_X, Ex_X, En_y)

# 设置设备
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

seq_len=24* 7        # Endogenous变量长度
pre_len=24          # 待预测时段长度
d_model=512         # Patch of Endogenous Variables和 Exogenous Variables嵌入长度
patch_len=24        # 单个Patch长度
n_heads=8           # Num of Multihead
d_ff=2048           # FFN中间层维度，一般是4*d_model
e_layers=10          # 编码器层数
time_embed = 5      # 时间特征嵌入维度

# TimePower模型的初始化
model = TimePower(seq_len=seq_len,pre_len=pre_len,d_model=d_model,patch_len=patch_len,n_heads=n_heads,d_ff=d_ff,e_layers=e_layers,use_norm=True,time_embed=time_embed)
# 加载最优模型的参数（例如保存为 'best_mse_model.pth'）
model.load_state_dict(torch.load('best_mse_model.pth', map_location=torch.device('cpu')))
model.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
namelist = ["Labeled_February.csv", "Labeled_April.csv", "Labeled_June.csv","Labeled_August.csv", "Labeled_October.csv", "Labeled_December.csv"]

# 参数设置
Seq_len = 7*24  # 历史数据长度（15 天）
Pre_len = 24      # 预测长度（24 小时）

features = ['Precipitation','Temperature', 'Wind_speed','Humidity', 'holiday', 'month', 'hour', 'Load']
features_indices = [data.columns.get_loc(f) for f in features]

Power_Inputs_list = []

for filename in namelist:
    data = pd.read_csv(filename)
    data = data.drop(columns=['DateTime'])
    data_np = data.to_numpy().astype(np.float32)
    Power_Input = data_np[-Seq_len-Pre_len:, features_indices]
    Power_Inputs_list.append(Power_Input)
    
Power_Inputs = np.array(Power_Inputs_list) 

# 提取负荷特征和其他数值特征的索引
load_index = features.index('Load')
num_feature_indices = [features.index(f) for f in features if f not in ['holiday', 'month', 'hour', 'Load']]

# 归一化非负荷数值特征
Power_Inputs[:, :, num_feature_indices] = scaler_Feature.transform(
    Power_Inputs[:, :, num_feature_indices].reshape(-1,len(num_feature_indices))
).reshape(Power_Inputs[:, :, num_feature_indices].shape)

# 单独归一化负荷特征
Power_Inputs[:, :, load_index] = scaler_Load.transform(
    Power_Inputs[:, :, load_index].reshape(-1, 1)
).reshape(Power_Inputs[:, :, load_index].shape)


Power_inputs = TimePowerDataset(Data=Power_Inputs, Seq_len=Seq_len)
# print(Power_Inputs.shape)
Pre_data = DataLoader(Power_inputs, batch_size=1,shuffle=False)

out_list =[]
for En_X,Ex_X, En_y in Pre_data:
    # print(data.shape)
    with torch.no_grad():
        En_X = En_X.to(device)
        Ex_X = Ex_X.to(device)
        En_y = En_y.to(device)
        outputs = model(En_X, Ex_X)

        # 反归一化
        outputs = scaler_Load.inverse_transform(outputs.cpu().numpy().reshape(-1,1))
        out_list.append(outputs)
out_list=np.array(out_list).reshape(-1)

predictions_df = pd.DataFrame(out_list.transpose(),columns=['Predicted_Load'])
predictions_df.to_csv('pre_answer.csv', index=False)