In [1]:
import numpy as np
import pandas as pd
import torch
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torch.utils.data.distributed import DistributedSampler
from sklearn.preprocessing import StandardScaler, MinMaxScaler

In [8]:
import argparse

In [None]:
class PowerDataset(Dataset):
    def __init__(self, input_size, output_size, data_path, mode='train'):
        if mode == 'train':
            df = pd.read_csv(data_path + '/train_new.csv')
        elif mode == 'test':
            df = pd.read_csv(data_path + '/test_new.csv')

        scaler = MinMaxScaler()
        scaler_model = MinMaxScaler()
        df.replace('?', np.nan, inplace=True)
        # 删除包含 NaN 的行，确保所有数据是有效的
        df.dropna(inplace=True)
        df = df.drop(columns=['DateTime'])
        self.data = scaler_model.fit_transform(np.array(df))

        scaler.fit_transform(np.array(df[['Global_active_power', 'Global_reactive_power', 'Voltage', 'Global_intensity', 'Sub_metering_1', 'Sub_metering_2', 'Sub_metering_3','RR','NBJRR1','NBJRR5' ,'NBJRR10', 'NBJBROU', 'temperature_2m_mean']]).reshape(-1, 1))

        self.input_size = input_size
        self.output_size = output_size

        self.data_x = []
        self.data_yin = []
        self.data_yout = []
        self.split_data(self.input_size, self.input_size)

    def split_data(self, input_size, output_size):
        dataX = []  # 保存X
        dataY = []  # 保存Y

        # 将输入窗口的数据保存到X中，将输出窗口保存到Y中
        window_size = self.input_size + self.output_size
        for index in range(len(self.data) - window_size):
            dataX.append(self.data[index: index + self.input_size][:])
            dataY.append(self.data[index + self.input_size: index + window_size][:])

        self.data_x = np.array(dataX)
        self.data_y = np.array(dataY)
    
    def __len__(self):
        # 返回数据的总数
        return len(self.data_x)
    
    def __getitem__(self, idx):
        data = torch.tensor(self.data_x[idx])
        label = torch.tensor(self.data_y[idx])
        return data, label

In [None]:
input_size, output_size = 90, 90
data_path = "/opt/data/private/hyl/code/ml-work/data"
raw_dataset = PowerDataset(input_size, output_size, data_path)
# sampler = DistributedSampler(raw_dataset)
dataloader = DataLoader(raw_dataset, batch_size=64, drop_last=True)
for inputs, labels in dataloader:
    print(inputs.shape)
    print(labels.shape)
    break

torch.Size([64, 90, 12])
torch.Size([64, 90, 12])


In [None]:
import torch
import torch.nn as nn

class LSTMPredictor(nn.Module):
    def __init__(self, args):
        super(LSTMPredictor, self).__init__()
        self.encoder_lstm = nn.LSTM(args.input_size, args.hidden_size, batch_first=True)
        self.decoder_lstm = nn.LSTM(args.hidden_size, args.hidden_size, batch_first=True)
        self.out_fc = nn.Linear(args.hidden_size, args.output_size)
        self.out_seq_length = args.output_length
    
    def forward(self, x):
        batch_size, seq_length, feature = x.size() # 【64, 90, 12]
        encoder_outputs, (encoder_hidden, encoder_cell) = self.encoder_lstm(x) # encoder_outputs:[64, 90, hidden_size64], encoder_hidden、encoder_cell:[1,64,64],
        decoder_hidden = encoder_hidden
        decoder_cell = encoder_cell
        
        decoder_input = encoder_outputs
        decoded_output = []

        for t in range(self.out_seq_length):
            decoder_output, (decoder_hidden, decoder_cell) = self.decoder_lstm(decoder_input, (decoder_hidden, decoder_cell)) # decoder_output:[64,1,hidden_size64],decoder_hidden:[1,64,64]

            # 只取解码器的第一个时间步输出
            output = self.out_fc(decoder_output[:, 0, :].unsqueeze(1)) # [64, 1, 12]
            decoded_output.append(output)
            decoder_input = torch.cat((decoder_input[:, 1:, :], decoder_output[:, 0, :].unsqueeze(1)), dim=1)

        decoded_output = torch.cat(decoded_output, dim=1)
        return decoded_output

In [11]:
model = LSTMPredictor(args=argparse.Namespace(input_size=12, hidden_size=64, output_size=12, output_length=output_size))
for inputs, labels in dataloader:
    inputs = inputs.float()  # Ensure inputs are float32
    labels = labels.float()
    print(inputs.shape)
    print(labels.shape)
    outputs = model(inputs)

torch.Size([64, 90, 12])
torch.Size([64, 90, 12])
torch.Size([64, 90, 12])
torch.Size([64, 90, 12])
torch.Size([64, 90, 12])
torch.Size([64, 90, 12])
torch.Size([64, 90, 12])
torch.Size([64, 90, 12])
torch.Size([64, 90, 12])
torch.Size([64, 90, 12])
torch.Size([64, 90, 12])
torch.Size([64, 90, 12])
torch.Size([64, 90, 12])
torch.Size([64, 90, 12])
torch.Size([64, 90, 12])
torch.Size([64, 90, 12])
torch.Size([64, 90, 12])
torch.Size([64, 90, 12])
torch.Size([64, 90, 12])
torch.Size([64, 90, 12])
torch.Size([64, 90, 12])
torch.Size([64, 90, 12])
torch.Size([64, 90, 12])
torch.Size([64, 90, 12])
torch.Size([64, 90, 12])
torch.Size([64, 90, 12])
torch.Size([64, 90, 12])
torch.Size([64, 90, 12])
torch.Size([64, 90, 12])
torch.Size([64, 90, 12])
torch.Size([64, 90, 12])
torch.Size([64, 90, 12])
torch.Size([64, 90, 12])
torch.Size([64, 90, 12])
torch.Size([64, 90, 12])
torch.Size([64, 90, 12])
torch.Size([64, 90, 12])
torch.Size([64, 90, 12])
torch.Size([64, 90, 12])
torch.Size([64, 90, 12])


KeyboardInterrupt: 