In [8]:
import torch
import numpy as np
import pandas as pd
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset

# 自定义数据集类
class TimeSeriesDataset(Dataset):
    def __init__(self, data):
        self.data = data

    def __getitem__(self, index):
        return self.data[index]

    def __len__(self):
        return len(self.data)

# LSTM模型类
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers):
        super(LSTMModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])  # 只取最后一个时序点的输出
        return out

# 超参数和其他设置
input_size = 30  # 输入特征数量
hidden_size = 64  # LSTM隐藏层大小
output_size = 1  # 输出特征数量
num_layers = 2  # LSTM层数
num_epochs = 10
learning_rate = 0.001
batch_size = 4





In [14]:
shotnum=[113318,113320,113321,113322,113323,113324,113325,113326,113327,113328,113330,113331,113332,113333,113334,113335,113337,113338,113339,113341]


data = []# 输入数据，维度为20x5501x30
for i in range(20):
    filename =  f"midu{shotnum[i]}.xlsx"  # 替换为实际的文件名
    df = pd.read_excel(filename)
    df = df.iloc[2:5502, 2:31].values # 提取第2到31列的数据，2到5501行
    data.append(df)
# 创建训练数据集和验证数据集
# 创建训练数据集和验证数据集
data_tensor = torch.tensor(data, dtype=torch.float)
# 创建训练数据集和验证数据集
target = data_tensor[:, :, 0:1]  # 目标数据，维度为20x5501x1，其中预测的特征值是第一个特征值

train_data = data_tensor[:18]
valid_data = data_tensor[18:]

train_target = target[:18]
valid_target = target[18:]


train_dataset = TimeSeriesDataset(list(zip(train_data, train_target)))
valid_dataset = TimeSeriesDataset(list(zip(valid_data, valid_target)))

train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
valid_loader = DataLoader(dataset=valid_dataset, batch_size=batch_size, shuffle=False)


# 初始化模型
model = LSTMModel(input_size, hidden_size, output_size, num_layers)

# 定义损失函数和优化器
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)



In [16]:
# 训练和验证
for epoch in range(num_epochs):
    model.train()
    train_loss = 0.0

    for data, target in train_loader:
        inputs = data.permute(0, 2, 1)  # 输入数据，维度调整为batch_size x 30 x 5501
        targets = target.squeeze()  # 目标数据，维度调整为batch_size x 5501

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs.squeeze(), targets)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()

    train_loss /= len(train_loader)

    model.eval()
    valid_loss = 0.0



RuntimeError: input.size(-1) must be equal to input_size. Expected 30, got 5499