In [3]:
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import os
from torchvision import transforms
from PIL import Image
import torch
import torch.optim as optim


class ImageSequenceDataset(Dataset):
    def __init__(self, root_dir, sequence_length, transform=None):
        self.root_dir = root_dir
        self.sequence_length = sequence_length
        self.transform = transform

        self.img_paths = [os.path.join(root_dir, file) for file in sorted(os.listdir(root_dir))]
        label_set = sorted(set(file.split('_')[0] for file in os.listdir(root_dir)))
        self.label_to_int = {label: idx for idx, label in enumerate(label_set)}

        self.labels = [self.label_to_int[file.split('_')[0]] for file in os.listdir(root_dir)[::sequence_length]]

    def __getitem__(self, idx):
        sequence_paths = self.img_paths[idx * self.sequence_length : (idx + 1) * self.sequence_length]
        sequence = [Image.open(img_path) for img_path in sequence_paths]

        if self.transform:
            sequence = [self.transform(img) for img in sequence]

        label = self.labels[idx]
        return torch.stack(sequence), torch.tensor(label, dtype=torch.long)

    def __len__(self):
        return len(self.img_paths) // self.sequence_length

    def _extract_label(self, img_path):
        basename = os.path.basename(img_path)
        label = basename.split('_')[0]
        return self.label_to_int[label]

transform = transforms.Compose([
    transforms.Resize((512, 512)),
    transforms.ToTensor(),
])

# LSTM 模型
class ImageSequenceClassifier(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(ImageSequenceClassifier, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)

        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])
        return out

In [4]:
# 超参数
input_size = 512 * 512  # 根据你的图像尺寸和转换来设定
hidden_size = 256
num_layers = 2
num_classes = 2  # 你的类别数量

# 创建模型
model = ImageSequenceClassifier(input_size, hidden_size, num_layers, num_classes)


sequence_length = 5
dataset = ImageSequenceDataset(root_dir='miou_out/detection-results', sequence_length=sequence_length, transform=transform)

dataset_size = len(dataset)
train_size = int(dataset_size * 0.8)
test_size = dataset_size - train_size

train_dataset = torch.utils.data.Subset(dataset, range(0, train_size))
test_dataset = torch.utils.data.Subset(dataset, range(train_size, dataset_size))

train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=4, shuffle=False)


# 损失函数和优化器
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# 训练模型
num_epochs = 50
for epoch in range(num_epochs):
    for sequences, labels in train_loader:
        # 将序列展平为 (batch_size, sequence_length * feature_size)
        sequences = sequences.view(sequences.size(0), sequence_length, -1)

        # 前向传播
        outputs = model(sequences)
        loss = criterion(outputs, labels)

        # 反向传播和优化
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

print("训练完成")

IndexError: Target 2 is out of bounds.

In [None]:
# 将模型设置为评估模式
model.eval()

# 用于存储预测和真实标签
all_predictions = []
all_labels = []

with torch.no_grad():  # 在这个块中，不计算梯度
    for sequences, labels in test_loader:
        sequences = sequences.view(sequences.size(0), sequence_length, -1)
        outputs = model(sequences)
        _, predicted = torch.max(outputs, 1)

        all_predictions.extend(predicted.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

In [None]:
from sklearn.metrics import accuracy_score

accuracy = accuracy_score(all_labels, all_predictions)
print(f"测试集上的准确率: {accuracy:.4f}")

In [None]:
# 训练集上的准确率
all_predictions = []
all_labels = []

with torch.no_grad():
    for sequences, labels in train_loader:
        sequences = sequences.view(sequences.size(0), sequence_length, -1)
        outputs = model(sequences)
        _, predicted = torch.max(outputs, 1)

        all_predictions.extend(predicted.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

accuracy = accuracy_score(all_labels, all_predictions)
print(f"训练集上的准确率: {accuracy:.4f}")