In [1]:
#数据集划分

import os
import random
import shutil


def makedir(new_dir):
    if not os.path.exists(new_dir):
        os.makedirs(new_dir)


if __name__ == '__main__':

    random.seed(1)

    dataset_dir = os.path.join("ABIDE", "ABIDE_db")
    split_dir = os.path.join("ABIDE", "ABIDE_split")
    train_dir = os.path.join(split_dir, "train")
    valid_dir = os.path.join(split_dir, "valid")
    test_dir = os.path.join(split_dir, "test")

    train_pct = 0.8
    valid_pct = 0.1
    test_pct = 0.1

    for root, dirs, files in os.walk(dataset_dir):
        for sub_dir in dirs:

            txts = os.listdir(os.path.join(root, sub_dir))
            txts = list(filter(lambda x: x.endswith('.txt'), txts))
            random.shuffle(txts)
            txt_count = len(txts)

            train_point = int(txt_count * train_pct)
            valid_point = int(txt_count * (train_pct + valid_pct))

            for i in range(txt_count):
                if i < train_point:
                    out_dir = os.path.join(train_dir, sub_dir)
                elif i < valid_point:
                    out_dir = os.path.join(valid_dir, sub_dir)
                else:
                    out_dir = os.path.join(test_dir, sub_dir)

                makedir(out_dir)

                target_path = os.path.join(out_dir, txts[i])
                src_path = os.path.join(dataset_dir, sub_dir, txts[i])

                shutil.copy(src_path, target_path)

            print('Class:{}, train:{}, valid:{}, test:{}'.format(sub_dir, train_point, valid_point-train_point,
                                                                 txt_count-valid_point))



Class:ASD, train:99, valid:12, test:13
Class:TD, train:99, valid:12, test:13


In [None]:
import numpy as np
import torch
import os
import random
from torch.utils.data import Dataset

random.seed(1)
rmb_label = {"ASD": 0, "TD": 1}      # 设置标签

class ABIDEtxtDataset(Dataset):
    def __init__(self, data_dir, transform=None):
        """
        ABIDE_db的Dataset
        :param data_dir: str, 数据集所在路径
        :param transform: torch.transform，数据预处理
        """
        self.label_name = {"ASD": 0, "TD": 1}
        self.data_info = self.get_txt_info(data_dir)  # data_info存储所有txt路径和标签，在DataLoader中通过index读取样本
        self.transform = transform
        print('Number of samples:', len(self.data_info))
        print('Sample info:', self.data_info[0])
    def __getitem__(self, index):
        path_txt, label = self.data_info[index]
        txt_data = np.loadtxt(path_txt)
        txt = torch.tensor(txt_data)     

        if self.transform is not None:
            txt = self.transform(txt)   # 在这里做transform，转为tensor等等

        return txt, label

    def __len__(self):
        return len(self.data_info)

    @staticmethod
    def get_txt_info(data_dir):
        data_info = list()
        for root, dirs, _ in os.walk(data_dir):
            # 遍历类别
            for sub_dir in dirs:
                txt_names = os.listdir(os.path.join(root, sub_dir))
                txt_names = list(filter(lambda x: x.endswith('.txt'), txt_names))

                # 遍历txt
                for i in range(len(txt_names)):
                    txt_name = txt_names[i]
                    path_txt = os.path.join(root, sub_dir, txt_name)
                    label = rmb_label[sub_dir]
                    data_info.append((path_txt, int(label)))

        return data_info

In [2]:
import torch.nn as nn

class RNNModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(RNNModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True, dropout=0.5)
        self.dropout = nn.Dropout(p=0.5)
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        # 初始化隐藏层状态
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        # 前向传播 RNN
        out, _ = self.rnn(x, h0)
        out = self.dropout(out)
        # 将输出结果展平
        out = out[:, -1, :]
        # 全连接层
        out = self.fc(out)
        return out

In [3]:
import torch.optim as optim
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt

#数据集地址
split_dir = os.path.join("ABIDE", "ABIDE_split")  
train_dir = os.path.join(split_dir, "train")
valid_dir = os.path.join(split_dir, "valid")
test_dir  = os.path.join(split_dir, "test") 
print("数据集地址over")

# 定义超参数
input_size = 111   # 根据具体数据集修改
hidden_size = 256
num_layers = 3
num_classes = 2
batch_size = 32
learning_rate = 0.001
num_epochs = 5
print("定义超参数over")

# 实例化数据集和数据加载器
train_dataset = ABIDEtxtDataset(train_dir ,transform=None)
valid_dataset = ABIDEtxtDataset(valid_dir ,transform=None)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=True)
print("实例化数据集和数据加载器over")


数据集地址over
定义超参数over
Number of samples: 0


IndexError: list index out of range

In [None]:
# 实例化模型、损失函数和优化器
model = RNNModel(input_size, hidden_size, num_layers, num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# 训练模型
train_losses = []
valid_losses = []
train_accs = []
valid_accs = []
for epoch in range(num_epochs):
    train_loss = 0.0
    train_acc = 0.0
    valid_loss = 0.0
    valid_acc = 0.0

    # 训练阶段
    model.train()
    for i, (data, label) in enumerate(train_loader):
        optimizer.zero_grad()
        outputs = model(data.float())
        loss = criterion(outputs, label)
        loss.backward()
        optimizer.step()

        # 计算训练准确率和损失
        _, predicted = torch.max(outputs.data, 1)
        train_acc += (predicted == label).sum().item()
        train_loss += loss.item()

    # 验证阶段
    model.eval()
    with torch.no_grad():
        for i, (data, label) in enumerate(valid_loader):
            outputs = model(data.float())
            loss = criterion(outputs, label)

            # 计算验证准确率和损失
            _, predicted = torch.max(outputs.data, 1)
            valid_acc += (predicted == label).sum().item()
            valid_loss += loss.item()

    # 计算平均损失和准确率
    train_loss /= len(train_loader.dataset)
    valid_loss /= len(valid_loader.dataset)
    train_acc /= len(train_loader.dataset)
    valid_acc /= len(valid_loader.dataset)

    # 保存损失和准确率
    train_losses.append(train_loss)
    valid_losses.append(valid_loss)
    train_accs.append(train_acc)
    valid_accs.append(valid_acc)

    # 打印训练结果
    print(f"Epoch [{epoch+1}/{num_epochs}], Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}, Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}")

# 可视化训练结果
plt.plot(train_losses, label="train loss")
plt.plot(valid_losses, label="valid loss")
plt.legend()
plt.show()

plt.plot(train_accs, label="train acc")
plt.plot(valid_accs, label="valid acc")
plt.legend()
plt.show()

In [None]:
# 加载测试数据
test_dir  = os.path.join(split_dir, "test") 
test_dataset = ABIDEtxtDataset(test_dir, transform=None)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# 将模型设置为评估模式
model.eval()

# 使用测试数据进行模型的评估
with torch.no_grad():
    correct = 0
    total = 0
    for inputs, labels in test_loader:
        inputs = inputs.double()

        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)

        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    print('Test Accuracy: {:.2f}%'.format(accuracy))

# 将模型恢复为训练模式
model.train()
