In [1]:
import os

import numpy as np
import pandas as pd

In [2]:
def merge_csv_files(input_dir, group_size=10):
    csv_files = [file for file in os.listdir(input_dir) if file.endswith(".csv")]

    grouped_files = [
        csv_files[i : i + group_size] for i in range(0, len(csv_files), group_size)
    ]

    data = []
    label = []

    for group_idx, group in enumerate(grouped_files):
        group_data = []

        for csv_file in group:
            file_path = os.path.join(input_dir, csv_file)
            df = pd.read_csv(file_path, header=None)
            pure_data = df.iloc[1:, 1:-1]
            group_data.append(pure_data.to_numpy(dtype=np.float64).flatten())

        group_data = np.array(group_data, dtype=np.float64)
        group_label = int(df.iloc[1, -1])
        label.append(group_label)
        data.append(group_data)

    return np.array(data), np.array(label)

In [3]:
input_directory = "../data/extract_data/train"
data_set = merge_csv_files(input_directory)

In [4]:
from torch.utils.data import Dataset
import torch

class CostumDataset(Dataset):
    def __init__(self, file_path, transform=None):
        self.data, self.labels = merge_csv_files(file_path)
        self.transform = transform

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        sample = self.data[idx]
        label = self.labels[idx]

        if self.transform:
            sample = self.transform(sample)

        return sample, label

train_data = CostumDataset("../data/extract_data/train")
val_data = CostumDataset("../data/extract_data/val")

In [7]:
import torch
import torchvision
from torch import nn
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
from torchsummary import summary

import time
import os

os.environ['TORCH_HOME'] = r'C:\Users\bugs_\PycharmProjects\eegProject\models'

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

train_data_size = len(train_data)
val_data_size = len(val_data)
print("训练数据集的长度为:{}".format(train_data_size))
print("验证数据集的长度为:{}".format(val_data_size))

train_dataloader = DataLoader(train_data, batch_size=1)
val_dataloader = DataLoader(val_data, batch_size=1)

训练数据集的长度为:171
验证数据集的长度为:57


In [14]:
import torch.nn.functional as F
print('==> Building model..')

class GRU(nn.Module):
    def __init__(self):
        super(GRU, self).__init__()

        self.gru_layer = nn.GRU(
            input_size=176,
            hidden_size=64,
            num_layers=2,
            bias=True,
            batch_first=True,       # input & output will has batch size as 1s dimension. e.g. (batch, segment_length, no_feature)
        )

        self.out = nn.Linear(64, 3)

    def forward(self, x):
        r_out, (h_n, h_c) = self.gru_layer(x.float(), None)
        r_out = F.dropout(r_out, 0.3)
        test_output = self.out(r_out[:, -1, :]) # choose r_out at the last time step
        return test_output

gru = GRU()
gru.to(device)

print(gru)

==> Building model..
GRU(
  (gru_layer): GRU(176, 64, num_layers=2, batch_first=True)
  (out): Linear(in_features=64, out_features=3, bias=True)
)


In [28]:
loss_fn = nn.CrossEntropyLoss()
loss_fn.to(device)

learning_rate = 1e-2
optimizer = torch.optim.Adam(gru.parameters(), lr=learning_rate, weight_decay=0.005)   # optimize all parameters


total_train_step = 0
total_val_step = 0
epoch = 100

In [36]:

for data in train_dataloader:
    features, targets = data
    for i in features[0]:
        i = torch.unsqueeze(i, 0)
print(i.shape)

torch.Size([1, 176])


In [38]:
start_time = time.time()
writer = SummaryWriter(r"C:\Users\bugs_\PycharmProjects\eegProject\loss_train")

for i in range(epoch):
    print("--------第{}轮训练开始--------".format(i + 1))

    gru.train()
    for data in train_dataloader:
        features, targets = data
        features = features.to(device)
        targets = targets.to(device)
        for i in features[0]:
            i = torch.unsqueeze(i, 0)

            outputs = gru(i)
        loss = loss_fn(outputs, targets.long())

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_train_step += 1
        # if total_train_step % 100 == 0:
        print("训练次数:{}, Loss:{}".format(total_train_step, loss.item()))
        end_time = time.time()
        print("耗时：{}s".format(end_time - start_time))
        writer.add_scalar("train_loss", loss.item(), total_train_step)

    gru.eval()
    total_val_loss = 0
    total_accuracy = 0
    with torch.no_grad():
        for data in val_dataloader:
            features, targets = data
            features = features.to(device)
            targets = targets.to(device)
            for i in features[0]:
                i = torch.unsqueeze(i, 0)
                outputs = gru(i)
            outputs = gru(features)
            loss = loss_fn(outputs, targets.long())
            total_val_loss = total_val_loss + loss.item()
            accuracy = (outputs.argmax(1) == targets).sum()
            total_accuracy = total_accuracy + accuracy
    print("整体验证集上的Loss：{}".format(total_val_loss))
    print("整体验证集上的正确率：{}".format(total_accuracy / val_data_size))
    writer.add_scalar("val_loss", total_val_loss, total_val_step)
    writer.add_scalar("val_accuracy", total_accuracy / val_data_size, total_val_step)
    total_val_step += 1

writer.close()

--------第1轮训练开始--------


IndexError: too many indices for tensor of dimension 2