In [1]:
import os

import numpy as np
import pandas as pd

In [2]:
def merge_csv_files(input_dir, group_size=10):
    csv_files = [file for file in os.listdir(input_dir) if file.endswith(".csv")]

    grouped_files = [
        csv_files[i : i + group_size] for i in range(0, len(csv_files), group_size)
    ]

    data = []
    label = []

    for group_idx, group in enumerate(grouped_files):
        group_data = []
        df = pd.DataFrame()

        for csv_file in group:
            file_path = os.path.join(input_dir, csv_file)
            df = pd.read_csv(file_path, header=None)
            pure_data = df.iloc[1:, 1:-1]
            group_data.append(pure_data.to_numpy().flatten())

        group_data = np.array(group_data)
        group_label = df.iloc[1, -1]
        label.append(group_label)
        data.append(group_data)

    return np.array(data, dtype=np.float32), np.array(label, dtype=np.int64)

In [3]:
input_directory = "../data/extract_data/train"
data_set = merge_csv_files(input_directory)

In [7]:
data_set[0][1]

array([[ 2.6083422e-01, -1.0762836e+00,  5.3636956e-01, ...,
         1.6788895e+01,  4.3849759e+00,  6.9192910e-01],
       [-2.3547415e-01,  3.1310487e-01, -4.3272758e-01, ...,
         1.2538747e+01,  3.0083663e+00,  5.1276588e-01],
       [-5.5105168e-01, -1.8840991e+00,  3.0924928e+00, ...,
         2.0077322e+01,  3.5752716e+00,  4.1684201e-01],
       ...,
       [-6.0056776e-02,  2.4289803e-01,  1.7319660e-01, ...,
         3.1696804e+01,  5.0570984e+00,  3.8467687e-01],
       [ 2.4861319e-01, -4.4812962e-01,  1.7339541e-01, ...,
         3.2655136e+01,  4.7473712e+00,  4.1566828e-01],
       [ 1.9568929e-02,  8.2545742e-02, -6.5090120e-02, ...,
         2.1349199e+01,  5.7862973e+00,  2.9648867e-01]], dtype=float32)

In [8]:
import torch
from torch.utils.data import Dataset

from utils.preprocessing import merge_csv_files


class CostumDataset(Dataset):
    def __init__(self, file_path, transform=None):
        self.data, self.labels = merge_csv_files(file_path)
        self.transform = transform

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        sample = self.data[idx]
        label = self.labels[idx]

        if self.transform:
            sample = self.transform(sample)

        return sample, label

train_data = CostumDataset("../data/extract_data/train")
val_data = CostumDataset("../data/extract_data/val")

In [11]:
train_data[-1]

(array([[-1.4947109 ,  0.16405785, -0.40233412, ..., 58.266975  ,
         13.492968  ,  1.4920999 ],
        [-0.6508876 , -0.5064079 , -0.55445975, ..., 29.467905  ,
         10.615383  ,  1.035656  ],
        [-0.25991696, -0.5302343 , -1.2361296 , ..., 35.2661    ,
          9.281973  ,  0.92583656],
        ...,
        [-0.66705567, -1.0801073 , -0.19621485, ..., 36.40521   ,
         10.298986  ,  0.81272167],
        [-0.54874223, -0.55612916, -0.5598128 , ..., 56.506382  ,
         11.18319   ,  0.86725515],
        [-0.5428575 , -0.551182  , -0.36312675, ..., 38.301186  ,
         10.731084  ,  1.051912  ]], dtype=float32),
 1)

In [5]:
import torch
import torchvision
from torch import nn
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
from torchsummary import summary

import time
import os

os.environ['TORCH_HOME'] = r'C:\Users\bugs_\PycharmProjects\eegProject\models'

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

train_data_size = len(train_data)
val_data_size = len(val_data)
print("训练数据集的长度为:{}".format(train_data_size))
print("验证数据集的长度为:{}".format(val_data_size))

train_dataloader = DataLoader(train_data, batch_size=1)
val_dataloader = DataLoader(val_data, batch_size=1)

训练数据集的长度为:171
验证数据集的长度为:57


In [6]:
import torch.nn.functional as F
print('==> Building model..')

class GRU(nn.Module):
    def __init__(self):
        super(GRU, self).__init__()

        self.gru_layer = nn.GRU(
            input_size=176,
            hidden_size=64,
            num_layers=2,
            bias=True,
            batch_first=True,       # input & output will has batch size as 1s dimension. e.g. (batch, segment_length, no_feature)
        )

        self.out = nn.Linear(64, 3)

    def forward(self, x):
        r_out, (h_n, h_c) = self.gru_layer(x.float(), None)
        r_out = F.dropout(r_out, 0.3)
        test_output = self.out(r_out[:, -1, :]) # choose r_out at the last time step
        return test_output

gru = GRU()
gru.to(device)

print(gru)

==> Building model..
GRU(
  (gru_layer): GRU(176, 64, num_layers=2, batch_first=True)
  (out): Linear(in_features=64, out_features=3, bias=True)
)


In [7]:
loss_fn = nn.CrossEntropyLoss()
loss_fn.to(device)

learning_rate = 1e-2
optimizer = torch.optim.Adam(gru.parameters(), lr=learning_rate, weight_decay=0.005)   # optimize all parameters


total_train_step = 0
total_val_step = 0
epoch = 100

In [8]:

for data in train_dataloader:
    features, targets = data
    for i in features[0]:
        i = torch.unsqueeze(i, 0)
print(i.shape)

torch.Size([1, 176])


In [11]:
start_time = time.time()
writer = SummaryWriter(r"..\loss_train")

for i in range(epoch):
    print("--------第{}轮训练开始--------".format(i + 1))

    gru.train()
    for data in train_dataloader:
        features, targets = data
        features = features.to(device)
        targets = targets.to(device)

        outputs = gru(features)
        loss = loss_fn(outputs, targets.long())

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_train_step += 1
        # if total_train_step % 100 == 0:
        print("训练次数:{}, Loss:{}".format(total_train_step, loss.item()))
        end_time = time.time()
        print("耗时：{}s".format(end_time - start_time))
        writer.add_scalar("train_loss", loss.item(), total_train_step)

    gru.eval()
    total_val_loss = 0
    total_accuracy = 0
    with torch.no_grad():
        for data in val_dataloader:
            features, targets = data
            features = features.to(device)
            targets = targets.to(device)
            outputs = gru(features)
            outputs = gru(features)
            loss = loss_fn(outputs, targets.long())
            total_val_loss = total_val_loss + loss.item()
            accuracy = (outputs.argmax(1) == targets).sum()
            total_accuracy = total_accuracy + accuracy
    print("整体验证集上的Loss：{}".format(total_val_loss))
    print("整体验证集上的正确率：{}".format(total_accuracy / val_data_size))
    writer.add_scalar("val_loss", total_val_loss, total_val_step)
    writer.add_scalar("val_accuracy", total_accuracy / val_data_size, total_val_step)
    total_val_step += 1

writer.close()

--------第1轮训练开始--------
训练次数:172, Loss:10.089409828186035
耗时：0.007492542266845703s
训练次数:173, Loss:7.046926498413086
耗时：0.014002323150634766s
训练次数:174, Loss:3.3846638202667236
耗时：0.020549297332763672s
训练次数:175, Loss:0.7542783617973328
耗时：0.026533126831054688s
训练次数:176, Loss:0.6432912349700928
耗时：0.03132486343383789s
训练次数:177, Loss:0.3279566168785095
耗时：0.03631162643432617s
训练次数:178, Loss:0.12108016759157181
耗时：0.04283881187438965s
训练次数:179, Loss:0.04591349512338638
耗时：0.05132317543029785s
训练次数:180, Loss:0.02801399491727352
耗时：0.0579066276550293s
训练次数:181, Loss:0.011303434148430824
耗时：0.06289911270141602s
训练次数:182, Loss:0.013378609903156757
耗时：0.06839132308959961s
训练次数:183, Loss:0.005956754554063082
耗时：0.07338118553161621s
训练次数:184, Loss:0.006278315093368292
耗时：0.07993721961975098s
训练次数:185, Loss:0.004446737933903933
耗时：0.0859229564666748s
训练次数:186, Loss:0.011853497475385666
耗时：0.09043359756469727s
训练次数:187, Loss:0.006880756001919508
耗时：0.09542083740234375s
训练次数:188, Loss:0.0020441368687