In [1]:
import pandas as pd
import numpy as np
import torch
import pathlib
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle


In [2]:
class Predictor(torch.nn.Module):
    def __init__(self, input_dim: int, output_dim: int, hidden_dim: int, batch_first: bool) -> None:
        """
            層を定義

            Args:
                input_dim (int): 入力の次元
                output_dim (int): 出力の次元
                hidden_dim (int): 隠れ層の次元
                batch_first (int): 入力テンソルの形式を変更する

            Returns:
                None
        """
        super(Predictor, self).__init__()
        self.nn = torch.nn.LSTM(
            input_size=input_dim,
            hidden_size=hidden_dim,
            batch_first=batch_first
        )
        self.output_layer = torch.nn.Linear(hidden_dim, output_dim)

    def forward(self, inputs, hidden0=None):
        """
            どのように計算を伝搬していくかを定義

            Args:
                inputs (array): 入力する時系列データ
        """
        h, _ = self.nn(inputs, hidden0)
        output = self.output_layer(h[:, -1])

        return output


In [3]:
class Train:
    def __init__(
        self,
        input_dim: int,
        output_dim: int,
        hidden_dim: int,
        batch_first: bool,
        lr: float,
        benchmark: bool,
    ) -> None:
        self.labels = ["stay", "walk", "jog", "skip", "stUp", "stDown"]
        self.input_dim = input_dim

        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        print("device：", self.device)
        self.model = Predictor(input_dim, output_dim, hidden_dim, batch_first).to(
            self.device
        )
        self.criterion = torch.nn.CrossEntropyLoss()
        self.optimizer = torch.optim.Adam(self.model.parameters(), lr=lr)
        # self.optimizer = torch.optim.SGD(self.model.parameters(), lr=lr)

        torch.backends.cudnn.benchmark = benchmark

    def make_dataset(self, sequence_length: int, person_id: str, path: str):
        files = [
            {
                "label": l,
                "acc": str(
                    list(
                        pathlib.Path(f"{path}/{i+1}_{l}/{person_id}").glob("*-acc.csv")
                    )[0]
                ),
                "gyro": str(
                    list(
                        pathlib.Path(f"{path}/{i+1}_{l}/{person_id}").glob("*-gyro.csv")
                    )[0]
                ),
                "pressure": str(
                    list(
                        pathlib.Path(f"{path}/{i+1}_{l}/{person_id}").glob(
                            "*-pressure.csv"
                        )
                    )[0]
                ),
            }
            for i, l in enumerate(self.labels)
        ]

        df_acc = pd.concat(
            [
                pd.read_csv(
                    f["acc"], header=None, names=["time", "x", "y", "z"]
                ).assign(label=f["label"])
                for f in files
            ]
        )
        df_gyro = pd.concat(
            [
                pd.read_csv(f["gyro"], header=None, names=["time", "pressure"]).assign(
                    label=f["label"]
                )
                for f in files
            ]
        )
        df_pressure = pd.concat(
            [
                pd.read_csv(
                    f["pressure"], header=None, names=["time", "x", "y", "z"]
                ).assign(label=f["label"])
                for f in files
            ]
        )

        dataset_inputs = []
        dataset_labels = []
        dataset_times = []

        # self.df_acc を時系列データに変換
        for i, row in enumerate(df_acc.itertuples()):
            # dataframe の長さを超える場合は終了
            if i + sequence_length > len(df_acc):
                break
            # ラベルが変わる場合はスキップ
            if i > 0 and df_acc.iloc[i - 1].label != row.label:
                continue

            dataset_inputs.append(
                df_acc.iloc[i : i + sequence_length]
                .drop(["time", "label"], axis=1)
                .values
            )
            dataset_labels.append(row.label)
            dataset_times.append(row.time)

        return dataset_inputs, dataset_labels, dataset_times

    def labels2int(self, labels: list):
        if type(labels[0]) == list:
            return [self.labels2int(label) for label in labels]
        return [self.labels.index(label) for label in labels]

    def train(
        self,
        train_inputs: list,
        train_labels: list,
        test_inputs: list,
        test_labels: list,
        epoch_num: int,
        batch_size: int,
        sequence_length: int,
    ):
        train_batch_num = len(train_inputs) // batch_size
        test_batch_num = len(test_inputs) // batch_size

        for epoch in range(epoch_num):
            print("-" * 20)
            print(f"Epoch {epoch+1}/{epoch_num}")
            train_loss = 0.0
            test_loss = 0.0
            shuffled_train_inputs, shuffled_train_labels = shuffle(
                train_inputs, train_labels
            )

            np.savetxt(
                "shuffled_train_inputs.csv",
                np.array(shuffled_train_inputs).reshape(
                    -1, np.array(shuffled_train_inputs).shape[-1]
                ),
                delimiter=",",
            )

            for batch in range(train_batch_num):
                start = batch * batch_size
                end = start + batch_size

                np_train_inputs = np.array(shuffled_train_inputs[start:end]).astype(
                    np.float64
                )
                np_train_labels = np.array(shuffled_train_labels[start:end]).astype(
                    np.int64
                )
                loss, _ = self.train_step(np_train_inputs, np_train_labels)
                train_loss += loss.item()

            for batch in range(test_batch_num):
                start = batch * batch_size
                end = start + batch_size

                loss, _ = self.train_step(
                    np.array(test_inputs[start:end]).astype(np.float64),
                    np.array(test_labels[start:end]).astype(np.int64),
                )
                test_loss += loss.item()

            print(f"loss: {train_loss / train_batch_num}")
            print(f'test_loss: {test_loss / test_batch_num}')

    def train_step(self, inputs, labels):
        inputs_tensor = torch.tensor(inputs, dtype=torch.float32).to(self.device)
        labels_tensor = torch.tensor(labels).to(self.device)

        self.model.eval()
        preds = self.model(inputs_tensor)
        loss = self.criterion(preds, labels_tensor)

        loss.backward()
        self.optimizer.step()

        return loss, preds

In [4]:
# 使用する人物のID
person_id = "Person1201"
# データが格納されているディレクトリ
path = "./data/HASC-BasicActivity"
# 1つの入力データの長さ
sequence_length = 15
# 入力データの次元
input_dim = 3
# 出力データの次元
output_dim = 6
# 隠れ層の次元
hidden_dim = 128
# テストデータの割合
test_size = 0.2
# 訓練パラメータ
lr = 0.0001
# CUDA を使用するか
benchmark = True
# epoch 数
epoch_num = 1000
# バッチサイズ(データセットを分割した数)
batch_size = 32


# データセットを作成
train = Train(input_dim, output_dim, hidden_dim, True, lr, benchmark)
inputs, labels, _ = train.make_dataset(sequence_length, person_id, path)
labels_index = train.labels2int(labels)
# 訓練データとテストデータに分割
train_inputs, test_inputs, train_labels, test_labels = train_test_split(inputs, labels_index, test_size=test_size, shuffle=False)
# 訓練する
train.train(train_inputs, train_labels, test_inputs, test_labels, epoch_num, batch_size, sequence_length)


device： cpu
--------------------
Epoch 1/1000
loss: 1.630137525629579
test_loss: 2.442890736094692
--------------------
Epoch 2/1000
loss: 1.7019225055711311
test_loss: 1.8877409361956412
--------------------
Epoch 3/1000
loss: 1.540266997458642
test_loss: 1.795748932319775
--------------------
Epoch 4/1000
loss: 1.385672136879804
test_loss: 1.7306678817983259
--------------------
Epoch 5/1000
loss: 1.1404629349708557
test_loss: 1.767101986366406
--------------------
Epoch 6/1000
loss: 1.1020972488219278
test_loss: 1.7500062919499582
--------------------
Epoch 7/1000
loss: 0.9903440485920822
test_loss: 1.7630002226745873
--------------------
Epoch 8/1000
loss: 0.9252728592408331
test_loss: 1.563580154326924
--------------------
Epoch 9/1000
loss: 0.8774599046037909
test_loss: 1.4735484447395593
--------------------
Epoch 10/1000
loss: 0.918975033127425
test_loss: 1.307259570088303
--------------------
Epoch 11/1000
loss: 0.8698261415487841
test_loss: 1.357031524181366
-----------------