# <br>[ LG전자_DX_Intensive_Course  ] 딥러닝 기반 시계열 분석 4<br><br> : Recurrent Neural Network - RNN, LSTM, GRU for Classification<br>

In [None]:
# github에서 데이터 불러오기
!git clone https://github.com/KU-DIC/LG_time_series_day11.git

In [1]:
# 모듈 불러오기
import os
import time
import copy
import random
import pickle
import numpy as np

import torch
import torch.nn as nn
import torch.optim as optim

# <br>0. Hyperparameter Setting
- data_dir: 데이터가 존재하는 경로 (해당 실습에서는 train/test 시계열 데이터가 존재하는 경로를 의미함)
- batch_size: 학습 및 검증에 사용할 배치의 크기
- num_classes: 새로운 데이터의 class 개수
- num_epochs: 학습할 epoch 횟수
- window_size: input의 시간 길이 (time series data에서 도출한 subsequence의 길이)
- input_size: 변수 개수
- hidden_size: 모델의 hidden dimension
- num_layers: 모델의 layer 개수
- bidirectional: 모델의 양방향성 여부
- random_seed: reproduction을 위해 고정할 seed의 값

In [2]:
# Hyperparameter setting
data_dir = '/content/LG_time_series_day11/input/har-data'
batch_size = 32
num_classes = 6
num_epochs = 200
window_size = 50
input_size = 561
hidden_size = 64
num_layers = 2
bidirectional = True

random_seed = 42
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # Detect if we have a GPU available

In [3]:
# seed 고정
torch.manual_seed(random_seed)
torch.cuda.manual_seed(random_seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(random_seed)
random.seed(random_seed)

---

# <br>__1. Data: Human Activity Recognition Data__
- 데이터 description
    - Human Activity Recognition (HAR) Data는 30명의 실험자들이 각자 스마트폰을 허리에 착용하고 6가지 활동 (Walking, Walking Upstairs, Walking Downstairs, Sitting, Standing, Laying)을 수행할 때 측정된 센서 데이터로 구성된 데이터셋이다. 해당 데이터셋은 총 561개의 변수로 이루어져 있으며, 전체 데이터 중 70%는 train 데이터이고 나머지 30%는 test 데이터이다. HAR Data를 활용한 시계열 분류 task는 다변량 시계열 데이터를 input으로 받아 이를 다음 6가지 활동 중 하나의 class로 분류하는 것을 목표로 한다: 0(Walking), 1(Walking Upstairs), 2(Walking Downstairs), 3(Sitting), 4(Standing), 5(Laying). <br><br>

- 변수 설명
    - 독립변수(X): 여러 실험자에 대하여 561개의 변수를 281 시점동안 수집한 시계열 데이터 -> shape: (#실험자, 561, 281)
    - 종속변수(Y): 시계열 데이터의 label - 0(Walking) / 1(Walking Upstairs) / 2(Walking Downstairs) / 3(Sitting) / 4(Standing) / 5(Laying) <br><br>

- 데이터 출처
    - https://archive.ics.uci.edu/ml/datasets/human+activity+recognition+using+smartphones#

In [4]:
def create_classification_dataset(window_size, data_dir, batch_size):
    # data_dir에 있는 train/test 데이터 불러오기
    x = pickle.load(open(os.path.join(data_dir, 'x_train.pkl'), 'rb'))
    y = pickle.load(open(os.path.join(data_dir, 'state_train.pkl'), 'rb'))
    x_test = pickle.load(open(os.path.join(data_dir, 'x_test.pkl'), 'rb'))
    y_test = pickle.load(open(os.path.join(data_dir, 'state_test.pkl'), 'rb'))

    # train data를 시간순으로 8:2의 비율로 train/validation set으로 분할
    n_train = int(0.8 * len(x))
    n_valid = len(x) - n_train
    n_test = len(x_test)
    x_train, y_train = x[:n_train], y[:n_train]
    x_valid, y_valid = x[n_train:], y[n_train:]

    # train/validation/test 데이터를 window_size 시점 길이로 분할
    datasets = []
    for set in [(x_train, y_train, n_train), (x_valid, y_valid, n_valid), (x_test, y_test, n_test)]:
        T = set[0].shape[-1]
        windows = np.split(set[0][:, :, :window_size * (T // window_size)], (T // window_size), -1)
        windows = np.concatenate(windows, 0)
        labels = np.split(set[1][:, :window_size * (T // window_size)], (T // window_size), -1)
        labels = np.round(np.mean(np.concatenate(labels, 0), -1))
        datasets.append(torch.utils.data.TensorDataset(torch.Tensor(windows), torch.Tensor(labels)))

    # train/validation/test DataLoader 구축
    trainset, validset, testset = datasets[0], datasets[1], datasets[2]
    train_loader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True)
    valid_loader = torch.utils.data.DataLoader(validset, batch_size=batch_size, shuffle=False)
    test_loader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False)

    return train_loader, valid_loader, test_loader

In [5]:
# Dataloader 구축
# data shape: (batch_size x input_size x seq_len)
train_loader, valid_loader, test_loader = create_classification_dataset(window_size, data_dir, batch_size)

---

# <br>__2. Model: Vanilla RNN, LSTM, GRU__

- RNN/LSTM/GRU layer 설명 - **torch.nn.RNN() / torch.nn.LSTM() / torch.nn.GRU()**
    - input_size: input feature의 크기 (시계열 데이터의 변수 개수)
    - hidden_size: hidden state의 feature의 크기
    - num_layers: recurrent layer의 개수
    - batch_first: input의 shape에서 첫번째가 batch_size인지의 여부 (True - shape of (batch, seq, feature) / False - shape of (seq, batch, feature))
    - bidirectional: 모델의 양방향성 여부 <br><br>
    
- 모델 ouputs
    - output: last layer의 모든 시점의 hidden state
        - output: tensor of shape (batch, seq, D * hidden_size) with batch_first=True / tensor of shape (seq, batch, D * hidden_size) with batch_first=False
    - h_n: 모든 layer의 final hidden state
        - h_n: tensor of shape (D * num_layers, batch, hidden_size)
    - c_n: 모든 layer의 final cell state **(only LSTM)**
        - c_n: tensor of shape (D * num_layers, batch, hidden_size)

In [6]:
class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes, bidirectional, rnn_type='rnn'):
        super(RNN, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.rnn_type = rnn_type
        self.num_directions = 2 if bidirectional == True else 1
        
        # rnn_type에 따른 recurrent layer 설정
        if self.rnn_type == 'rnn':
            self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True, bidirectional=bidirectional)
        elif self.rnn_type == 'lstm':
            self.rnn = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, bidirectional=bidirectional)
        elif self.rnn_type == 'gru':
            self.rnn = nn.GRU(input_size, hidden_size, num_layers, batch_first=True, bidirectional=bidirectional)
        
        # bidirectional에 따른 fc layer 구축
        # bidirectional 여부에 따라 hidden state의 shape가 달라짐 (True: 2 * hidden_size, False: hidden_size)
        self.fc = nn.Linear(self.num_directions * hidden_size, num_classes)

    def forward(self, x):
        # data dimension: (batch_size x input_size x seq_len) -> (batch_size x seq_len x input_size)로 변환
        x = torch.transpose(x, 1, 2)
        
        # initial hidden states 설정
        h0 = torch.zeros(self.num_directions * self.num_layers, x.size(0), self.hidden_size).to(device)
        
        # 선택한 rnn_type의 RNN으로부터 output 도출
        if self.rnn_type in ['rnn', 'gru']:
            out, _ = self.rnn(x, h0)  # out: tensor of shape (batch_size, seq_length, hidden_size)
        else:
            # initial cell states 설정
            c0 = torch.zeros(self.num_directions * self.num_layers, x.size(0), self.hidden_size).to(device)
            out, _ = self.rnn(x, (h0, c0))  # out: tensor of shape (batch_size, seq_length, hidden_size)
        
        out = self.fc(out[:, -1, :])
        return out

In [7]:
# Vanilla RNN 모델 구축
rnn = RNN(input_size, hidden_size, num_layers, num_classes, bidirectional, rnn_type='rnn')
rnn = rnn.to(device)
print(rnn)

RNN(
  (rnn): RNN(561, 64, num_layers=2, batch_first=True, bidirectional=True)
  (fc): Linear(in_features=128, out_features=6, bias=True)
)


In [8]:
# LSTM 모델 구축
lstm = RNN(input_size, hidden_size, num_layers, num_classes, bidirectional, rnn_type='lstm')
lstm = lstm.to(device)
print(lstm)

RNN(
  (rnn): LSTM(561, 64, num_layers=2, batch_first=True, bidirectional=True)
  (fc): Linear(in_features=128, out_features=6, bias=True)
)


In [9]:
# GRU 모델 구축
gru = RNN(input_size, hidden_size, num_layers, num_classes, bidirectional, rnn_type='gru')
gru = gru.to(device)
print(gru)

RNN(
  (rnn): GRU(561, 64, num_layers=2, batch_first=True, bidirectional=True)
  (fc): Linear(in_features=128, out_features=6, bias=True)
)


---

# <br>__3. Training__

In [10]:
def train_model(model, dataloaders, criterion, num_epochs, optimizer):
    since = time.time()

    val_acc_history = []

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch + 1, num_epochs))
        print('-' * 10)

        # 각 epoch마다 순서대로 training과 validation을 진행
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # 모델을 training mode로 설정
            else:
                model.eval()   # 모델을 validation mode로 설정

            running_loss = 0.0
            running_corrects = 0
            running_total = 0

            # training과 validation 단계에 맞는 dataloader에 대하여 학습/검증 진행
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device, dtype=torch.long)

                # parameter gradients를 0으로 설정
                optimizer.zero_grad()

                # forward
                # training 단계에서만 gradient 업데이트 수행
                with torch.set_grad_enabled(phase == 'train'):
                    # input을 model에 넣어 output을 도출한 후, loss를 계산함
                    outputs = model(inputs)
                    loss = criterion(outputs, labels)

                    # output 중 최댓값의 위치에 해당하는 class로 예측을 수행
                    _, preds = torch.max(outputs, 1)

                    # backward (optimize): training 단계에서만 수행
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # batch별 loss를 축적함
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
                running_total += labels.size(0)

            # epoch의 loss 및 accuracy 도출
            epoch_loss = running_loss / running_total
            epoch_acc = running_corrects.double() / running_total

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))

            # validation 단계에서 validation loss가 감소할 때마다 best model 가중치를 업데이트함
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())
            if phase == 'val':
                val_acc_history.append(epoch_acc)

        print()

    # 전체 학습 시간 계산
    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # validation loss가 가장 낮았을 때의 best model 가중치를 불러와 best model을 구축함
    model.load_state_dict(best_model_wts)
    
    # best model 가중치 저장
    # torch.save(best_model_wts, '../output/best_model.pt')
    return model, val_acc_history

In [11]:
# trining 단계에서 사용할 Dataloader dictionary 생성
dataloaders_dict = {
    'train': train_loader,
    'val': valid_loader
}

In [12]:
# loss function 설정
criterion = nn.CrossEntropyLoss()

In [13]:
# Vanilla RNN 모델 학습
rnn, rnn_val_acc_history = train_model(rnn, dataloaders_dict, criterion, num_epochs,
                                       optimizer=optim.Adam(rnn.parameters(), lr=0.0001))

Epoch 1/200
----------
train Loss: 1.7269 Acc: 0.1625
val Loss: 1.5952 Acc: 0.3600

Epoch 2/200
----------
train Loss: 1.5923 Acc: 0.3625
val Loss: 1.4803 Acc: 0.5200

Epoch 3/200
----------
train Loss: 1.5066 Acc: 0.4250
val Loss: 1.3984 Acc: 0.5200

Epoch 4/200
----------
train Loss: 1.4346 Acc: 0.4250
val Loss: 1.3447 Acc: 0.5200

Epoch 5/200
----------
train Loss: 1.3826 Acc: 0.4250
val Loss: 1.3092 Acc: 0.5200

Epoch 6/200
----------
train Loss: 1.3376 Acc: 0.4250
val Loss: 1.2846 Acc: 0.5200

Epoch 7/200
----------
train Loss: 1.3015 Acc: 0.4375
val Loss: 1.2660 Acc: 0.5200

Epoch 8/200
----------
train Loss: 1.2709 Acc: 0.4875
val Loss: 1.2510 Acc: 0.5200

Epoch 9/200
----------
train Loss: 1.2469 Acc: 0.5000
val Loss: 1.2446 Acc: 0.5200

Epoch 10/200
----------
train Loss: 1.2242 Acc: 0.5375
val Loss: 1.2393 Acc: 0.5600

Epoch 11/200
----------
train Loss: 1.2062 Acc: 0.5375
val Loss: 1.2287 Acc: 0.5600

Epoch 12/200
----------
train Loss: 1.1907 Acc: 0.5500
val Loss: 1.2197 Ac

In [14]:
# LSTM 모델 학습
lstm, lstm_val_acc_history = train_model(lstm, dataloaders_dict, criterion, num_epochs,
                                         optimizer=optim.Adam(lstm.parameters(), lr=0.0001))

Epoch 1/200
----------
train Loss: 1.8070 Acc: 0.0500
val Loss: 1.7599 Acc: 0.5600

Epoch 2/200
----------
train Loss: 1.7755 Acc: 0.4000
val Loss: 1.7251 Acc: 0.6400

Epoch 3/200
----------
train Loss: 1.7458 Acc: 0.4750
val Loss: 1.6905 Acc: 0.5600

Epoch 4/200
----------
train Loss: 1.7183 Acc: 0.4500
val Loss: 1.6567 Acc: 0.5600

Epoch 5/200
----------
train Loss: 1.6904 Acc: 0.4500
val Loss: 1.6229 Acc: 0.5600

Epoch 6/200
----------
train Loss: 1.6631 Acc: 0.4375
val Loss: 1.5883 Acc: 0.5600

Epoch 7/200
----------
train Loss: 1.6350 Acc: 0.4375
val Loss: 1.5528 Acc: 0.5600

Epoch 8/200
----------
train Loss: 1.6061 Acc: 0.4500
val Loss: 1.5158 Acc: 0.5600

Epoch 9/200
----------
train Loss: 1.5770 Acc: 0.4500
val Loss: 1.4787 Acc: 0.5600

Epoch 10/200
----------
train Loss: 1.5452 Acc: 0.4500
val Loss: 1.4423 Acc: 0.5600

Epoch 11/200
----------
train Loss: 1.5161 Acc: 0.4500
val Loss: 1.4069 Acc: 0.6000

Epoch 12/200
----------
train Loss: 1.4866 Acc: 0.4875
val Loss: 1.3729 Ac

In [15]:
# GRU 모델 학습
gru, gru_val_acc_history = train_model(gru, dataloaders_dict, criterion, num_epochs,
                                       optimizer=optim.Adam(gru.parameters(), lr=0.0001))

Epoch 1/200
----------
train Loss: 1.7324 Acc: 0.3750
val Loss: 1.6347 Acc: 0.5200

Epoch 2/200
----------
train Loss: 1.6366 Acc: 0.4500
val Loss: 1.5424 Acc: 0.5200

Epoch 3/200
----------
train Loss: 1.5529 Acc: 0.4375
val Loss: 1.4709 Acc: 0.5200

Epoch 4/200
----------
train Loss: 1.4876 Acc: 0.4375
val Loss: 1.4117 Acc: 0.5200

Epoch 5/200
----------
train Loss: 1.4310 Acc: 0.4375
val Loss: 1.3663 Acc: 0.5200

Epoch 6/200
----------
train Loss: 1.3864 Acc: 0.4500
val Loss: 1.3303 Acc: 0.5200

Epoch 7/200
----------
train Loss: 1.3492 Acc: 0.4500
val Loss: 1.2997 Acc: 0.5200

Epoch 8/200
----------
train Loss: 1.3191 Acc: 0.4500
val Loss: 1.2753 Acc: 0.5200

Epoch 9/200
----------
train Loss: 1.2923 Acc: 0.4625
val Loss: 1.2574 Acc: 0.5200

Epoch 10/200
----------
train Loss: 1.2670 Acc: 0.5125
val Loss: 1.2389 Acc: 0.5600

Epoch 11/200
----------
train Loss: 1.2449 Acc: 0.5625
val Loss: 1.2190 Acc: 0.6000

Epoch 12/200
----------
train Loss: 1.2255 Acc: 0.5375
val Loss: 1.2013 Ac

---

# <br>__4. Testing__

In [16]:
def test_model(model, test_loader):
    model.eval()   # 모델을 validation mode로 설정
    
    # test_loader에 대하여 검증 진행 (gradient update 방지)
    with torch.no_grad():
        corrects = 0
        total = 0
        for inputs, labels in test_loader:
            inputs = inputs.to(device)
            labels = labels.to(device, dtype=torch.long)

            # forward
            # input을 model에 넣어 output을 도출
            outputs = model(inputs)

            # output 중 최댓값의 위치에 해당하는 class로 예측을 수행
            _, preds = torch.max(outputs, 1)

            # batch별 정답 개수를 축적함
            corrects += torch.sum(preds == labels.data)
            total += labels.size(0)

    # accuracy를 도출함
    test_acc = corrects.double() / total
    print('Testing Acc: {:.4f}'.format(test_acc))

In [17]:
# Vanilla RNN 모델 검증 (Acc: 0.6889)
test_model(rnn, test_loader)

Testing Acc: 0.6889


In [18]:
# LSTM 모델 검증 (Acc: 0.8222)
# Vanilla RNN / LSTM / GRU 중 LSTM이 가장 높은 성능을 도출함
test_model(lstm, test_loader)

Testing Acc: 0.7333


In [19]:
# GRU 모델 검증 (Acc: 0.7556)
test_model(gru, test_loader)

Testing Acc: 0.8000


---