In [1]:
import pandas as pd
import numpy as np
import main_classificaiton as mc
import torch
import torch.nn as nn
import torch.optim as optim
import pickle
import os
import random

In [2]:
# seed 고정
random_seed = 42

torch.manual_seed(random_seed)
torch.cuda.manual_seed(random_seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(random_seed)
random.seed(random_seed)

In [3]:
# Case 1. w/o data representation & RNN model 
config1 = {
        'with_representation': False, # classification에 사용되는 representation이 있을 경우 True, 아닐 경우 False
        'model': 'RNN', # classification에에 활용할 알고리즘 정의, {'RNN', 'LSTM', 'GRU', 'CNN_1D', 'FC_layer'} 중 택 1

        'parameter': {
            'window_size' : 128, # input time series data를 windowing 하여 자르는 길이(size)
            'num_layers' : 2, # recurrnet layers의 수, Default : 1
            'hidden_size' : 64, # hidden state의 벡터차원 수
            'attention' : False, # True일 경우 attention layer를 추가
            'dropout' : 0.2, # If non-zero, introduces a Dropout layer on the outputs of each RNN layer except the last layer, with dropout probability equal to dropout. Default: 0
            'bidirectional' : True, # 모델의 양방향성 여부
            'batch_size' : 64, #batch size
            'device': 'cuda', # 학습 환경, ["cuda", "cpu"] 중 선택
            'num_epochs' : 150 # 학습 시 사용할 epoch 수
            }
}

# Case 2. w/o data representation &LSTM model 
config2 = {
        'with_representation': False, # classification에 사용되는 representation이 있을 경우 True, 아닐 경우 False
        'model': 'LSTM', # classification에에 활용할 알고리즘 정의, {'RNN', 'LSTM', 'GRU', 'CNN_1D', 'FC_layer'} 중 택 1

        'parameter': {
            'window_size' : 128, # input time series data를 windowing 하여 자르는 길이(size)
            'num_layers' : 2, # recurrnet layers의 수, Default : 1
            'hidden_size' : 64, # hidden state의 벡터차원 수
            'attention' : False, # True일 경우 attention layer를 추가
            'dropout' : 0.2, # If non-zero, introduces a Dropout layer on the outputs of each RNN layer except the last layer, with dropout probability equal to dropout. Default: 0
            'bidirectional' : True, # 모델의 양방향성 여부
            'batch_size' : 64, #batch size
            'device': 'cuda', # 학습 환경, ["cuda", "cpu"] 중 선택
            'num_epochs' : 150 # 학습 시 사용할 epoch 수
            }
}

# Case 3. w/o data representation & GRU model 
config3 = {
        'with_representation': False, # classification에 사용되는 representation이 있을 경우 True, 아닐 경우 False
        'model': 'GRU', # classification에에 활용할 알고리즘 정의, {'RNN', 'LSTM', 'GRU', 'CNN_1D', 'FC_layer'} 중 택 1

        'parameter': {
            'window_size' : 128, # input time series data를 windowing 하여 자르는 길이(size)
            'num_layers' : 2, # recurrnet layers의 수, Default : 1
            'hidden_size' : 64, # hidden state의 벡터차원 수
            'attention' : False, # True일 경우 attention layer를 추가
            'dropout' : 0.2, # If non-zero, introduces a Dropout layer on the outputs of each RNN layer except the last layer, with dropout probability equal to dropout. Default: 0
            'bidirectional' : True, # 모델의 양방향성 여부
            'batch_size' : 64, #batch size
            'device': 'cuda', # 학습 환경, ["cuda", "cpu"] 중 선택
            'num_epochs' : 150 # 학습 시 사용할 epoch 수
            }
}

# Case 4. w/o data representation & CNN_1D model 
config4 = {
        'with_representation': False, # classification에 사용되는 representation이 있을 경우 True, 아닐 경우 False
        'model': 'CNN_1D', # classification에에 활용할 알고리즘 정의, {'RNN', 'LSTM', 'GRU', 'CNN_1D', 'FC_layer'} 중 택 1

        'parameter': {
            'window_size' : 128, # input time series data를 windowing 하여 자르는 길이(size)
            'output_channels' : 64, # convolution channel size of output
            'drop_out' : 0.2, # If non-zero, introduces a Dropout layer on the outputs of each RNN layer except the last layer, with dropout probability equal to dropout. Default: 0
            'kernel_size' : 3, # convolutional filter size
            'stride' : 1, # stride of the convolution. Default = 1 
            'padding' : 0, # padding added to both sides of the input. Default = 0
            'batch_size' : 64, # batch size
            'device': 'cuda', # 학습 환경, ["cuda", "cpu"] 중 선택
            'num_epochs' : 150 # 학습 시 사용할 epoch 수
            }
}

# Case 5. w data representation & fully-connected layers 
# 특징 벡터의 사이즈 = 20 이라고 가정
config5 = {
        'with_representation': True, # classification에 사용되는 representation이 있을 경우 True, 아닐 경우 False
        'model': 'FC', # classification에에 활용할 알고리즘 정의, {'RNN', 'LSTM', 'GRU', 'CNN_1D', 'FC'} 중 택 1

        'parameter': {
            'window_size' : 128, # input time series data를 windowing 하여 자르는 길이(size)
            'input_representation' : 0, # 예를 들면 (80, 20) 차원의 벡터 (80은 window_length에 따른 관측치 수, 20은 representation 특징벡터 차원 수)를 넣어야 함. 지금은 loader부분에서 random값들어가 있음
            'drop_out' : 0.2, # If non-zero, introduces a Dropout layer on the outputs of each RNN layer except the last layer, with dropout probability equal to dropout. Default: 0
            'batch_size' : 64, # batch size
            'bias': True, # bias [True, False]
            'device': 'cuda', # 학습 환경, ["cuda", "cpu"] 중 선택
            'num_epochs' : 150 # 학습 시 사용할 epoch 수
            }
}

In [4]:
data_dir = './data'

train_x = pickle.load(open(os.path.join(data_dir, 'X_train.pkl'), 'rb'))
train_y = pickle.load(open(os.path.join(data_dir, 'y_train.pkl'), 'rb'))
test_x =  pickle.load(open(os.path.join(data_dir, 'X_test.pkl'), 'rb'))
test_y = pickle.load(open(os.path.join(data_dir, 'y_test.pkl'), 'rb'))

train_data = {'x' : train_x, 'y' : train_y}
test_data = {'x' : test_x, 'y' : test_y}

print(train_x.shape)  #shape : (num_of_instance x input_dims x window_size) = (7352, 9, 128)
print(train_y.shape) #shape : (num_of_instance x input_dims x window_size) = (7352, )
print(test_x.shape)  #shape : (num_of_instance x input_dims x window_size) = (2947, 9, 128)
print(test_y.shape)  #shape : (num_of_instance x input_dims x window_size) = (2947)

(7352, 9, 128)
(7352,)
(2947, 9, 128)
(2947,)


In [5]:
# Case 1. w/o data representation & RNN
config = config1
data_classification = mc.Classification(config, train_data, test_data)
pred, prob = data_classification.getResult()

# test_loader : shuffle = False 
print(pred[:5]) # shape : (2947, )
print(prob[:5]) # shape : (2947, 6)


Epoch 1/150
train Loss: 1.6574 Acc: 0.3578
val Loss: 1.4503 Acc: 0.3848

Epoch 6/150
train Loss: 1.0065 Acc: 0.5565
val Loss: 1.0774 Acc: 0.5391

Epoch 11/150
train Loss: 0.9592 Acc: 0.5877
val Loss: 1.0034 Acc: 0.5670

Epoch 16/150
train Loss: 0.9334 Acc: 0.6057
val Loss: 1.0058 Acc: 0.5778

Epoch 21/150
train Loss: 0.8719 Acc: 0.6354
val Loss: 0.9964 Acc: 0.6213

Epoch 26/150
train Loss: 0.6879 Acc: 0.7171
val Loss: 0.7186 Acc: 0.7458

Epoch 31/150
train Loss: 0.5052 Acc: 0.8101
val Loss: 0.5891 Acc: 0.8212

Epoch 36/150
train Loss: 0.4008 Acc: 0.8599
val Loss: 0.5279 Acc: 0.8484

Epoch 41/150
train Loss: 0.3466 Acc: 0.8808
val Loss: 0.4705 Acc: 0.8627

Epoch 46/150
train Loss: 0.3060 Acc: 0.8973
val Loss: 0.3717 Acc: 0.8906

Epoch 51/150
train Loss: 0.2752 Acc: 0.9112
val Loss: 0.3496 Acc: 0.9130

Epoch 56/150
train Loss: 0.2896 Acc: 0.8980
val Loss: 0.2873 Acc: 0.9211

Epoch 61/150
train Loss: 0.2468 Acc: 0.9223
val Loss: 0.3164 Acc: 0.9103

Epoch 66/150
train Loss: 0.2346 Acc: 0.

In [6]:
# Case 2. w/o data representation & LSTM
config = config2
data_classification = mc.Classification(config, train_data, test_data)
pred, prob = data_classification.getResult()

# test_loader : shuffle = False 
print(pred[:5]) # shape : (2947, )
print(prob[:5]) # shape : (2947, 6)


Epoch 1/150
train Loss: 1.7734 Acc: 0.2931
val Loss: 1.7412 Acc: 0.3018

Epoch 6/150
train Loss: 1.0326 Acc: 0.5496
val Loss: 1.0919 Acc: 0.5534

Epoch 11/150
train Loss: 0.9024 Acc: 0.6183
val Loss: 0.9183 Acc: 0.6649

Epoch 16/150
train Loss: 0.9062 Acc: 0.6169
val Loss: 0.8528 Acc: 0.6880

Epoch 21/150
train Loss: 0.7794 Acc: 0.6883
val Loss: 0.8394 Acc: 0.7124

Epoch 26/150
train Loss: 0.7871 Acc: 0.6541
val Loss: 0.9775 Acc: 0.5846

Epoch 31/150
train Loss: 0.7142 Acc: 0.6864
val Loss: 0.7888 Acc: 0.6886

Epoch 36/150
train Loss: 0.6371 Acc: 0.7419
val Loss: 0.6810 Acc: 0.7444

Epoch 41/150
train Loss: 0.6682 Acc: 0.7421
val Loss: 0.7087 Acc: 0.7315

Epoch 46/150
train Loss: 0.5376 Acc: 0.7832
val Loss: 0.6536 Acc: 0.7322

Epoch 51/150
train Loss: 0.5593 Acc: 0.7653
val Loss: 0.5823 Acc: 0.7587

Epoch 56/150
train Loss: 0.4992 Acc: 0.7890
val Loss: 0.5864 Acc: 0.7723

Epoch 61/150
train Loss: 0.4589 Acc: 0.8147
val Loss: 0.5428 Acc: 0.7899

Epoch 66/150
train Loss: 0.5841 Acc: 0.

In [7]:
# Case 3. w/o data representation & GRU
config = config3
data_classification = mc.Classification(config, train_data, test_data)
pred, prob = data_classification.getResult()

# test_loader : shuffle = False 
print(pred[:5]) # shape : (2947, )
print(prob[:5]) # shape : (2947, 6)


Epoch 1/150
train Loss: 1.7511 Acc: 0.3561
val Loss: 1.6960 Acc: 0.4018

Epoch 6/150
train Loss: 0.9861 Acc: 0.5676
val Loss: 1.1365 Acc: 0.5350

Epoch 11/150
train Loss: 0.8708 Acc: 0.6320
val Loss: 0.9467 Acc: 0.6383

Epoch 16/150
train Loss: 0.6686 Acc: 0.7653
val Loss: 0.6364 Acc: 0.8144

Epoch 21/150
train Loss: 0.3027 Acc: 0.9106
val Loss: 0.3224 Acc: 0.9300

Epoch 26/150
train Loss: 0.1912 Acc: 0.9401
val Loss: 0.2573 Acc: 0.9395

Epoch 31/150
train Loss: 0.1561 Acc: 0.9471
val Loss: 0.2446 Acc: 0.9388

Epoch 36/150
train Loss: 0.1395 Acc: 0.9480
val Loss: 0.2414 Acc: 0.9402

Epoch 41/150
train Loss: 0.1350 Acc: 0.9536
val Loss: 0.2387 Acc: 0.9341

Epoch 46/150
train Loss: 0.1310 Acc: 0.9480
val Loss: 0.2515 Acc: 0.9402

Epoch 51/150
train Loss: 0.1219 Acc: 0.9534
val Loss: 0.2522 Acc: 0.9395

Epoch 56/150
train Loss: 0.1228 Acc: 0.9539
val Loss: 0.2619 Acc: 0.9368

Epoch 61/150
train Loss: 0.1180 Acc: 0.9539
val Loss: 0.2590 Acc: 0.9375

Epoch 66/150
train Loss: 0.1070 Acc: 0.

In [8]:
# Case 4. w/o data representation & CNN_1D
config = config4
data_classification = mc.Classification(config, train_data, test_data)
pred, prob = data_classification.getResult()

# test_loader : shuffle = False 
print(pred[:5]) # shape : (2947, )
print(prob[:5]) # shape : (2947, 6)


Epoch 1/150
train Loss: 1.6331 Acc: 0.4045
val Loss: 1.3934 Acc: 0.5398

Epoch 6/150
train Loss: 0.5212 Acc: 0.7987
val Loss: 0.6720 Acc: 0.8056

Epoch 11/150
train Loss: 0.3854 Acc: 0.8604
val Loss: 0.5499 Acc: 0.8368

Epoch 16/150
train Loss: 0.3060 Acc: 0.8927
val Loss: 0.4675 Acc: 0.8532

Epoch 21/150
train Loss: 0.2494 Acc: 0.9141
val Loss: 0.4058 Acc: 0.8783

Epoch 26/150
train Loss: 0.2069 Acc: 0.9306
val Loss: 0.3791 Acc: 0.8865

Epoch 31/150
train Loss: 0.1786 Acc: 0.9386
val Loss: 0.3540 Acc: 0.8967

Epoch 36/150
train Loss: 0.1568 Acc: 0.9466
val Loss: 0.3402 Acc: 0.8994

Epoch 41/150
train Loss: 0.1403 Acc: 0.9522
val Loss: 0.3284 Acc: 0.9055

Epoch 46/150
train Loss: 0.1274 Acc: 0.9558
val Loss: 0.3223 Acc: 0.9103

Epoch 51/150
train Loss: 0.1179 Acc: 0.9578
val Loss: 0.2973 Acc: 0.9205

Epoch 56/150
train Loss: 0.1107 Acc: 0.9595
val Loss: 0.2925 Acc: 0.9218

Epoch 61/150
train Loss: 0.1022 Acc: 0.9606
val Loss: 0.2865 Acc: 0.9211

Epoch 66/150
train Loss: 0.0969 Acc: 0.

In [9]:
# Case 5. w/ data representation & fully-connected layers
config = config5
data_classification = mc.Classification(config, train_data, test_data)
pred, prob = data_classification.getResult()

# test_loader : shuffle = False 
print(pred[:5]) # shape : (2947, )
print(prob[:5]) # shape : (2947, 6)


Epoch 1/150
train Loss: 1.8034 Acc: 0.1852
val Loss: 1.7961 Acc: 0.1869

Epoch 6/150
train Loss: 1.7933 Acc: 0.1818
val Loss: 1.7908 Acc: 0.1788

Epoch 11/150
train Loss: 1.7917 Acc: 0.1823
val Loss: 1.7904 Acc: 0.1788

Epoch 16/150
train Loss: 1.7921 Acc: 0.1777
val Loss: 1.7905 Acc: 0.1754

Epoch 21/150
train Loss: 1.7912 Acc: 0.1877
val Loss: 1.7905 Acc: 0.1727

Epoch 26/150
train Loss: 1.7917 Acc: 0.1758
val Loss: 1.7906 Acc: 0.1740

Epoch 31/150
train Loss: 1.7906 Acc: 0.1785
val Loss: 1.7909 Acc: 0.1761

Epoch 36/150
train Loss: 1.7915 Acc: 0.1816
val Loss: 1.7910 Acc: 0.1768

Epoch 41/150
train Loss: 1.7904 Acc: 0.1787
val Loss: 1.7911 Acc: 0.1774

Epoch 46/150
train Loss: 1.7907 Acc: 0.1809
val Loss: 1.7911 Acc: 0.1768

Epoch 51/150
train Loss: 1.7910 Acc: 0.1760
val Loss: 1.7911 Acc: 0.1768

Epoch 56/150
train Loss: 1.7909 Acc: 0.1738
val Loss: 1.7912 Acc: 0.1774

Epoch 61/150
train Loss: 1.7903 Acc: 0.1753
val Loss: 1.7912 Acc: 0.1774

Epoch 66/150
train Loss: 1.7906 Acc: 0.