In [1]:
import pandas as pd
import numpy as np
import main_classificaiton as mc
import torch
import torch.nn as nn
import torch.optim as optim
import pickle
import os
import random

In [2]:
# seed 고정
random_seed = 42

torch.manual_seed(random_seed)
torch.cuda.manual_seed(random_seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(random_seed)
random.seed(random_seed)

In [3]:
# Case 1. w/o data representation & RNN model 
config1 = {
        'with_representation': False, # classification에 사용되는 representation이 있을 경우 True, 아닐 경우 False
        'model': 'RNN', # classification에에 활용할 알고리즘 정의, {'RNN', 'LSTM', 'GRU', 'CNN_1D', 'FC_layer'} 중 택 1

        'parameter': {
            'window_size' : 128, # input time series data를 windowing 하여 자르는 길이(size)
            'num_layers' : 2, # recurrnet layers의 수, Default : 1
            'hidden_size' : 64, # hidden state의 벡터차원 수
            'attention' : False, # True일 경우 attention layer를 추가
            'dropout' : 0.2, # If non-zero, introduces a Dropout layer on the outputs of each RNN layer except the last layer, with dropout probability equal to dropout. Default: 0
            'bidirectional' : True, # 모델의 양방향성 여부
            'batch_size' : 64, #batch size
            'device': 'cuda', # 학습 환경, ["cuda", "cpu"] 중 선택
            'num_epochs' : 150 # 학습 시 사용할 epoch 수
            }
}

# Case 2. w/o data representation &LSTM model 
config2 = {
        'with_representation': False, # classification에 사용되는 representation이 있을 경우 True, 아닐 경우 False
        'model': 'LSTM', # classification에에 활용할 알고리즘 정의, {'RNN', 'LSTM', 'GRU', 'CNN_1D', 'FC_layer'} 중 택 1

        'parameter': {
            'window_size' : 128, # input time series data를 windowing 하여 자르는 길이(size)
            'num_layers' : 2, # recurrnet layers의 수, Default : 1
            'hidden_size' : 64, # hidden state의 벡터차원 수
            'attention' : False, # True일 경우 attention layer를 추가
            'dropout' : 0.2, # If non-zero, introduces a Dropout layer on the outputs of each RNN layer except the last layer, with dropout probability equal to dropout. Default: 0
            'bidirectional' : True, # 모델의 양방향성 여부
            'batch_size' : 64, #batch size
            'device': 'cuda', # 학습 환경, ["cuda", "cpu"] 중 선택
            'num_epochs' : 150 # 학습 시 사용할 epoch 수
            }
}

# Case 3. w/o data representation & GRU model 
config3 = {
        'with_representation': False, # classification에 사용되는 representation이 있을 경우 True, 아닐 경우 False
        'model': 'GRU', # classification에에 활용할 알고리즘 정의, {'RNN', 'LSTM', 'GRU', 'CNN_1D', 'FC_layer'} 중 택 1

        'parameter': {
            'window_size' : 128, # input time series data를 windowing 하여 자르는 길이(size)
            'num_layers' : 2, # recurrnet layers의 수, Default : 1
            'hidden_size' : 64, # hidden state의 벡터차원 수
            'attention' : False, # True일 경우 attention layer를 추가
            'dropout' : 0.2, # If non-zero, introduces a Dropout layer on the outputs of each RNN layer except the last layer, with dropout probability equal to dropout. Default: 0
            'bidirectional' : True, # 모델의 양방향성 여부
            'batch_size' : 64, #batch size
            'device': 'cuda', # 학습 환경, ["cuda", "cpu"] 중 선택
            'num_epochs' : 150 # 학습 시 사용할 epoch 수
            }
}

# Case 4. w/o data representation & CNN_1D model 
config4 = {
        'with_representation': False, # classification에 사용되는 representation이 있을 경우 True, 아닐 경우 False
        'model': 'CNN_1D', # classification에에 활용할 알고리즘 정의, {'RNN', 'LSTM', 'GRU', 'CNN_1D', 'FC_layer'} 중 택 1

        'parameter': {
            'window_size' : 128, # input time series data를 windowing 하여 자르는 길이(size)
            'output_channels' : 64, # convolution channel size of output
            'drop_out' : 0.2, # If non-zero, introduces a Dropout layer on the outputs of each RNN layer except the last layer, with dropout probability equal to dropout. Default: 0
            'kernel_size' : 3, # convolutional filter size
            'stride' : 1, # stride of the convolution. Default = 1 
            'padding' : 0, # padding added to both sides of the input. Default = 0
            'batch_size' : 64, # batch size
            'device': 'cuda', # 학습 환경, ["cuda", "cpu"] 중 선택
            'num_epochs' : 150 # 학습 시 사용할 epoch 수
            }
}

# Case 5. w data representation & fully-connected layers 
# 특징 벡터의 사이즈 = 20 이라고 가정
config5 = {
        'with_representation': True, # classification에 사용되는 representation이 있을 경우 True, 아닐 경우 False
        'model': 'FC', # classification에에 활용할 알고리즘 정의, {'RNN', 'LSTM', 'GRU', 'CNN_1D', 'FC'} 중 택 1

        'parameter': {
            'window_size' : 128, # input time series data를 windowing 하여 자르는 길이(size)
            'input_representation' : 0, # 예를 들면 (80, 20) 차원의 벡터 (80은 window_length에 따른 관측치 수, 20은 representation 특징벡터 차원 수)를 넣어야 함. 지금은 loader부분에서 random값들어가 있음
            'drop_out' : 0.2, # If non-zero, introduces a Dropout layer on the outputs of each RNN layer except the last layer, with dropout probability equal to dropout. Default: 0
            'batch_size' : 64, # batch size
            'bias': True, # bias [True, False]
            'device': 'cuda', # 학습 환경, ["cuda", "cpu"] 중 선택
            'num_epochs' : 150 # 학습 시 사용할 epoch 수
            }
}

In [4]:
data_dir = './data'

train_x = pickle.load(open(os.path.join(data_dir, 'X_train.pkl'), 'rb'))
train_y = pickle.load(open(os.path.join(data_dir, 'y_train.pkl'), 'rb'))
test_x =  pickle.load(open(os.path.join(data_dir, 'X_test.pkl'), 'rb'))
test_y = pickle.load(open(os.path.join(data_dir, 'y_test.pkl'), 'rb'))

train_data = {'x' : train_x, 'y' : train_y}
test_data = {'x' : test_x, 'y' : test_y}

print(train_x.shape)  #shape : (num_of_instance x input_dims x window_size) = (7352, 9, 128)
print(train_y.shape) #shape : (num_of_instance x input_dims x window_size) = (7352, )
print(test_x.shape)  #shape : (num_of_instance x input_dims x window_size) = (2947, 9, 128)
print(test_y.shape)  #shape : (num_of_instance x input_dims x window_size) = (2947)

(7352, 9, 128)
(7352,)
(2947, 9, 128)
(2947,)


In [5]:
# Case 1. w/o data representation & RNN
config = config1
data_classification = mc.Classification(config, train_data, test_data)
result = data_classification.getResult()

# test_loader : shuffle = False 
preds = result['preds']  # shape : (2947, )
probs = result['probs']  # shape : (2947, 6)


Epoch 1/150
train Loss: 1.6580 Acc: 0.3719
val Loss: 1.4532 Acc: 0.3678

Epoch 6/150
train Loss: 1.0048 Acc: 0.5598
val Loss: 1.1178 Acc: 0.5146

Epoch 11/150
train Loss: 0.9582 Acc: 0.5775
val Loss: 1.0061 Acc: 0.5622

Epoch 16/150
train Loss: 0.9304 Acc: 0.6009
val Loss: 1.0273 Acc: 0.5751

Epoch 21/150
train Loss: 0.8618 Acc: 0.6368
val Loss: 1.0039 Acc: 0.6451

Epoch 26/150
train Loss: 0.6365 Acc: 0.7465
val Loss: 0.6684 Acc: 0.7539

Epoch 31/150
train Loss: 0.5134 Acc: 0.7936
val Loss: 0.5338 Acc: 0.7974

Epoch 36/150
train Loss: 0.4148 Acc: 0.8461
val Loss: 0.4598 Acc: 0.8273

Epoch 41/150
train Loss: 0.3641 Acc: 0.8738
val Loss: 0.4156 Acc: 0.8450

Epoch 46/150
train Loss: 0.3200 Acc: 0.8993
val Loss: 0.4285 Acc: 0.8980

Epoch 51/150
train Loss: 0.2832 Acc: 0.9109
val Loss: 0.4048 Acc: 0.8939

Epoch 56/150
train Loss: 0.2760 Acc: 0.9102
val Loss: 0.3839 Acc: 0.8899

Epoch 61/150
train Loss: 0.2971 Acc: 0.8985
val Loss: 0.3319 Acc: 0.8939

Epoch 66/150
train Loss: 0.2518 Acc: 0.

In [6]:
# Case 2. w/o data representation & LSTM
config = config2
data_classification = mc.Classification(config, train_data, test_data)
result = data_classification.getResult()

# test_loader : shuffle = False 
preds = result['preds']  # shape : (2947, )
probs = result['probs']  # shape : (2947, 6)


Epoch 1/150
train Loss: 1.7742 Acc: 0.3477
val Loss: 1.7438 Acc: 0.3698

Epoch 6/150
train Loss: 0.9785 Acc: 0.6058
val Loss: 1.0983 Acc: 0.5846

Epoch 11/150
train Loss: 0.7324 Acc: 0.6924
val Loss: 0.8869 Acc: 0.7131

Epoch 16/150
train Loss: 0.6090 Acc: 0.7344
val Loss: 0.8187 Acc: 0.6900

Epoch 21/150
train Loss: 0.5944 Acc: 0.7301
val Loss: 0.7354 Acc: 0.7383

Epoch 26/150
train Loss: 0.5000 Acc: 0.7803
val Loss: 0.6883 Acc: 0.7451

Epoch 31/150
train Loss: 0.3777 Acc: 0.8461
val Loss: 0.6054 Acc: 0.7886

Epoch 36/150
train Loss: 0.3114 Acc: 0.9032
val Loss: 0.5671 Acc: 0.8613

Epoch 41/150
train Loss: 0.4221 Acc: 0.8526
val Loss: 0.5538 Acc: 0.8538

Epoch 46/150
train Loss: 0.2506 Acc: 0.9242
val Loss: 0.4324 Acc: 0.8749

Epoch 51/150
train Loss: 0.2291 Acc: 0.9298
val Loss: 0.3808 Acc: 0.8885

Epoch 56/150
train Loss: 0.2571 Acc: 0.9136
val Loss: 0.3531 Acc: 0.8919

Epoch 61/150
train Loss: 0.1895 Acc: 0.9396
val Loss: 0.3331 Acc: 0.8933

Epoch 66/150
train Loss: 0.4456 Acc: 0.

In [7]:
# Case 3. w/o data representation & GRU
config = config3
data_classification = mc.Classification(config, train_data, test_data)
result = data_classification.getResult()

# test_loader : shuffle = False 
preds = result['preds']  # shape : (2947, )
probs = result['probs']  # shape : (2947, 6)


Epoch 1/150
train Loss: 1.7494 Acc: 0.2773
val Loss: 1.6919 Acc: 0.4385

Epoch 6/150
train Loss: 0.9816 Acc: 0.5771
val Loss: 1.1222 Acc: 0.5676

Epoch 11/150
train Loss: 0.8454 Acc: 0.6388
val Loss: 0.8501 Acc: 0.7022

Epoch 16/150
train Loss: 0.6690 Acc: 0.7356
val Loss: 0.6263 Acc: 0.7607

Epoch 21/150
train Loss: 0.4714 Acc: 0.8198
val Loss: 0.4925 Acc: 0.8056

Epoch 26/150
train Loss: 0.3678 Acc: 0.8939
val Loss: 0.3986 Acc: 0.8783

Epoch 31/150
train Loss: 0.2674 Acc: 0.9230
val Loss: 0.3501 Acc: 0.8729

Epoch 36/150
train Loss: 0.2195 Acc: 0.9306
val Loss: 0.3100 Acc: 0.8967

Epoch 41/150
train Loss: 0.2101 Acc: 0.9282
val Loss: 0.3474 Acc: 0.8776

Epoch 46/150
train Loss: 0.2173 Acc: 0.9282
val Loss: 0.3525 Acc: 0.8817

Epoch 51/150
train Loss: 0.1955 Acc: 0.9316
val Loss: 0.3506 Acc: 0.8634

Epoch 56/150
train Loss: 0.1797 Acc: 0.9361
val Loss: 0.2783 Acc: 0.9021

Epoch 61/150
train Loss: 0.1914 Acc: 0.9323
val Loss: 0.2588 Acc: 0.9205

Epoch 66/150
train Loss: 0.1760 Acc: 0.

In [8]:
# Case 4. w/o data representation & CNN_1D
config = config4
data_classification = mc.Classification(config, train_data, test_data)
result = data_classification.getResult()

# test_loader : shuffle = False 
preds = result['preds']  # shape : (2947, )
probs = result['probs']  # shape : (2947, 6)


Epoch 1/150
train Loss: 1.6523 Acc: 0.4560
val Loss: 1.4561 Acc: 0.6166

Epoch 6/150
train Loss: 0.4959 Acc: 0.8111
val Loss: 0.6206 Acc: 0.7886

Epoch 11/150
train Loss: 0.3488 Acc: 0.8786
val Loss: 0.4856 Acc: 0.8382

Epoch 16/150
train Loss: 0.2673 Acc: 0.9087
val Loss: 0.4041 Acc: 0.8878

Epoch 21/150
train Loss: 0.2140 Acc: 0.9276
val Loss: 0.3621 Acc: 0.8946

Epoch 26/150
train Loss: 0.1831 Acc: 0.9364
val Loss: 0.3286 Acc: 0.9137

Epoch 31/150
train Loss: 0.1601 Acc: 0.9415
val Loss: 0.3232 Acc: 0.9103

Epoch 36/150
train Loss: 0.1469 Acc: 0.9456
val Loss: 0.3089 Acc: 0.9164

Epoch 41/150
train Loss: 0.1319 Acc: 0.9507
val Loss: 0.3083 Acc: 0.9184

Epoch 46/150
train Loss: 0.1224 Acc: 0.9524
val Loss: 0.3063 Acc: 0.9137

Epoch 51/150
train Loss: 0.1168 Acc: 0.9541
val Loss: 0.2974 Acc: 0.9177

Epoch 56/150
train Loss: 0.1102 Acc: 0.9575
val Loss: 0.2907 Acc: 0.9198

Epoch 61/150
train Loss: 0.1039 Acc: 0.9568
val Loss: 0.2901 Acc: 0.9150

Epoch 66/150
train Loss: 0.0980 Acc: 0.

In [9]:
# Case 5. w/ data representation & fully-connected layers
config = config5
data_classification = mc.Classification(config, train_data, test_data)
result = data_classification.getResult()

# test_loader : shuffle = False 
preds = result['preds']  # shape : (2947, )
probs = result['probs']  # shape : (2947, 6)


Epoch 1/150
train Loss: 1.8125 Acc: 0.1697
val Loss: 1.8002 Acc: 0.1720

Epoch 6/150
train Loss: 1.7975 Acc: 0.1722
val Loss: 1.7939 Acc: 0.1693

Epoch 11/150
train Loss: 1.7943 Acc: 0.1823
val Loss: 1.7934 Acc: 0.1788

Epoch 16/150
train Loss: 1.7921 Acc: 0.1777
val Loss: 1.7937 Acc: 0.1822

Epoch 21/150
train Loss: 1.7915 Acc: 0.1835
val Loss: 1.7936 Acc: 0.1774

Epoch 26/150
train Loss: 1.7908 Acc: 0.1842
val Loss: 1.7933 Acc: 0.1734

Epoch 31/150
train Loss: 1.7905 Acc: 0.1838
val Loss: 1.7931 Acc: 0.1768

Epoch 36/150
train Loss: 1.7909 Acc: 0.1842
val Loss: 1.7928 Acc: 0.1808

Epoch 41/150
train Loss: 1.7905 Acc: 0.1825
val Loss: 1.7928 Acc: 0.1781

Epoch 46/150
train Loss: 1.7905 Acc: 0.1831
val Loss: 1.7926 Acc: 0.1740

Epoch 51/150
train Loss: 1.7900 Acc: 0.1860
val Loss: 1.7926 Acc: 0.1754

Epoch 56/150
train Loss: 1.7911 Acc: 0.1796
val Loss: 1.7926 Acc: 0.1720

Epoch 61/150
train Loss: 1.7907 Acc: 0.1826
val Loss: 1.7926 Acc: 0.1700

Epoch 66/150
train Loss: 1.7905 Acc: 0.