In [1]:
import pandas as pd
import numpy as np
import main_classificaiton as mc
import torch
import torch.nn as nn
import torch.optim as optim
import pickle
import os
import random

In [2]:
# seed 고정
random_seed = 42

torch.manual_seed(random_seed)
torch.cuda.manual_seed(random_seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(random_seed)
random.seed(random_seed)

In [3]:
# Case 1. w/o data representation & RNN model 
config1 = {
        'with_representation': False, # classification에 사용되는 representation이 있을 경우 True, 아닐 경우 False
        'model': 'RNN', # classification에에 활용할 알고리즘 정의, {'RNN', 'LSTM', 'GRU', 'CNN_1D', 'FC_layer'} 중 택 1

        'parameter': {
            'window_size' : 128, # input time series data를 windowing 하여 자르는 길이(size)
            'num_layers' : 2, # recurrnet layers의 수, Default : 1
            'hidden_size' : 64, # hidden state의 벡터차원 수
            'attention' : False, # True일 경우 attention layer를 추가
            'dropout' : 0.2, # If non-zero, introduces a Dropout layer on the outputs of each RNN layer except the last layer, with dropout probability equal to dropout. Default: 0
            'bidirectional' : True, # 모델의 양방향성 여부
            'batch_size' : 64, #batch size
            'device': 'cuda', # 학습 환경, ["cuda", "cpu"] 중 선택
            'num_epochs' : 150 # 학습 시 사용할 epoch 수
            }
}

# Case 2. w/o data representation &LSTM model 
config2 = {
        'with_representation': False, # classification에 사용되는 representation이 있을 경우 True, 아닐 경우 False
        'model': 'LSTM', # classification에에 활용할 알고리즘 정의, {'RNN', 'LSTM', 'GRU', 'CNN_1D', 'FC_layer'} 중 택 1

        'parameter': {
            'window_size' : 128, # input time series data를 windowing 하여 자르는 길이(size)
            'num_layers' : 2, # recurrnet layers의 수, Default : 1
            'hidden_size' : 64, # hidden state의 벡터차원 수
            'attention' : False, # True일 경우 attention layer를 추가
            'dropout' : 0.2, # If non-zero, introduces a Dropout layer on the outputs of each RNN layer except the last layer, with dropout probability equal to dropout. Default: 0
            'bidirectional' : True, # 모델의 양방향성 여부
            'batch_size' : 64, #batch size
            'device': 'cuda', # 학습 환경, ["cuda", "cpu"] 중 선택
            'num_epochs' : 150 # 학습 시 사용할 epoch 수
            }
}

# Case 3. w/o data representation & GRU model 
config3 = {
        'with_representation': False, # classification에 사용되는 representation이 있을 경우 True, 아닐 경우 False
        'model': 'GRU', # classification에에 활용할 알고리즘 정의, {'RNN', 'LSTM', 'GRU', 'CNN_1D', 'FC_layer'} 중 택 1

        'parameter': {
            'window_size' : 128, # input time series data를 windowing 하여 자르는 길이(size)
            'num_layers' : 2, # recurrnet layers의 수, Default : 1
            'hidden_size' : 64, # hidden state의 벡터차원 수
            'attention' : False, # True일 경우 attention layer를 추가
            'dropout' : 0.2, # If non-zero, introduces a Dropout layer on the outputs of each RNN layer except the last layer, with dropout probability equal to dropout. Default: 0
            'bidirectional' : True, # 모델의 양방향성 여부
            'batch_size' : 64, #batch size
            'device': 'cuda', # 학습 환경, ["cuda", "cpu"] 중 선택
            'num_epochs' : 150 # 학습 시 사용할 epoch 수
            }
}

# Case 4. w/o data representation & CNN_1D model 
config4 = {
        'with_representation': False, # classification에 사용되는 representation이 있을 경우 True, 아닐 경우 False
        'model': 'CNN_1D', # classification에에 활용할 알고리즘 정의, {'RNN', 'LSTM', 'GRU', 'CNN_1D', 'FC_layer'} 중 택 1

        'parameter': {
            'window_size' : 128, # input time series data를 windowing 하여 자르는 길이(size)
            'output_channels' : 64, # convolution channel size of output
            'drop_out' : 0.2, # If non-zero, introduces a Dropout layer on the outputs of each RNN layer except the last layer, with dropout probability equal to dropout. Default: 0
            'kernel_size' : 3, # convolutional filter size
            'stride' : 1, # stride of the convolution. Default = 1 
            'padding' : 0, # padding added to both sides of the input. Default = 0
            'batch_size' : 64, # batch size
            'device': 'cuda', # 학습 환경, ["cuda", "cpu"] 중 선택
            'num_epochs' : 150 # 학습 시 사용할 epoch 수
            }
}

# Case 5. w data representation & fully-connected layers 
# 특징 벡터의 사이즈 = 20 이라고 가정
config5 = {
        'with_representation': True, # classification에 사용되는 representation이 있을 경우 True, 아닐 경우 False
        'model': 'FC', # classification에에 활용할 알고리즘 정의, {'RNN', 'LSTM', 'GRU', 'CNN_1D', 'FC'} 중 택 1

        'parameter': {
            'window_size' : 128, # input time series data를 windowing 하여 자르는 길이(size)
            'input_representation' : 0, # 예를 들면 (80, 20) 차원의 벡터 (80은 window_length에 따른 관측치 수, 20은 representation 특징벡터 차원 수)를 넣어야 함. 지금은 loader부분에서 random값들어가 있음
            'drop_out' : 0.2, # If non-zero, introduces a Dropout layer on the outputs of each RNN layer except the last layer, with dropout probability equal to dropout. Default: 0
            'batch_size' : 64, # batch size
            'bias': True, # bias [True, False]
            'device': 'cuda', # 학습 환경, ["cuda", "cpu"] 중 선택
            'num_epochs' : 150 # 학습 시 사용할 epoch 수
            }
}

In [4]:
data_dir = './data'

train_x = pickle.load(open(os.path.join(data_dir, 'X_train.pkl'), 'rb'))
train_y = pickle.load(open(os.path.join(data_dir, 'y_train.pkl'), 'rb'))
test_x =  pickle.load(open(os.path.join(data_dir, 'X_test.pkl'), 'rb'))
test_y = pickle.load(open(os.path.join(data_dir, 'y_test.pkl'), 'rb'))

train_data = {'x' : train_x, 'y' : train_y}
test_data = {'x' : test_x, 'y' : test_y}

print(train_x.shape)  #shape : (num_of_instance x input_dims x window_size) = (7352, 9, 128)
print(train_y.shape) #shape : (num_of_instance x input_dims x window_size) = (7352, )
print(test_x.shape)  #shape : (num_of_instance x input_dims x window_size) = (2947, 9, 128)
print(test_y.shape)  #shape : (num_of_instance x input_dims x window_size) = (2947)

(7352, 9, 128)
(7352,)
(2947, 9, 128)
(2947,)


In [5]:
# Case 1. w/o data representation & RNN
config = config1
data_classification = mc.Classification(config, train_data, test_data)
pred, prob = data_classification.getResult()

# test_loader : shuffle = False 
print(pred[:5]) # shape : (2947, )
print(prob[:5]) # shape : (2947, 6)


Epoch 1/150
train Loss: 1.6584 Acc: 0.3578
val Loss: 1.4538 Acc: 0.3759

Epoch 6/150
train Loss: 1.0077 Acc: 0.5560
val Loss: 1.0960 Acc: 0.5289

Epoch 11/150
train Loss: 0.9814 Acc: 0.5594
val Loss: 1.0072 Acc: 0.5595

Epoch 16/150
train Loss: 0.9413 Acc: 0.5902
val Loss: 0.9829 Acc: 0.5887

Epoch 21/150
train Loss: 0.9057 Acc: 0.6235
val Loss: 0.9704 Acc: 0.6159

Epoch 26/150
train Loss: 0.6936 Acc: 0.7210
val Loss: 0.7325 Acc: 0.7410

Epoch 31/150
train Loss: 0.5424 Acc: 0.7854
val Loss: 0.5766 Acc: 0.7954

Epoch 36/150
train Loss: 0.4317 Acc: 0.8369
val Loss: 0.4270 Acc: 0.8402

Epoch 41/150
train Loss: 0.3776 Acc: 0.8636
val Loss: 0.4722 Acc: 0.8600

Epoch 46/150
train Loss: 0.3262 Acc: 0.8919
val Loss: 0.4379 Acc: 0.8702

Epoch 51/150
train Loss: 0.2836 Acc: 0.9102
val Loss: 0.4246 Acc: 0.8872

Epoch 56/150
train Loss: 0.2483 Acc: 0.9182
val Loss: 0.3875 Acc: 0.9021

Epoch 61/150
train Loss: 0.2417 Acc: 0.9197
val Loss: 0.3927 Acc: 0.9035

Epoch 66/150
train Loss: 0.2165 Acc: 0.

In [6]:
# Case 2. w/o data representation & LSTM
config = config2
data_classification = mc.Classification(config, train_data, test_data)
pred, prob = data_classification.getResult()

# test_loader : shuffle = False 
print(pred[:5]) # shape : (2947, )
print(prob[:5]) # shape : (2947, 6)


Epoch 1/150
train Loss: 1.7821 Acc: 0.1978
val Loss: 1.7576 Acc: 0.3297

Epoch 6/150
train Loss: 1.0517 Acc: 0.5593
val Loss: 1.1752 Acc: 0.5724

Epoch 11/150
train Loss: 0.7946 Acc: 0.6781
val Loss: 0.9321 Acc: 0.7247

Epoch 16/150
train Loss: 0.7382 Acc: 0.7223
val Loss: 0.8463 Acc: 0.7424

Epoch 21/150
train Loss: 0.5938 Acc: 0.7769
val Loss: 0.7406 Acc: 0.7587

Epoch 26/150
train Loss: 0.5457 Acc: 0.7830
val Loss: 0.6777 Acc: 0.7559

Epoch 31/150
train Loss: 0.4712 Acc: 0.8152
val Loss: 0.6655 Acc: 0.7791

Epoch 36/150
train Loss: 0.4101 Acc: 0.8456
val Loss: 0.6466 Acc: 0.7933

Epoch 41/150
train Loss: 0.3734 Acc: 0.8546
val Loss: 0.5965 Acc: 0.8226

Epoch 46/150
train Loss: 0.3122 Acc: 0.8898
val Loss: 0.5677 Acc: 0.8457

Epoch 51/150
train Loss: 0.3138 Acc: 0.8919
val Loss: 0.5175 Acc: 0.8654

Epoch 56/150
train Loss: 0.2393 Acc: 0.9216
val Loss: 0.4102 Acc: 0.8973

Epoch 61/150
train Loss: 0.2519 Acc: 0.9192
val Loss: 0.3028 Acc: 0.8973

Epoch 66/150
train Loss: 0.2013 Acc: 0.

In [7]:
# Case 3. w/o data representation & GRU
config = config3
data_classification = mc.Classification(config, train_data, test_data)
pred, prob = data_classification.getResult()

# test_loader : shuffle = False 
print(pred[:5]) # shape : (2947, )
print(prob[:5]) # shape : (2947, 6)


Epoch 1/150
train Loss: 1.7403 Acc: 0.3537
val Loss: 1.6700 Acc: 0.4555

Epoch 6/150
train Loss: 0.9985 Acc: 0.5676
val Loss: 1.0738 Acc: 0.5540

Epoch 11/150
train Loss: 0.8697 Acc: 0.6382
val Loss: 0.8905 Acc: 0.6540

Epoch 16/150
train Loss: 0.4917 Acc: 0.8536
val Loss: 0.4752 Acc: 0.8858

Epoch 21/150
train Loss: 0.2645 Acc: 0.9277
val Loss: 0.3380 Acc: 0.9021

Epoch 26/150
train Loss: 0.1948 Acc: 0.9408
val Loss: 0.2771 Acc: 0.9245

Epoch 31/150
train Loss: 0.1731 Acc: 0.9388
val Loss: 0.2624 Acc: 0.9293

Epoch 36/150
train Loss: 0.1524 Acc: 0.9456
val Loss: 0.2625 Acc: 0.9300

Epoch 41/150
train Loss: 0.1442 Acc: 0.9464
val Loss: 0.2558 Acc: 0.9307

Epoch 46/150
train Loss: 0.1337 Acc: 0.9488
val Loss: 0.2497 Acc: 0.9361

Epoch 51/150
train Loss: 0.1266 Acc: 0.9519
val Loss: 0.2436 Acc: 0.9341

Epoch 56/150
train Loss: 0.1228 Acc: 0.9527
val Loss: 0.2503 Acc: 0.9320

Epoch 61/150
train Loss: 0.1244 Acc: 0.9512
val Loss: 0.2425 Acc: 0.9341

Epoch 66/150
train Loss: 0.1145 Acc: 0.

In [8]:
# Case 4. w/o data representation & CNN_1D
config = config4
data_classification = mc.Classification(config, train_data, test_data)
pred, prob = data_classification.getResult()

# test_loader : shuffle = False 
print(pred[:5]) # shape : (2947, )
print(prob[:5]) # shape : (2947, 6)


Epoch 1/150
train Loss: 1.5987 Acc: 0.5013
val Loss: 1.3670 Acc: 0.6084

Epoch 6/150
train Loss: 0.5261 Acc: 0.8019
val Loss: 0.6510 Acc: 0.7797

Epoch 11/150
train Loss: 0.3782 Acc: 0.8662
val Loss: 0.5126 Acc: 0.8239

Epoch 16/150
train Loss: 0.2891 Acc: 0.9051
val Loss: 0.4063 Acc: 0.8817

Epoch 21/150
train Loss: 0.2326 Acc: 0.9254
val Loss: 0.3486 Acc: 0.8926

Epoch 26/150
train Loss: 0.1978 Acc: 0.9345
val Loss: 0.3156 Acc: 0.8967

Epoch 31/150
train Loss: 0.1759 Acc: 0.9415
val Loss: 0.2921 Acc: 0.9014

Epoch 36/150
train Loss: 0.1598 Acc: 0.9452
val Loss: 0.2803 Acc: 0.9055

Epoch 41/150
train Loss: 0.1453 Acc: 0.9495
val Loss: 0.2741 Acc: 0.9007

Epoch 46/150
train Loss: 0.1322 Acc: 0.9527
val Loss: 0.2675 Acc: 0.9123

Epoch 51/150
train Loss: 0.1259 Acc: 0.9539
val Loss: 0.2670 Acc: 0.9198

Epoch 56/150
train Loss: 0.1179 Acc: 0.9560
val Loss: 0.2752 Acc: 0.9225

Epoch 61/150
train Loss: 0.1124 Acc: 0.9544
val Loss: 0.2663 Acc: 0.9218

Epoch 66/150
train Loss: 0.1077 Acc: 0.

In [9]:
# Case 5. w/ data representation & fully-connected layers
config = config5
data_classification = mc.Classification(config, train_data, test_data)
pred, prob = data_classification.getResult()

# test_loader : shuffle = False 
print(pred[:5]) # shape : (2947, )
print(prob[:5]) # shape : (2947, 6)


Epoch 1/150
train Loss: 1.8038 Acc: 0.1717
val Loss: 1.7895 Acc: 0.1937

Epoch 6/150
train Loss: 1.7943 Acc: 0.1777
val Loss: 1.7870 Acc: 0.1971

Epoch 11/150
train Loss: 1.7936 Acc: 0.1750
val Loss: 1.7873 Acc: 0.2039

Epoch 16/150
train Loss: 1.7923 Acc: 0.1753
val Loss: 1.7874 Acc: 0.2060

Epoch 21/150
train Loss: 1.7918 Acc: 0.1785
val Loss: 1.7874 Acc: 0.2060

Epoch 26/150
train Loss: 1.7925 Acc: 0.1755
val Loss: 1.7873 Acc: 0.2026

Epoch 31/150
train Loss: 1.7925 Acc: 0.1741
val Loss: 1.7873 Acc: 0.2053

Epoch 36/150
train Loss: 1.7912 Acc: 0.1831
val Loss: 1.7873 Acc: 0.2046

Epoch 41/150
train Loss: 1.7910 Acc: 0.1738
val Loss: 1.7873 Acc: 0.2046

Epoch 46/150
train Loss: 1.7912 Acc: 0.1728
val Loss: 1.7873 Acc: 0.2039

Epoch 51/150
train Loss: 1.7908 Acc: 0.1811
val Loss: 1.7872 Acc: 0.2005

Epoch 56/150
train Loss: 1.7913 Acc: 0.1799
val Loss: 1.7873 Acc: 0.2053

Epoch 61/150
train Loss: 1.7910 Acc: 0.1794
val Loss: 1.7874 Acc: 0.2012

Epoch 66/150
train Loss: 1.7903 Acc: 0.