In [1]:
import pandas as pd
import numpy as np
import main_classificaiton as mc
import torch
import torch.nn as nn
import torch.optim as optim
import pickle
import os
import random
from sklearn.metrics import classification_report

In [2]:
# seed 고정
random_seed = 42

torch.manual_seed(random_seed)
torch.cuda.manual_seed(random_seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(random_seed)
random.seed(random_seed)

In [3]:
# Case 1. w/o data representation & LSTM model 
config1 = {
        'with_representation': False, # classification에 사용되는 representation이 있을 경우 True, 아닐 경우 False
        'model': 'LSTM', # classification에에 활용할 알고리즘 정의, {'LSTM', 'GRU', 'CNN_1D', 'LSTM_FCNs', 'FC_layer'} 중 택 1

        'parameter': {
            'window_size' : 128, # input time series data를 windowing 하여 자르는 길이(size)
            'num_layers' : 2, # recurrnet layers의 수, Default : 1
            'hidden_size' : 64, # hidden state의 벡터차원 수
            'attention' : False, # True일 경우 attention layer를 추가
            'dropout' : 0.1, # If non-zero, introduces a Dropout layer on the outputs of each RNN layer except the last layer, with dropout probability equal to dropout. Default: 0
            'bidirectional' : True, # 모델의 양방향성 여부
            'batch_size' : 64, #batch size
            'device': 'cuda', # 학습 환경, ["cuda", "cpu"] 중 선택
            'num_epochs' : 150 # 학습 시 사용할 epoch 수
            }
}

# Case 2. w/o data representation & GRU model 
config2 = {
        'with_representation': False, # classification에 사용되는 representation이 있을 경우 True, 아닐 경우 False
        'model': 'GRU', # classification에에 활용할 알고리즘 정의, {'LSTM', 'GRU', 'CNN_1D', 'LSTM_FCNs', 'FC_layer'} 중 택 1

        'parameter': {
            'window_size' : 128, # input time series data를 windowing 하여 자르는 길이(size)
            'num_layers' : 2, # recurrnet layers의 수, Default : 1
            'hidden_size' : 64, # hidden state의 벡터차원 수
            'attention' : False, # True일 경우 attention layer를 추가
            'dropout' : 0.1, # If non-zero, introduces a Dropout layer on the outputs of each RNN layer except the last layer, with dropout probability equal to dropout. Default: 0
            'bidirectional' : True, # 모델의 양방향성 여부
            'batch_size' : 64, #batch size
            'device': 'cuda', # 학습 환경, ["cuda", "cpu"] 중 선택
            'num_epochs' : 150 # 학습 시 사용할 epoch 수
            }
}

# Case 3. w/o data representation & CNN_1D model 
config3 = {
        'with_representation': False, # classification에 사용되는 representation이 있을 경우 True, 아닐 경우 False
        'model': 'CNN_1D', # classification에에 활용할 알고리즘 정의, {'LSTM', 'GRU', 'CNN_1D', 'LSTM_FCNs', 'FC_layer'} 중 택 1

        'parameter': {
            'window_size' : 128, # input time series data를 windowing 하여 자르는 길이(size)
            'output_channels' : 64, # convolution channel size of output
            'drop_out' : 0.1, # If non-zero, introduces a Dropout layer on the outputs of each RNN layer except the last layer, with dropout probability equal to dropout. Default: 0
            'kernel_size' : 3, # convolutional filter size
            'stride' : 1, # stride of the convolution. Default = 1 
            'padding' : 0, # padding added to both sides of the input. Default = 0
            'batch_size' : 64, # batch size
            'device': 'cuda', # 학습 환경, ["cuda", "cpu"] 중 선택
            'num_epochs' : 150 # 학습 시 사용할 epoch 수
            }
}

# Case 4. w/o data representation & LSTM_FCNs model 
config4 = {
        'with_representation': False, # classification에 사용되는 representation이 있을 경우 True, 아닐 경우 False
        'model': 'LSTM_FCNs', # classification에에 활용할 알고리즘 정의, {'LSTM', 'GRU', 'CNN_1D', 'LSTM_FCNs', 'FC_layer'} 중 택 1

        'parameter': {
            'window_size' : 128, # input time series data를 windowing 하여 자르는 길이(size)
            'num_layers' : 1, # recurrnet layers의 수, Default : 1
            'lstm_drop_out' : 0.4, # If non-zero, introduces a Dropout layer on the outputs of each RNN layer except the last layer, with dropout probability equal to dropout. Default: 0
            'fc_drop_out' : 0.1, # If non-zero, introduces a Dropout layer on the outputs of each RNN layer except the last layer, with dropout probability equal to dropout. Default: 0
            'batch_size' : 256, # batch size
            'device': 'cuda', # 학습 환경, ["cuda", "cpu"] 중 선택
            'num_epochs' : 150 # 학습 시 사용할 epoch 수
            }
}

# Case 5. w data representation & fully-connected layers 
config5 = {
        'with_representation': True, # classification에 사용되는 representation이 있을 경우 True, 아닐 경우 False
        'model': 'FC', # classification에에 활용할 알고리즘 정의, {'RNN', 'LSTM', 'GRU', 'CNN_1D', 'FC'} 중 택 1

        'parameter': {
            'window_size' : 128, # input time series data를 windowing 하여 자르는 길이(size)
            'drop_out' : 0.1, # If non-zero, introduces a Dropout layer on the outputs of each RNN layer except the last layer, with dropout probability equal to dropout. Default: 0
            'batch_size' : 64, # batch size
            'bias': True, # bias [True, False]
            'device': 'cuda', # 학습 환경, ["cuda", "cpu"] 중 선택
            'num_epochs' : 150 # 학습 시 사용할 epoch 수
            }
}

In [4]:
# Raw data 
data_dir = './data'

train_x = pickle.load(open(os.path.join(data_dir, 'X_train.pkl'), 'rb'))
train_y = pickle.load(open(os.path.join(data_dir, 'y_train.pkl'), 'rb'))
test_x =  pickle.load(open(os.path.join(data_dir, 'X_test.pkl'), 'rb'))
test_y = pickle.load(open(os.path.join(data_dir, 'y_test.pkl'), 'rb'))

train_data = {'x' : train_x, 'y' : train_y}
test_data = {'x' : test_x, 'y' : test_y}

print(train_x.shape)  #shape : (num_of_instance x input_dims x window_size) = (7352, 9, 128)
print(train_y.shape) #shape : (num_of_instance) = (7352, )
print(test_x.shape)  #shape : (num_of_instance x input_dims x window_size) = (2947, 9, 128)
print(test_y.shape)  #shape : (num_of_instance) = (2947, )

(7352, 9, 128)
(7352,)
(2947, 9, 128)
(2947,)


In [5]:
# Case 1. w/o data representation & LSTM
config = config1
data_classification = mc.Classification(config, train_data, test_data)
pred, prob = data_classification.getResult()

# test_loader : shuffle = False 
print(pred[:5]) # shape : (2947, )
print(prob[:5]) # shape : (2947, 6)

print(pd.DataFrame(classification_report(test_data['y'], pred, output_dict=True)))


Epoch 1/150
train Loss: 1.7789 Acc: 0.2663
val Loss: 1.7555 Acc: 0.3569

Epoch 6/150
train Loss: 1.0515 Acc: 0.5273
val Loss: 1.1120 Acc: 0.5574

Epoch 11/150
train Loss: 0.9011 Acc: 0.6179
val Loss: 0.9829 Acc: 0.6071

Epoch 16/150
train Loss: 0.7906 Acc: 0.6473
val Loss: 0.7324 Acc: 0.6717

Epoch 21/150
train Loss: 0.6235 Acc: 0.7009
val Loss: 0.6675 Acc: 0.6540

Epoch 26/150
train Loss: 0.5685 Acc: 0.7247
val Loss: 0.6062 Acc: 0.7077

Epoch 31/150
train Loss: 0.5548 Acc: 0.7473
val Loss: 0.6014 Acc: 0.7077

Epoch 36/150
train Loss: 0.5036 Acc: 0.7701
val Loss: 0.5988 Acc: 0.7315

Epoch 41/150
train Loss: 0.4671 Acc: 0.7961
val Loss: 0.5340 Acc: 0.7668

Epoch 46/150
train Loss: 0.4766 Acc: 0.7903
val Loss: 0.5355 Acc: 0.7648

Epoch 51/150
train Loss: 0.4500 Acc: 0.7966
val Loss: 0.5546 Acc: 0.7709

Epoch 56/150
train Loss: 0.4335 Acc: 0.8104
val Loss: 0.5610 Acc: 0.7757

Epoch 61/150
train Loss: 0.4243 Acc: 0.8150
val Loss: 0.6111 Acc: 0.7736

Epoch 66/150
train Loss: 0.4101 Acc: 0.

In [6]:
# Case 2. w/o data representation & GRU
config = config2
data_classification = mc.Classification(config, train_data, test_data)
pred, prob = data_classification.getResult()

# test_loader : shuffle = False 
print(pred[:5]) # shape : (2947, )
print(prob[:5]) # shape : (2947, 6)

print(pd.DataFrame(classification_report(test_data['y'], pred, output_dict=True)))


Epoch 1/150
train Loss: 1.7381 Acc: 0.2835
val Loss: 1.6610 Acc: 0.3725

Epoch 6/150
train Loss: 1.0349 Acc: 0.5535
val Loss: 1.1303 Acc: 0.5670

Epoch 11/150
train Loss: 0.7380 Acc: 0.7028
val Loss: 0.7783 Acc: 0.6988

Epoch 16/150
train Loss: 0.4793 Acc: 0.7851
val Loss: 0.5477 Acc: 0.7879

Epoch 21/150
train Loss: 0.4056 Acc: 0.8165
val Loss: 0.5346 Acc: 0.7865

Epoch 26/150
train Loss: 0.3649 Acc: 0.8427
val Loss: 0.5201 Acc: 0.8239

Epoch 31/150
train Loss: 0.3154 Acc: 0.8842
val Loss: 0.4668 Acc: 0.8804

Epoch 36/150
train Loss: 0.2150 Acc: 0.9359
val Loss: 0.2604 Acc: 0.9354

Epoch 41/150
train Loss: 0.1714 Acc: 0.9444
val Loss: 0.2349 Acc: 0.9381

Epoch 46/150
train Loss: 0.1570 Acc: 0.9434
val Loss: 0.2270 Acc: 0.9395

Epoch 51/150
train Loss: 0.1436 Acc: 0.9447
val Loss: 0.2231 Acc: 0.9381

Epoch 56/150
train Loss: 0.1376 Acc: 0.9463
val Loss: 0.2210 Acc: 0.9388

Epoch 61/150
train Loss: 0.1270 Acc: 0.9497
val Loss: 0.2103 Acc: 0.9415

Epoch 66/150
train Loss: 0.1179 Acc: 0.

In [7]:
# Case 3. w/o data representation & CNN_1D
config = config3
data_classification = mc.Classification(config, train_data, test_data)
pred, prob = data_classification.getResult()

# test_loader : shuffle = False 
print(pred[:5]) # shape : (2947, )
print(prob[:5]) # shape : (2947, 6)

print(pd.DataFrame(classification_report(test_data['y'], pred, output_dict=True)))


Epoch 1/150
train Loss: 1.6921 Acc: 0.4606
val Loss: 1.5258 Acc: 0.6220

Epoch 6/150
train Loss: 0.5038 Acc: 0.8153
val Loss: 0.6735 Acc: 0.8035

Epoch 11/150
train Loss: 0.3418 Acc: 0.8772
val Loss: 0.6024 Acc: 0.8389

Epoch 16/150
train Loss: 0.2777 Acc: 0.8997
val Loss: 0.5746 Acc: 0.8559

Epoch 21/150
train Loss: 0.2316 Acc: 0.9199
val Loss: 0.5430 Acc: 0.8749

Epoch 26/150
train Loss: 0.1975 Acc: 0.9306
val Loss: 0.5114 Acc: 0.8939

Epoch 31/150
train Loss: 0.1694 Acc: 0.9381
val Loss: 0.4864 Acc: 0.9028

Epoch 36/150
train Loss: 0.1462 Acc: 0.9486
val Loss: 0.4714 Acc: 0.9028

Epoch 41/150
train Loss: 0.1299 Acc: 0.9534
val Loss: 0.4587 Acc: 0.9082

Epoch 46/150
train Loss: 0.1189 Acc: 0.9560
val Loss: 0.4640 Acc: 0.9055

Epoch 51/150
train Loss: 0.1088 Acc: 0.9575
val Loss: 0.4569 Acc: 0.9096

Epoch 56/150
train Loss: 0.1012 Acc: 0.9585
val Loss: 0.4401 Acc: 0.9116

Epoch 61/150
train Loss: 0.0946 Acc: 0.9597
val Loss: 0.4293 Acc: 0.9109

Epoch 66/150
train Loss: 0.0899 Acc: 0.

In [8]:
# Case 4. w/o data representation & LSTM_FCNs
config = config4
data_classification = mc.Classification(config, train_data, test_data)
pred, prob = data_classification.getResult()

# test_loader : shuffle = False 
print(pred[:5]) # shape : (2947, )
print(prob[:5]) # shape : (2947, 6)

print(pd.DataFrame(classification_report(test_data['y'], pred, output_dict=True)))


Epoch 1/150
train Loss: 1.6852 Acc: 0.4542
val Loss: 1.7355 Acc: 0.5656

Epoch 6/150
train Loss: 1.3337 Acc: 0.6819
val Loss: 1.3285 Acc: 0.6567

Epoch 11/150
train Loss: 0.9891 Acc: 0.8062
val Loss: 0.9974 Acc: 0.7927

Epoch 16/150
train Loss: 0.8252 Acc: 0.9223
val Loss: 0.8648 Acc: 0.8953

Epoch 21/150
train Loss: 0.6896 Acc: 0.9415
val Loss: 0.7679 Acc: 0.9007

Epoch 26/150
train Loss: 0.5904 Acc: 0.9546
val Loss: 0.6917 Acc: 0.9028

Epoch 31/150
train Loss: 0.5156 Acc: 0.9594
val Loss: 0.6275 Acc: 0.9041

Epoch 36/150
train Loss: 0.4533 Acc: 0.9619
val Loss: 0.5726 Acc: 0.9048

Epoch 41/150
train Loss: 0.3985 Acc: 0.9636
val Loss: 0.5319 Acc: 0.9048

Epoch 46/150
train Loss: 0.3537 Acc: 0.9638
val Loss: 0.4924 Acc: 0.9055

Epoch 51/150
train Loss: 0.3157 Acc: 0.9653
val Loss: 0.4623 Acc: 0.9041

Epoch 56/150
train Loss: 0.2811 Acc: 0.9662
val Loss: 0.4415 Acc: 0.9048

Epoch 61/150
train Loss: 0.2518 Acc: 0.9657
val Loss: 0.4205 Acc: 0.9109

Epoch 66/150
train Loss: 0.2277 Acc: 0.

--------------------------------------------------

In [9]:
# Representation data
data_dir = './data'

train_x = pd.read_csv(os.path.join(data_dir, 'ts2vec_repr_train.csv'))
train_y = pickle.load(open(os.path.join(data_dir, 'y_train.pkl'), 'rb'))

test_x = pd.read_csv(os.path.join(data_dir, 'ts2vec_repr_test.csv'))
test_y = pickle.load(open(os.path.join(data_dir, 'y_test.pkl'), 'rb'))

train_data = {'x' : train_x, 'y' : train_y}
test_data = {'x' : test_x, 'y' : test_y}

print(train_x.shape)  #shape : (num_of_instance x representation_dims) = (7352, 9, 128)
print(train_y.shape) #shape : (num_of_instance) = (7352, )
print(test_x.shape)  #shape : (num_of_instance x representation_dims) = (2947, 9, 128)
print(test_y.shape)  #shape : (num_of_instance) = (2947, )

(7352, 128)
(7352,)
(2947, 128)
(2947,)


In [10]:
# Case 5. w/ data representation & fully-connected layers
config = config5
data_classification = mc.Classification(config, train_data, test_data)
pred, prob = data_classification.getResult()

# test_loader : shuffle = False 
print(pred[:5]) # shape : (2947, )
print(prob[:5]) # shape : (2947, 6)

print(pd.DataFrame(classification_report(test_data['y'], pred, output_dict=True)))


Epoch 1/150
train Loss: 1.6923 Acc: 0.3856
val Loss: 1.6072 Acc: 0.5350

Epoch 6/150
train Loss: 0.9423 Acc: 0.6948
val Loss: 0.8945 Acc: 0.7410

Epoch 11/150
train Loss: 0.6747 Acc: 0.7614
val Loss: 0.6682 Acc: 0.7791

Epoch 16/150
train Loss: 0.5527 Acc: 0.8062
val Loss: 0.5651 Acc: 0.8178

Epoch 21/150
train Loss: 0.4724 Acc: 0.8357
val Loss: 0.4980 Acc: 0.8321

Epoch 26/150
train Loss: 0.4103 Acc: 0.8563
val Loss: 0.4440 Acc: 0.8484

Epoch 31/150
train Loss: 0.3651 Acc: 0.8786
val Loss: 0.3966 Acc: 0.8790

Epoch 36/150
train Loss: 0.3195 Acc: 0.8941
val Loss: 0.3584 Acc: 0.8960

Epoch 41/150
train Loss: 0.2863 Acc: 0.9044
val Loss: 0.3313 Acc: 0.8939

Epoch 46/150
train Loss: 0.2542 Acc: 0.9182
val Loss: 0.3054 Acc: 0.9041

Epoch 51/150
train Loss: 0.2301 Acc: 0.9281
val Loss: 0.2858 Acc: 0.9123

Epoch 56/150
train Loss: 0.2122 Acc: 0.9320
val Loss: 0.2721 Acc: 0.9164

Epoch 61/150
train Loss: 0.1963 Acc: 0.9398
val Loss: 0.2596 Acc: 0.9184

Epoch 66/150
train Loss: 0.1876 Acc: 0.