In [1]:
import pandas as pd
import numpy as np
import main_classificaiton as mc
import torch
import torch.nn as nn
import torch.optim as optim
import pickle
import os
import random

In [2]:
# seed 고정
random_seed = 42

torch.manual_seed(random_seed)
torch.cuda.manual_seed(random_seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(random_seed)
random.seed(random_seed)

In [12]:
# Case 1. w/o data representation & RNN model 
config1 = {
        'with_representation': False, # classification에 사용되는 representation이 있을 경우 True, 아닐 경우 False
        'model': 'RNN', # classification에에 활용할 알고리즘 정의, {'RNN', 'LSTM', 'GRU', 'CNN_1D', 'FC_layer'} 중 택 1

        'parameter': {
            'window_size' : 50, # input time series data를 windowing 하여 자르는 길이(size)
            'num_layers' : 2, # recurrnet layers의 수, Default : 1
            'hidden_size' : 64, # hidden state의 벡터차원 수
            'attention' : False, # True일 경우 attention layer를 추가
            'dropout' : 0.2, # If non-zero, introduces a Dropout layer on the outputs of each RNN layer except the last layer, with dropout probability equal to dropout. Default: 0
            'bidirectional' : True, # 모델의 양방향성 여부
            'batch_size' : 64, #batch size
            'device': 'cuda', # 학습 환경, ["cuda", "cpu"] 중 선택
            'num_epochs' : 100 # 학습 시 사용할 epoch 수
            }
}

# Case 2. w/o data representation &LSTM model 
config2 = {
        'with_representation': False, # classification에 사용되는 representation이 있을 경우 True, 아닐 경우 False
        'model': 'LSTM', # classification에에 활용할 알고리즘 정의, {'RNN', 'LSTM', 'GRU', 'CNN_1D', 'FC_layer'} 중 택 1

        'parameter': {
            'window_size' : 50, # input time series data를 windowing 하여 자르는 길이(size)
            'num_layers' : 2, # recurrnet layers의 수, Default : 1
            'hidden_size' : 64, # hidden state의 벡터차원 수
            'attention' : False, # True일 경우 attention layer를 추가
            'dropout' : 0.2, # If non-zero, introduces a Dropout layer on the outputs of each RNN layer except the last layer, with dropout probability equal to dropout. Default: 0
            'bidirectional' : True, # 모델의 양방향성 여부
            'batch_size' : 64, #batch size
            'device': 'cuda', # 학습 환경, ["cuda", "cpu"] 중 선택
            'num_epochs' : 100 # 학습 시 사용할 epoch 수
            }
}

# Case 3. w/o data representation & GRU model 
config3 = {
        'with_representation': False, # classification에 사용되는 representation이 있을 경우 True, 아닐 경우 False
        'model': 'GRU', # classification에에 활용할 알고리즘 정의, {'RNN', 'LSTM', 'GRU', 'CNN_1D', 'FC_layer'} 중 택 1

        'parameter': {
            'window_size' : 50, # input time series data를 windowing 하여 자르는 길이(size)
            'num_layers' : 2, # recurrnet layers의 수, Default : 1
            'hidden_size' : 64, # hidden state의 벡터차원 수
            'attention' : False, # True일 경우 attention layer를 추가
            'dropout' : 0.2, # If non-zero, introduces a Dropout layer on the outputs of each RNN layer except the last layer, with dropout probability equal to dropout. Default: 0
            'bidirectional' : True, # 모델의 양방향성 여부
            'batch_size' : 64, #batch size
            'device': 'cuda', # 학습 환경, ["cuda", "cpu"] 중 선택
            'num_epochs' : 100 # 학습 시 사용할 epoch 수
            }
}

# Case 4. w/o data representation & CNN_1D model 
config4 = {
        'with_representation': False, # classification에 사용되는 representation이 있을 경우 True, 아닐 경우 False
        'model': 'CNN_1D', # classification에에 활용할 알고리즘 정의, {'RNN', 'LSTM', 'GRU', 'CNN_1D', 'FC_layer'} 중 택 1

        'parameter': {
            'window_size' : 50, # input time series data를 windowing 하여 자르는 길이(size)
            'output_channels' : 64, # convolution channel size of output
            'drop_out' : 0.2, # If non-zero, introduces a Dropout layer on the outputs of each RNN layer except the last layer, with dropout probability equal to dropout. Default: 0
            'kernel_size' : 3, # convolutional filter size
            'stride' : 1, # stride of the convolution. Default = 1 
            'padding' : 0, # padding added to both sides of the input. Default = 0
            'batch_size' : 64, # batch size
            'device': 'cuda', # 학습 환경, ["cuda", "cpu"] 중 선택
            'num_epochs' : 100 # 학습 시 사용할 epoch 수
            }
}

# Case 5. w data representation & fully-connected layers 
# 특징 벡터의 사이즈 = 20 이라고 가정
config5 = {
        'with_representation': True, # classification에 사용되는 representation이 있을 경우 True, 아닐 경우 False
        'model': 'FC', # classification에에 활용할 알고리즘 정의, {'RNN', 'LSTM', 'GRU', 'CNN_1D', 'FC'} 중 택 1

        'parameter': {
            'window_size' : 50, # input time series data를 windowing 하여 자르는 길이(size)
            'input_representation' : 0, # 예를 들면 (80, 20) 차원의 벡터 (80은 window_length에 따른 관측치 수, 20은 representation 특징벡터 차원 수)를 넣어야 함. 지금은 loader부분에서 random값들어가 있음
            'drop_out' : 0.2, # If non-zero, introduces a Dropout layer on the outputs of each RNN layer except the last layer, with dropout probability equal to dropout. Default: 0
            'batch_size' : 64, # batch size
            'bias': True, # bias [True, False]
            'device': 'cuda', # 학습 환경, ["cuda", "cpu"] 중 선택
            'num_epochs' : 100 # 학습 시 사용할 epoch 수
            }
}

In [8]:
temp_x = pickle.load(open(os.path.join(data_dir, 'x_train - 복사본.pkl'), 'rb'))

In [9]:
temp_x.shape

(21, 561, 281)

In [11]:
pd.DataFrame(temp_x[0])

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,271,272,273,274,275,276,277,278,279,280
0,0.288585,0.278419,0.279653,0.279174,0.276629,0.277199,0.279454,0.277432,0.277293,0.280586,...,0.398249,0.328590,0.240039,0.278460,0.266821,0.286568,0.305840,0.310360,0.287725,0.220035
1,-0.020294,-0.016411,-0.019467,-0.026201,-0.016570,-0.010098,-0.019641,-0.030488,-0.021751,-0.009960,...,0.027318,-0.023303,-0.017442,-0.023398,-0.018607,0.020619,-0.002192,-0.054635,-0.038922,-0.007873
2,-0.132905,-0.123520,-0.113462,-0.123283,-0.115362,-0.105137,-0.110022,-0.125360,-0.120751,-0.106065,...,-0.062956,-0.081836,-0.085923,-0.130455,-0.129805,-0.092536,-0.124971,-0.139121,-0.121019,-0.105861
3,-0.995279,-0.998245,-0.995380,-0.996091,-0.998139,-0.997335,-0.996921,-0.996559,-0.997328,-0.994803,...,-0.285904,-0.269353,-0.271956,-0.277102,-0.332375,-0.358399,-0.309300,-0.299196,-0.345199,-0.325851
4,-0.983111,-0.975300,-0.967187,-0.983403,-0.980817,-0.990487,-0.967186,-0.966728,-0.961245,-0.972758,...,0.162323,0.267064,0.166483,0.122801,0.188234,0.184799,0.191434,0.131415,0.065631,0.069517
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
556,-0.464761,-0.732626,0.100699,0.640011,0.693578,0.275041,0.014637,-0.561871,-0.234313,-0.482871,...,0.677174,0.774611,0.293660,-0.186212,-0.309551,-0.886370,-0.208520,0.323233,0.196858,0.445547
557,-0.018446,0.703511,0.808529,-0.485366,-0.615971,-0.368224,-0.189512,0.467383,0.117797,-0.070670,...,-0.105358,0.085763,-0.116361,-0.283945,0.178750,0.500656,0.072863,0.009463,0.435944,-0.740208
558,-0.841247,-0.844788,-0.848933,-0.848649,-0.847865,-0.849632,-0.852150,-0.851017,-0.847971,-0.848294,...,-0.656642,-0.673466,-0.681290,-0.696572,-0.694272,-0.687830,-0.700868,-0.711755,-0.705702,-0.697422
559,0.179941,0.180289,0.180637,0.181935,0.185151,0.184823,0.182170,0.183779,0.188982,0.190310,...,0.317010,0.305444,0.300935,0.292482,0.294402,0.299898,0.292534,0.285024,0.289208,0.294479


In [3]:
data_dir = './data'

train_x = pickle.load(open(os.path.join(data_dir, 'X_train.pkl'), 'rb'))
train_y = pickle.load(open(os.path.join(data_dir, 'y_train.pkl'), 'rb'))
test_x =  pickle.load(open(os.path.join(data_dir, 'X_test.pkl'), 'rb'))
test_y = pickle.load(open(os.path.join(data_dir, 'y_test.pkl'), 'rb'))

# train_data = {'x' : train_x, 'y' : train_y}
# test_data = {'x' : test_x, 'y' : test_y}

In [14]:
print(train_x.shape)  #shape : (num_of_instance x input_dims x window_size) = (7352, 9, 128)
print(train_y.shape) #shape : (num_of_instance x input_dims x window_size) = (7352, )
print(test_x.shape)  #shape : (num_of_instance x input_dims x window_size) = (2947, 9, 128)
print(test_y.shape)  #shape : (num_of_instance x input_dims x window_size) = (2947)

(7352, 9, 128)
(7352,)
(2947, 9, 128)
(2947,)


In [12]:
train_y.shape

(7352,)

In [7]:
test_x.shape

(2947, 9, 128)

In [6]:
pd.DataFrame(train_x[0])

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,118,119,120,121,122,123,124,125,126,127
0,0.000181,0.010139,0.009276,0.005066,0.01081,0.004045,0.004757,0.006214,0.003307,0.007572,...,0.001412,-0.001509,6e-05,0.000435,-0.000819,0.000228,-0.0003,-0.001147,-0.000222,0.001576
1,0.010767,0.006579,0.008929,0.007489,0.006141,0.006944,0.003552,0.002537,0.004085,0.005118,...,0.000172,0.001756,-7.6e-05,-0.004053,-0.004295,-0.002929,-0.002023,0.000171,0.001574,0.00307
2,0.055561,0.055125,0.048405,0.049775,0.043013,0.044729,0.043719,0.035379,0.031188,0.023567,...,0.000339,-0.000842,-0.005926,-0.010068,-0.008023,-0.003412,0.000359,0.002648,0.002381,-0.00227
3,0.030191,0.043711,0.035688,0.040402,0.047097,0.050185,0.050545,0.044992,0.047686,0.046812,...,0.012667,0.010475,0.011098,0.013411,0.018454,0.025197,0.032328,0.039852,0.037449,0.028818
4,0.066014,0.042699,0.07485,0.05732,0.052343,0.069174,0.049867,0.056751,0.058189,0.043199,...,-0.002878,-0.002945,-0.003434,-0.005051,-0.004988,-0.005166,-0.001298,0.001909,-8e-05,-3.8e-05
5,0.022859,0.010316,0.01325,0.017751,0.002553,0.007725,0.004325,0.010617,0.017189,0.010511,...,-0.006042,-0.006891,-0.004903,0.001354,0.008033,0.007355,0.002669,-0.00217,-0.005643,-0.001446
6,1.012817,1.022833,1.022028,1.017877,1.02368,1.016974,1.017746,1.019263,1.016417,1.020745,...,1.020981,1.018065,1.019638,1.020017,1.018766,1.019815,1.01929,1.018445,1.019372,1.021171
7,-0.123217,-0.126876,-0.124004,-0.124928,-0.125767,-0.124462,-0.127361,-0.127891,-0.125868,-0.124368,...,-0.123493,-0.121995,-0.12391,-0.12797,-0.128295,-0.12701,-0.126185,-0.12407,-0.122745,-0.121326
8,0.102934,0.105687,0.102102,0.106553,0.102814,0.107493,0.109386,0.103886,0.102473,0.097566,...,0.100058,0.098564,0.093177,0.088742,0.090505,0.094843,0.09835,0.100385,0.099874,0.094987


In [13]:
data_dir = './data'

train_x = pickle.load(open(os.path.join(data_dir, 'x_train.pkl'), 'rb'))
train_y = pickle.load(open(os.path.join(data_dir, 'state_train.pkl'), 'rb'))
test_x =  pickle.load(open(os.path.join(data_dir, 'x_test.pkl'), 'rb'))
test_y = pickle.load(open(os.path.join(data_dir, 'state_test.pkl'), 'rb'))

train_data = {'x' : train_x, 'y' : train_y}
test_data = {'x' : test_x, 'y' : test_y}

In [14]:
# Case 1. w/o data representation & RNN
config = config1
data_classification = mc.Classification(config, train_data, test_data)
test_accuracy = data_classification.getResult()

Epoch 1/100
----------
train Loss: 1.7534 Acc: 0.1250
val Loss: 1.6363 Acc: 0.2400

Epoch 2/100
----------
train Loss: 1.6556 Acc: 0.2125
val Loss: 1.5519 Acc: 0.4400

Epoch 3/100
----------
train Loss: 1.5757 Acc: 0.4125
val Loss: 1.4809 Acc: 0.5200

Epoch 4/100
----------
train Loss: 1.5100 Acc: 0.4250
val Loss: 1.4213 Acc: 0.5200

Epoch 5/100
----------
train Loss: 1.4546 Acc: 0.4375
val Loss: 1.3730 Acc: 0.5200

Epoch 6/100
----------
train Loss: 1.4105 Acc: 0.4125
val Loss: 1.3356 Acc: 0.5200

Epoch 7/100
----------
train Loss: 1.3754 Acc: 0.4250
val Loss: 1.3068 Acc: 0.5200

Epoch 8/100
----------
train Loss: 1.3448 Acc: 0.4250
val Loss: 1.2837 Acc: 0.5200

Epoch 9/100
----------
train Loss: 1.3204 Acc: 0.4250
val Loss: 1.2664 Acc: 0.5200

Epoch 10/100
----------
train Loss: 1.2967 Acc: 0.4500
val Loss: 1.2545 Acc: 0.5200

Epoch 11/100
----------
train Loss: 1.2747 Acc: 0.4625
val Loss: 1.2456 Acc: 0.5200

Epoch 12/100
----------
train Loss: 1.2561 Acc: 0.5000
val Loss: 1.2390 Ac

In [15]:
# Case 2. w/o data representation & LSTM
config = config2
data_classification = mc.Classification(config, train_data, test_data)
test_accuracy = data_classification.getResult()

Epoch 1/100
----------
train Loss: 1.8122 Acc: 0.0125
val Loss: 1.7687 Acc: 0.4800

Epoch 2/100
----------
train Loss: 1.7889 Acc: 0.3500
val Loss: 1.7440 Acc: 0.6800

Epoch 3/100
----------
train Loss: 1.7664 Acc: 0.5125
val Loss: 1.7192 Acc: 0.6400

Epoch 4/100
----------
train Loss: 1.7452 Acc: 0.5125
val Loss: 1.6954 Acc: 0.6000

Epoch 5/100
----------
train Loss: 1.7235 Acc: 0.5125
val Loss: 1.6731 Acc: 0.6000

Epoch 6/100
----------
train Loss: 1.7014 Acc: 0.5125
val Loss: 1.6515 Acc: 0.6000

Epoch 7/100
----------
train Loss: 1.6814 Acc: 0.5000
val Loss: 1.6300 Acc: 0.6000

Epoch 8/100
----------
train Loss: 1.6604 Acc: 0.5000
val Loss: 1.6089 Acc: 0.6000

Epoch 9/100
----------
train Loss: 1.6393 Acc: 0.5000
val Loss: 1.5880 Acc: 0.6000

Epoch 10/100
----------
train Loss: 1.6196 Acc: 0.5000
val Loss: 1.5670 Acc: 0.6000

Epoch 11/100
----------
train Loss: 1.5990 Acc: 0.5000
val Loss: 1.5464 Acc: 0.6000

Epoch 12/100
----------
train Loss: 1.5786 Acc: 0.5125
val Loss: 1.5257 Ac

In [16]:
# Case 3. w/o data representation & GRU
config = config3
data_classification = mc.Classification(config, train_data, test_data)
test_accuracy = data_classification.getResult()

Epoch 1/100
----------
train Loss: 1.8700 Acc: 0.0000
val Loss: 1.8108 Acc: 0.0400

Epoch 2/100
----------
train Loss: 1.8000 Acc: 0.1000
val Loss: 1.7409 Acc: 0.2400

Epoch 3/100
----------
train Loss: 1.7355 Acc: 0.2875
val Loss: 1.6773 Acc: 0.4800

Epoch 4/100
----------
train Loss: 1.6768 Acc: 0.4000
val Loss: 1.6191 Acc: 0.5200

Epoch 5/100
----------
train Loss: 1.6251 Acc: 0.4125
val Loss: 1.5679 Acc: 0.5200

Epoch 6/100
----------
train Loss: 1.5756 Acc: 0.4250
val Loss: 1.5228 Acc: 0.5200

Epoch 7/100
----------
train Loss: 1.5337 Acc: 0.4250
val Loss: 1.4808 Acc: 0.5200

Epoch 8/100
----------
train Loss: 1.4958 Acc: 0.4250
val Loss: 1.4427 Acc: 0.5200

Epoch 9/100
----------
train Loss: 1.4605 Acc: 0.4250
val Loss: 1.4082 Acc: 0.5200

Epoch 10/100
----------
train Loss: 1.4308 Acc: 0.4250
val Loss: 1.3761 Acc: 0.5200

Epoch 11/100
----------
train Loss: 1.4047 Acc: 0.4250
val Loss: 1.3470 Acc: 0.5200

Epoch 12/100
----------
train Loss: 1.3812 Acc: 0.4250
val Loss: 1.3215 Ac

In [17]:
# Case 4. w/o data representation & CNN_1D
config = config4
data_classification = mc.Classification(config, train_data, test_data)
test_accuracy = data_classification.getResult()

Epoch 1/100
----------
train Loss: 1.7814 Acc: 0.2500
val Loss: 1.7544 Acc: 0.1200

Epoch 2/100
----------
train Loss: 1.7095 Acc: 0.2875
val Loss: 1.6926 Acc: 0.1200

Epoch 3/100
----------
train Loss: 1.6575 Acc: 0.3125
val Loss: 1.6356 Acc: 0.1200

Epoch 4/100
----------
train Loss: 1.5843 Acc: 0.3875
val Loss: 1.5825 Acc: 0.2400

Epoch 5/100
----------
train Loss: 1.5424 Acc: 0.4125
val Loss: 1.5268 Acc: 0.5600

Epoch 6/100
----------
train Loss: 1.4902 Acc: 0.3500
val Loss: 1.4675 Acc: 0.5600

Epoch 7/100
----------
train Loss: 1.4291 Acc: 0.4625
val Loss: 1.4075 Acc: 0.5600

Epoch 8/100
----------
train Loss: 1.3764 Acc: 0.4375
val Loss: 1.3494 Acc: 0.5200

Epoch 9/100
----------
train Loss: 1.3315 Acc: 0.4250
val Loss: 1.3014 Acc: 0.5200

Epoch 10/100
----------
train Loss: 1.2848 Acc: 0.4250
val Loss: 1.2664 Acc: 0.5200

Epoch 11/100
----------
train Loss: 1.2473 Acc: 0.4125
val Loss: 1.2448 Acc: 0.5200

Epoch 12/100
----------
train Loss: 1.1952 Acc: 0.4375
val Loss: 1.2327 Ac

In [18]:
# Case 5. w/ data representation & fully-connected layers
config = config5
data_classification = mc.Classification(config, train_data, test_data)
test_accuracy = data_classification.getResult()

Epoch 1/100
----------
train Loss: 1.7696 Acc: 0.2750
val Loss: 1.8004 Acc: 0.2800

Epoch 2/100
----------
train Loss: 1.7765 Acc: 0.2625
val Loss: 1.8002 Acc: 0.2800

Epoch 3/100
----------
train Loss: 1.7764 Acc: 0.2875
val Loss: 1.7999 Acc: 0.2800

Epoch 4/100
----------
train Loss: 1.7738 Acc: 0.2250
val Loss: 1.7997 Acc: 0.2800

Epoch 5/100
----------
train Loss: 1.7655 Acc: 0.2750
val Loss: 1.7994 Acc: 0.2800

Epoch 6/100
----------
train Loss: 1.7693 Acc: 0.2500
val Loss: 1.7991 Acc: 0.2800

Epoch 7/100
----------
train Loss: 1.7608 Acc: 0.2500
val Loss: 1.7989 Acc: 0.2800

Epoch 8/100
----------
train Loss: 1.7650 Acc: 0.2750
val Loss: 1.7987 Acc: 0.2800

Epoch 9/100
----------
train Loss: 1.7627 Acc: 0.3000
val Loss: 1.7985 Acc: 0.2800

Epoch 10/100
----------
train Loss: 1.7691 Acc: 0.2500
val Loss: 1.7982 Acc: 0.2800

Epoch 11/100
----------
train Loss: 1.7615 Acc: 0.2875
val Loss: 1.7980 Acc: 0.2800

Epoch 12/100
----------
train Loss: 1.7613 Acc: 0.3250
val Loss: 1.7978 Ac