In [1]:
import torch
import pickle
import random
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler

import main_classificaiton as mc

In [2]:
# seed 고정
random_seed = 42

torch.manual_seed(random_seed)
torch.cuda.manual_seed(random_seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(random_seed)
random.seed(random_seed)

In [3]:
# Case 1. LSTM model (w/o data representation)
config1 = {
        'model': 'LSTM', # classification에 활용할 알고리즘 정의, {'LSTM', 'GRU', 'CNN_1D', 'LSTM_FCNs', 'FC'} 중 택 1
        'training': True,  # 학습 여부, 저장된 학습 완료 모델 존재시 False로 설정
        'best_model_path': './ckpt/lstm.pt',  # 학습 완료 모델 저장 경로
        'parameter': {
            'input_size': 9,  # 데이터의 변수 개수, int
            'num_classes': 6,  # 분류할 class 개수, int
            'num_layers': 2,  # recurrent layers의 수, int(default: 2, 범위: 1 이상)
            'hidden_size': 64,  # hidden state의 차원, int(default: 64, 범위: 1 이상)
            'dropout': 0.1,  # dropout 확률, float(default: 0.1, 범위: 0 이상 1 이하)
            'bidirectional': True,  # 모델의 양방향성 여부, bool(default: True)
            'num_epochs': 150,  # 학습 epoch 횟수, int(default: 150, 범위: 1 이상)
            'batch_size': 64,  # batch 크기, int(default: 64, 범위: 1 이상, 컴퓨터 사양에 적합하게 설정)
            'lr': 0.0001,  # learning rate, float(default: 0.001, 범위: 0.1 이하)
            'device': 'cuda'  # 학습 환경, ["cuda", "cpu"] 중 선택
        }
}

# Case 2. GRU model (w/o data representation)
config2 = {
        'model': 'GRU', # classification에 활용할 알고리즘 정의, {'LSTM', 'GRU', 'CNN_1D', 'LSTM_FCNs', 'FC'} 중 택 1
        'training': True,  # 학습 여부, 저장된 학습 완료 모델 존재시 False로 설정
        'best_model_path': './ckpt/gru.pt',  # 학습 완료 모델 저장 경로
        'parameter': {
            'input_size': 9,  # 데이터의 변수 개수, int
            'num_classes': 6,  # 분류할 class 개수, int
            'num_layers': 2,  # recurrent layers의 수, int(default: 2, 범위: 1 이상)
            'hidden_size': 64,  # hidden state의 차원, int(default: 64, 범위: 1 이상)
            'dropout': 0.1,  # dropout 확률, float(default: 0.1, 범위: 0 이상 1 이하)
            'bidirectional': True,  # 모델의 양방향성 여부, bool(default: True)
            'num_epochs': 150,  # 학습 epoch 횟수, int(default: 150, 범위: 1 이상)
            'batch_size': 64,  # batch 크기, int(default: 64, 범위: 1 이상, 컴퓨터 사양에 적합하게 설정)
            'lr': 0.0001,  # learning rate, float(default: 0.001, 범위: 0.1 이하)
            'device': 'cuda'  # 학습 환경, ["cuda", "cpu"] 중 선택
        }
}

# Case 3. CNN_1D model (w/o data representation)
config3 = {
        'model': 'CNN_1D', # classification에 활용할 알고리즘 정의, {'LSTM', 'GRU', 'CNN_1D', 'LSTM_FCNs', 'FC'} 중 택 1
        'training': True,  # 학습 여부, 저장된 학습 완료 모델 존재시 False로 설정
        'best_model_path': './ckpt/cnn_1d.pt',  # 학습 완료 모델 저장 경로
        'parameter': {
            'input_size': 9,  # 데이터의 변수 개수, int
            'num_classes': 6,  # 분류할 class 개수, int
            'seq_len': 128,  # 데이터의 시간 길이, int
            'output_channels': 64, # convolution layer의 output channel, int(default: 64, 범위: 1 이상, 2의 지수로 설정 권장)
            'kernel_size': 3, # convolutional layer의 filter 크기, int(default: 3, 범위: 3 이상, 홀수로 설정 권장)
            'stride': 1, # convolution layer의 stride 크기, int(default: 1, 범위: 1 이상)
            'padding': 0, # padding 크기, int(default: 0, 범위: 0 이상)
            'drop_out': 0.1, # dropout 확률, float(default: 0.1, 범위: 0 이상 1 이하)
            'num_epochs': 150,  # 학습 epoch 횟수, int(default: 150, 범위: 1 이상)
            'batch_size': 64,  # batch 크기, int(default: 64, 범위: 1 이상, 컴퓨터 사양에 적합하게 설정)
            'lr': 0.0001,  # learning rate, float(default: 0.0001, 범위: 0.1 이하)
            'device': 'cuda'  # 학습 환경, ["cuda", "cpu"] 중 선택
        }
}

# Case 4. LSTM_FCNs model (w/o data representation)
config4 = {
        'model': 'LSTM_FCNs', # classification에 활용할 알고리즘 정의, {'LSTM', 'GRU', 'CNN_1D', 'LSTM_FCNs', 'FC'} 중 택 1
        'training': True,  # 학습 여부, 저장된 학습 완료 모델 존재시 False로 설정
        'best_model_path': './ckpt/lstm_fcn.pt',  # 학습 완료 모델 저장 경로
        'parameter': {
            'input_size': 9,  # 데이터의 변수 개수, int
            'num_classes': 6,  # 분류할 class 개수, int
            'num_layers': 1,  # recurrent layers의 수, int(default: 1, 범위: 1 이상)
            'lstm_drop_out': 0.4, # LSTM dropout 확률, float(default: 0.4, 범위: 0 이상 1 이하)
            'fc_drop_out': 0.1, # FC dropout 확률, float(default: 0.1, 범위: 0 이상 1 이하)
            'num_epochs': 150, # 학습 epoch 횟수, int(default: 150, 범위: 1 이상)
            'batch_size': 64,  # batch 크기, int(default: 64, 범위: 1 이상, 컴퓨터 사양에 적합하게 설정)
            'lr': 0.0001,  # learning rate, float(default: 0.0001, 범위: 0.1 이하)
            'device': 'cuda'  # 학습 환경, ["cuda", "cpu"] 중 선택
        }
}

# Case 5. fully-connected layers (w/ data representation)
config5 = {
        'model': 'FC', # classification에 활용할 알고리즘 정의, {'LSTM', 'GRU', 'CNN_1D', 'LSTM_FCNs', 'FC'} 중 택 1
        "training": True,  # 학습 여부, 저장된 학습 완료 모델 존재시 False로 설정
        "best_model_path": './ckpt/fc.pt',  # 학습 완료 모델 저장 경로
        'parameter': {
            'input_size': 64,  # 데이터의 변수 개수(representation 차원), int
            'num_classes': 6,  # 분류할 class 개수, int
            'drop_out': 0.1, # dropout 확률, float(default: 0.1, 범위: 0 이상 1 이하)
            'bias': True, # bias 사용 여부, bool(default: True)
            'num_epochs': 150, # 학습 epoch 횟수, int(default: 150, 범위: 1 이상)
            'batch_size': 64,  # batch 크기, int(default: 64, 범위: 1 이상, 컴퓨터 사양에 적합하게 설정)
            'lr': 0.0001,  # learning rate, float(default: 0.0001, 범위: 0.1 이하)
            'device': 'cuda'  # 학습 환경, ["cuda", "cpu"] 중 선택
        }
}

In [4]:
# raw time series data
train_x = pickle.load(open('./data/x_train.pkl', 'rb'))
train_y = pickle.load(open('./data/y_train.pkl', 'rb'))
test_x = pickle.load(open('./data/x_test.pkl', 'rb'))
test_y = pickle.load(open('./data/y_test.pkl', 'rb'))

print(train_x.shape)  #shape : (num_of_instance x input_dims x window_size) = (7352, 9, 128)
print(train_y.shape) #shape : (num_of_instance) = (7352, )
print(test_x.shape)  #shape : (num_of_instance x input_dims x window_size) = (2947, 9, 128)
print(test_y.shape)  #shape : (num_of_instance) = (2947, )

(7352, 9, 128)
(7352,)
(2947, 9, 128)
(2947,)


In [5]:
# scaling time series data
scaler = MinMaxScaler()

if len(train_x.shape) < 3:
    scaler = scaler.fit(train_x)
else:
    origin_shape = train_x.shape
    scaler = scaler.fit(np.transpose(train_x, (0, 2, 1)).reshape(-1, origin_shape[1]))

scaled_x_data = []
for x_data in [train_x, test_x]:
    if len(train_x.shape) < 3:
        x_data = scaler.transform(x_data)
    else:
        x_data = scaler.transform(np.transpose(x_data, (0, 2, 1)).reshape(-1, origin_shape[1]))
        x_data = np.transpose(x_data.reshape(-1, origin_shape[2], origin_shape[1]), (0, 2, 1))

    scaled_x_data.append(x_data)
train_x, test_x = scaled_x_data        

print(train_x.shape)  #shape : (num_of_instance x input_dims x window_size) = (95, 24, 144)
print(test_x.shape)  #shape : (num_of_instance x input_dims x window_size) = (42, 24, 144)

(7352, 9, 128)
(2947, 9, 128)


In [6]:
train_data = {'x': train_x, 'y': train_y}
test_data = {'x': test_x, 'y': test_y}

In [7]:
# Case 1. LSTM model (w/o data representation)
config = config1
data_cls = mc.Classification(config, train_data, test_data)
model = data_cls.build_model()  # 모델 구축

if config["training"]:
    best_model = data_cls.train_model(model)  # 모델 학습
    data_cls.save_model(best_model, best_model_path=config["best_model_path"])  # 모델 저장

pred, prob, acc = data_cls.pred_data(model, best_model_path=config["best_model_path"])  # class 예측

Start training model

Epoch 1/150
train Loss: 1.7866 Acc: 0.2522
val Loss: 1.7781 Acc: 0.3562

Epoch 10/150
train Loss: 1.0309 Acc: 0.5445
val Loss: 1.1062 Acc: 0.5269

Epoch 20/150
train Loss: 0.9873 Acc: 0.5570
val Loss: 1.0418 Acc: 0.4908

Epoch 30/150
train Loss: 0.9661 Acc: 0.5678
val Loss: 1.0172 Acc: 0.5058

Epoch 40/150
train Loss: 0.9543 Acc: 0.5718
val Loss: 1.0311 Acc: 0.5105

Epoch 50/150
train Loss: 0.9646 Acc: 0.5705
val Loss: 1.0260 Acc: 0.5017

Epoch 60/150
train Loss: 0.9402 Acc: 0.5798
val Loss: 0.9992 Acc: 0.5432

Epoch 70/150
train Loss: 0.9203 Acc: 0.5911
val Loss: 0.9986 Acc: 0.5221

Epoch 80/150
train Loss: 0.9095 Acc: 0.6101
val Loss: 1.0059 Acc: 0.5622

Epoch 90/150
train Loss: 0.8829 Acc: 0.6300
val Loss: 0.9972 Acc: 0.5568

Epoch 100/150
train Loss: 0.8737 Acc: 0.6189
val Loss: 0.9597 Acc: 0.5636

Epoch 110/150
train Loss: 0.6764 Acc: 0.7315
val Loss: 0.7555 Acc: 0.6805

Epoch 120/150
train Loss: 0.4947 Acc: 0.8260
val Loss: 0.5623 Acc: 0.7818

Epoch 130/150


In [8]:
print(acc)
print(pred.shape, prob.shape)

0.7987784187309128
(2947,) (2947, 6)


In [9]:
# Case 2. GRU (w/o data representation)
config = config2
data_cls = mc.Classification(config, train_data, test_data)
model = data_cls.build_model()  # 모델 구축

if config["training"]:
    best_model = data_cls.train_model(model)  # 모델 학습
    data_cls.save_model(best_model, best_model_path=config["best_model_path"])  # 모델 저장

pred, prob, acc = data_cls.pred_data(model, best_model_path=config["best_model_path"])  # class 예측

Start training model

Epoch 1/150
train Loss: 1.7808 Acc: 0.1845
val Loss: 1.7641 Acc: 0.1992

Epoch 10/150
train Loss: 1.0581 Acc: 0.5329
val Loss: 1.0990 Acc: 0.4854

Epoch 20/150
train Loss: 0.9882 Acc: 0.5638
val Loss: 1.1267 Acc: 0.5010

Epoch 30/150
train Loss: 0.6256 Acc: 0.7725
val Loss: 0.6747 Acc: 0.6676

Epoch 40/150
train Loss: 0.4949 Acc: 0.7954
val Loss: 0.5328 Acc: 0.7825

Epoch 50/150
train Loss: 0.4421 Acc: 0.8051
val Loss: 0.4884 Acc: 0.7906

Epoch 60/150
train Loss: 0.4072 Acc: 0.8262
val Loss: 0.4703 Acc: 0.7961

Epoch 70/150
train Loss: 0.3840 Acc: 0.8339
val Loss: 0.4865 Acc: 0.7743

Epoch 80/150
train Loss: 0.3570 Acc: 0.8538
val Loss: 0.4210 Acc: 0.8396

Epoch 90/150
train Loss: 0.3635 Acc: 0.8519
val Loss: 0.4114 Acc: 0.8423

Epoch 100/150
train Loss: 0.3189 Acc: 0.8738
val Loss: 0.3833 Acc: 0.8729

Epoch 110/150
train Loss: 0.3139 Acc: 0.8779
val Loss: 0.3782 Acc: 0.8919

Epoch 120/150
train Loss: 0.2614 Acc: 0.9009
val Loss: 0.3622 Acc: 0.8912

Epoch 130/150


In [10]:
print(acc)
print(pred.shape, prob.shape)

0.827281981676281
(2947,) (2947, 6)


In [11]:
# Case 3. CNN_1D (w/o data representation)
config = config3
data_cls = mc.Classification(config, train_data, test_data)
model = data_cls.build_model()  # 모델 구축

if config["training"]:
    best_model = data_cls.train_model(model)  # 모델 학습
    data_cls.save_model(best_model, best_model_path=config["best_model_path"])  # 모델 저장

pred, prob, acc = data_cls.pred_data(model, best_model_path=config["best_model_path"])  # class 예측

Start training model

Epoch 1/150
train Loss: 1.7267 Acc: 0.3486
val Loss: 1.6268 Acc: 0.3936

Epoch 10/150
train Loss: 0.8740 Acc: 0.7089
val Loss: 0.9253 Acc: 0.7233

Epoch 20/150
train Loss: 0.5772 Acc: 0.8079
val Loss: 0.6615 Acc: 0.8097

Epoch 30/150
train Loss: 0.4651 Acc: 0.8257
val Loss: 0.5759 Acc: 0.7899

Epoch 40/150
train Loss: 0.4086 Acc: 0.8402
val Loss: 0.5197 Acc: 0.8117

Epoch 50/150
train Loss: 0.3792 Acc: 0.8493
val Loss: 0.4863 Acc: 0.8300

Epoch 60/150
train Loss: 0.3586 Acc: 0.8572
val Loss: 0.4723 Acc: 0.8090

Epoch 70/150
train Loss: 0.3409 Acc: 0.8609
val Loss: 0.4487 Acc: 0.8321

Epoch 80/150
train Loss: 0.3283 Acc: 0.8655
val Loss: 0.4280 Acc: 0.8423

Epoch 90/150
train Loss: 0.3107 Acc: 0.8737
val Loss: 0.4115 Acc: 0.8545

Epoch 100/150
train Loss: 0.3023 Acc: 0.8774
val Loss: 0.4176 Acc: 0.8260

Epoch 110/150
train Loss: 0.2908 Acc: 0.8837
val Loss: 0.4070 Acc: 0.8341

Epoch 120/150
train Loss: 0.2804 Acc: 0.8895
val Loss: 0.3843 Acc: 0.8518

Epoch 130/150


In [12]:
print(acc)
print(pred.shape, prob.shape)

0.8092975907702749
(2947,) (2947, 6)


In [13]:
# Case 4. LSTM_FCNs (w/o data representation)
config = config4
data_cls = mc.Classification(config, train_data, test_data)
model = data_cls.build_model()  # 모델 구축

if config["training"]:
    best_model = data_cls.train_model(model)  # 모델 학습
    data_cls.save_model(best_model, best_model_path=config["best_model_path"])  # 모델 저장

pred, prob, acc = data_cls.pred_data(model, best_model_path=config["best_model_path"])  # class 예측

Start training model

Epoch 1/150
train Loss: 1.5169 Acc: 0.6674
val Loss: 1.4140 Acc: 0.8899

Epoch 10/150
train Loss: 0.5008 Acc: 0.9475
val Loss: 0.5601 Acc: 0.8946

Epoch 20/150
train Loss: 0.2327 Acc: 0.9568
val Loss: 0.3736 Acc: 0.8987

Epoch 30/150
train Loss: 0.1450 Acc: 0.9628
val Loss: 0.3249 Acc: 0.9007

Epoch 40/150
train Loss: 0.1045 Acc: 0.9672
val Loss: 0.3851 Acc: 0.8885

Epoch 50/150
train Loss: 0.0925 Acc: 0.9662
val Loss: 0.2981 Acc: 0.9062

Epoch 60/150
train Loss: 0.0761 Acc: 0.9694
val Loss: 0.3331 Acc: 0.9211

Epoch 70/150
train Loss: 0.0746 Acc: 0.9680
val Loss: 0.3098 Acc: 0.9177

Epoch 80/150
train Loss: 0.0654 Acc: 0.9742
val Loss: 0.3247 Acc: 0.9266

Epoch 90/150
train Loss: 0.0639 Acc: 0.9730
val Loss: 0.2965 Acc: 0.9286

Epoch 100/150
train Loss: 0.0627 Acc: 0.9713
val Loss: 0.3549 Acc: 0.8939

Epoch 110/150
train Loss: 0.0554 Acc: 0.9733
val Loss: 0.3783 Acc: 0.9218

Epoch 120/150
train Loss: 0.0531 Acc: 0.9752
val Loss: 0.4274 Acc: 0.9245

Epoch 130/150


In [14]:
print(acc)
print(pred.shape, prob.shape)

0.9195792331184256
(2947,) (2947, 6)


--------------------------------------------------

In [15]:
# representation data
train_x = pd.read_csv('./data/ts2vec_repr_train.csv')
train_y = pickle.load(open('./data/y_train.pkl', 'rb'))

test_x = pd.read_csv('./data/ts2vec_repr_test.csv')
test_y = pickle.load(open('./data/y_test.pkl', 'rb'))

train_data = {'x': train_x, 'y': train_y}
test_data = {'x': test_x, 'y': test_y}

print(train_x.shape)  #shape : (num_of_instance x representation_dims) = (7352, 64)
print(train_y.shape) #shape : (num_of_instance) = (7352, )
print(test_x.shape)  #shape : (num_of_instance x representation_dims) = (2947, 64)
print(test_y.shape)  #shape : (num_of_instance) = (2947, )

(7352, 64)
(7352,)
(2947, 64)
(2947,)


In [16]:
# Case 5. fully-connected layers (w/ data representation)
config = config5
data_cls = mc.Classification(config, train_data, test_data)
model = data_cls.build_model()  # 모델 구축

if config["training"]:
    best_model = data_cls.train_model(model)  # 모델 학습
    data_cls.save_model(best_model, best_model_path=config["best_model_path"])  # 모델 저장

pred, prob, acc = data_cls.pred_data(model, best_model_path=config["best_model_path"])  # class 예측

Start training model

Epoch 1/150
train Loss: 1.7738 Acc: 0.2080
val Loss: 1.7019 Acc: 0.2570

Epoch 10/150
train Loss: 0.7111 Acc: 0.7990
val Loss: 0.7149 Acc: 0.8097

Epoch 20/150
train Loss: 0.4213 Acc: 0.8907
val Loss: 0.4824 Acc: 0.8708

Epoch 30/150
train Loss: 0.2872 Acc: 0.9243
val Loss: 0.3738 Acc: 0.8926

Epoch 40/150
train Loss: 0.2214 Acc: 0.9349
val Loss: 0.3160 Acc: 0.9021

Epoch 50/150
train Loss: 0.1850 Acc: 0.9418
val Loss: 0.2853 Acc: 0.9089

Epoch 60/150
train Loss: 0.1560 Acc: 0.9493
val Loss: 0.2693 Acc: 0.9116

Epoch 70/150
train Loss: 0.1415 Acc: 0.9483
val Loss: 0.2598 Acc: 0.9184

Epoch 80/150
train Loss: 0.1320 Acc: 0.9515
val Loss: 0.2543 Acc: 0.9239

Epoch 90/150
train Loss: 0.1207 Acc: 0.9536
val Loss: 0.2525 Acc: 0.9266

Epoch 100/150
train Loss: 0.1144 Acc: 0.9565
val Loss: 0.2493 Acc: 0.9266

Epoch 110/150
train Loss: 0.1110 Acc: 0.9582
val Loss: 0.2451 Acc: 0.9279

Epoch 120/150
train Loss: 0.1040 Acc: 0.9597
val Loss: 0.2459 Acc: 0.9286

Epoch 130/150


In [17]:
print(acc)
print(pred.shape, prob.shape)

0.9172039362063116
(2947,) (2947, 6)
