In [1]:
import torch
import pickle
import random
import pandas as pd
import numpy as np

import main_classificaiton as mc

In [2]:
# seed 고정
random_seed = 42

torch.manual_seed(random_seed)
torch.cuda.manual_seed(random_seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(random_seed)
random.seed(random_seed)

In [3]:
# Case 1. LSTM model (w/o data representation)
config1 = {
        'model': 'LSTM', # classification에 활용할 알고리즘 정의, {'LSTM', 'GRU', 'CNN_1D', 'LSTM_FCNs', 'FC'} 중 택 1
        'training': True,  # 학습 여부, 저장된 학습 완료 모델 존재시 False로 설정
        'best_model_path': './ckpt/lstm.pt',  # 학습 완료 모델 저장 경로
        'parameter': {
            'input_size': 9,  # 데이터의 변수 개수, int
            'num_classes': 6,  # 분류할 class 개수, int
            'num_layers': 2,  # recurrent layers의 수, int(default: 2, 범위: 1 이상)
            'hidden_size': 64,  # hidden state의 차원, int(default: 64, 범위: 1 이상)
            'dropout': 0.1,  # dropout 확률, float(default: 0.1, 범위: 0 이상 1 이하)
            'bidirectional': True,  # 모델의 양방향성 여부, bool(default: True)
            'num_epochs': 150,  # 학습 epoch 횟수, int(default: 150, 범위: 1 이상)
            'batch_size': 64,  # batch 크기, int(default: 64, 범위: 1 이상, 컴퓨터 사양에 적합하게 설정)
            'lr': 0.0001,  # learning rate, float(default: 0.001, 범위: 0.1 이하)
            'device': 'cuda'  # 학습 환경, ["cuda", "cpu"] 중 선택
        }
}

# Case 2. GRU model (w/o data representation)
config2 = {
        'model': 'GRU', # classification에 활용할 알고리즘 정의, {'LSTM', 'GRU', 'CNN_1D', 'LSTM_FCNs', 'FC'} 중 택 1
        'training': True,  # 학습 여부, 저장된 학습 완료 모델 존재시 False로 설정
        'best_model_path': './ckpt/gru.pt',  # 학습 완료 모델 저장 경로
        'parameter': {
            'input_size': 9,  # 데이터의 변수 개수, int
            'num_classes': 6,  # 분류할 class 개수, int
            'num_layers': 2,  # recurrent layers의 수, int(default: 2, 범위: 1 이상)
            'hidden_size': 64,  # hidden state의 차원, int(default: 64, 범위: 1 이상)
            'dropout': 0.1,  # dropout 확률, float(default: 0.1, 범위: 0 이상 1 이하)
            'bidirectional': True,  # 모델의 양방향성 여부, bool(default: True)
            'num_epochs': 150,  # 학습 epoch 횟수, int(default: 150, 범위: 1 이상)
            'batch_size': 64,  # batch 크기, int(default: 64, 범위: 1 이상, 컴퓨터 사양에 적합하게 설정)
            'lr': 0.0001,  # learning rate, float(default: 0.001, 범위: 0.1 이하)
            'device': 'cuda'  # 학습 환경, ["cuda", "cpu"] 중 선택
        }
}

# Case 3. CNN_1D model (w/o data representation)
config3 = {
        'model': 'CNN_1D', # classification에 활용할 알고리즘 정의, {'LSTM', 'GRU', 'CNN_1D', 'LSTM_FCNs', 'FC'} 중 택 1
        'training': True,  # 학습 여부, 저장된 학습 완료 모델 존재시 False로 설정
        'best_model_path': './ckpt/cnn_1d.pt',  # 학습 완료 모델 저장 경로
        'parameter': {
            'input_size': 9,  # 데이터의 변수 개수, int
            'num_classes': 6,  # 분류할 class 개수, int
            'seq_len': 128,  # 데이터의 시간 길이, int
            'output_channels': 64, # convolution layer의 output channel, int(default: 64, 범위: 1 이상, 2의 지수로 설정 권장)
            'kernel_size': 3, # convolutional layer의 filter 크기, int(default: 3, 범위: 3 이상, 홀수로 설정 권장)
            'stride': 1, # convolution layer의 stride 크기, int(default: 1, 범위: 1 이상)
            'padding': 0, # padding 크기, int(default: 0, 범위: 0 이상)
            'drop_out': 0.1, # dropout 확률, float(default: 0.1, 범위: 0 이상 1 이하)
            'num_epochs': 150,  # 학습 epoch 횟수, int(default: 150, 범위: 1 이상)
            'batch_size': 64,  # batch 크기, int(default: 64, 범위: 1 이상, 컴퓨터 사양에 적합하게 설정)
            'lr': 0.0001,  # learning rate, float(default: 0.0001, 범위: 0.1 이하)
            'device': 'cuda'  # 학습 환경, ["cuda", "cpu"] 중 선택
        }
}

# Case 4. LSTM_FCNs model (w/o data representation)
config4 = {
        'model': 'LSTM_FCNs', # classification에 활용할 알고리즘 정의, {'LSTM', 'GRU', 'CNN_1D', 'LSTM_FCNs', 'FC'} 중 택 1
        'training': True,  # 학습 여부, 저장된 학습 완료 모델 존재시 False로 설정
        'best_model_path': './ckpt/lstm_fcn.pt',  # 학습 완료 모델 저장 경로
        'parameter': {
            'input_size': 9,  # 데이터의 변수 개수, int
            'num_classes': 6,  # 분류할 class 개수, int
            'num_layers': 1,  # recurrent layers의 수, int(default: 1, 범위: 1 이상)
            'lstm_drop_out': 0.4, # LSTM dropout 확률, float(default: 0.4, 범위: 0 이상 1 이하)
            'fc_drop_out': 0.1, # FC dropout 확률, float(default: 0.1, 범위: 0 이상 1 이하)
            'num_epochs': 150, # 학습 epoch 횟수, int(default: 150, 범위: 1 이상)
            'batch_size': 64,  # batch 크기, int(default: 64, 범위: 1 이상, 컴퓨터 사양에 적합하게 설정)
            'lr': 0.0001,  # learning rate, float(default: 0.0001, 범위: 0.1 이하)
            'device': 'cuda'  # 학습 환경, ["cuda", "cpu"] 중 선택
        }
}

# Case 5. fully-connected layers (w/ data representation)
config5 = {
        'model': 'FC', # classification에 활용할 알고리즘 정의, {'LSTM', 'GRU', 'CNN_1D', 'LSTM_FCNs', 'FC'} 중 택 1
        "training": True,  # 학습 여부, 저장된 학습 완료 모델 존재시 False로 설정
        "best_model_path": './ckpt/fc.pt',  # 학습 완료 모델 저장 경로
        'parameter': {
            'input_size': 64,  # 데이터의 변수 개수(representation 차원), int
            'num_classes': 6,  # 분류할 class 개수, int
            'drop_out': 0.1, # dropout 확률, float(default: 0.1, 범위: 0 이상 1 이하)
            'bias': True, # bias 사용 여부, bool(default: True)
            'num_epochs': 150, # 학습 epoch 횟수, int(default: 150, 범위: 1 이상)
            'batch_size': 64,  # batch 크기, int(default: 64, 범위: 1 이상, 컴퓨터 사양에 적합하게 설정)
            'lr': 0.0001,  # learning rate, float(default: 0.0001, 범위: 0.1 이하)
            'device': 'cuda'  # 학습 환경, ["cuda", "cpu"] 중 선택
        }
}

In [4]:
# raw time series data
train_x = pickle.load(open('./data/x_train.pkl', 'rb'))
train_y = pickle.load(open('./data/y_train.pkl', 'rb'))
test_x = pickle.load(open('./data/x_test.pkl', 'rb'))
test_y = pickle.load(open('./data/y_test.pkl', 'rb'))

train_data = {'x': train_x, 'y': train_y}
test_data = {'x': test_x, 'y': test_y}

print(train_x.shape)  #shape : (num_of_instance x input_dims x window_size) = (7352, 9, 128)
print(train_y.shape) #shape : (num_of_instance) = (7352, )
print(test_x.shape)  #shape : (num_of_instance x input_dims x window_size) = (2947, 9, 128)
print(test_y.shape)  #shape : (num_of_instance) = (2947, )

(7352, 9, 128)
(7352,)
(2947, 9, 128)
(2947,)


In [5]:
# Case 1. LSTM model (w/o data representation)
config = config1
data_cls = mc.Classification(config, train_data, test_data)
model = data_cls.build_model()  # 모델 구축

if config["training"]:
    best_model = data_cls.train_model(model)  # 모델 학습
    data_cls.save_model(best_model, best_model_path=config["best_model_path"])  # 모델 저장

pred, prob, acc = data_cls.pred_data(model, best_model_path=config["best_model_path"])  # class 예측

Start training model

Epoch 1/150
train Loss: 1.7789 Acc: 0.2663
val Loss: 1.7555 Acc: 0.3569

Epoch 10/150
train Loss: 0.9261 Acc: 0.6154
val Loss: 1.0606 Acc: 0.5561

Epoch 20/150
train Loss: 0.6527 Acc: 0.7092
val Loss: 0.6196 Acc: 0.7260

Epoch 30/150
train Loss: 0.5330 Acc: 0.7543
val Loss: 0.6024 Acc: 0.7138

Epoch 40/150
train Loss: 0.4484 Acc: 0.8147
val Loss: 0.5414 Acc: 0.7675

Epoch 50/150
train Loss: 0.4056 Acc: 0.8371
val Loss: 0.5199 Acc: 0.8124

Epoch 60/150
train Loss: 0.3437 Acc: 0.8662
val Loss: 0.5137 Acc: 0.8341

Epoch 70/150
train Loss: 0.2697 Acc: 0.8927
val Loss: 0.4364 Acc: 0.8668

Epoch 80/150
train Loss: 0.2342 Acc: 0.9094
val Loss: 0.4039 Acc: 0.8736

Epoch 90/150
train Loss: 0.1545 Acc: 0.9391
val Loss: 0.3710 Acc: 0.8946

Epoch 100/150
train Loss: 0.1312 Acc: 0.9495
val Loss: 0.2584 Acc: 0.9313

Epoch 110/150
train Loss: 0.1397 Acc: 0.9469
val Loss: 0.3855 Acc: 0.9069

Epoch 120/150
train Loss: 0.1120 Acc: 0.9577
val Loss: 0.3268 Acc: 0.9191

Epoch 130/150


In [6]:
print(acc)
print(pred.shape, prob.shape)

0.8591788259246692
(2947,) (2947, 6)


In [7]:
# Case 2. GRU (w/o data representation)
config = config2
data_cls = mc.Classification(config, train_data, test_data)
model = data_cls.build_model()  # 모델 구축

if config["training"]:
    best_model = data_cls.train_model(model)  # 모델 학습
    data_cls.save_model(best_model, best_model_path=config["best_model_path"])  # 모델 저장

pred, prob, acc = data_cls.pred_data(model, best_model_path=config["best_model_path"])  # class 예측

Start training model

Epoch 1/150
train Loss: 1.7381 Acc: 0.2835
val Loss: 1.6610 Acc: 0.3725

Epoch 10/150
train Loss: 0.8096 Acc: 0.6638
val Loss: 0.8608 Acc: 0.6818

Epoch 20/150
train Loss: 0.4127 Acc: 0.8085
val Loss: 0.5333 Acc: 0.8056

Epoch 30/150
train Loss: 0.3282 Acc: 0.8713
val Loss: 0.4935 Acc: 0.8654

Epoch 40/150
train Loss: 0.1819 Acc: 0.9415
val Loss: 0.2424 Acc: 0.9354

Epoch 50/150
train Loss: 0.1502 Acc: 0.9420
val Loss: 0.2254 Acc: 0.9354

Epoch 60/150
train Loss: 0.1279 Acc: 0.9492
val Loss: 0.2140 Acc: 0.9402

Epoch 70/150
train Loss: 0.1152 Acc: 0.9517
val Loss: 0.2132 Acc: 0.9381

Epoch 80/150
train Loss: 0.1092 Acc: 0.9537
val Loss: 0.2147 Acc: 0.9388

Epoch 90/150
train Loss: 0.1040 Acc: 0.9566
val Loss: 0.2135 Acc: 0.9368

Epoch 100/150
train Loss: 0.1051 Acc: 0.9565
val Loss: 0.2109 Acc: 0.9388

Epoch 110/150
train Loss: 0.0990 Acc: 0.9570
val Loss: 0.2156 Acc: 0.9266

Epoch 120/150
train Loss: 0.0927 Acc: 0.9621
val Loss: 0.2335 Acc: 0.9259

Epoch 130/150


In [8]:
print(acc)
print(pred.shape, prob.shape)

0.8853070919579233
(2947,) (2947, 6)


In [9]:
# Case 3. CNN_1D (w/o data representation)
config = config3
data_cls = mc.Classification(config, train_data, test_data)
model = data_cls.build_model()  # 모델 구축

if config["training"]:
    best_model = data_cls.train_model(model)  # 모델 학습
    data_cls.save_model(best_model, best_model_path=config["best_model_path"])  # 모델 저장

pred, prob, acc = data_cls.pred_data(model, best_model_path=config["best_model_path"])  # class 예측

Start training model

Epoch 1/150
train Loss: 1.6921 Acc: 0.4606
val Loss: 1.5258 Acc: 0.6220

Epoch 10/150
train Loss: 0.3617 Acc: 0.8696
val Loss: 0.6173 Acc: 0.8205

Epoch 20/150
train Loss: 0.2393 Acc: 0.9169
val Loss: 0.5575 Acc: 0.8695

Epoch 30/150
train Loss: 0.1748 Acc: 0.9379
val Loss: 0.4917 Acc: 0.9001

Epoch 40/150
train Loss: 0.1335 Acc: 0.9515
val Loss: 0.4622 Acc: 0.9075

Epoch 50/150
train Loss: 0.1101 Acc: 0.9575
val Loss: 0.4530 Acc: 0.9103

Epoch 60/150
train Loss: 0.0967 Acc: 0.9587
val Loss: 0.4298 Acc: 0.9075

Epoch 70/150
train Loss: 0.0870 Acc: 0.9617
val Loss: 0.4059 Acc: 0.9123

Epoch 80/150
train Loss: 0.0807 Acc: 0.9638
val Loss: 0.3856 Acc: 0.9130

Epoch 90/150
train Loss: 0.0757 Acc: 0.9645
val Loss: 0.3565 Acc: 0.9164

Epoch 100/150
train Loss: 0.0728 Acc: 0.9641
val Loss: 0.3594 Acc: 0.9252

Epoch 110/150
train Loss: 0.0675 Acc: 0.9684
val Loss: 0.3703 Acc: 0.9252

Epoch 120/150
train Loss: 0.0638 Acc: 0.9687
val Loss: 0.3622 Acc: 0.9252

Epoch 130/150


In [10]:
print(acc)
print(pred.shape, prob.shape)

0.9066847641669495
(2947,) (2947, 6)


In [11]:
# Case 4. LSTM_FCNs (w/o data representation)
config = config4
data_cls = mc.Classification(config, train_data, test_data)
model = data_cls.build_model()  # 모델 구축

if config["training"]:
    best_model = data_cls.train_model(model)  # 모델 학습
    data_cls.save_model(best_model, best_model_path=config["best_model_path"])  # 모델 저장

pred, prob, acc = data_cls.pred_data(model, best_model_path=config["best_model_path"])  # class 예측

Start training model

Epoch 1/150
train Loss: 1.5299 Acc: 0.5790
val Loss: 1.4081 Acc: 0.6234

Epoch 10/150
train Loss: 0.5186 Acc: 0.9386
val Loss: 0.6188 Acc: 0.9150

Epoch 20/150
train Loss: 0.2323 Acc: 0.9604
val Loss: 0.4559 Acc: 0.9055

Epoch 30/150
train Loss: 0.1432 Acc: 0.9640
val Loss: 0.4200 Acc: 0.9184

Epoch 40/150
train Loss: 0.1077 Acc: 0.9653
val Loss: 0.4246 Acc: 0.9137

Epoch 50/150
train Loss: 0.0802 Acc: 0.9725
val Loss: 0.4375 Acc: 0.9239

Epoch 60/150
train Loss: 0.0826 Acc: 0.9663
val Loss: 0.4266 Acc: 0.9137

Epoch 70/150
train Loss: 0.0617 Acc: 0.9745
val Loss: 0.4401 Acc: 0.9021

Epoch 80/150
train Loss: 0.0583 Acc: 0.9750
val Loss: 0.4615 Acc: 0.9191

Epoch 90/150
train Loss: 0.0635 Acc: 0.9723
val Loss: 0.4184 Acc: 0.9109

Epoch 100/150
train Loss: 0.0496 Acc: 0.9779
val Loss: 0.4195 Acc: 0.9218

Epoch 110/150
train Loss: 0.0555 Acc: 0.9764
val Loss: 0.3962 Acc: 0.9211

Epoch 120/150
train Loss: 0.0485 Acc: 0.9794
val Loss: 0.3860 Acc: 0.9198

Epoch 130/150


In [12]:
print(acc)
print(pred.shape, prob.shape)

0.9216152019002376
(2947,) (2947, 6)


--------------------------------------------------

In [13]:
# representation data
train_x = pd.read_csv('./data/ts2vec_repr_train.csv')
train_y = pickle.load(open('./data/y_train.pkl', 'rb'))

test_x = pd.read_csv('./data/ts2vec_repr_test.csv')
test_y = pickle.load(open('./data/y_test.pkl', 'rb'))

train_data = {'x': train_x, 'y': train_y}
test_data = {'x': test_x, 'y': test_y}

print(train_x.shape)  #shape : (num_of_instance x representation_dims) = (7352, 64)
print(train_y.shape) #shape : (num_of_instance) = (7352, )
print(test_x.shape)  #shape : (num_of_instance x representation_dims) = (2947, 64)
print(test_y.shape)  #shape : (num_of_instance) = (2947, )

(7352, 64)
(7352,)
(2947, 64)
(2947,)


In [14]:
# Case 5. fully-connected layers (w/ data representation)
config = config5
data_cls = mc.Classification(config, train_data, test_data)
model = data_cls.build_model()  # 모델 구축

if config["training"]:
    best_model = data_cls.train_model(model)  # 모델 학습
    data_cls.save_model(best_model, best_model_path=config["best_model_path"])  # 모델 저장

pred, prob, acc = data_cls.pred_data(model, best_model_path=config["best_model_path"])  # class 예측

Start training model

Epoch 1/150
train Loss: 1.7088 Acc: 0.3428
val Loss: 1.6546 Acc: 0.4099

Epoch 10/150
train Loss: 0.7219 Acc: 0.7761
val Loss: 0.7332 Acc: 0.7702

Epoch 20/150
train Loss: 0.4102 Acc: 0.8922
val Loss: 0.4728 Acc: 0.8872

Epoch 30/150
train Loss: 0.2763 Acc: 0.9265
val Loss: 0.3520 Acc: 0.9001

Epoch 40/150
train Loss: 0.2140 Acc: 0.9371
val Loss: 0.2944 Acc: 0.9069

Epoch 50/150
train Loss: 0.1795 Acc: 0.9437
val Loss: 0.2656 Acc: 0.9075

Epoch 60/150
train Loss: 0.1578 Acc: 0.9459
val Loss: 0.2516 Acc: 0.9089

Epoch 70/150
train Loss: 0.1423 Acc: 0.9510
val Loss: 0.2446 Acc: 0.9130

Epoch 80/150
train Loss: 0.1332 Acc: 0.9507
val Loss: 0.2379 Acc: 0.9205

Epoch 90/150
train Loss: 0.1259 Acc: 0.9517
val Loss: 0.2365 Acc: 0.9232

Epoch 100/150
train Loss: 0.1187 Acc: 0.9531
val Loss: 0.2364 Acc: 0.9252

Epoch 110/150
train Loss: 0.1139 Acc: 0.9553
val Loss: 0.2359 Acc: 0.9286

Epoch 120/150
train Loss: 0.1082 Acc: 0.9590
val Loss: 0.2345 Acc: 0.9334

Epoch 130/150


In [15]:
print(acc)
print(pred.shape, prob.shape)

0.9144893111638955
(2947,) (2947, 6)
