In [1]:
import torch
import pickle
import random
import pandas as pd
import numpy as np

import main_regression as mr

In [2]:
# seed 고정
random_seed = 42

torch.manual_seed(random_seed)
torch.cuda.manual_seed(random_seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(random_seed)
random.seed(random_seed)

In [3]:
# Case 1. LSTM model (w/o data representation)
config1 = {
        'model': 'LSTM', # classification에 활용할 알고리즘 정의, {'LSTM', 'GRU', 'CNN_1D', 'LSTM_FCNs', 'FC'} 중 택 1
        'training': True,  # 학습 여부, 저장된 학습 완료 모델 존재시 False로 설정
        'best_model_path': './ckpt/lstm.pt',  # 학습 완료 모델 저장 경로
        'parameter': {
            'input_size': 24,  # 데이터의 변수 개수, int
            'num_layers': 2,  # recurrent layers의 수, int(default: 2, 범위: 1 이상)
            'hidden_size': 64,  # hidden state의 차원, int(default: 64, 범위: 1 이상)
            'dropout': 0.1,  # dropout 확률, float(default: 0.1, 범위: 0 이상 1 이하)
            'bidirectional': True,  # 모델의 양방향성 여부, bool(default: True)
            'num_epochs': 1000,  # 학습 epoch 횟수, int(default: 1000, 범위: 1 이상)
            'batch_size': 16,  # batch 크기, int(default: 16, 범위: 1 이상, 컴퓨터 사양에 적합하게 설정)
            'lr': 0.0001,  # learning rate, float(default: 0.0001, 범위: 0.1 이하)
            'device': 'cuda'  # 학습 환경, ["cuda", "cpu"] 중 선택
        }
}

# Case 2. GRU model (w/o data representation)
config2 = {
        'model': 'GRU', # classification에 활용할 알고리즘 정의, {'LSTM', 'GRU', 'CNN_1D', 'LSTM_FCNs', 'FC'} 중 택 1
        'training': True,  # 학습 여부, 저장된 학습 완료 모델 존재시 False로 설정
        'best_model_path': './ckpt/gru.pt',  # 학습 완료 모델 저장 경로
        'parameter': {
            'input_size': 24,  # 데이터의 변수 개수, int
            'num_layers': 2,  # recurrent layers의 수, int(default: 2, 범위: 1 이상)
            'hidden_size': 64,  # hidden state의 차원, int(default: 64, 범위: 1 이상)
            'dropout': 0.1,  # dropout 확률, float(default: 0.1, 범위: 0 이상 1 이하)
            'bidirectional': True,  # 모델의 양방향성 여부, bool(default: True)
            'num_epochs': 1000,  # 학습 epoch 횟수, int(default: 1000, 범위: 1 이상)
            'batch_size': 16,  # batch 크기, int(default: 16, 범위: 1 이상, 컴퓨터 사양에 적합하게 설정)
            'lr': 0.0001,  # learning rate, float(default: 0.0001, 범위: 0.1 이하)
            'device': 'cuda'  # 학습 환경, ["cuda", "cpu"] 중 선택
        }
}

# Case 3. CNN_1D model (w/o data representation)
config3 = {
        'model': 'CNN_1D', # classification에 활용할 알고리즘 정의, {'LSTM', 'GRU', 'CNN_1D', 'LSTM_FCNs', 'FC'} 중 택 1
        'training': True,  # 학습 여부, 저장된 학습 완료 모델 존재시 False로 설정
        'best_model_path': './ckpt/cnn_1d.pt',  # 학습 완료 모델 저장 경로
        'parameter': {
            'input_size': 24,  # 데이터의 변수 개수, int
            'seq_len': 144,  # 데이터의 시간 길이, int
            'output_channels': 64, # convolution layer의 output channel, int(default: 64, 범위: 1 이상, 2의 지수로 설정 권장)
            'kernel_size': 3, # convolutional layer의 filter 크기, int(default: 3, 범위: 3 이상, 홀수로 설정 권장)
            'stride': 1, # convolution layer의 stride 크기, int(default: 1, 범위: 1 이상)
            'padding': 0, # padding 크기, int(default: 0, 범위: 0 이상)
            'drop_out': 0.1, # dropout 확률, float(default: 0.1, 범위: 0 이상 1 이하)
            'num_epochs': 1000,  # 학습 epoch 횟수, int(default: 1000, 범위: 1 이상)
            'batch_size': 16,  # batch 크기, int(default: 16, 범위: 1 이상, 컴퓨터 사양에 적합하게 설정)
            'lr': 0.0001,  # learning rate, float(default: 0.0001, 범위: 0.1 이하)
            'device': 'cuda'  # 학습 환경, ["cuda", "cpu"] 중 선택
        }
}

# Case 4. LSTM_FCNs model (w/o data representation)
config4 = {
        'model': 'LSTM_FCNs', # classification에 활용할 알고리즘 정의, {'LSTM', 'GRU', 'CNN_1D', 'LSTM_FCNs', 'FC'} 중 택 1
        'training': True,  # 학습 여부, 저장된 학습 완료 모델 존재시 False로 설정
        'best_model_path': './ckpt/lstm_fcn.pt',  # 학습 완료 모델 저장 경로
        'parameter': {
            'input_size': 24,  # 데이터의 변수 개수, int
            'num_layers': 2,  # recurrent layers의 수, int(default: 2, 범위: 1 이상)
            'lstm_drop_out': 0.1, # LSTM dropout 확률, float(default: 0.1, 범위: 0 이상 1 이하)
            'fc_drop_out': 0.1, # FC dropout 확률, float(default: 0.1, 범위: 0 이상 1 이하)
            'num_epochs': 1000, # 학습 epoch 횟수, int(default: 1000, 범위: 1 이상)
            'batch_size': 16,  # batch 크기, int(default: 16, 범위: 1 이상, 컴퓨터 사양에 적합하게 설정)
            'lr': 0.0001,  # learning rate, float(default: 0.0001, 범위: 0.1 이하)
            'device': 'cuda'  # 학습 환경, ["cuda", "cpu"] 중 선택
        }
}

# Case 5. fully-connected layers (w/ data representation)
config5 = {
        'model': 'FC', # classification에 활용할 알고리즘 정의, {'LSTM', 'GRU', 'CNN_1D', 'LSTM_FCNs', 'FC'} 중 택 1
        "training": True,  # 학습 여부, 저장된 학습 완료 모델 존재시 False로 설정
        "best_model_path": './ckpt/fc.pt',  # 학습 완료 모델 저장 경로
        'parameter': {
            'input_size': 64,  # 데이터의 변수 개수(representation 차원), int
            'drop_out': 0.1, # dropout 확률, float(default: 0.1, 범위: 0 이상 1 이하)
            'bias': True, # bias 사용 여부, bool(default: True)
            'num_epochs': 1000, # 학습 epoch 횟수, int(default: 1000, 범위: 1 이상)
            'batch_size': 16,  # batch 크기, int(default: 16, 범위: 1 이상, 컴퓨터 사양에 적합하게 설정)
            'lr': 0.0001,  # learning rate, float(default: 0.0001, 범위: 0.1 이하)
            'device': 'cuda'  # 학습 환경, ["cuda", "cpu"] 중 선택
        }
}

In [4]:
# raw time series data
train_x = pickle.load(open('./data/x_train.pkl', 'rb'))
train_y = pickle.load(open('./data/y_train.pkl', 'rb'))
test_x = pickle.load(open('./data/x_test.pkl', 'rb'))
test_y = pickle.load(open('./data/y_test.pkl', 'rb'))

train_data = {'x': train_x, 'y': train_y}
test_data = {'x': test_x, 'y': test_y}

print(train_x.shape)  #shape : (num_of_instance x input_dims x window_size) = (95, 24, 144)
print(train_y.shape) #shape : (num_of_instance) = (95,)
print(test_x.shape)  #shape : (num_of_instance x input_dims x window_size) = (42, 24, 144)
print(test_y.shape)  #shape : (num_of_instance) = (42,)

(95, 24, 144)
(95,)
(42, 24, 144)
(42,)


In [5]:
# Case 1. LSTM model (w/o data representation)
config = config1
data_reg = mr.Regression(config, train_data, test_data)
model = data_reg.build_model()  # 모델 구축

if config["training"]:
    best_model = data_reg.train_model(model)  # 모델 학습
    data_reg.save_model(best_model, best_model_path=config["best_model_path"])  # 모델 저장

pred, mse, mae = data_reg.pred_data(model, best_model_path=config["best_model_path"])  # 예측

Start training model

Epoch 1/1000
train Loss: 223.7674
val Loss: 190.3345

Epoch 10/1000
train Loss: 210.2965
val Loss: 177.4592

Epoch 20/1000
train Loss: 180.4303
val Loss: 148.4286

Epoch 30/1000
train Loss: 144.4305
val Loss: 116.1962

Epoch 40/1000
train Loss: 119.0877
val Loss: 93.6555

Epoch 50/1000
train Loss: 99.2963
val Loss: 76.1784

Epoch 60/1000
train Loss: 83.4011
val Loss: 62.2586

Epoch 70/1000
train Loss: 70.4260
val Loss: 51.1112

Epoch 80/1000
train Loss: 60.1143
val Loss: 42.4102

Epoch 90/1000
train Loss: 51.2951
val Loss: 35.1722

Epoch 100/1000
train Loss: 44.4562
val Loss: 29.6943

Epoch 110/1000
train Loss: 39.0166
val Loss: 25.4268

Epoch 120/1000
train Loss: 34.6994
val Loss: 22.1687

Epoch 130/1000
train Loss: 31.2190
val Loss: 19.6624

Epoch 140/1000
train Loss: 28.4767
val Loss: 17.7805

Epoch 150/1000
train Loss: 26.2840
val Loss: 16.3688

Epoch 160/1000
train Loss: 24.4931
val Loss: 15.3279

Epoch 170/1000
train Loss: 23.1821
val Loss: 14.6373

Epoch 18

In [6]:
print(f'** Performance of test dataset ==> MSE = {mse}, MAE = {mae}')
print(f'** Dimension of result for test dataset = {pred.shape}')

** Performance of test dataset ==> MSE = 12.972700119018555, MAE = 2.7908825874328613
** Dimension of result for test dataset = (42,)


In [7]:
# Case 2. GRU (w/o data representation)
config = config2
data_reg = mr.Regression(config, train_data, test_data)
model = data_reg.build_model()  # 모델 구축

if config["training"]:
    best_model = data_reg.train_model(model)  # 모델 학습
    data_reg.save_model(best_model, best_model_path=config["best_model_path"])  # 모델 저장

pred, mse, mae = data_reg.pred_data(model, best_model_path=config["best_model_path"])  # 예측

Start training model

Epoch 1/1000
train Loss: 227.3942
val Loss: 191.6880

Epoch 10/1000
train Loss: 186.3391
val Loss: 153.6383

Epoch 20/1000
train Loss: 148.0420
val Loss: 119.2658

Epoch 30/1000
train Loss: 118.3456
val Loss: 92.8377

Epoch 40/1000
train Loss: 96.7763
val Loss: 73.8525

Epoch 50/1000
train Loss: 80.3770
val Loss: 59.6750

Epoch 60/1000
train Loss: 67.3567
val Loss: 48.4864

Epoch 70/1000
train Loss: 57.0061
val Loss: 39.8115

Epoch 80/1000
train Loss: 48.6998
val Loss: 33.0317

Epoch 90/1000
train Loss: 42.0997
val Loss: 27.8164

Epoch 100/1000
train Loss: 36.9311
val Loss: 23.8177

Epoch 110/1000
train Loss: 32.8255
val Loss: 20.7926

Epoch 120/1000
train Loss: 29.5859
val Loss: 18.5334

Epoch 130/1000
train Loss: 27.0894
val Loss: 16.8793

Epoch 140/1000
train Loss: 25.1445
val Loss: 15.6843

Epoch 150/1000
train Loss: 23.6446
val Loss: 14.8627

Epoch 160/1000
train Loss: 22.5003
val Loss: 14.3160

Epoch 170/1000
train Loss: 21.6382
val Loss: 13.9830

Epoch 180/

In [8]:
print(f'** Performance of test dataset ==> MSE = {mse}, MAE = {mae}')
print(f'** Dimension of result for test dataset = {pred.shape}')

** Performance of test dataset ==> MSE = 12.435452461242676, MAE = 2.773865222930908
** Dimension of result for test dataset = (42,)


In [9]:
# Case 3. CNN_1D (w/o data representation)
config = config3
data_reg = mr.Regression(config, train_data, test_data)
model = data_reg.build_model()  # 모델 구축

if config["training"]:
    best_model = data_reg.train_model(model)  # 모델 학습
    data_reg.save_model(best_model, best_model_path=config["best_model_path"])  # 모델 저장

pred, mse, mae = data_reg.pred_data(model, best_model_path=config["best_model_path"])  # 예측

Start training model

Epoch 1/1000
train Loss: 555.0059
val Loss: 109.0806

Epoch 10/1000
train Loss: 42.7734
val Loss: 19.1337

Epoch 20/1000
train Loss: 41.0078
val Loss: 15.5636

Epoch 30/1000
train Loss: 37.4624
val Loss: 14.8062

Epoch 40/1000
train Loss: 24.0640
val Loss: 14.9046

Epoch 50/1000
train Loss: 29.2808
val Loss: 14.5940

Epoch 60/1000
train Loss: 29.1140
val Loss: 15.6754

Epoch 70/1000
train Loss: 27.6092
val Loss: 15.3248

Epoch 80/1000
train Loss: 30.7468
val Loss: 15.7165

Epoch 90/1000
train Loss: 20.9283
val Loss: 15.7559

Epoch 100/1000
train Loss: 19.6466
val Loss: 15.9551

Epoch 110/1000
train Loss: 20.0110
val Loss: 17.1988

Epoch 120/1000
train Loss: 18.5151
val Loss: 16.0409

Epoch 130/1000
train Loss: 20.4064
val Loss: 16.8151

Epoch 140/1000
train Loss: 19.8867
val Loss: 16.5886

Epoch 150/1000
train Loss: 17.6717
val Loss: 17.6459

Epoch 160/1000
train Loss: 19.0035
val Loss: 16.3855

Epoch 170/1000
train Loss: 18.1048
val Loss: 17.7671

Epoch 180/1000


In [10]:
print(f'** Performance of test dataset ==> MSE = {mse}, MAE = {mae}')
print(f'** Dimension of result for test dataset = {pred.shape}')

** Performance of test dataset ==> MSE = 12.220710754394531, MAE = 2.7250516414642334
** Dimension of result for test dataset = (42,)


In [11]:
# Case 4. LSTM_FCNs (w/o data representation)
config = config4
data_reg = mr.Regression(config, train_data, test_data)
model = data_reg.build_model()  # 모델 구축

if config["training"]:
    best_model = data_reg.train_model(model)  # 모델 학습
    data_reg.save_model(best_model, best_model_path=config["best_model_path"])  # 모델 저장

pred, mse, mae = data_reg.pred_data(model, best_model_path=config["best_model_path"])  # 예측

Start training model

Epoch 1/1000
train Loss: 227.9608
val Loss: 187.8042

Epoch 10/1000
train Loss: 214.2792
val Loss: 186.4743

Epoch 20/1000
train Loss: 182.9382
val Loss: 150.9421

Epoch 30/1000
train Loss: 153.0073
val Loss: 124.5707

Epoch 40/1000
train Loss: 131.3564
val Loss: 103.7000

Epoch 50/1000
train Loss: 115.0392
val Loss: 85.5239

Epoch 60/1000
train Loss: 102.1320
val Loss: 76.8003

Epoch 70/1000
train Loss: 91.6300
val Loss: 68.4533

Epoch 80/1000
train Loss: 83.4663
val Loss: 60.1736

Epoch 90/1000
train Loss: 76.6745
val Loss: 55.9470

Epoch 100/1000
train Loss: 70.6409
val Loss: 45.8753

Epoch 110/1000
train Loss: 65.1769
val Loss: 43.7059

Epoch 120/1000
train Loss: 60.3001
val Loss: 45.1425

Epoch 130/1000
train Loss: 55.8308
val Loss: 41.9959

Epoch 140/1000
train Loss: 51.9045
val Loss: 40.7986

Epoch 150/1000
train Loss: 48.1885
val Loss: 38.0941

Epoch 160/1000
train Loss: 44.3104
val Loss: 30.1289

Epoch 170/1000
train Loss: 41.1139
val Loss: 29.6885

Epoch

In [12]:
print(f'** Performance of test dataset ==> MSE = {mse}, MAE = {mae}')
print(f'** Dimension of result for test dataset = {pred.shape}')

** Performance of test dataset ==> MSE = 14.570467948913574, MAE = 2.9160358905792236
** Dimension of result for test dataset = (42,)


--------------------------------------------------

In [13]:
# representation data
train_x = pd.read_csv('./data/ts2vec_repr_train.csv')
train_y = pickle.load(open('./data/y_train.pkl', 'rb'))

test_x = pd.read_csv('./data/ts2vec_repr_test.csv')
test_y = pickle.load(open('./data/y_test.pkl', 'rb'))

train_data = {'x': train_x, 'y': train_y}
test_data = {'x': test_x, 'y': test_y}

print(train_x.shape)  #shape : (num_of_instance x representation_dims) = (95, 64)
print(train_y.shape) #shape : (num_of_instance) = (95, )
print(test_x.shape)  #shape : (num_of_instance x representation_dims) = (42, 64)
print(test_y.shape)  #shape : (num_of_instance) = (42, )

(95, 64)
(95,)
(42, 64)
(42,)


In [14]:
# Case 5. fully-connected layers (w/ data representation)
config = config5
data_reg = mr.Regression(config, train_data, test_data)
model = data_reg.build_model()  # 모델 구축

if config["training"]:
    best_model = data_reg.train_model(model)  # 모델 학습
    data_reg.save_model(best_model, best_model_path=config["best_model_path"])  # 모델 저장

pred, mse, mae = data_reg.pred_data(model, best_model_path=config["best_model_path"])  # 예측

Start training model

Epoch 1/1000
train Loss: 230.7502
val Loss: 195.8775

Epoch 10/1000
train Loss: 212.3306
val Loss: 181.2488

Epoch 20/1000
train Loss: 192.4345
val Loss: 164.4762

Epoch 30/1000
train Loss: 165.8161
val Loss: 140.7580

Epoch 40/1000
train Loss: 137.7342
val Loss: 114.9524

Epoch 50/1000
train Loss: 114.6146
val Loss: 91.1035

Epoch 60/1000
train Loss: 87.3861
val Loss: 70.2700

Epoch 70/1000
train Loss: 70.0523
val Loss: 52.8536

Epoch 80/1000
train Loss: 57.3371
val Loss: 39.0701

Epoch 90/1000
train Loss: 41.9194
val Loss: 28.8224

Epoch 100/1000
train Loss: 36.1336
val Loss: 21.8171

Epoch 110/1000
train Loss: 33.4344
val Loss: 17.3724

Epoch 120/1000
train Loss: 28.2753
val Loss: 14.7237

Epoch 130/1000
train Loss: 24.4812
val Loss: 13.3183

Epoch 140/1000
train Loss: 22.3405
val Loss: 12.4961

Epoch 150/1000
train Loss: 23.7632
val Loss: 12.0579

Epoch 160/1000
train Loss: 24.7017
val Loss: 11.8135

Epoch 170/1000
train Loss: 24.6635
val Loss: 11.6641

Epoch 

In [15]:
print(f'** Performance of test dataset ==> MSE = {mse}, MAE = {mae}')
print(f'** Dimension of result for test dataset = {pred.shape}')

** Performance of test dataset ==> MSE = 15.044198036193848, MAE = 3.089829921722412
** Dimension of result for test dataset = (42,)
