In [1]:
import torch
import pickle
import random
import pandas as pd
import numpy as np

import main_regression as mr

In [2]:
# seed 고정
random_seed = 42

torch.manual_seed(random_seed)
torch.cuda.manual_seed(random_seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(random_seed)
random.seed(random_seed)

In [3]:
# Case 1. LSTM model (w/o data representation)
config1 = {
        'model': 'LSTM', # regression에 활용할 알고리즘 정의, {'LSTM', 'GRU', 'CNN_1D', 'LSTM_FCNs', 'FC'} 중 택 1
        'training': True,  # 학습 여부, 저장된 학습 완료 모델 존재시 False로 설정
        'best_model_path': './ckpt/lstm.pt',  # 학습 완료 모델 저장 경로
        'parameter': {
            'input_size': 24,  # 데이터의 변수 개수, int
            'num_layers': 2,  # recurrent layers의 수, int(default: 2, 범위: 1 이상)
            'hidden_size': 64,  # hidden state의 차원, int(default: 64, 범위: 1 이상)
            'dropout': 0.1,  # dropout 확률, float(default: 0.1, 범위: 0 이상 1 이하)
            'bidirectional': True,  # 모델의 양방향성 여부, bool(default: True)
            'num_epochs': 1000,  # 학습 epoch 횟수, int(default: 1000, 범위: 1 이상)
            'batch_size': 16,  # batch 크기, int(default: 16, 범위: 1 이상, 컴퓨터 사양에 적합하게 설정)
            'lr': 0.0001,  # learning rate, float(default: 0.0001, 범위: 0.1 이하)
            'device': 'cuda'  # 학습 환경, ["cuda", "cpu"] 중 선택
        }
}

# Case 2. GRU model (w/o data representation)
config2 = {
        'model': 'GRU', # regression에 활용할 알고리즘 정의, {'LSTM', 'GRU', 'CNN_1D', 'LSTM_FCNs', 'FC'} 중 택 1
        'training': True,  # 학습 여부, 저장된 학습 완료 모델 존재시 False로 설정
        'best_model_path': './ckpt/gru.pt',  # 학습 완료 모델 저장 경로
        'parameter': {
            'input_size': 24,  # 데이터의 변수 개수, int
            'num_layers': 2,  # recurrent layers의 수, int(default: 2, 범위: 1 이상)
            'hidden_size': 64,  # hidden state의 차원, int(default: 64, 범위: 1 이상)
            'dropout': 0.1,  # dropout 확률, float(default: 0.1, 범위: 0 이상 1 이하)
            'bidirectional': True,  # 모델의 양방향성 여부, bool(default: True)
            'num_epochs': 1000,  # 학습 epoch 횟수, int(default: 1000, 범위: 1 이상)
            'batch_size': 16,  # batch 크기, int(default: 16, 범위: 1 이상, 컴퓨터 사양에 적합하게 설정)
            'lr': 0.0001,  # learning rate, float(default: 0.0001, 범위: 0.1 이하)
            'device': 'cuda'  # 학습 환경, ["cuda", "cpu"] 중 선택
        }
}

# Case 3. CNN_1D model (w/o data representation)
config3 = {
        'model': 'CNN_1D', # regression에 활용할 알고리즘 정의, {'LSTM', 'GRU', 'CNN_1D', 'LSTM_FCNs', 'FC'} 중 택 1
        'training': True,  # 학습 여부, 저장된 학습 완료 모델 존재시 False로 설정
        'best_model_path': './ckpt/cnn_1d.pt',  # 학습 완료 모델 저장 경로
        'parameter': {
            'input_size': 24,  # 데이터의 변수 개수, int
            'seq_len': 144,  # 데이터의 시간 길이, int
            'output_channels': 64, # convolution layer의 output channel, int(default: 64, 범위: 1 이상, 2의 지수로 설정 권장)
            'kernel_size': 3, # convolutional layer의 filter 크기, int(default: 3, 범위: 3 이상, 홀수로 설정 권장)
            'stride': 1, # convolution layer의 stride 크기, int(default: 1, 범위: 1 이상)
            'padding': 0, # padding 크기, int(default: 0, 범위: 0 이상)
            'drop_out': 0.1, # dropout 확률, float(default: 0.1, 범위: 0 이상 1 이하)
            'num_epochs': 1000,  # 학습 epoch 횟수, int(default: 1000, 범위: 1 이상)
            'batch_size': 16,  # batch 크기, int(default: 16, 범위: 1 이상, 컴퓨터 사양에 적합하게 설정)
            'lr': 0.0001,  # learning rate, float(default: 0.0001, 범위: 0.1 이하)
            'device': 'cuda'  # 학습 환경, ["cuda", "cpu"] 중 선택
        }
}

# Case 4. LSTM_FCNs model (w/o data representation)
config4 = {
        'model': 'LSTM_FCNs', # regression에 활용할 알고리즘 정의, {'LSTM', 'GRU', 'CNN_1D', 'LSTM_FCNs', 'FC'} 중 택 1
        'training': True,  # 학습 여부, 저장된 학습 완료 모델 존재시 False로 설정
        'best_model_path': './ckpt/lstm_fcn.pt',  # 학습 완료 모델 저장 경로
        'parameter': {
            'input_size': 24,  # 데이터의 변수 개수, int
            'num_layers': 2,  # recurrent layers의 수, int(default: 2, 범위: 1 이상)
            'lstm_drop_out': 0.1, # LSTM dropout 확률, float(default: 0.1, 범위: 0 이상 1 이하)
            'fc_drop_out': 0.1, # FC dropout 확률, float(default: 0.1, 범위: 0 이상 1 이하)
            'num_epochs': 1000, # 학습 epoch 횟수, int(default: 1000, 범위: 1 이상)
            'batch_size': 16,  # batch 크기, int(default: 16, 범위: 1 이상, 컴퓨터 사양에 적합하게 설정)
            'lr': 0.0001,  # learning rate, float(default: 0.0001, 범위: 0.1 이하)
            'device': 'cuda'  # 학습 환경, ["cuda", "cpu"] 중 선택
        }
}

# Case 5. fully-connected layers (w/ data representation)
config5 = {
        'model': 'FC', # regression에 활용할 알고리즘 정의, {'LSTM', 'GRU', 'CNN_1D', 'LSTM_FCNs', 'FC'} 중 택 1
        "training": True,  # 학습 여부, 저장된 학습 완료 모델 존재시 False로 설정
        "best_model_path": './ckpt/fc.pt',  # 학습 완료 모델 저장 경로
        'parameter': {
            'input_size': 64,  # 데이터의 변수 개수(representation 차원), int
            'drop_out': 0.1, # dropout 확률, float(default: 0.1, 범위: 0 이상 1 이하)
            'bias': True, # bias 사용 여부, bool(default: True)
            'num_epochs': 1000, # 학습 epoch 횟수, int(default: 1000, 범위: 1 이상)
            'batch_size': 16,  # batch 크기, int(default: 16, 범위: 1 이상, 컴퓨터 사양에 적합하게 설정)
            'lr': 0.0001,  # learning rate, float(default: 0.0001, 범위: 0.1 이하)
            'device': 'cuda'  # 학습 환경, ["cuda", "cpu"] 중 선택
        }
}

In [4]:
# raw time series data
train_x = pickle.load(open('./data/x_train.pkl', 'rb'))
train_y = pickle.load(open('./data/y_train.pkl', 'rb'))
test_x = pickle.load(open('./data/x_test.pkl', 'rb'))
test_y = pickle.load(open('./data/y_test.pkl', 'rb'))

train_data = {'x': train_x, 'y': train_y}
test_data = {'x': test_x, 'y': test_y}

print(train_x.shape)  #shape : (num_of_instance x input_dims x window_size) = (95, 24, 144)
print(train_y.shape) #shape : (num_of_instance) = (95,)
print(test_x.shape)  #shape : (num_of_instance x input_dims x window_size) = (42, 24, 144)
print(test_y.shape)  #shape : (num_of_instance) = (42,)

(95, 24, 144)
(95,)
(42, 24, 144)
(42,)


In [5]:
# Case 1. LSTM model (w/o data representation)
config = config1
data_reg = mr.Regression(config, train_data, test_data)
model = data_reg.build_model()  # 모델 구축

if config["training"]:
    best_model = data_reg.train_model(model)  # 모델 학습
    data_reg.save_model(best_model, best_model_path=config["best_model_path"])  # 모델 저장

pred, mse, mae = data_reg.pred_data(model, best_model_path=config["best_model_path"])  # 예측

Start training model

Epoch 1/1000
train Loss: 1.0065
val Loss: 1.0968

Epoch 10/1000
train Loss: 0.9683
val Loss: 1.1008

Epoch 20/1000
train Loss: 0.9174
val Loss: 1.1129

Epoch 30/1000
train Loss: 0.8366
val Loss: 1.1352

Epoch 40/1000
train Loss: 0.6921
val Loss: 1.0965

Epoch 50/1000
train Loss: 0.4794
val Loss: 0.8697

Epoch 60/1000
train Loss: 0.3169
val Loss: 0.6878

Epoch 70/1000
train Loss: 0.2104
val Loss: 0.5314

Epoch 80/1000
train Loss: 0.1567
val Loss: 0.6264

Epoch 90/1000
train Loss: 0.1166
val Loss: 0.6274

Epoch 100/1000
train Loss: 0.0936
val Loss: 0.6262

Epoch 110/1000
train Loss: 0.0755
val Loss: 0.5365

Epoch 120/1000
train Loss: 0.0620
val Loss: 0.5058

Epoch 130/1000
train Loss: 0.0498
val Loss: 0.5113

Epoch 140/1000
train Loss: 0.0517
val Loss: 0.5527

Epoch 150/1000
train Loss: 0.0353
val Loss: 0.5465

Epoch 160/1000
train Loss: 0.0299
val Loss: 0.5675

Epoch 170/1000
train Loss: 0.0260
val Loss: 0.5771

Epoch 180/1000
train Loss: 0.0222
val Loss: 0.5897

E

In [6]:
print(f'** Performance of test dataset ==> MSE = {mse}, MAE = {mae}')
print(f'** Dimension of result for test dataset = {pred.shape}')

** Performance of test dataset ==> MSE = 0.40650781989097595, MAE = 0.5203109979629517
** Dimension of result for test dataset = (42,)


In [7]:
# Case 2. GRU (w/o data representation)
config = config2
data_reg = mr.Regression(config, train_data, test_data)
model = data_reg.build_model()  # 모델 구축

if config["training"]:
    best_model = data_reg.train_model(model)  # 모델 학습
    data_reg.save_model(best_model, best_model_path=config["best_model_path"])  # 모델 저장

pred, mse, mae = data_reg.pred_data(model, best_model_path=config["best_model_path"])  # 예측

Start training model

Epoch 1/1000
train Loss: 1.0025
val Loss: 1.1412

Epoch 10/1000
train Loss: 0.8717
val Loss: 1.1479

Epoch 20/1000
train Loss: 0.7750
val Loss: 1.1162

Epoch 30/1000
train Loss: 0.6795
val Loss: 1.0546

Epoch 40/1000
train Loss: 0.5885
val Loss: 0.9468

Epoch 50/1000
train Loss: 0.4968
val Loss: 0.8299

Epoch 60/1000
train Loss: 0.4225
val Loss: 0.7259

Epoch 70/1000
train Loss: 0.3632
val Loss: 0.6357

Epoch 80/1000
train Loss: 0.3195
val Loss: 0.6009

Epoch 90/1000
train Loss: 0.2807
val Loss: 0.5239

Epoch 100/1000
train Loss: 0.2430
val Loss: 0.5009

Epoch 110/1000
train Loss: 0.2134
val Loss: 0.4467

Epoch 120/1000
train Loss: 0.1879
val Loss: 0.4142

Epoch 130/1000
train Loss: 0.1614
val Loss: 0.3483

Epoch 140/1000
train Loss: 0.1352
val Loss: 0.3265

Epoch 150/1000
train Loss: 0.1161
val Loss: 0.3182

Epoch 160/1000
train Loss: 0.0987
val Loss: 0.3166

Epoch 170/1000
train Loss: 0.0837
val Loss: 0.3235

Epoch 180/1000
train Loss: 0.0699
val Loss: 0.3172

E

In [8]:
print(f'** Performance of test dataset ==> MSE = {mse}, MAE = {mae}')
print(f'** Dimension of result for test dataset = {pred.shape}')

** Performance of test dataset ==> MSE = 0.5235792994499207, MAE = 0.5581346154212952
** Dimension of result for test dataset = (42,)


In [9]:
# Case 3. CNN_1D (w/o data representation)
config = config3
data_reg = mr.Regression(config, train_data, test_data)
model = data_reg.build_model()  # 모델 구축

if config["training"]:
    best_model = data_reg.train_model(model)  # 모델 학습
    data_reg.save_model(best_model, best_model_path=config["best_model_path"])  # 모델 저장

pred, mse, mae = data_reg.pred_data(model, best_model_path=config["best_model_path"])  # 예측

Start training model

Epoch 1/1000
train Loss: 1.0200
val Loss: 1.1142

Epoch 10/1000
train Loss: 0.8686
val Loss: 1.1062

Epoch 20/1000
train Loss: 0.8131
val Loss: 1.0885

Epoch 30/1000
train Loss: 0.6840
val Loss: 1.0811

Epoch 40/1000
train Loss: 0.6112
val Loss: 1.0685

Epoch 50/1000
train Loss: 0.5073
val Loss: 1.0388

Epoch 60/1000
train Loss: 0.4109
val Loss: 1.0298

Epoch 70/1000
train Loss: 0.3394
val Loss: 0.9958

Epoch 80/1000
train Loss: 0.2383
val Loss: 0.9570

Epoch 90/1000
train Loss: 0.2027
val Loss: 0.9261

Epoch 100/1000
train Loss: 0.1689
val Loss: 0.9236

Epoch 110/1000
train Loss: 0.1132
val Loss: 0.8760

Epoch 120/1000
train Loss: 0.0893
val Loss: 0.8933

Epoch 130/1000
train Loss: 0.0830
val Loss: 0.8819

Epoch 140/1000
train Loss: 0.0585
val Loss: 0.8917

Epoch 150/1000
train Loss: 0.0488
val Loss: 0.8474

Epoch 160/1000
train Loss: 0.0329
val Loss: 0.8884

Epoch 170/1000
train Loss: 0.0308
val Loss: 0.8402

Epoch 180/1000
train Loss: 0.0265
val Loss: 0.8484

E

In [10]:
print(f'** Performance of test dataset ==> MSE = {mse}, MAE = {mae}')
print(f'** Dimension of result for test dataset = {pred.shape}')

** Performance of test dataset ==> MSE = 0.6754866242408752, MAE = 0.647735595703125
** Dimension of result for test dataset = (42,)


In [11]:
# Case 4. LSTM_FCNs (w/o data representation)
config = config4
data_reg = mr.Regression(config, train_data, test_data)
model = data_reg.build_model()  # 모델 구축

if config["training"]:
    best_model = data_reg.train_model(model)  # 모델 학습
    data_reg.save_model(best_model, best_model_path=config["best_model_path"])  # 모델 저장

pred, mse, mae = data_reg.pred_data(model, best_model_path=config["best_model_path"])  # 예측

Start training model

Epoch 1/1000
train Loss: 1.0094
val Loss: 1.1003

Epoch 10/1000
train Loss: 0.7806
val Loss: 1.0636

Epoch 20/1000
train Loss: 0.6471
val Loss: 1.0443

Epoch 30/1000
train Loss: 0.5468
val Loss: 0.9926

Epoch 40/1000
train Loss: 0.3919
val Loss: 1.0060

Epoch 50/1000
train Loss: 0.2866
val Loss: 0.8760

Epoch 60/1000
train Loss: 0.1470
val Loss: 0.6656

Epoch 70/1000
train Loss: 0.0961
val Loss: 0.5836

Epoch 80/1000
train Loss: 0.0693
val Loss: 0.5295

Epoch 90/1000
train Loss: 0.0512
val Loss: 0.5686

Epoch 100/1000
train Loss: 0.0592
val Loss: 0.5847

Epoch 110/1000
train Loss: 0.0402
val Loss: 0.5635

Epoch 120/1000
train Loss: 0.0461
val Loss: 0.6062

Epoch 130/1000
train Loss: 0.0204
val Loss: 0.6100

Epoch 140/1000
train Loss: 0.0161
val Loss: 0.5821

Epoch 150/1000
train Loss: 0.0159
val Loss: 0.6181

Epoch 160/1000
train Loss: 0.0132
val Loss: 0.5648

Epoch 170/1000
train Loss: 0.0094
val Loss: 0.5993

Epoch 180/1000
train Loss: 0.0083
val Loss: 0.5829

E

In [12]:
print(f'** Performance of test dataset ==> MSE = {mse}, MAE = {mae}')
print(f'** Dimension of result for test dataset = {pred.shape}')

** Performance of test dataset ==> MSE = 0.544350266456604, MAE = 0.5546749830245972
** Dimension of result for test dataset = (42,)


--------------------------------------------------

In [13]:
# representation data
train_x = pd.read_csv('./data/ts2vec_repr_train.csv')
train_y = pickle.load(open('./data/y_train.pkl', 'rb'))

test_x = pd.read_csv('./data/ts2vec_repr_test.csv')
test_y = pickle.load(open('./data/y_test.pkl', 'rb'))

train_data = {'x': train_x, 'y': train_y}
test_data = {'x': test_x, 'y': test_y}

print(train_x.shape)  #shape : (num_of_instance x representation_dims) = (95, 64)
print(train_y.shape) #shape : (num_of_instance) = (95, )
print(test_x.shape)  #shape : (num_of_instance x representation_dims) = (42, 64)
print(test_y.shape)  #shape : (num_of_instance) = (42, )

(95, 64)
(95,)
(42, 64)
(42,)


In [14]:
# Case 5. fully-connected layers (w/ data representation)
config = config5
data_reg = mr.Regression(config, train_data, test_data)
model = data_reg.build_model()  # 모델 구축

if config["training"]:
    best_model = data_reg.train_model(model)  # 모델 학습
    data_reg.save_model(best_model, best_model_path=config["best_model_path"])  # 모델 저장

pred, mse, mae = data_reg.pred_data(model, best_model_path=config["best_model_path"])  # 예측

Start training model

Epoch 1/1000
train Loss: 1.0107
val Loss: 1.3017

Epoch 10/1000
train Loss: 0.9193
val Loss: 1.2253

Epoch 20/1000
train Loss: 0.8682
val Loss: 1.2195

Epoch 30/1000
train Loss: 0.8902
val Loss: 1.2012

Epoch 40/1000
train Loss: 0.7929
val Loss: 1.1947

Epoch 50/1000
train Loss: 0.8185
val Loss: 1.1773

Epoch 60/1000
train Loss: 0.7694
val Loss: 1.1708

Epoch 70/1000
train Loss: 0.7491
val Loss: 1.1679

Epoch 80/1000
train Loss: 0.7178
val Loss: 1.1757

Epoch 90/1000
train Loss: 0.7118
val Loss: 1.1748

Epoch 100/1000
train Loss: 0.7503
val Loss: 1.1710

Epoch 110/1000
train Loss: 0.7139
val Loss: 1.1796

Epoch 120/1000
train Loss: 0.6267
val Loss: 1.1838

Epoch 130/1000
train Loss: 0.6783
val Loss: 1.1891

Epoch 140/1000
train Loss: 0.6395
val Loss: 1.1969

Epoch 150/1000
train Loss: 0.6808
val Loss: 1.2062

Epoch 160/1000
train Loss: 0.6791
val Loss: 1.2110

Epoch 170/1000
train Loss: 0.6614
val Loss: 1.2234

Epoch 180/1000
train Loss: 0.5958
val Loss: 1.2277

E

In [15]:
print(f'** Performance of test dataset ==> MSE = {mse}, MAE = {mae}')
print(f'** Dimension of result for test dataset = {pred.shape}')

** Performance of test dataset ==> MSE = 0.7736062407493591, MAE = 0.6670892238616943
** Dimension of result for test dataset = (42,)
