In [1]:
import torch
import random
import pickle
import pandas as pd
import numpy as np

import main_regression as mr

# Set Seed

In [2]:
# seed 고정
random_seed = 42

torch.manual_seed(random_seed)
torch.cuda.manual_seed(random_seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(random_seed)
random.seed(random_seed)

# Set Config

In [3]:
# Case 1. LSTM model (w/o data representation)
config1 = {
        'model': 'LSTM', # Regression에 활용할 알고리즘 정의, {'LSTM', 'GRU', 'CNN_1D', 'LSTM_FCNs', 'FC', 'DARNN} 중 택 1
        'training': True,  # 학습 여부, 저장된 학습 완료 모델 존재시 False로 설정
        'best_model_path': './ckpt/lstm.pt',  # 학습 완료 모델 저장 경로
        'parameter': {
            'input_size': 24,  # 데이터의 변수 개수, int
            'timestep' : 144, # timestep = window_size
            'num_layers': 2,  # recurrent layers의 수, int(default: 2, 범위: 1 이상)
            'hidden_size': 64,  # hidden state의 차원, int(default: 64, 범위: 1 이상)
            'dropout': 0.1,  # dropout 확률, float(default: 0.1, 범위: 0 이상 1 이하)
            'bidirectional': True,  # 모델의 양방향성 여부, bool(default: True)
            'num_epochs': 2000,  # 학습 epoch 횟수, int(default: 150, 범위: 1 이상)
            'batch_size': 64,  # batch 크기, int(default: 64, 범위: 1 이상, 컴퓨터 사양에 적합하게 설정)
            'lr': 0.0003,  # learning rate, float(default: 0.001, 범위: 0.1 이하)
            'device': 'cuda',  # 학습 환경, ["cuda", "cpu"] 중 선택
            'need_yhist' : False
        }
}

# Case 2. GRU model (w/o data representation)
config2 = {
        'model': 'GRU', # Regression에 활용할 알고리즘 정의, {'LSTM', 'GRU', 'CNN_1D', 'LSTM_FCNs', 'FC', 'DARNN} 중 택 1
        'training': True,  # 학습 여부, 저장된 학습 완료 모델 존재시 False로 설정
        'best_model_path': './ckpt/gru.pt',  # 학습 완료 모델 저장 경로
        'with_representation' : False, # representation 유무, bool (defeault: False)
        'parameter': {
            'input_size': 24,  # 데이터의 변수 개수, int
            'timestep' : 144, # timestep = window_size
            'num_layers': 2,  # recurrent layers의 수, int(default: 2, 범위: 1 이상)
            'hidden_size': 64,  # hidden state의 차원, int(default: 64, 범위: 1 이상)
            'dropout': 0.1,  # dropout 확률, float(default: 0.1, 범위: 0 이상 1 이하)
            'bidirectional': True,  # 모델의 양방향성 여부, bool(default: True)
            'num_epochs': 2000,  # 학습 epoch 횟수, int(default: 150, 범위: 1 이상)
            'batch_size': 64,  # batch 크기, int(default: 64, 범위: 1 이상, 컴퓨터 사양에 적합하게 설정)
            'lr': 0.0003,  # learning rate, float(default: 0.001, 범위: 0.1 이하)
            'device': 'cuda',  # 학습 환경, ["cuda", "cpu"] 중 선택
            'need_yhist' : False
        }
}

# Case 3. CNN_1D model (w/o data representation)
config3 = {
        'model': 'CNN_1D', # Regression에 활용할 알고리즘 정의, {'LSTM', 'GRU', 'CNN_1D', 'LSTM_FCNs', 'FC', 'DARNN} 중 택 1
        'training': True,  # 학습 여부, 저장된 학습 완료 모델 존재시 False로 설정
        'best_model_path': './ckpt/cnn_1d.pt',  # 학습 완료 모델 저장 경로
        'parameter': {
            'input_size': 24,  # 데이터의 변수 개수, int
            'timestep' : 144, # timestep = window_size
            'seq_len': 144,  # 데이터의 시간 길이, int
            'output_channels': 64, # convolution layer의 output channel, int(default: 64, 범위: 1 이상, 2의 지수로 설정 권장)
            'kernel_size': 3, # convolutional layer의 filter 크기, int(default: 3, 범위: 3 이상, 홀수로 설정 권장)
            'stride': 1, # convolution layer의 stride 크기, int(default: 1, 범위: 1 이상)
            'padding': 0, # padding 크기, int(default: 0, 범위: 0 이상)
            'drop_out': 0.1, # dropout 확률, float(default: 0.1, 범위: 0 이상 1 이하)
            'num_epochs': 2000,  # 학습 epoch 횟수, int(default: 150, 범위: 1 이상)
            'batch_size': 64,  # batch 크기, int(default: 64, 범위: 1 이상, 컴퓨터 사양에 적합하게 설정)
            'lr': 0.0003,  # learning rate, float(default: 0.0001, 범위: 0.1 이하)
            'device': 'cuda',  # 학습 환경, ["cuda", "cpu"] 중 선택4
            'need_yhist' : False
        }
}

# Case 4. LSTM_FCNs model (w/o data representation)
config4 = {
        'model': 'LSTM_FCNs', # Regression에 활용할 알고리즘 정의, {'LSTM', 'GRU', 'CNN_1D', 'LSTM_FCNs', 'FC', 'DARNN} 중 택 1
        'training': True,  # 학습 여부, 저장된 학습 완료 모델 존재시 False로 설정
        'best_model_path': './ckpt/lstm_fcn.pt',  # 학습 완료 모델 저장 경로
        'parameter': {
            'input_size': 24,  # 데이터의 변수 개수, int
            'timestep' : 144, # timestep = window_size
            'num_layers': 2,  # recurrent layers의 수, int(default: 1, 범위: 1 이상)
            'lstm_drop_out': 0.001, # LSTM dropout 확률, float(default: 0.4, 범위: 0 이상 1 이하)
            'fc_drop_out': 0.001, # FC dropout 확률, float(default: 0.1, 범위: 0 이상 1 이하)
            'num_epochs': 2000, # 학습 epoch 횟수, int(default: 150, 범위: 1 이상)
            'batch_size': 64,  # batch 크기, int(default: 64, 범위: 1 이상, 컴퓨터 사양에 적합하게 설정)
            'lr': 0.0003,  # learning rate, float(default: 0.0001, 범위: 0.1 이하)
            'device': 'cuda',  # 학습 환경, ["cuda", "cpu"] 중 선택
            'need_yhist' : False
        }
}

# Case 5. DARNN model (w/o data representation)
config5 = {
        'model': 'DARNN', # Regression에 활용할 알고리즘 정의, {'LSTM', 'GRU', 'CNN_1D', 'LSTM_FCNs', 'FC', 'DARNN} 중 택 1
        'training': True,  # 학습 여부, 저장된 학습 완료 모델 존재시 False로 설정
        'best_model_path': './ckpt/darnn.pt',  # 학습 완료 모델 저장 경로
        'parameter': {
            'input_size': 24,  # 데이터의 변수 개수, int
            'encoder_hidden_size': 64, # Encoder hidden state의 차원, int(default: 64, 범위: 1 이상)
            'decoder_hidden_size': 64, # Decoder hidden state의 차원, int(default: 64, 범위: 1 이상)
            'timestep': 144, # timestep의 크기, int(default: 16, 범위: 1이상)
            'encoder_stateful': False, # Encoder의 Stateful 사용여부, bool(default: False)
            'decoder_stateful': False, # Decoder의 Stateful 사용여부, bool(default: False)
            'num_epochs': 1500,  # 학습 epoch 횟수, int(default: 150, 범위: 1 이상)
            'batch_size': 64,  # batch 크기, int(default: 64, 범위: 1 이상, 컴퓨터 사양에 적합하게 설정)
            'lr': 0.0003,  # learning rate, float(default: 0.0001, 범위: 0.1 이하)
            'device': 'cuda',  # 학습 환경, ["cuda", "cpu"] 중 선택
            'need_yhist': True
        }
}

# Case 6. fully-connected layers (w/ data representation)
config6 = {
        'model': 'FC', # Regression에 활용할 알고리즘 정의, {'LSTM', 'GRU', 'CNN_1D', 'LSTM_FCNs', 'FC', 'DARNN} 중 택 1
        "training": True,  # 학습 여부, 저장된 학습 완료 모델 존재시 False로 설정
        "best_model_path": './ckpt/fc.pt',  # 학습 완료 모델 저장 경로
        'parameter': {
            'input_size': 64,  # 데이터의 변수 개수(representation 차원), int
            'timestep' : 1, # timestep = window_size
            'shift_size': 1, # shift 정도, int
            'drop_out': 0.1, # dropout 확률, float(default: 0.1, 범위: 0 이상 1 이하)
            'bias': True, # bias 사용 여부, bool(default: True)
            'num_epochs': 2000, # 학습 epoch 횟수, int(default: 150, 범위: 1 이상)
            'batch_size': 32,  # batch 크기, int(default: 64, 범위: 1 이상, 컴퓨터 사양에 적합하게 설정)
            'lr': 0.0003,  # learning rate, float(default: 0.0001, 범위: 0.1 이하)
            'device': 'cuda',  # 학습 환경, ["cuda", "cpu"] 중 선택
            'need_yhist' : False
        }
}


# Load data

In [4]:
# raw time series data
train_x = pickle.load(open('./data/x_train_new_energy.pkl', 'rb'))
train_y = pickle.load(open('./data/y_train_new_energy.pkl', 'rb'))
test_x = pickle.load(open('./data/x_test_new_energy.pkl', 'rb'))
test_y = pickle.load(open('./data/y_test_new_energy.pkl', 'rb'))

train_data = {'x': train_x, 'y': train_y}
test_data = {'x': test_x, 'y': test_y}

print(train_x.shape)  #shape : (num_of_instance x input_dims x window_size) = (95, 24, 144)
print(train_y.shape) #shape : (num_of_instance) = (95,)
print(test_x.shape)  #shape : (num_of_instance x input_dims x window_size) = (42, 24, 144)
print(test_y.shape)  #shape : (num_of_instance) = (42,)

(95, 24, 144)
(95,)
(42, 24, 144)
(42,)


# Training Model

In [5]:
# Case 1. LSTM model (w/o data representation)
config = config1
data_reg = mr.Regression(config, train_data, test_data)
model = data_reg.build_model()  # 모델 구축

if config["training"]:
    best_model = data_reg.train_model(model)  # 모델 학습
    data_reg.save_model(best_model, best_model_path=config["best_model_path"])  # 모델 저장

y_true, pred, mse, r2 = data_reg.pred_data(model, best_model_path=config["best_model_path"])  # 예측
print(f'test Loss: {np.round(mse,5)} and R2: {np.round(r2,5)}')
print(f'test RMSE: {np.round(np.sqrt(mse), 4)}')

Start training model

Epoch 1/2000
train Loss: 211.3343
val Loss: 241.2221

Epoch 10/2000
train Loss: 196.1204
val Loss: 224.6028

Epoch 20/2000
train Loss: 169.6117
val Loss: 194.2914

Epoch 30/2000
train Loss: 135.3743
val Loss: 157.2957

Epoch 40/2000
train Loss: 107.5801
val Loss: 127.0598

Epoch 50/2000
train Loss: 85.5347
val Loss: 102.8723

Epoch 60/2000
train Loss: 68.7810
val Loss: 84.1853

Epoch 70/2000
train Loss: 55.7026
val Loss: 69.3272

Epoch 80/2000
train Loss: 45.7779
val Loss: 57.8176

Epoch 90/2000
train Loss: 38.4944
val Loss: 49.1586

Epoch 100/2000
train Loss: 32.9524
val Loss: 42.3158

Epoch 110/2000
train Loss: 28.4964
val Loss: 36.6120

Epoch 120/2000
train Loss: 25.3236
val Loss: 32.3999

Epoch 130/2000
train Loss: 23.0006
val Loss: 29.1571

Epoch 140/2000
train Loss: 21.4504
val Loss: 26.8423

Epoch 150/2000
train Loss: 20.3267
val Loss: 25.0473

Epoch 160/2000
train Loss: 19.5836
val Loss: 23.7225

Epoch 170/2000
train Loss: 19.0509
val Loss: 22.7036

Epoch 


Epoch 1510/2000
train Loss: 13.2540
val Loss: 24.9225

Epoch 1520/2000
train Loss: 13.3709
val Loss: 26.7087

Epoch 1530/2000
train Loss: 15.1164
val Loss: 25.4137

Epoch 1540/2000
train Loss: 13.6996
val Loss: 22.5507

Epoch 1550/2000
train Loss: 13.4229
val Loss: 22.4404

Epoch 1560/2000
train Loss: 14.2166
val Loss: 27.2753

Epoch 1570/2000
train Loss: 13.7789
val Loss: 25.5240

Epoch 1580/2000
train Loss: 13.4308
val Loss: 26.1428

Epoch 1590/2000
train Loss: 12.9704
val Loss: 22.0881

Epoch 1600/2000
train Loss: 12.8358
val Loss: 25.5736

Epoch 1610/2000
train Loss: 12.3687
val Loss: 24.7315

Epoch 1620/2000
train Loss: 12.2048
val Loss: 22.9926

Epoch 1630/2000
train Loss: 22.6707
val Loss: 20.7764

Epoch 1640/2000
train Loss: 17.1057
val Loss: 23.9407

Epoch 1650/2000
train Loss: 15.8879
val Loss: 21.1435

Epoch 1660/2000
train Loss: 15.7790
val Loss: 21.2319

Epoch 1670/2000
train Loss: 15.7663
val Loss: 22.3303

Epoch 1680/2000
train Loss: 15.6554
val Loss: 21.2483

Epoch 169

In [6]:
# Case 2. GRU (w/o data representation)
config = config2
data_reg = mr.Regression(config, train_data, test_data)
model = data_reg.build_model()  # 모델 구축

if config["training"]:
    best_model = data_reg.train_model(model)  # 모델 학습
    data_reg.save_model(best_model, best_model_path=config["best_model_path"])  # 모델 저장

y_true, pred, mse, r2 = data_reg.pred_data(model, best_model_path=config["best_model_path"])  # 예측
print(f'test Loss: {np.round(mse,5)} and R2: {np.round(r2,5)}')
print(f'test RMSE: {np.round(np.sqrt(mse), 4)}')

Start training model

Epoch 1/2000
train Loss: 213.1694
val Loss: 239.2076

Epoch 10/2000
train Loss: 165.9689
val Loss: 189.1914

Epoch 20/2000
train Loss: 120.9827
val Loss: 140.4195

Epoch 30/2000
train Loss: 89.0985
val Loss: 106.3132

Epoch 40/2000
train Loss: 68.7730
val Loss: 83.9854

Epoch 50/2000
train Loss: 54.6515
val Loss: 68.0372

Epoch 60/2000
train Loss: 44.2332
val Loss: 55.8891

Epoch 70/2000
train Loss: 36.4557
val Loss: 46.6094

Epoch 80/2000
train Loss: 31.1764
val Loss: 40.0572

Epoch 90/2000
train Loss: 27.2754
val Loss: 35.0266

Epoch 100/2000
train Loss: 24.4592
val Loss: 31.2498

Epoch 110/2000
train Loss: 22.4738
val Loss: 28.3591

Epoch 120/2000
train Loss: 20.9493
val Loss: 26.0320

Epoch 130/2000
train Loss: 19.9967
val Loss: 24.4686

Epoch 140/2000
train Loss: 19.3658
val Loss: 23.3105

Epoch 150/2000
train Loss: 18.8648
val Loss: 22.2941

Epoch 160/2000
train Loss: 18.5701
val Loss: 21.6161

Epoch 170/2000
train Loss: 18.3796
val Loss: 21.0975

Epoch 180/


Epoch 1510/2000
train Loss: 13.6305
val Loss: 22.6995

Epoch 1520/2000
train Loss: 13.2463
val Loss: 21.6716

Epoch 1530/2000
train Loss: 13.2165
val Loss: 23.2589

Epoch 1540/2000
train Loss: 12.6946
val Loss: 23.1882

Epoch 1550/2000
train Loss: 12.6467
val Loss: 24.3126

Epoch 1560/2000
train Loss: 14.1852
val Loss: 21.2180

Epoch 1570/2000
train Loss: 12.4202
val Loss: 22.4764

Epoch 1580/2000
train Loss: 12.7161
val Loss: 27.9974

Epoch 1590/2000
train Loss: 11.7886
val Loss: 24.5786

Epoch 1600/2000
train Loss: 12.0838
val Loss: 22.5590

Epoch 1610/2000
train Loss: 12.3468
val Loss: 22.3414

Epoch 1620/2000
train Loss: 11.1514
val Loss: 27.1706

Epoch 1630/2000
train Loss: 11.7463
val Loss: 24.9906

Epoch 1640/2000
train Loss: 11.3121
val Loss: 23.3708

Epoch 1650/2000
train Loss: 12.8897
val Loss: 27.7300

Epoch 1660/2000
train Loss: 11.5212
val Loss: 22.2421

Epoch 1670/2000
train Loss: 10.4326
val Loss: 25.3108

Epoch 1680/2000
train Loss: 10.1438
val Loss: 29.7218

Epoch 169

In [7]:
# Case 3. CNN_1D (w/o data representation)
config = config3
data_reg = mr.Regression(config, train_data, test_data)
model = data_reg.build_model()  # 모델 구축

if config["training"]:
    best_model = data_reg.train_model(model)  # 모델 학습
    data_reg.save_model(best_model, best_model_path=config["best_model_path"])  # 모델 저장

y_true, pred, mse, r2 = data_reg.pred_data(model, best_model_path=config["best_model_path"])  # 예측
print(f'test Loss: {np.round(mse,5)} and R2: {np.round(r2,5)}')
print(f'test RMSE: {np.round(np.sqrt(mse), 4)}')

Start training model

Epoch 1/2000
train Loss: 126.0176
val Loss: 31.5952

Epoch 10/2000
train Loss: 28.8105
val Loss: 27.8481

Epoch 20/2000
train Loss: 24.7747
val Loss: 21.8527

Epoch 30/2000
train Loss: 25.0509
val Loss: 20.0571

Epoch 40/2000
train Loss: 26.8283
val Loss: 20.2179

Epoch 50/2000
train Loss: 20.5226
val Loss: 23.6409

Epoch 60/2000
train Loss: 20.8485
val Loss: 23.3904

Epoch 70/2000
train Loss: 18.9526
val Loss: 20.7253

Epoch 80/2000
train Loss: 19.2484
val Loss: 21.5913

Epoch 90/2000
train Loss: 17.6067
val Loss: 21.7909

Epoch 100/2000
train Loss: 18.6615
val Loss: 21.3158

Epoch 110/2000
train Loss: 19.9427
val Loss: 20.6759

Epoch 120/2000
train Loss: 18.2787
val Loss: 24.5962

Epoch 130/2000
train Loss: 17.1683
val Loss: 23.8187

Epoch 140/2000
train Loss: 17.6127
val Loss: 21.2991

Epoch 150/2000
train Loss: 15.5843
val Loss: 21.1685

Epoch 160/2000
train Loss: 18.9864
val Loss: 21.7208

Epoch 170/2000
train Loss: 14.8610
val Loss: 25.8490

Epoch 180/2000
t


Epoch 1530/2000
train Loss: 2.5867
val Loss: 37.7279

Epoch 1540/2000
train Loss: 2.5046
val Loss: 38.2376

Epoch 1550/2000
train Loss: 2.7017
val Loss: 36.5751

Epoch 1560/2000
train Loss: 3.9046
val Loss: 37.8930

Epoch 1570/2000
train Loss: 2.2605
val Loss: 38.2635

Epoch 1580/2000
train Loss: 2.7005
val Loss: 38.7719

Epoch 1590/2000
train Loss: 2.7591
val Loss: 38.9216

Epoch 1600/2000
train Loss: 2.0386
val Loss: 39.8160

Epoch 1610/2000
train Loss: 2.9230
val Loss: 40.6668

Epoch 1620/2000
train Loss: 2.1467
val Loss: 38.9468

Epoch 1630/2000
train Loss: 2.4916
val Loss: 39.3260

Epoch 1640/2000
train Loss: 2.7093
val Loss: 39.9204

Epoch 1650/2000
train Loss: 2.5376
val Loss: 39.3479

Epoch 1660/2000
train Loss: 3.1153
val Loss: 39.9042

Epoch 1670/2000
train Loss: 2.6767
val Loss: 38.0717

Epoch 1680/2000
train Loss: 2.5529
val Loss: 39.4336

Epoch 1690/2000
train Loss: 1.7759
val Loss: 38.7404

Epoch 1700/2000
train Loss: 2.4292
val Loss: 39.5309

Epoch 1710/2000
train Loss:

In [8]:
# Case 4. LSTM_FCNs (w/o data representation)
config = config4
data_reg = mr.Regression(config, train_data, test_data)
model = data_reg.build_model()  # 모델 구축

if config["training"]:
    best_model = data_reg.train_model(model)  # 모델 학습
    data_reg.save_model(best_model, best_model_path=config["best_model_path"])  # 모델 저장

y_true, pred, mse, r2 = data_reg.pred_data(model, best_model_path=config["best_model_path"])  # 예측
print(f'test Loss: {np.round(mse,5)} and R2: {np.round(r2,5)}')
print(f'test RMSE: {np.round(np.sqrt(mse), 4)}')

Start training model

Epoch 1/2000
train Loss: 211.9843
val Loss: 285.8119

Epoch 10/2000
train Loss: 197.4490
val Loss: 235.0284

Epoch 20/2000
train Loss: 175.5602
val Loss: 211.7100

Epoch 30/2000
train Loss: 147.8240
val Loss: 175.5422

Epoch 40/2000
train Loss: 123.6403
val Loss: 144.8802

Epoch 50/2000
train Loss: 104.1462
val Loss: 125.1565

Epoch 60/2000
train Loss: 89.8806
val Loss: 113.5019

Epoch 70/2000
train Loss: 78.8314
val Loss: 108.0770

Epoch 80/2000
train Loss: 69.9318
val Loss: 78.3070

Epoch 90/2000
train Loss: 63.2097
val Loss: 73.2328

Epoch 100/2000
train Loss: 56.7629
val Loss: 73.9584

Epoch 110/2000
train Loss: 51.1936
val Loss: 58.8537

Epoch 120/2000
train Loss: 46.1409
val Loss: 62.1643

Epoch 130/2000
train Loss: 41.5442
val Loss: 63.9642

Epoch 140/2000
train Loss: 37.2565
val Loss: 52.5278

Epoch 150/2000
train Loss: 33.7555
val Loss: 31.5369

Epoch 160/2000
train Loss: 30.3177
val Loss: 55.4604

Epoch 170/2000
train Loss: 27.0422
val Loss: 51.1821

Epo


Epoch 1530/2000
train Loss: 0.1678
val Loss: 26.0538

Epoch 1540/2000
train Loss: 0.4609
val Loss: 45.9220

Epoch 1550/2000
train Loss: 0.1874
val Loss: 19.9847

Epoch 1560/2000
train Loss: 0.1430
val Loss: 25.6623

Epoch 1570/2000
train Loss: 0.2389
val Loss: 47.6350

Epoch 1580/2000
train Loss: 0.3046
val Loss: 18.8383

Epoch 1590/2000
train Loss: 0.7313
val Loss: 20.8972

Epoch 1600/2000
train Loss: 0.5916
val Loss: 23.6793

Epoch 1610/2000
train Loss: 0.5273
val Loss: 24.1695

Epoch 1620/2000
train Loss: 0.1727
val Loss: 77.5666

Epoch 1630/2000
train Loss: 0.2674
val Loss: 18.1358

Epoch 1640/2000
train Loss: 0.8433
val Loss: 20.1798

Epoch 1650/2000
train Loss: 0.3381
val Loss: 23.4147

Epoch 1660/2000
train Loss: 0.3510
val Loss: 25.5673

Epoch 1670/2000
train Loss: 0.0990
val Loss: 49.1895

Epoch 1680/2000
train Loss: 0.1832
val Loss: 19.1476

Epoch 1690/2000
train Loss: 0.2549
val Loss: 42.7964

Epoch 1700/2000
train Loss: 0.1490
val Loss: 28.7389

Epoch 1710/2000
train Loss: