In [11]:
import torch
import random
import pickle
import pandas as pd
import numpy as np

import main_regression as mr

# Set Seed

In [12]:
# seed 고정
random_seed = 42

torch.manual_seed(random_seed)
torch.cuda.manual_seed(random_seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(random_seed)
random.seed(random_seed)

# Set Config

In [17]:
# Case 1. LSTM model (w/o data representation)
config1 = {
        'model': 'LSTM', # Regression에 활용할 알고리즘 정의, {'LSTM', 'GRU', 'CNN_1D', 'LSTM_FCNs', 'FC', 'DARNN} 중 택 1
        'training': True,  # 학습 여부, 저장된 학습 완료 모델 존재시 False로 설정
        'best_model_path': './ckpt/lstm.pt',  # 학습 완료 모델 저장 경로
        'parameter': {
            'input_size': 24,  # 데이터의 변수 개수, int
            'timestep' : 144, # timestep = window_size
            'num_layers': 2,  # recurrent layers의 수, int(default: 2, 범위: 1 이상)
            'hidden_size': 64,  # hidden state의 차원, int(default: 64, 범위: 1 이상)
            'shift_size' : 1,
            'dropout': 0.1,  # dropout 확률, float(default: 0.1, 범위: 0 이상 1 이하)
            'bidirectional': True,  # 모델의 양방향성 여부, bool(default: True)
            'num_epochs': 2000,  # 학습 epoch 횟수, int(default: 150, 범위: 1 이상)
            'batch_size': 16,  # batch 크기, int(default: 64, 범위: 1 이상, 컴퓨터 사양에 적합하게 설정)
            'lr': 0.0003,  # learning rate, float(default: 0.001, 범위: 0.1 이하)
            'device': 'cuda',  # 학습 환경, ["cuda", "cpu"] 중 선택
            'need_yhist' : False,
            'shift_size' : 1,

        }
}

# Case 2. GRU model (w/o data representation)
config2 = {
        'model': 'GRU', # Regression에 활용할 알고리즘 정의, {'LSTM', 'GRU', 'CNN_1D', 'LSTM_FCNs', 'FC', 'DARNN} 중 택 1
        'training': True,  # 학습 여부, 저장된 학습 완료 모델 존재시 False로 설정
        'best_model_path': './ckpt/gru.pt',  # 학습 완료 모델 저장 경로
        'with_representation' : False, # representation 유무, bool (defeault: False)
        'parameter': {
            'input_size': 24,  # 데이터의 변수 개수, int
            'timestep' : 144, # timestep = window_size
            'num_layers': 2,  # recurrent layers의 수, int(default: 2, 범위: 1 이상)
            'hidden_size': 16,  # hidden state의 차원, int(default: 64, 범위: 1 이상)
            'dropout': 0.1,  # dropout 확률, float(default: 0.1, 범위: 0 이상 1 이하)
            'bidirectional': True,  # 모델의 양방향성 여부, bool(default: True)
            'num_epochs': 2000,  # 학습 epoch 횟수, int(default: 150, 범위: 1 이상)
            'batch_size': 16,  # batch 크기, int(default: 64, 범위: 1 이상, 컴퓨터 사양에 적합하게 설정)
            'lr': 0.0003,  # learning rate, float(default: 0.001, 범위: 0.1 이하)
            'device': 'cuda',  # 학습 환경, ["cuda", "cpu"] 중 선택
            'need_yhist' : False,
            'shift_size' : 1,


        }
}

# Case 3. CNN_1D model (w/o data representation)
config3 = {
        'model': 'CNN_1D', # Regression에 활용할 알고리즘 정의, {'LSTM', 'GRU', 'CNN_1D', 'LSTM_FCNs', 'FC', 'DARNN} 중 택 1
        'training': True,  # 학습 여부, 저장된 학습 완료 모델 존재시 False로 설정
        'best_model_path': './ckpt/cnn_1d.pt',  # 학습 완료 모델 저장 경로
        'parameter': {
            'input_size': 24,  # 데이터의 변수 개수, int
            'timestep' : 144, # timestep = window_size
            'seq_len': 144,  # 데이터의 시간 길이, int
            'output_channels': 64, # convolution layer의 output channel, int(default: 64, 범위: 1 이상, 2의 지수로 설정 권장)
            'kernel_size': 3, # convolutional layer의 filter 크기, int(default: 3, 범위: 3 이상, 홀수로 설정 권장)
            'stride': 1, # convolution layer의 stride 크기, int(default: 1, 범위: 1 이상)
            'padding': 0, # padding 크기, int(default: 0, 범위: 0 이상)
            'drop_out': 0.1, # dropout 확률, float(default: 0.1, 범위: 0 이상 1 이하)
            'num_epochs': 2000,  # 학습 epoch 횟수, int(default: 150, 범위: 1 이상)
            'batch_size': 16,  # batch 크기, int(default: 64, 범위: 1 이상, 컴퓨터 사양에 적합하게 설정)
            'lr': 0.0003,  # learning rate, float(default: 0.0001, 범위: 0.1 이하)
            'device': 'cuda',  # 학습 환경, ["cuda", "cpu"] 중 선택4
            'need_yhist' : False,
            'shift_size' : 1,

        }
}

# Case 4. LSTM_FCNs model (w/o data representation)
config4 = {
        'model': 'LSTM_FCNs', # Regression에 활용할 알고리즘 정의, {'LSTM', 'GRU', 'CNN_1D', 'LSTM_FCNs', 'FC', 'DARNN} 중 택 1
        'training': True,  # 학습 여부, 저장된 학습 완료 모델 존재시 False로 설정
        'best_model_path': './ckpt/lstm_fcn.pt',  # 학습 완료 모델 저장 경로
        'parameter': {
            'input_size': 24,  # 데이터의 변수 개수, int
            'timestep' : 144, # timestep = window_size
            'num_layers': 2,  # recurrent layers의 수, int(default: 1, 범위: 1 이상)
            'lstm_drop_out': 0.001, # LSTM dropout 확률, float(default: 0.4, 범위: 0 이상 1 이하)
            'fc_drop_out': 0.001, # FC dropout 확률, float(default: 0.1, 범위: 0 이상 1 이하)
            'num_epochs': 2000, # 학습 epoch 횟수, int(default: 150, 범위: 1 이상)
            'batch_size': 16,  # batch 크기, int(default: 64, 범위: 1 이상, 컴퓨터 사양에 적합하게 설정)
            'lr': 0.0003,  # learning rate, float(default: 0.0001, 범위: 0.1 이하)
            'device': 'cuda',  # 학습 환경, ["cuda", "cpu"] 중 선택
            'need_yhist' : False,
            'shift_size' : 1,


        }
}

# Case 5. fully-connected layers (w/ data representation)
config5 = {
        'model': 'FC', # Regression에 활용할 알고리즘 정의, {'LSTM', 'GRU', 'CNN_1D', 'LSTM_FCNs', 'FC', 'DARNN} 중 택 1
        "training": True,  # 학습 여부, 저장된 학습 완료 모델 존재시 False로 설정
        "best_model_path": './ckpt/fc.pt',  # 학습 완료 모델 저장 경로
        'parameter': {
            'input_size': 144,  # 데이터의 변수 개수(representation 차원), int
            'timestep' : 1, # timestep = window_size
            'shift_size': 1, # shift 정도, int
            'drop_out': 0.1, # dropout 확률, float(default: 0.1, 범위: 0 이상 1 이하)
            'bias': True, # bias 사용 여부, bool(default: True)
            'num_epochs': 2000, # 학습 epoch 횟수, int(default: 150, 범위: 1 이상)
            'batch_size': 16,  # batch 크기, int(default: 64, 범위: 1 이상, 컴퓨터 사양에 적합하게 설정)
            'lr': 0.0001,  # learning rate, float(default: 0.0001, 범위: 0.1 이하)
            'device': 'cuda',  # 학습 환경, ["cuda", "cpu"] 중 선택
            'need_yhist' : False
        }
}

# Load data

In [14]:
# raw time series data
train_x = pickle.load(open('./data/x_train_new_energy.pkl', 'rb'))
train_y = pickle.load(open('./data/y_train_new_energy.pkl', 'rb'))
test_x = pickle.load(open('./data/x_test_new_energy.pkl', 'rb'))
test_y = pickle.load(open('./data/y_test_new_energy.pkl', 'rb'))

train_data = {'x': train_x, 'y': train_y}
test_data = {'x': test_x, 'y': test_y}

print(train_x.shape)  #shape : (num_of_instance x input_dims x window_size) = (95, 24, 144)
print(train_y.shape) #shape : (num_of_instance) = (95,)
print(test_x.shape)  #shape : (num_of_instance x input_dims x window_size) = (42, 24, 144)
print(test_y.shape)  #shape : (num_of_instance) = (42,)

(95, 24, 144)
(95,)
(42, 24, 144)
(42,)


# Training Model

In [5]:
# Case 1. LSTM model (w/o data representation)
config = config1
data_reg = mr.Regression(config, train_data, test_data, use_representation=True)
model = data_reg.build_model()  # 모델 구축

if config["training"]:
    best_model = data_reg.train_model(model)  # 모델 학습
    data_reg.save_model(best_model, best_model_path=config["best_model_path"])  # 모델 저장

y_true, pred, mse, r2 = data_reg.pred_data(model, best_model_path=config["best_model_path"])  # 예측
print(f'test Loss: {np.round(mse,5)} and R2: {np.round(r2,5)}')
print(f'test RMSE: {np.round(np.sqrt(mse), 4)}')

Start training model

Epoch 1/2000
train Loss: 222.6936
val Loss: 187.7219

Epoch 10/2000
train Loss: 165.4655
val Loss: 131.8825

Epoch 20/2000
train Loss: 94.9833
val Loss: 70.5002

Epoch 30/2000
train Loss: 57.2592
val Loss: 39.0989

Epoch 40/2000
train Loss: 37.1636
val Loss: 23.5779

Epoch 50/2000
train Loss: 26.9331
val Loss: 16.6143

Epoch 60/2000
train Loss: 22.3273
val Loss: 14.1925

Epoch 70/2000
train Loss: 20.4854
val Loss: 13.7292

Epoch 80/2000
train Loss: 19.7527
val Loss: 13.8964

Epoch 90/2000
train Loss: 19.4778
val Loss: 14.1848

Epoch 100/2000
train Loss: 19.4023
val Loss: 14.4099

Epoch 110/2000
train Loss: 19.3856
val Loss: 14.5618

Epoch 120/2000
train Loss: 19.3693
val Loss: 14.6043

Epoch 130/2000
train Loss: 19.3616
val Loss: 14.6482

Epoch 140/2000
train Loss: 19.3636
val Loss: 14.6680

Epoch 150/2000
train Loss: 19.3577
val Loss: 14.6775

Epoch 160/2000
train Loss: 19.3554
val Loss: 14.6804

Epoch 170/2000
train Loss: 19.3498
val Loss: 14.6825

Epoch 180/200

In [18]:
# Case 2. GRU (w/o data representation)
config = config2
data_reg = mr.Regression(config, train_data, test_data, use_representation=True)
model = data_reg.build_model()  # 모델 구축

if config["training"]:
    best_model = data_reg.train_model(model)  # 모델 학습
    data_reg.save_model(best_model, best_model_path=config["best_model_path"])  # 모델 저장

y_true, pred, mse, r2 = data_reg.pred_data(model, best_model_path=config["best_model_path"])  # 예측
print(f'test Loss: {np.round(mse,5)} and R2: {np.round(r2,5)}')
print(f'test RMSE: {np.round(np.sqrt(mse), 4)}')

Start training model

Epoch 1/2000
train Loss: 225.6518
val Loss: 191.2764

Epoch 10/2000
train Loss: 199.6607
val Loss: 167.0832

Epoch 20/2000
train Loss: 171.3232
val Loss: 141.0707

Epoch 30/2000
train Loss: 148.5514
val Loss: 120.5289

Epoch 40/2000
train Loss: 130.3471
val Loss: 104.0884

Epoch 50/2000
train Loss: 114.2071
val Loss: 89.6738

Epoch 60/2000
train Loss: 101.0050
val Loss: 78.0037

Epoch 70/2000
train Loss: 89.8570
val Loss: 68.1942

Epoch 80/2000
train Loss: 80.2721
val Loss: 59.8409

Epoch 90/2000
train Loss: 71.9747
val Loss: 52.6845

Epoch 100/2000
train Loss: 64.7673
val Loss: 46.5445

Epoch 110/2000
train Loss: 58.5368
val Loss: 41.3081

Epoch 120/2000
train Loss: 53.1165
val Loss: 36.7847

Epoch 130/2000
train Loss: 48.3358
val Loss: 32.8760

Epoch 140/2000
train Loss: 44.1963
val Loss: 29.5709

Epoch 150/2000
train Loss: 40.6226
val Loss: 26.7376

Epoch 160/2000
train Loss: 37.5043
val Loss: 24.3331

Epoch 170/2000
train Loss: 34.7614
val Loss: 22.2841

Epoch

In [7]:
# Case 3. CNN_1D (w/o data representation)
config = config3
data_reg = mr.Regression(config, train_data, test_data, use_representation=True)
model = data_reg.build_model()  # 모델 구축

if config["training"]:
    best_model = data_reg.train_model(model)  # 모델 학습
    data_reg.save_model(best_model, best_model_path=config["best_model_path"])  # 모델 저장

y_true, pred, mse, r2 = data_reg.pred_data(model, best_model_path=config["best_model_path"])  # 예측
print(f'test Loss: {np.round(mse,5)} and R2: {np.round(r2,5)}')
print(f'test RMSE: {np.round(np.sqrt(mse), 4)}')

Start training model

Epoch 1/2000
train Loss: 241.7953
val Loss: 200.1572

Epoch 10/2000
train Loss: 25.4863
val Loss: 16.7112

Epoch 20/2000
train Loss: 28.1218
val Loss: 19.6314

Epoch 30/2000
train Loss: 27.5288
val Loss: 16.1003

Epoch 40/2000
train Loss: 22.6572
val Loss: 15.0557

Epoch 50/2000
train Loss: 23.2195
val Loss: 15.3886

Epoch 60/2000
train Loss: 20.4684
val Loss: 17.9040

Epoch 70/2000
train Loss: 20.7106
val Loss: 16.3008

Epoch 80/2000
train Loss: 17.5504
val Loss: 16.3044

Epoch 90/2000
train Loss: 18.3929
val Loss: 15.3937

Epoch 100/2000
train Loss: 18.9795
val Loss: 15.2887

Epoch 110/2000
train Loss: 17.2724
val Loss: 15.1479

Epoch 120/2000
train Loss: 15.8387
val Loss: 19.0238

Epoch 130/2000
train Loss: 20.2261
val Loss: 17.5141

Epoch 140/2000
train Loss: 16.0169
val Loss: 18.2457

Epoch 150/2000
train Loss: 17.6703
val Loss: 16.6404

Epoch 160/2000
train Loss: 18.6532
val Loss: 16.0960

Epoch 170/2000
train Loss: 16.2398
val Loss: 21.4689

Epoch 180/2000


In [16]:
# Case 4. LSTM_FCNs (w/o data representation)
config = config4
data_reg = mr.Regression(config, train_data, test_data, use_representation=True)
model = data_reg.build_model()  # 모델 구축

if config["training"]:
    best_model = data_reg.train_model(model)  # 모델 학습
    data_reg.save_model(best_model, best_model_path=config["best_model_path"])  # 모델 저장

y_true, pred, mse, r2 = data_reg.pred_data(model, best_model_path=config["best_model_path"])  # 예측
print(f'test Loss: {np.round(mse,5)} and R2: {np.round(r2,5)}')
print(f'test RMSE: {np.round(np.sqrt(mse), 4)}')

Start training model

Epoch 1/2000
train Loss: 226.0511
val Loss: 200.1778

Epoch 10/2000
train Loss: 169.3904
val Loss: 134.2786

Epoch 20/2000
train Loss: 108.7624
val Loss: 84.0601

Epoch 30/2000
train Loss: 79.7754
val Loss: 50.5382

Epoch 40/2000
train Loss: 61.8190
val Loss: 37.8694

Epoch 50/2000
train Loss: 48.7371
val Loss: 33.1911

Epoch 60/2000
train Loss: 38.3243
val Loss: 27.9385

Epoch 70/2000
train Loss: 30.1517
val Loss: 14.4212

Epoch 80/2000
train Loss: 24.0227
val Loss: 20.7905

Epoch 90/2000
train Loss: 19.8376
val Loss: 20.3671

Epoch 100/2000
train Loss: 17.1810
val Loss: 19.5929

Epoch 110/2000
train Loss: 14.2604
val Loss: 20.3327

Epoch 120/2000
train Loss: 11.7465
val Loss: 14.9605

Epoch 130/2000
train Loss: 10.2184
val Loss: 16.7348

Epoch 140/2000
train Loss: 9.4282
val Loss: 17.1257

Epoch 150/2000
train Loss: 8.6618
val Loss: 16.7103

Epoch 160/2000
train Loss: 6.9807
val Loss: 17.0364

Epoch 170/2000
train Loss: 6.5157
val Loss: 21.5285

Epoch 180/2000
t

In [9]:
import pandas as pd
train_x =pd.read_csv('./data/train_new_energy.csv').values
test_x = pd.read_csv('./data/test_new_energy.csv').values

train_y = pd.read_csv('./data/train_new_energy_y.csv').values
test_y = pd.read_csv('./data/test_new_energy_y.csv').values

train_data = {'x': train_x, 'y': train_y}
test_data = {'x': test_x, 'y': test_y}

In [10]:
# Case 5. fully-connected layers (w/ data representation)

# raw time seires data for regression
config = config5
data_reg = mr.Regression(config, train_data, test_data, use_representation = True)
model = data_reg.build_model()  # 모델 구축

if config["training"]:
    best_model = data_reg.train_model(model)  # 모델 학습
    data_reg.save_model(best_model, best_model_path=config["best_model_path"])  # 모델 저장

y_true, pred, mse, r2 = data_reg.pred_data(model, best_model_path=config["best_model_path"])  # 예측
print(f'test Loss: {np.round(mse,5)} and R2: {np.round(r2,5)}')
print(f'test RMSE: {np.round(np.sqrt(mse), 4)}')

Start training model

Epoch 1/2000
train Loss: 230.8990
val Loss: 195.0663

Epoch 10/2000
train Loss: 204.2068
val Loss: 172.1549

Epoch 20/2000
train Loss: 179.6896
val Loss: 149.7107

Epoch 30/2000
train Loss: 157.1175
val Loss: 130.3498

Epoch 40/2000
train Loss: 133.8857
val Loss: 111.6359

Epoch 50/2000
train Loss: 112.1209
val Loss: 92.1628

Epoch 60/2000
train Loss: 92.9869
val Loss: 74.8337

Epoch 70/2000
train Loss: 77.6394
val Loss: 60.4213

Epoch 80/2000
train Loss: 63.5292
val Loss: 48.4655

Epoch 90/2000
train Loss: 51.1169
val Loss: 38.8648

Epoch 100/2000
train Loss: 43.2211
val Loss: 31.4259

Epoch 110/2000
train Loss: 37.9837
val Loss: 26.0214

Epoch 120/2000
train Loss: 32.3637
val Loss: 22.1450

Epoch 130/2000
train Loss: 30.0149
val Loss: 19.6046

Epoch 140/2000
train Loss: 27.2543
val Loss: 17.9141

Epoch 150/2000
train Loss: 26.0799
val Loss: 16.9205

Epoch 160/2000
train Loss: 24.7485
val Loss: 16.4321

Epoch 170/2000
train Loss: 24.7557
val Loss: 16.1334

Epoch 