In [1]:
import torch
import random
import pandas as pd
import numpy as np
import main_forecasting as mf

In [2]:
# seed 고정
random_seed = 42

torch.manual_seed(random_seed)
torch.cuda.manual_seed(random_seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(random_seed)
random.seed(random_seed)

In [3]:
# Case 1. model = lstm
config1 = {
    "model": 'lstm',
    "training": True,  # 학습 여부, 저장된 학습 완료 모델 존재시 False로 설정
    "best_model_path": './ckpt/lstm.pt',  # 학습 완료 모델 저장 경로
    "parameter": {
        "input_size" : 1,  # 데이터 변수 개수, int
        "window_size" : 48,  # input sequence의 길이, int
        "forecast_step" : 1,  # 예측할 미래 시점의 길이, int
        "num_layers" : 2,  # recurrent layers의 수, int(default: 2, 범위: 1 이상)
        "hidden_size" : 64,  # hidden state의 차원, int(default: 64, 범위: 1 이상)
        "dropout" : 0.1,  # dropout 확률, float(default: 0.1, 범위: 0 이상 1 이하)
        "bidirectional" : True,  # 모델의 양방향성 여부, bool(default: True)
        "num_epochs" : 150,  # 학습 epoch 횟수, int(default: 150, 범위: 1 이상)
        "batch_size" : 64,  # batch 크기, int(default: 64, 범위: 1 이상, 컴퓨터 사양에 적합하게 설정)
        "lr" : 0.0001,  # learning rate, float(default: 0.0001, 범위: 0.1 이하)
        "device" : 'cuda'  # 학습 환경, (default: 'cuda', ['cuda', 'cpu'] 중 선택)
    }
}

# Case 2. model = gru
config2 = {
    "model": 'gru',
    "training": True,  # 학습 여부, 저장된 학습 완료 모델 존재시 False로 설정
    "best_model_path": './ckpt/gru.pt',  # 학습 완료 모델 저장 경로
    "parameter": {
        "input_size" : 1,  # 데이터 변수 개수, int
        "window_size" : 48,  # input sequence의 길이, int
        "forecast_step" : 1,  # 예측할 미래 시점의 길이, int
        "num_layers" : 2,  # recurrent layers의 수, int(default: 2, 범위: 1 이상)
        "hidden_size" : 64,  # hidden state의 차원, int(default: 64, 범위: 1 이상)
        "dropout" : 0.1,  # dropout 확률, float(default: 0.1, 범위: 0 이상 1 이하)
        "bidirectional" : True,  # 모델의 양방향성 여부, bool(default: True)
        "num_epochs" : 150,  # 학습 epoch 횟수, int(default: 150, 범위: 1 이상)
        "batch_size" : 64,  # batch 크기, int(default: 64, 범위: 1 이상, 컴퓨터 사양에 적합하게 설정)
        "lr" : 0.0001,  # learning rate, float(default: 0.0001, 범위: 0.1 이하)
        "device" : 'cuda'  # 학습 환경, (default: 'cuda', ['cuda', 'cpu'] 중 선택)
    }
}

# Case 3. model = informer
config3 = {
    "model": 'informer',
    "training": True,  # 학습 여부, 저장된 학습 완료 모델 존재시 False로 설정
    "best_model_path": './ckpt/informer.pt',  # 학습 완료 모델 저장 경로
    "parameter": {
        "input_size" : 1,  # 데이터 변수 개수, int
        "window_size" : 48,  # input sequence의 길이, int
        "forecast_step" : 1,  # 예측할 미래 시점의 길이, int
        "label_len" : 12,  # Decoder의 start token 길이, int(default: 12)
        "d_model" : 512,  # 모델의 hidden dimension, int(default: 512)
        "e_layers" : 2,  # encoder layer 수, int(default: 2)
        "d_layers" : 1,  # decoder layer 수, int(default: 1)
        "d_ff" : 2048,  # fully connected layer의 hidden dimension, int(default: 2048)
        "factor" : 5, # 모델의 ProbSparse Attention factor, int(default: 5)
        "dropout" : 0.05, # dropout ratio, int(default: 0.05)
        "attn" : 'prob', # 모델의 attention 계산 방식, (default: 'prob', ['prob', 'full'] 중 선택)
        "n_heads" : 8, # multi-head attention head 수, int(default: 8)
        "embed" : 'timeF', # time features encoding 방식, (default: 'timeF', ['timeF', 'fixed', 'learned'] 중 선택)
        "num_epochs" : 150,  # 학습 epoch 횟수, int(default: 150, 범위: 1 이상)
        "batch_size" : 64,  # batch 크기, int(default: 64, 범위: 1 이상, 컴퓨터 사양에 적합하게 설정)
        "lr" : 0.0001,  # learning rate, float(default: 0.0001, 범위: 0.1 이하)
        "lradj" : 'type1', # learning rate 조정 방식, (default: 'type1', ['type1', 'type2'] 중 선택)
        "device" : 'cuda'  # 학습 환경, (default: 'cuda', ['cuda', 'cpu'] 중 선택)   
    }
}

# Case 4. model = scinet
config4 = {
    "model": 'scinet',
    "training": True,  # 학습 여부, 저장된 학습 완료 모델 존재시 False로 설정
    "best_model_path": './ckpt/scinet.pt',  # 학습 완료 모델 저장 경로
    "parameter": {
        "input_size" : 1,  # 데이터 변수 개수, int
        "window_size" : 48,  # input sequence의 길이, int
        "forecast_step" : 1,  # 예측할 미래 시점의 길이, int
        "levels" : 2, # Tree의 depth, int(default: 2, 범위: input sequence의 로그 값 이하, 2~4 설정 권장)
        "stacks" : 1, # SCINet 구조를 쌓는 횟수, int(default: 1, 범위: 3 이하)
        "num_epochs" : 150,  # 학습 epoch 횟수, int(default: 150, 범위: 1 이상)
        "batch_size" : 64,  # batch 크기, int(default: 64, 범위: 1 이상, 컴퓨터 사양에 적합하게 설정)
        "lr" : 0.00005,  # learning rate, float(default: 0.0001, 범위: 0.1 이하)
        "device" : 'cuda'  # 학습 환경, (default: 'cuda', ['cuda', 'cpu'] 중 선택)
    }
}

In [4]:
# Dataset
dataset_dir = {
    "train": './data/train_data.csv',
    "test": './data/test_data.csv'
}

# train/test 데이터 불러오기 (csv 형태)
# shape=(# time steps, )
train_data = pd.read_csv(dataset_dir["train"])
train_data = train_data["MT_320"].values

test_data = pd.read_csv(dataset_dir["test"])
test_date = test_data["date"].values
test_data = test_data["MT_320"].values

In [5]:
# Case 1. model = lstm
config = config1
data_forecast = mf.Forecasting(config, train_data, test_data, test_date)
init_model = data_forecast.build_model()  # 모델 구축

if config["training"]:
    best_model = data_forecast.train_model(init_model)  # 모델 학습
    data_forecast.save_model(best_model, best_model_path=config["best_model_path"])  # 모델 저장

pred, mse, mae = data_forecast.pred_data(init_model, best_model_path=config["best_model_path"])  # 예측

Start training model

Epoch 1/150
train Loss: 0.6287 RMSE: 0.7929
val Loss: 0.3361 RMSE: 0.5798

Epoch 10/150
train Loss: 0.1202 RMSE: 0.3467
val Loss: 0.1645 RMSE: 0.4056

Epoch 20/150
train Loss: 0.0783 RMSE: 0.2798
val Loss: 0.1055 RMSE: 0.3248

Epoch 30/150
train Loss: 0.0698 RMSE: 0.2641
val Loss: 0.0958 RMSE: 0.3095

Epoch 40/150
train Loss: 0.0661 RMSE: 0.2571
val Loss: 0.0920 RMSE: 0.3033

Epoch 50/150
train Loss: 0.0624 RMSE: 0.2498
val Loss: 0.0899 RMSE: 0.2998

Epoch 60/150
train Loss: 0.0601 RMSE: 0.2451
val Loss: 0.0879 RMSE: 0.2965

Epoch 70/150
train Loss: 0.0580 RMSE: 0.2409
val Loss: 0.0855 RMSE: 0.2924

Epoch 80/150
train Loss: 0.0560 RMSE: 0.2367
val Loss: 0.0857 RMSE: 0.2927

Epoch 90/150
train Loss: 0.0546 RMSE: 0.2337
val Loss: 0.0885 RMSE: 0.2975

Epoch 100/150
train Loss: 0.0538 RMSE: 0.2319
val Loss: 0.0913 RMSE: 0.3021

Epoch 110/150
train Loss: 0.0522 RMSE: 0.2286
val Loss: 0.0901 RMSE: 0.3002

Epoch 120/150
train Loss: 0.0511 RMSE: 0.2260
val Loss: 0.0896 RM

In [6]:
print(f'** Performance of test dataset ==> MSE = {mse}, MAE = {mae}')
print(f'** Dimension of result for test dataset = {pred.shape}')
pred.head()

** Performance of test dataset ==> MSE = 848.2227047211326, MAE = 21.091649440773203
** Dimension of result for test dataset = (5213, 2)


Unnamed: 0_level_0,actual_value,predicted_value
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2014-05-28 19:00:00,467.564103,482.103058
2014-05-28 20:00:00,494.403567,463.662292
2014-05-28 21:00:00,498.874025,461.408844
2014-05-28 22:00:00,450.239688,461.354523
2014-05-28 23:00:00,364.130435,363.162384


In [7]:
# Case 2. model = gru
config = config2
data_forecast = mf.Forecasting(config, train_data, test_data, test_date)
init_model = data_forecast.build_model()  # 모델 구축

if config["training"]:
    best_model = data_forecast.train_model(init_model)  # 모델 학습
    data_forecast.save_model(best_model, best_model_path=config["best_model_path"])  # 모델 저장

pred, mse, mae = data_forecast.pred_data(init_model, best_model_path=config["best_model_path"]) # 예측

Start training model

Epoch 1/150
train Loss: 0.5256 RMSE: 0.7250
val Loss: 0.3187 RMSE: 0.5645

Epoch 10/150
train Loss: 0.1430 RMSE: 0.3781
val Loss: 0.2019 RMSE: 0.4494

Epoch 20/150
train Loss: 0.0871 RMSE: 0.2950
val Loss: 0.1285 RMSE: 0.3585

Epoch 30/150
train Loss: 0.0748 RMSE: 0.2734
val Loss: 0.1122 RMSE: 0.3349

Epoch 40/150
train Loss: 0.0698 RMSE: 0.2642
val Loss: 0.1040 RMSE: 0.3225

Epoch 50/150
train Loss: 0.0659 RMSE: 0.2567
val Loss: 0.0970 RMSE: 0.3114

Epoch 60/150
train Loss: 0.0635 RMSE: 0.2520
val Loss: 0.0897 RMSE: 0.2995

Epoch 70/150
train Loss: 0.0615 RMSE: 0.2480
val Loss: 0.0896 RMSE: 0.2993

Epoch 80/150
train Loss: 0.0595 RMSE: 0.2440
val Loss: 0.0863 RMSE: 0.2938

Epoch 90/150
train Loss: 0.0582 RMSE: 0.2412
val Loss: 0.0829 RMSE: 0.2879

Epoch 100/150
train Loss: 0.0572 RMSE: 0.2391
val Loss: 0.0845 RMSE: 0.2907

Epoch 110/150
train Loss: 0.0561 RMSE: 0.2368
val Loss: 0.0818 RMSE: 0.2860

Epoch 120/150
train Loss: 0.0549 RMSE: 0.2344
val Loss: 0.0788 RM

In [8]:
print(f'** Performance of test dataset ==> MSE = {mse}, MAE = {mae}')
print(f'** Dimension of result for test dataset = {pred.shape}')
pred.head()

** Performance of test dataset ==> MSE = 713.5153589302237, MAE = 19.887316306914993
** Dimension of result for test dataset = (5213, 2)


Unnamed: 0_level_0,actual_value,predicted_value
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2014-05-28 19:00:00,467.564103,463.461792
2014-05-28 20:00:00,494.403567,449.523132
2014-05-28 21:00:00,498.874025,456.224304
2014-05-28 22:00:00,450.239688,459.547668
2014-05-28 23:00:00,364.130435,375.698395


In [9]:
# Case 3. model = informer
config = config3
data_forecast = mf.Forecasting(config, train_data, test_data, test_date)
init_model = data_forecast.build_model()  # 모델 구축

if config["training"]:
    best_model = data_forecast.train_model(init_model)  # 모델 학습
    data_forecast.save_model(best_model, best_model_path=config["best_model_path"])  # 모델 저장

pred, mse, mae = data_forecast.pred_data(init_model, best_model_path=config["best_model_path"])  # 예측

Start training model

Epoch 1/150
train Loss: 0.0311 RMSE: 0.1764
val Loss: 0.0015 RMSE: 0.0392

Epoch 10/150
train Loss: 0.0016 RMSE: 0.0403
val Loss: 0.0006 RMSE: 0.0245

Epoch 20/150
train Loss: 0.0016 RMSE: 0.0397
val Loss: 0.0006 RMSE: 0.0252

Epoch 30/150
train Loss: 0.0016 RMSE: 0.0395
val Loss: 0.0006 RMSE: 0.0255

Epoch 40/150
train Loss: 0.0016 RMSE: 0.0398
val Loss: 0.0006 RMSE: 0.0251

Epoch 50/150
train Loss: 0.0016 RMSE: 0.0396
val Loss: 0.0006 RMSE: 0.0254

Epoch 60/150
train Loss: 0.0016 RMSE: 0.0401
val Loss: 0.0006 RMSE: 0.0251

Epoch 70/150
train Loss: 0.0016 RMSE: 0.0394
val Loss: 0.0007 RMSE: 0.0257

Epoch 80/150
train Loss: 0.0016 RMSE: 0.0399
val Loss: 0.0006 RMSE: 0.0247

Epoch 90/150
train Loss: 0.0016 RMSE: 0.0400
val Loss: 0.0006 RMSE: 0.0251

Epoch 100/150
train Loss: 0.0016 RMSE: 0.0400
val Loss: 0.0006 RMSE: 0.0247

Epoch 110/150
train Loss: 0.0016 RMSE: 0.0401
val Loss: 0.0006 RMSE: 0.0246

Epoch 120/150
train Loss: 0.0016 RMSE: 0.0397
val Loss: 0.0006 RM

In [10]:
print(f'** Performance of test dataset ==> MSE = {mse}, MAE = {mae}')
print(f'** Dimension of result for test dataset = {pred.shape}')
pred.head()

** Performance of test dataset ==> MSE = 1.4786320314542931, MAE = 0.757939572901241
** Dimension of result for test dataset = (5213, 2)


Unnamed: 0_level_0,actual_value,predicted_value
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2014-05-28 19:00:00,467.564103,468.05072
2014-05-28 20:00:00,494.403567,495.023163
2014-05-28 21:00:00,498.874025,499.462341
2014-05-28 22:00:00,450.239688,450.419952
2014-05-28 23:00:00,364.130435,364.293396


In [11]:
# Case 4. model = scinet
config = config4
data_forecast = mf.Forecasting(config, train_data, test_data, test_date)
init_model = data_forecast.build_model()  # 모델 구축

if config["training"]:
    best_model = data_forecast.train_model(init_model)  # 모델 학습
    data_forecast.save_model(best_model, best_model_path=config["best_model_path"])  # 모델 저장

pred, mse, mae = data_forecast.pred_data(init_model, best_model_path=config["best_model_path"])  # 예측

Start training model

Epoch 1/150
train Loss: 1.2087 RMSE: 1.0994
val Loss: 0.9560 RMSE: 0.9778

Epoch 10/150
train Loss: 0.2353 RMSE: 0.4851
val Loss: 0.2756 RMSE: 0.5249

Epoch 20/150
train Loss: 0.1382 RMSE: 0.3717
val Loss: 0.1883 RMSE: 0.4339

Epoch 30/150
train Loss: 0.1002 RMSE: 0.3165
val Loss: 0.1451 RMSE: 0.3809

Epoch 40/150
train Loss: 0.0862 RMSE: 0.2936
val Loss: 0.1257 RMSE: 0.3545

Epoch 50/150
train Loss: 0.0791 RMSE: 0.2812
val Loss: 0.1162 RMSE: 0.3409

Epoch 60/150
train Loss: 0.0751 RMSE: 0.2740
val Loss: 0.1101 RMSE: 0.3318

Epoch 70/150
train Loss: 0.0730 RMSE: 0.2702
val Loss: 0.1065 RMSE: 0.3263

Epoch 80/150
train Loss: 0.0707 RMSE: 0.2658
val Loss: 0.1029 RMSE: 0.3208

Epoch 90/150
train Loss: 0.0694 RMSE: 0.2634
val Loss: 0.1009 RMSE: 0.3177

Epoch 100/150
train Loss: 0.0687 RMSE: 0.2622
val Loss: 0.0989 RMSE: 0.3145

Epoch 110/150
train Loss: 0.0679 RMSE: 0.2606
val Loss: 0.0979 RMSE: 0.3128

Epoch 120/150
train Loss: 0.0671 RMSE: 0.2589
val Loss: 0.0964 RM

In [12]:
print(f'** Performance of test dataset ==> MSE = {mse}, MAE = {mae}')
print(f'** Dimension of result for test dataset = {pred.shape}')
pred.head()

** Performance of test dataset ==> MSE = 744.7235067454232, MAE = 19.707768212003856
** Dimension of result for test dataset = (5213, 2)


Unnamed: 0_level_0,actual_value,predicted_value
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2014-05-28 19:00:00,467.564103,446.310089
2014-05-28 20:00:00,494.403567,464.866852
2014-05-28 21:00:00,498.874025,462.809753
2014-05-28 22:00:00,450.239688,455.555481
2014-05-28 23:00:00,364.130435,363.264679
