In [1]:
import torch
import pickle
import random
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

import config
import utils
import main_regression as mr

In [2]:
# seed 고정
random_seed = 42

torch.manual_seed(random_seed)
torch.cuda.manual_seed(random_seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(random_seed)
random.seed(random_seed)

In [21]:
import glob
import os

def Saver(model_name):

    os.makedirs('./ckpt', exist_ok=True)
    os.makedirs('./ckpt/' + model_name, exist_ok=True)
    
    model_dir_path = './ckpt/' + model_name
    runs = sorted(glob.glob(os.path.join('./', model_name, '/experiment_*')))
    indices = []
    for tmp in runs:
        tmp_num = tmp.split("\\")[-1]
        tmp_num = int(tmp_num.split("_")[-1])
        indices.append(tmp_num)
        
    if len(indices) == 0:
        run_id = str(0)
    else:
        run_id = np.max(indices) + 1

    experiment_dir = os.path.join(model_dir_path, '/', f'experiment_{str(run_id)}')

    os.makedirs(experiment_dir, exist_ok = True)
    

    return experiment_dir

In [22]:
# load raw data
# ["LSTM_rg", "GRU_rg", "CNN_1D_rg", "LSTM_FCNs_rg"]는 사용 데이터가 동일하기 때문에 편의상 utils.load_data에서 model_name을 'LSTM_rg'로 설정하여 불러온 데이터를 함께 사용함
data_root_dir = './data/'
train_x, train_y, test_x, test_y = utils.load_data(data_root_dir, model_name='LSTM_rg')  # shape=(num_of_instance, input_dims, time_steps)

# split train data into train/valiation data
# train data를 랜덤으로 test_size=split_ratio에 대하여 train/validation set으로 분할 (관측치 단위 데이터)
split_ratio = 0.2
train_x, valid_x, train_y, valid_y = train_test_split(train_x, train_y, test_size=split_ratio, shuffle=True)

# normalization
scaler_x_path = './scaler/minmax_scaler_x.pkl'
scaler_y_path = './scaler/minmax_scaler_y.pkl'
train_x, valid_x = utils.get_train_val_data(train_x, valid_x, scaler_x_path)
train_y, valid_y = utils.get_train_val_data(train_y, valid_y, scaler_y_path)

(95, 24, 144)
(95,)
(42, 24, 144)
(42,)
inputSize(train_x.shape[1]): 24
sequenceLenth (train_x.shape[2]): 144
Save MinMaxScaler in path: ./scaler/minmax_scaler_x.pkl
Save MinMaxScaler in path: ./scaler/minmax_scaler_y.pkl


In [24]:
# Case 1. LSTM model (w/o data representation)
model_name = 'LSTM_rg'
model_params = config.model_config[model_name]

exp_path = Saver(model_name)
data_reg = mr.Regression(model_params)
best_model = data_reg.train_model(train_x, train_y, valid_x, valid_y)  # 모델 학습
data_reg.save_model(best_model, best_model_path=os.path.join(exp_path, model_params["best_model"]))  # 모델 저장

Start training model: LSTM_rg

Epoch 1/100
train Loss: 0.0566
val Loss: 0.0569

Epoch 50/100
train Loss: 0.0434
val Loss: 0.0434

Epoch 100/100
train Loss: 0.0326
val Loss: 0.0346

Training complete in 0m 5s
Best val MSE: 0.034624


In [5]:
# Case 2. GRU (w/o data representation)
model_name = 'GRU_rg'
model_params = config.model_config[model_name]

exp_path = Saver(model_name)
data_reg = mr.Regression(model_params)
best_model = data_reg.train_model(train_x, train_y, valid_x, valid_y)  # 모델 학습
data_reg.save_model(best_model, best_model_path=os.path.join(exp_path, model_params["best_model"]))  # 모델 저장

Start training model: GRU_rg

Epoch 1/1000
train Loss: 0.0489
val Loss: 0.0434

Epoch 50/1000
train Loss: 0.0342
val Loss: 0.0434

Epoch 100/1000
train Loss: 0.0218
val Loss: 0.0316

Epoch 150/1000
train Loss: 0.0187
val Loss: 0.0232

Epoch 200/1000
train Loss: 0.0160
val Loss: 0.0187

Epoch 250/1000
train Loss: 0.0149
val Loss: 0.0210

Epoch 300/1000
train Loss: 0.0141
val Loss: 0.0207

Epoch 350/1000
train Loss: 0.0120
val Loss: 0.0214

Epoch 400/1000
train Loss: 0.0104
val Loss: 0.0217

Epoch 450/1000
train Loss: 0.0092
val Loss: 0.0194

Epoch 500/1000
train Loss: 0.0081
val Loss: 0.0173

Epoch 550/1000
train Loss: 0.0074
val Loss: 0.0167

Epoch 600/1000
train Loss: 0.0066
val Loss: 0.0169

Epoch 650/1000
train Loss: 0.0058
val Loss: 0.0166

Epoch 700/1000
train Loss: 0.0074
val Loss: 0.0194

Epoch 750/1000
train Loss: 0.0053
val Loss: 0.0207

Epoch 800/1000
train Loss: 0.0043
val Loss: 0.0209

Epoch 850/1000
train Loss: 0.0041
val Loss: 0.0283

Epoch 900/1000
train Loss: 0.0038
val

In [6]:
# Case 3. CNN_1D (w/o data representation)
model_name = 'CNN_1D_rg'
model_params = config.model_config[model_name]

exp_path = Saver(model_name)
data_reg = mr.Regression(model_params)
best_model = data_reg.train_model(train_x, train_y, valid_x, valid_y)  # 모델 학습
data_reg.save_model(best_model, best_model_path=os.path.join(exp_path, model_params["best_model"]))  # 모델 저장

Start training model: CNN_1D_rg

Epoch 1/1000
train Loss: 0.0499
val Loss: 0.0436

Epoch 50/1000
train Loss: 0.0363
val Loss: 0.0441

Epoch 100/1000
train Loss: 0.0315
val Loss: 0.0448

Epoch 150/1000
train Loss: 0.0270
val Loss: 0.0450

Epoch 200/1000
train Loss: 0.0238
val Loss: 0.0458

Epoch 250/1000
train Loss: 0.0187
val Loss: 0.0479

Epoch 300/1000
train Loss: 0.0160
val Loss: 0.0483

Epoch 350/1000
train Loss: 0.0132
val Loss: 0.0493

Epoch 400/1000
train Loss: 0.0107
val Loss: 0.0495

Epoch 450/1000
train Loss: 0.0083
val Loss: 0.0484

Epoch 500/1000
train Loss: 0.0071
val Loss: 0.0490

Epoch 550/1000
train Loss: 0.0061
val Loss: 0.0485

Epoch 600/1000
train Loss: 0.0049
val Loss: 0.0483

Epoch 650/1000
train Loss: 0.0038
val Loss: 0.0495

Epoch 700/1000
train Loss: 0.0034
val Loss: 0.0503

Epoch 750/1000
train Loss: 0.0034
val Loss: 0.0501

Epoch 800/1000
train Loss: 0.0028
val Loss: 0.0518

Epoch 850/1000
train Loss: 0.0021
val Loss: 0.0517

Epoch 900/1000
train Loss: 0.0016


In [7]:
# Case 4. LSTM_FCNs (w/o data representation)
model_name = 'LSTM_FCNs_rg'
model_params = config.model_config[model_name]

exp_path = Saver(model_name)
data_reg = mr.Regression(model_params)
best_model = data_reg.train_model(train_x, train_y, valid_x, valid_y)  # 모델 학습
data_reg.save_model(best_model, best_model_path=os.path.join(exp_path, model_params["best_model"]))  # 모델 저장

Start training model: LSTM_FCNs_rg

Epoch 1/1000
train Loss: 0.0531
val Loss: 0.0456

Epoch 50/1000
train Loss: 0.0156
val Loss: 0.0420

Epoch 100/1000
train Loss: 0.0070
val Loss: 0.0409

Epoch 150/1000
train Loss: 0.0034
val Loss: 0.0337

Epoch 200/1000
train Loss: 0.0036
val Loss: 0.0366

Epoch 250/1000
train Loss: 0.0030
val Loss: 0.0375

Epoch 300/1000
train Loss: 0.0046
val Loss: 0.0367

Epoch 350/1000
train Loss: 0.0025
val Loss: 0.0396

Epoch 400/1000
train Loss: 0.0018
val Loss: 0.0368

Epoch 450/1000
train Loss: 0.0023
val Loss: 0.0320

Epoch 500/1000
train Loss: 0.0029
val Loss: 0.0358

Epoch 550/1000
train Loss: 0.0013
val Loss: 0.0368

Epoch 600/1000
train Loss: 0.0021
val Loss: 0.0345

Epoch 650/1000
train Loss: 0.0013
val Loss: 0.0329

Epoch 700/1000
train Loss: 0.0006
val Loss: 0.0340

Epoch 750/1000
train Loss: 0.0021
val Loss: 0.0347

Epoch 800/1000
train Loss: 0.0028
val Loss: 0.0356

Epoch 850/1000
train Loss: 0.0009
val Loss: 0.0295

Epoch 900/1000
train Loss: 0.00

--------------------------------------------------

In [8]:
# load representation data
data_root_dir = './data/'
train_x, train_y, test_x, test_y = utils.load_data(data_root_dir, model_name='FC_rg')  # shape=(num_of_instance, embedding_dim)

# split train data into train/valiation data
# train data를 랜덤으로 test_size=split_ratio에 대하여 train/validation set으로 분할
split_ratio = 0.2
train_x, valid_x, train_y, valid_y = train_test_split(train_x, train_y, test_size=split_ratio, shuffle=True)

# normalization
scaler_x_path = './scaler/minmax_scaler_x_repr.pkl'
scaler_y_path = './scaler/minmax_scaler_y_repr.pkl'
train_x, valid_x = utils.get_train_val_data(train_x, valid_x, scaler_x_path)
train_y, valid_y = utils.get_train_val_data(train_y, valid_y, scaler_y_path)

Save MinMaxScaler in path: ./scaler/minmax_scaler_x_repr.pkl
Save MinMaxScaler in path: ./scaler/minmax_scaler_y_repr.pkl


In [9]:
# Case 5. fully-connected layers (w/ data representation)
model_name = 'FC_rg'
model_params = config.model_config[model_name]

exp_path = Saver(model_name)
data_reg = mr.Regression(model_params)
best_model = data_reg.train_model(train_x, train_y, valid_x, valid_y)  # 모델 학습
data_reg.save_model(best_model, best_model_path=os.path.join(exp_path, model_params["best_model"]))  # 모델 저장

Start training model: FC_rg

Epoch 1/1000
train Loss: 0.0522
val Loss: 0.0415

Epoch 50/1000
train Loss: 0.0433
val Loss: 0.0345

Epoch 100/1000
train Loss: 0.0416
val Loss: 0.0340

Epoch 150/1000
train Loss: 0.0416
val Loss: 0.0335

Epoch 200/1000
train Loss: 0.0392
val Loss: 0.0332

Epoch 250/1000
train Loss: 0.0382
val Loss: 0.0329

Epoch 300/1000
train Loss: 0.0375
val Loss: 0.0327

Epoch 350/1000
train Loss: 0.0368
val Loss: 0.0325

Epoch 400/1000
train Loss: 0.0361
val Loss: 0.0324

Epoch 450/1000
train Loss: 0.0347
val Loss: 0.0323

Epoch 500/1000
train Loss: 0.0344
val Loss: 0.0323

Epoch 550/1000
train Loss: 0.0332
val Loss: 0.0323

Epoch 600/1000
train Loss: 0.0327
val Loss: 0.0323

Epoch 650/1000
train Loss: 0.0309
val Loss: 0.0323

Epoch 700/1000
train Loss: 0.0285
val Loss: 0.0324

Epoch 750/1000
train Loss: 0.0298
val Loss: 0.0325

Epoch 800/1000
train Loss: 0.0284
val Loss: 0.0327

Epoch 850/1000
train Loss: 0.0279
val Loss: 0.0327

Epoch 900/1000
train Loss: 0.0287
val 