In [9]:
"""
This notebook is used for model hyper-parameter searching.
"""
import os
import numpy as np
import pandas as pd
import tensorflow as tf
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import matplotlib
import matplotlib.pyplot as plt
from pprint import pprint
from typing import Dict, List

In [10]:
import sys
sys.path.append("../")
# If this notebook file is not placed under in /notebook/ directory,
# adding directory "../" might not correly add the project directory.
# If adding "../" does not solve the importing problem, we need to setup 
# the directory mannually.
try:
    import constants
except ModuleNotFoundError:
    core_dir = input("Directory of core files >>> ")
    if not core_dir.endswith("/"):
        core_dir += "/"
    sys.path.append(core_dir)
    import constants

In [11]:
from core.tools.metrics import *
import core.tools.visualize as visualize
from core.tools.time_series import *
from core.tools.data_import import *
import core.tools.rnn_prepare as rnn_prepare

import core.models.stacked_lstm as stacked_lstm

import core.training.hps_methods as hps_methods

In [12]:
# data preparation phase.
pprint(constants.DATA_DIR)
choice = None
while choice is None or choice not in constants.DATA_DIR.keys():
    if choice is not None:
        print("Invalid data location received, try again...")
    choice = input("Select Dataset >>> ")
FILE_DIR = constants.DATA_DIR[choice]

print(f"Dataset chosen: {FILE_DIR}")

print("Avaiable configuration files found: ")
for cf in os.listdir("../hps_configs"):
    if cf.endswith("config.py"):
        print("\t" + cf)

config_name = input("Select config file >>> ")
if config_name.endswith(".py"):
    config_name = config_name[:-3]

exec(f"import hps_configs.{config_name} as config")

print("Reading configuration file...")
for att in dir(config):
    if att.endswith("_config"):
        print(f"\tLoading: {att}")
        exec(f"globals().update(config.{att})")

{'a': '/Users/tianyudu/Documents/Academics/EconForecasting/AnnEconForecast/data/UNRATE.csv',
 'b': '/home/ec2-user/AnnEconForecast/data/UNRATE.csv',
 'c': '/home/ec2-user/AnnEconForecast/data/DEXCAUS.csv'}
Select Dataset >>> b
Dataset chosen: /home/ec2-user/AnnEconForecast/data/UNRATE.csv
Avaiable configuration files found: 
	ec2_config.py
	sample_config.py
Select config file >>> ec2_config
Reading configuration file...
	Loading: dp_config


In [13]:
parameter_collection = hps_methods.gen_hparam_set(config.train_param)

Total number of parameter sets generated: 8


In [14]:
pprint(parameter_collection[0])

{'clip_grad': None,
 'epochs': 100,
 'fig_path': '/home/ec2-user/ec2_hps/2018DEC06_02/model_figs/num_time_steps=12-num_neurons=(256, '
             '128)-learning_rate=0.1',
 'hparam_str': 'num_time_steps=12-num_neurons=(256, 128)-learning_rate=0.1',
 'learning_rate': 0.1,
 'model_path': '/home/ec2-user/ec2_hps/2018DEC06_02/saved_models/num_time_steps=12-num_neurons=(256, '
               '128)-learning_rate=0.1',
 'num_inputs': 1,
 'num_neurons': (256, 128),
 'num_outputs': 1,
 'num_time_steps': 12,
 'report_periods': 10,
 'tensorboard_dir': '/home/ec2-user/ec2_hps/2018DEC06_02/tensorboard/num_time_steps=12-num_neurons=(256, '
                    '128)-learning_rate=0.1'}


In [15]:
def individual_train(para) -> None:
    prepared_df = rnn_prepare.prepare_dataset(
        file_dir=FILE_DIR,
        periods=PERIODS,
        order=ORDER,
        remove=None,
        verbose=False
    )
    (X_train, X_val, X_test,
     y_train, y_val, y_test) = rnn_prepare.generate_splited_dataset(
        raw=prepared_df,
        train_ratio=0.8,
        val_ratio=0.1,
        lags=para["num_time_steps"]
    )
    data_collection = {
        "X_train": X_train,
        "X_val": X_val,
        "X_test": X_test,
        "y_train": y_train,
        "y_val": y_val,
        "y_test": y_test
    }

    def checkpoints(z): return [
        z*x for x in range(1, para["epochs"] // z)] + [-1]
    
    (metrics_dict, predictions) = stacked_lstm.exec_core(
        parameters=para,
        data_collection=data_collection,
        prediction_checkpoints=checkpoints(
            para["epochs"] // 10
        ) + [-1]
    )
    plt.close()
    fig = visualize.plot_checkpoints(predictions, y_test, "test")
    if not os.path.exists(para["fig_path"]):
        os.makedirs(para["fig_path"])
    assert not para["fig_path"].endswith("/")
    plt.savefig(para["fig_path"] + "/" + "pred_records.svg")

In [28]:
# Check here.

Session Config:
	num_time_steps=12
	num_neurons=(256, 128)
	learning_rate=0.1


In [None]:
for (i, para) in enumerate(parameter_collection):
    print("================================================================")
    print(f"Executing hyper-parameter searching session [{i}/{len(parameter_collection) - 1}]...")
    print("Session Config:\n\t" + para["hparam_str"].replace("-", "\n\t"))
    start = datetime.now()
    individual_train(para)
    print(f"Time taken for session [{i}]: {str(datetime.now() - start)}.")

Executing hyper-parameter searching session [0/7]...
Session Config:
	num_time_steps=12
	num_neurons=(256, 128)
	learning_rate=0.1
Dataset loaded.    
	Index type: datetime64[ns]    
	Data type: float64
StandardScaler applied, scaling based on the first 679 observations.
Total 836 observations generated.
Note: shape format: (num_obs, time_steps, num_inputs/outputs)
X shape = (836, 12, 1), y shape = (836, 1, 1)
Training and testing set generated,        
X_train shape: (668, 12, 1)        
y_train shape: (668, 1)        
X_test shape: (84, 12, 1)        
y_test shape: (84, 1)        
X_validation shape: (84, 12, 1)        
y_validation shape: (84, 1)

Iteration [0], Training MSE 17005.1230469; Validation MSE 15377.6679688
Saving the trained model...
Time taken for [100] epochs:  0:00:03.570204
Loss Summary:
	mae=0.5510010123252869
	mse=0.4856529235839844
	rmse=0.6968880295753479
	mape=3.773477792739868
Time taken for session [0]: 0:00:05.355117.
Executing hyper-parameter searching sessi

KeyboardInterrupt: 