In [1]:
"""
This notebook is used for model hyper-parameter searching.
Also, if this can also be used as a baseline training script.
"""
import os, sys
import numpy as np
import pandas as pd
import tensorflow as tf
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import matplotlib
import matplotlib.pyplot as plt
from pprint import pprint
from typing import Dict, List

In [2]:
import sys
sys.path.append("../")
# If this notebook file is not placed under in /notebook/ directory,
# adding directory "../" might not correly add the project directory.
# If adding "../" does not solve the importing problem, we need to setup 
# the directory mannually.
try:
    import constants
except ModuleNotFoundError:
    core_dir = input("Directory of core files >>> ")
    if not core_dir.endswith("/"):
        core_dir += "/"
    sys.path.append(core_dir)
    import constants

In [3]:
from core.tools.metrics import *
import core.tools.visualize as visualize
from core.tools.time_series import *
from core.tools.data_import import *
import core.tools.rnn_prepare as rnn_prepare
import core.tools.param_set_generator as param_set_generator

import core.models.stacked_lstm as stacked_lstm

import core.training.hps_methods as hps_methods

In [4]:
# data preparation phase.
pprint(constants.DATA_DIR)
choice = None
while choice is None or choice not in constants.DATA_DIR.keys():
    if choice is not None:
        print("Invalid data location received, try again...")
    choice = input("Select Dataset >>> ")

FILE_DIR = constants.DATA_DIR[choice]

print(f"Dataset chosen: {FILE_DIR}")

print("Avaiable configuration files found: ")
for cf in os.listdir("../hps_configs"):
    if cf.endswith("config.py"):
        print("\t" + cf)

config_name = input("Select config file >>> ")
if config_name.endswith(".py"):
    config_name = config_name[:-3]

exec(f"import hps_configs.{config_name} as config")

# print("Reading configuration file...")
# for att in dir(config):
#     if att.endswith("_config"):
#         print(f"\tLoading: {att}")
#         exec(f"globals().update(config.{att})")

{'a': '/Users/tianyudu/Documents/Academics/EconForecasting/AnnEconForecast/data/UNRATE.csv',
 'b': '/home/ec2-user/AnnEconForecast/data/UNRATE.csv',
 'c': '/home/ec2-user/AnnEconForecast/data/DEXCAUS.csv'}
Select Dataset >>> b
Dataset chosen: /home/ec2-user/AnnEconForecast/data/UNRATE.csv
Avaiable configuration files found: 
	ec2_config.py
	mac_config.py
	ec2_lite_config.py
	mac_lite_config.py
Select config file >>> ec2_config


In [5]:
MAIN_DIR = "/home/ec2-user/ec2_hps/2018DEC31_01"
PARAMS = {
    'PERIODS': 1,
    'ORDER': 1,
    'LAGS': [3, 6, 9],
    'TRAIN_RATIO': 0.8,
    'VAL_RATIO': 0.1,
    'epochs': [150, 300, 500],
    'num_inputs': 1,
    'num_outputs': 1,
    'num_time_steps': None,
    'num_neurons': [
        [128, 128],
        [128, 256],
#         [256, 512],
#         [512, 1024],
#         [128, 256, 512],
#         [256, 512, 1024],
    ],
    'learning_rate': [0.003, 0.01, 0.03],
    'clip_grad': [None, 10.0],
    'report_periods': 10,
    'tensorboard_path': MAIN_DIR + "/tensorboard/",
    'model_path': MAIN_DIR + "/saved_models/",
    'fig_path': MAIN_DIR + "/model_figs/"
}

In [6]:
parameter_collection = param_set_generator.gen_hparam_set(PARAMS)

Total number of parameter sets generated: 108


In [7]:
pprint(parameter_collection[1])

{'LAGS': 3,
 'ORDER': 1,
 'PERIODS': 1,
 'TRAIN_RATIO': 0.8,
 'VAL_RATIO': 0.1,
 'clip_grad': 10.0,
 'epochs': 150,
 'fig_path': '/home/ec2-user/ec2_hps/2018DEC31_01/model_figs/LAGS=3-epochs=150-num_neurons=[128, '
             '128]-learning_rate=0.003-clip_grad=10.0',
 'hparam_str': 'LAGS=3-epochs=150-num_neurons=[128, '
               '128]-learning_rate=0.003-clip_grad=10.0',
 'learning_rate': 0.003,
 'model_path': '/home/ec2-user/ec2_hps/2018DEC31_01/saved_models/LAGS=3-epochs=150-num_neurons=[128, '
               '128]-learning_rate=0.003-clip_grad=10.0',
 'num_inputs': 1,
 'num_neurons': [128, 128],
 'num_outputs': 1,
 'num_time_steps': None,
 'report_periods': 10,
 'tensorboard_path': '/home/ec2-user/ec2_hps/2018DEC31_01/tensorboard/LAGS=3-epochs=150-num_neurons=[128, '
                     '128]-learning_rate=0.003-clip_grad=10.0'}


In [8]:
class HiddenPrints:
    def __enter__(self):
        self._original_stdout = sys.stdout
        sys.stdout = open(os.devnull, 'w')

    def __exit__(self, exc_type, exc_val, exc_tb):
        sys.stdout.close()
        sys.stdout = self._original_stdout

In [9]:
for (i, param) in enumerate(parameter_collection):
    visualize.progbar(i + 1, len(parameter_collection), min(100, len(parameter_collection)))
    with HiddenPrints():
        print("================================================================")
        print(f"Executing hyper-parameter searching session [{i}/{len(parameter_collection) - 1}]...")
        print("Session Flexiable Config:\n\t" + param["hparam_str"].replace("-", "\n\t"))
        start = datetime.now()
        hps_methods.individual_train(
            param=param,
            exec_core=stacked_lstm.exec_core,
            file_dir=FILE_DIR
        )
        print(f"Time taken for session [{i}]: {str(datetime.now() - start)}.")
print("Done.")

 -------------------------------------------------------------------------------- [0/108,   0.00%]

  def MAPE(x, y): return np.mean(np.abs((x - y) / y))


 ###############################################################################- [107/108,  99.07%]Done.
