In [1]:
"""
This notebook contains codes to run hyper-parameter tuning using a genetic algorithm.
Use another notebook if you wish to use *grid search* instead.
# Under development.
"""
import os, sys
import numpy as np
import pandas as pd
import tensorflow as tf
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import matplotlib
import matplotlib.pyplot as plt
from pprint import pprint
from typing import Dict, List



In [2]:
import sys
sys.path.append("../")
# If this notebook file is not placed under in /notebook/ directory,
# adding directory "../" might not correly add the project directory.
# If adding "../" does not solve the importing problem, we need to setup 
# the directory mannually.
try:
    import constants
except ModuleNotFoundError:
    core_dir = input("Directory of core files >>> ")
    if not core_dir.endswith("/"):
        core_dir += "/"
    sys.path.append(core_dir)
    import constants

In [3]:
from core.tools.metrics import *
import core.tools.visualize as visualize
from core.tools.time_series import *
from core.tools.data_import import *
import core.tools.rnn_prepare as rnn_prepare
import core.tools.param_set_generator as param_set_generator
import core.ga.genetic_hpt as genetic_hpt

import core.models.stacked_lstm as stacked_lstm

import core.training.hps_methods as hps_methods

In [4]:
# data preparation phase.
pprint(constants.DATA_DIR)
choice = None
while choice is None or choice not in constants.DATA_DIR.keys():
    if choice is not None:
        print("Invalid data location received, try again...")
    choice = input("Select Dataset >>> ")

FILE_DIR = constants.DATA_DIR[choice]

print(f"Dataset chosen: {FILE_DIR}")

print("Avaiable configuration files found: ")
for cf in os.listdir("../hps_configs"):
    if cf.endswith("config.py"):
        print("\t" + cf)

config_name = input("Select config file >>> ")
if config_name.endswith(".py"):
    config_name = config_name[:-3]

exec(f"import hps_configs.{config_name} as config")

# print("Reading configuration file...")
# for att in dir(config):
#     if att.endswith("_config"):
#         print(f"\tLoading: {att}")
#         exec(f"globals().update(config.{att})")

{'a': '/Users/tianyudu/Documents/Academics/EconForecasting/AnnEconForecast/data/UNRATE.csv',
 'b': '/home/ec2-user/AnnEconForecast/data/UNRATE.csv',
 'c': '/home/ec2-user/AnnEconForecast/data/DEXCAUS.csv'}
Select Dataset >>> a
Dataset chosen: /Users/tianyudu/Documents/Academics/EconForecasting/AnnEconForecast/data/UNRATE.csv
Avaiable configuration files found: 
	ec2_config.py
	mac_config.py
Select config file >>> mac_config


In [5]:
def obj_func(param) -> float:
    df_ready = rnn_prepare.prepare_dataset(
    file_dir=FILE_DIR,
    periods=int(param["PERIODS"]),
    order=int(param["ORDER"]),
    remove=None,
    verbose=False
    )

    # Split dataset.
    (X_train, X_val, X_test,
     y_train, y_val, y_test) = rnn_prepare.split_dataset(
        raw=df_ready,
        train_ratio=param["TRAIN_RATIO"],
        val_ratio=param["VAL_RATIO"],
        lags=param["LAGS"]
    )

    # The gross dataset excluding the test set.
    # Excluding the test set for isolation purpose.
    data_feed = {
        "X_train": X_train,
        "X_val": X_val,
        "y_train": y_train,
        "y_val": y_val,
    }
    ep = param["epochs"]
    ckpts = range(int(ep * 0.95), ep)  # Take the final 5% epochs.
    tf.reset_default_graph()
    model = stacked_lstm.StackedLSTM(
    param=param,
    prediction_checkpoints=ckpts,
    verbose=False
    )
    
    ret_pack = model.fit(data=data_feed, ret=["mse_val"])
    return float(np.mean(list(ret_pack["mse_val"].values())))

In [6]:
total_gen = 5
init_size = 10

In [7]:
optimizer = genetic_hpt.GeneticHPT(
    gene_pool=config.main,
    pop_size=init_size,
    eval_func=obj_func,
    mode="min",
    retain=0.5,
    shot_prob=0.05,
    mutate_prob=0.05,
    verbose=False
)

In [8]:
# entity1 = optimizer.population[0][0]
# entity2 = optimizer.population[-1][0]
# pprint(entity1)
# pprint(entity2)

In [9]:
class HiddenPrints:
    def __enter__(self):
        self._original_stdout = sys.stdout
        sys.stdout = open(os.devnull, 'w')

    def __exit__(self, exc_type, exc_val, exc_tb):
        sys.stdout.close()
        sys.stdout = self._original_stdout

In [10]:
# Training
print("Initial evaluation.")
optimizer.evaluate()
for gen in range(total_gen):
    print(f"Generation: [{gen}/{total_gen}]")
    optimizer.select()
    optimizer.evolve()
    optimizer.evaluate()

StandardScaler applied, scaling based on the first 679 observations.
StandardScaler applied, scaling based on the first 679 observations.
StandardScaler applied, scaling based on the first 679 observations.
StandardScaler applied, scaling based on the first 679 observations.
StandardScaler applied, scaling based on the first 679 observations.
StandardScaler applied, scaling based on the first 679 observations.
StandardScaler applied, scaling based on the first 679 observations.
StandardScaler applied, scaling based on the first 679 observations.
StandardScaler applied, scaling based on the first 679 observations.
StandardScaler applied, scaling based on the first 679 observations.


KeyboardInterrupt: 

In [19]:
# for (i, param) in enumerate(parameter_collection):
#     visualize.progbar(i, len(parameter_collection), 80)
#     with HiddenPrints():
#         print("================================================================")
#         print(f"Executing hyper-parameter searching session [{i}/{len(parameter_collection) - 1}]...")
#         print("Session Flexiable Config:\n\t" + param["hparam_str"].replace("-", "\n\t"))
#         start = datetime.now()
#         hps_methods.individual_train(
#             param=param,
#             exec_core=stacked_lstm.exec_core,
#             file_dir=FILE_DIR
#         )
#         print(f"Time taken for session [{i}]: {str(datetime.now() - start)}.")
# print("Done.")