In [1]:
"""
This notebook contains codes to run hyper-parameter tuning using a genetic algorithm.
Use another notebook if you wish to use *grid search* instead.
# Under development.
"""
import os, sys
import numpy as np
import pandas as pd
import tensorflow as tf
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import matplotlib
import matplotlib.pyplot as plt
from pprint import pprint
from typing import Dict, List
import datetime

In [2]:
import sys
sys.path.append("../")
# If this notebook file is not placed under in /notebook/ directory,
# adding directory "../" might not correly add the project directory.
# If adding "../" does not solve the importing problem, we need to setup 
# the directory mannually.
try:
    import constants
except ModuleNotFoundError:
    core_dir = input("Directory of core files >>> ")
    if not core_dir.endswith("/"):
        core_dir += "/"
    sys.path.append(core_dir)
    import constants

In [3]:
from core.tools.metrics import *
import core.tools.visualize as visualize
from core.tools.time_series import *
from core.tools.data_import import *
import core.tools.rnn_prepare as rnn_prepare
import core.tools.param_set_generator as param_set_generator
import core.ga.genetic_hpt as genetic_hpt

import core.models.stacked_lstm as stacked_lstm

import core.training.hps_methods as hps_methods

In [4]:
# data preparation phase.
pprint(constants.DATA_DIR)
choice = None
while choice is None or choice not in constants.DATA_DIR.keys():
    if choice is not None:
        print("Invalid data location received, try again...")
    choice = input("Select Dataset >>> ")
# choice = "a"

FILE_DIR = constants.DATA_DIR[choice]

print(f"Dataset chosen: {FILE_DIR}")

print("Avaiable configuration files found: ")
for cf in os.listdir("../hps_configs"):
    if cf.endswith("config.py"):
        print("\t" + cf)

config_name = input("Select config file >>> ")
if config_name.endswith(".py"):
    config_name = config_name[:-3]
# config_name = "mac_config"

exec(f"import hps_configs.{config_name} as config")

# print("Reading configuration file...")
# for att in dir(config):
#     if att.endswith("_config"):
#         print(f"\tLoading: {att}")
#         exec(f"globals().update(config.{att})")

{'a': '/Users/tianyudu/Documents/Academics/EconForecasting/AnnEconForecast/data/UNRATE.csv',
 'b': '/home/ec2-user/AnnEconForecast/data/UNRATE.csv',
 'c': '/home/ec2-user/AnnEconForecast/data/DEXCAUS.csv'}
Select Dataset >>> b
Dataset chosen: /home/ec2-user/AnnEconForecast/data/UNRATE.csv
Avaiable configuration files found: 
	ec2_config.py
	mac_config.py
Select config file >>> ec2_config


In [6]:
def obj_func(param) -> float:
    df_ready = rnn_prepare.prepare_dataset(
    file_dir=FILE_DIR,
    periods=int(param["PERIODS"]),
    order=int(param["ORDER"]),
    remove=None,
    verbose=False
    )

    # Split dataset.
    (X_train, X_val, X_test,
     y_train, y_val, y_test) = rnn_prepare.split_dataset(
        raw=df_ready,
        train_ratio=param["TRAIN_RATIO"],
        val_ratio=param["VAL_RATIO"],
        lags=param["LAGS"]
    )

    # The gross dataset excluding the test set.
    # Excluding the test set for isolation purpose.
    data_feed = {
        "X_train": X_train,
        "X_val": X_val,
        "y_train": y_train,
        "y_val": y_val,
    }
    ep = param["epochs"]
    ckpts = range(int(ep * 0.95), ep)  # Take the final 5% epochs.
    tf.reset_default_graph()
    model = stacked_lstm.StackedLSTM(
    param=param,
    prediction_checkpoints=ckpts,
    verbose=False
    )
    
    ret_pack = model.fit(data=data_feed, ret=["mse_val"])
    return float(np.mean(list(ret_pack["mse_val"].values())))

In [7]:
total_gen = 30
init_size = 10
ignore_set = (
    "PERIODS", "ORDER", "TRAIN_RATIO", "VAL_RATIO", "num_outputs", "num_inputs", "report_periods",
    "tensorboard_path", "model_path", "fig_path"
)

In [8]:
optimizer = genetic_hpt.GeneticHPT(
    gene_pool=config.main,
    pop_size=init_size,
    eval_func=obj_func,
    mode="min",
    retain=0.5,
    shot_prob=0.05,
    mutate_prob=0.05,
    verbose=False,
    ignore=ignore_set
)

In [9]:
# sample_param = {'LAGS': 6,
#  'ORDER': 1,
#  'PERIODS': 1,
#  'TRAIN_RATIO': 0.8,
#  'VAL_RATIO': 0.1,
#  'clip_grad': None,
#  'epochs': 500,
#  'fig_path': '/Volumes/Intel/debug/model_figs/',
#  'learning_rate': 0.1,
#  'model_path': '/Volumes/Intel/debug/saved_models/',
#  'num_inputs': 1,
#  'num_neurons': (32, 64),
#  'num_outputs': 1,
#  'num_time_steps': None,
#  'report_periods': 10,
#  'tensorboard_path': '/Volumes/Intel/debug/tensorboard/'}

In [12]:
class HiddenPrints:
    def __enter__(self):
        self._original_stdout = sys.stdout
        sys.stdout = open(os.devnull, 'w')

    def __exit__(self, exc_type, exc_val, exc_tb):
        sys.stdout.close()
        sys.stdout = self._original_stdout

In [None]:
start_time = datetime.datetime.now()

In [11]:
# Training
best_rec = list()
worst_rec = list()
print("Initial evaluation gen=0...")
optimizer.evaluate(verbose=True)
print(f"\nBest fitted entity validatiton MSE: {optimizer.population[0][1]: 0.7f}\
\nWorst fitted entity validation MSE: {optimizer.population[-1][1]: 0.7f}")
for gen in range(total_gen):
    print(f"Generation: [{gen + 1}/{total_gen}]")
    optimizer.select()
    optimizer.evolve()
    optimizer.evaluate(verbose=True)
    print(f"\nBest fitted entity validation MSE: {optimizer.population[0][1]: 0.7f}\
    \nWorst fitted entity validation MSE: {optimizer.population[-1][1]: 0.7f}")
    best_rec.append(optimizer.population[0][1])
    worst_rec.append(optimizer.population[-1][1])
print(f"Final generation best fitted entity: {optimizer.population[0][0]}\
\nwith valudation set MSE (fitness): {optimizer.population[0][1]}")

Initial evaluation gen=0...
 #################### Evaluating population[10/10, 100.00%]
Best fitted entity validation MSE:  0.0241276
Worst fitted entity validation MSE:  0.0702762
Generation: [1/30]
 #################### Evaluating population[11/11, 100.00%]
Best fitted entity validation MSE:  0.0250522    
Worst fitted entity validation MSE:  0.0429758
Generation: [2/30]
 #################### Evaluating population[11/11, 100.00%]
Best fitted entity validation MSE:  0.0257155    
Worst fitted entity validation MSE:  34072.1835938
Generation: [3/30]
 #################### Evaluating population[11/11, 100.00%]
Best fitted entity validation MSE:  0.0251317    
Worst fitted entity validation MSE:  28.7871838
Generation: [4/30]
 #################### Evaluating population[10/10, 100.00%]
Best fitted entity validation MSE:  0.0255463    
Worst fitted entity validation MSE:  0.0309619
Generation: [5/30]
 #################### Evaluating population[11/11, 100.00%]
Best fitted entity validation M

KeyboardInterrupt: 

In [20]:
[optimizer.population[x][0]["num_neurons"] for x in range(9)]

[[512, 1024],
 [512, 1024],
 [512, 1024],
 [512, 1024],
 [512, 1024],
 [512, 1024],
 [512, 1024],
 [512, 1024],
 [512, 1024]]

In [12]:
end_time = datetime.datetime.now()
print(f"Time taken: {str(end_time - start_time)}")