In [1]:
"""
This notebook is used for model hyper-parameter searching.
Also, if this can also be used as a baseline training script.
"""
import os, sys
import numpy as np
import pandas as pd
import tensorflow as tf
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import matplotlib
import matplotlib.pyplot as plt
from pprint import pprint
from typing import Dict, List

In [2]:
import sys
sys.path.append("../")
# If this notebook file is not placed under in /notebook/ directory,
# adding directory "../" might not correly add the project directory.
# If adding "../" does not solve the importing problem, we need to setup 
# the directory mannually.
try:
    import constants
except ModuleNotFoundError:
    core_dir = input("Directory of core files >>> ")
    if not core_dir.endswith("/"):
        core_dir += "/"
    sys.path.append(core_dir)
    import constants

In [3]:
from core.tools.metrics import *
import core.tools.visualize as visualize
from core.tools.time_series import *
from core.tools.data_import import *
import core.tools.rnn_prepare as rnn_prepare
import core.tools.param_set_generator as param_set_generator

import core.models.stacked_lstm as stacked_lstm

import core.training.hps_methods as hps_methods

In [4]:
# data preparation phase.
pprint(constants.DATA_DIR)
choice = None
while choice is None or choice not in constants.DATA_DIR.keys():
    if choice is not None:
        print("Invalid data location received, try again...")
    choice = input("Select Dataset >>> ")

FILE_DIR = constants.DATA_DIR[choice]

print(f"Dataset chosen: {FILE_DIR}")

print("Avaiable configuration files found: ")
for cf in os.listdir("../hps_configs"):
    if cf.endswith("config.py"):
        print("\t" + cf)

config_name = input("Select config file >>> ")
if config_name.endswith(".py"):
    config_name = config_name[:-3]

exec(f"import hps_configs.{config_name} as config")

# print("Reading configuration file...")
# for att in dir(config):
#     if att.endswith("_config"):
#         print(f"\tLoading: {att}")
#         exec(f"globals().update(config.{att})")

{'a': '/Users/tianyudu/Documents/Academics/EconForecasting/AnnEconForecast/data/UNRATE.csv',
 'b': '/home/ec2-user/AnnEconForecast/data/UNRATE.csv',
 'c': '/home/ec2-user/AnnEconForecast/data/DEXCAUS.csv'}
Select Dataset >>> b
Dataset chosen: /home/ec2-user/AnnEconForecast/data/UNRATE.csv
Avaiable configuration files found: 
	ec2_config.py
	mac_config.py
	sample_config.py
Select config file >>> ec2_config


In [5]:
parameter_collection = param_set_generator.gen_hparam_set(config.main)

Total number of parameter sets generated: 864


In [6]:
pprint(parameter_collection[1])

{'LAGS': 3,
 'ORDER': 1,
 'PERIODS': 1,
 'TRAIN_RATIO': 0.8,
 'VAL_RATIO': 0.1,
 'clip_grad': 10,
 'epochs': 150,
 'fig_path': '/home/ec2-user/ec2_hps/2018DEC21_01/model_figs/LAGS=3-epochs=150-num_neurons=(128, '
             '256)-learning_rate=0.003-clip_grad=10',
 'hparam_str': 'LAGS=3-epochs=150-num_neurons=(128, '
               '256)-learning_rate=0.003-clip_grad=10',
 'learning_rate': 0.003,
 'model_path': '/home/ec2-user/ec2_hps/2018DEC21_01/saved_models/LAGS=3-epochs=150-num_neurons=(128, '
               '256)-learning_rate=0.003-clip_grad=10',
 'num_inputs': 1,
 'num_neurons': (128, 256),
 'num_outputs': 1,
 'num_time_steps': None,
 'report_periods': 10,
 'tensorboard_path': '/home/ec2-user/ec2_hps/2018DEC21_01/tensorboard/LAGS=3-epochs=150-num_neurons=(128, '
                     '256)-learning_rate=0.003-clip_grad=10'}


In [7]:
class HiddenPrints:
    def __enter__(self):
        self._original_stdout = sys.stdout
        sys.stdout = open(os.devnull, 'w')

    def __exit__(self, exc_type, exc_val, exc_tb):
        sys.stdout.close()
        sys.stdout = self._original_stdout

In [8]:
for (i, param) in enumerate(parameter_collection):
    visualize.progbar(i, len(parameter_collection), 80)
    with HiddenPrints():
        print("================================================================")
        print(f"Executing hyper-parameter searching session [{i}/{len(parameter_collection) - 1}]...")
        print("Session Flexiable Config:\n\t" + param["hparam_str"].replace("-", "\n\t"))
        start = datetime.now()
        hps_methods.individual_train(
            param=param,
            exec_core=stacked_lstm.exec_core,
            file_dir=FILE_DIR
        )
        print(f"Time taken for session [{i}]: {str(datetime.now() - start)}.")
print("Done.")

 ############################################------------------------------------ [477/864,  55.21%]

ResourceExhaustedError: /home/ec2-user/ec2_hps/2018DEC21_01/saved_models/LAGS=9-epochs=150-num_neurons=(512, 1024, 2048)-learning_rate=0.003-clip_grad=None.data-00000-of-00001.tempstate1239659065685469408; Too many open files
	 [[node save/SaveV2 (defined at ../core/models/stacked_lstm.py:158)  = SaveV2[dtypes=[DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, ..., DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT], _device="/job:localhost/replica:0/task:0/device:CPU:0"](_arg_save/Const_0_0, save/SaveV2/tensor_names, save/SaveV2/shape_and_slices, OPTIMIZER/beta1_power/_55, OPTIMIZER/beta2_power/_57, OUTPUT_LAYER/BIAS/_59, OUTPUT_LAYER/BIAS/ADAM_OPTIMIZER/_61, OUTPUT_LAYER/BIAS/ADAM_OPTIMIZER_1/_63, OUTPUT_LAYER/WEIGHT/_65, OUTPUT_LAYER/WEIGHT/ADAM_OPTIMIZER/_67, OUTPUT_LAYER/WEIGHT/ADAM_OPTIMIZER_1/_69, rnn/multi_rnn_cell/cell_0/LSTM_CELL_0/bias/_71, rnn/multi_rnn_cell/cell_0/LSTM_CELL_0/bias/ADAM_OPTIMIZER/_73, rnn/multi_rnn_cell/cell_0/LSTM_CELL_0/bias/ADAM_OPTIMIZER_1/_75, rnn/multi_rnn_cell/cell_0/LSTM_CELL_0/kernel/_77, rnn/multi_rnn_cell/cell_0/LSTM_CELL_0/kernel/ADAM_OPTIMIZER/_79, rnn/multi_rnn_cell/cell_0/LSTM_CELL_0/kernel/ADAM_OPTIMIZER_1/_81, rnn/multi_rnn_cell/cell_1/LSTM_CELL_1/bias/_83, rnn/multi_rnn_cell/cell_1/LSTM_CELL_1/bias/ADAM_OPTIMIZER/_85, rnn/multi_rnn_cell/cell_1/LSTM_CELL_1/bias/ADAM_OPTIMIZER_1/_87, rnn/multi_rnn_cell/cell_1/LSTM_CELL_1/kernel/_89, rnn/multi_rnn_cell/cell_1/LSTM_CELL_1/kernel/ADAM_OPTIMIZER/_91, rnn/multi_rnn_cell/cell_1/LSTM_CELL_1/kernel/ADAM_OPTIMIZER_1/_93, rnn/multi_rnn_cell/cell_2/LSTM_CELL_2/bias/_95, rnn/multi_rnn_cell/cell_2/LSTM_CELL_2/bias/ADAM_OPTIMIZER/_97, rnn/multi_rnn_cell/cell_2/LSTM_CELL_2/bias/ADAM_OPTIMIZER_1/_99, rnn/multi_rnn_cell/cell_2/LSTM_CELL_2/kernel/_101, rnn/multi_rnn_cell/cell_2/LSTM_CELL_2/kernel/ADAM_OPTIMIZER/_103, rnn/multi_rnn_cell/cell_2/LSTM_CELL_2/kernel/ADAM_OPTIMIZER_1/_105)]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.


Caused by op 'save/SaveV2', defined at:
  File "/home/ec2-user/anaconda3/envs/tensorflow_p36/lib/python3.6/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/home/ec2-user/anaconda3/envs/tensorflow_p36/lib/python3.6/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/home/ec2-user/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/home/ec2-user/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/home/ec2-user/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/ipykernel/kernelapp.py", line 486, in start
    self.io_loop.start()
  File "/home/ec2-user/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/tornado/platform/asyncio.py", line 127, in start
    self.asyncio_loop.run_forever()
  File "/home/ec2-user/anaconda3/envs/tensorflow_p36/lib/python3.6/asyncio/base_events.py", line 422, in run_forever
    self._run_once()
  File "/home/ec2-user/anaconda3/envs/tensorflow_p36/lib/python3.6/asyncio/base_events.py", line 1432, in _run_once
    handle._run()
  File "/home/ec2-user/anaconda3/envs/tensorflow_p36/lib/python3.6/asyncio/events.py", line 145, in _run
    self._callback(*self._args)
  File "/home/ec2-user/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/tornado/ioloop.py", line 759, in _run_callback
    ret = callback()
  File "/home/ec2-user/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/tornado/stack_context.py", line 276, in null_wrapper
    return fn(*args, **kwargs)
  File "/home/ec2-user/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 536, in <lambda>
    self.io_loop.add_callback(lambda : self._handle_events(self.socket, 0))
  File "/home/ec2-user/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 450, in _handle_events
    self._handle_recv()
  File "/home/ec2-user/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 480, in _handle_recv
    self._run_callback(callback, msg)
  File "/home/ec2-user/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 432, in _run_callback
    callback(*args, **kwargs)
  File "/home/ec2-user/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/tornado/stack_context.py", line 276, in null_wrapper
    return fn(*args, **kwargs)
  File "/home/ec2-user/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 283, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "/home/ec2-user/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 233, in dispatch_shell
    handler(stream, idents, msg)
  File "/home/ec2-user/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 399, in execute_request
    user_expressions, allow_stdin)
  File "/home/ec2-user/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/ipykernel/ipkernel.py", line 208, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/home/ec2-user/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/ipykernel/zmqshell.py", line 537, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/home/ec2-user/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2662, in run_cell
    raw_cell, store_history, silent, shell_futures)
  File "/home/ec2-user/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2785, in _run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/home/ec2-user/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2903, in run_ast_nodes
    if self.run_code(code, result):
  File "/home/ec2-user/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2963, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-8-6fff477babcc>", line 11, in <module>
    file_dir=FILE_DIR
  File "../core/training/hps_methods.py", line 72, in individual_train
    prediction_checkpoints=ckps
  File "../core/models/stacked_lstm.py", line 158, in exec_core
    saver = tf.train.Saver()
  File "/home/ec2-user/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 1102, in __init__
    self.build()
  File "/home/ec2-user/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 1114, in build
    self._build(self._filename, build_save=True, build_restore=True)
  File "/home/ec2-user/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 1151, in _build
    build_save=build_save, build_restore=build_restore)
  File "/home/ec2-user/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 792, in _build_internal
    save_tensor = self._AddSaveOps(filename_tensor, saveables)
  File "/home/ec2-user/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 284, in _AddSaveOps
    save = self.save_op(filename_tensor, saveables)
  File "/home/ec2-user/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 202, in save_op
    tensors)
  File "/home/ec2-user/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/tensorflow/python/ops/gen_io_ops.py", line 1690, in save_v2
    shape_and_slices=shape_and_slices, tensors=tensors, name=name)
  File "/home/ec2-user/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "/home/ec2-user/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/tensorflow/python/util/deprecation.py", line 488, in new_func
    return func(*args, **kwargs)
  File "/home/ec2-user/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 3274, in create_op
    op_def=op_def)
  File "/home/ec2-user/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1770, in __init__
    self._traceback = tf_stack.extract_stack()

ResourceExhaustedError (see above for traceback): /home/ec2-user/ec2_hps/2018DEC21_01/saved_models/LAGS=9-epochs=150-num_neurons=(512, 1024, 2048)-learning_rate=0.003-clip_grad=None.data-00000-of-00001.tempstate1239659065685469408; Too many open files
	 [[node save/SaveV2 (defined at ../core/models/stacked_lstm.py:158)  = SaveV2[dtypes=[DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, ..., DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT], _device="/job:localhost/replica:0/task:0/device:CPU:0"](_arg_save/Const_0_0, save/SaveV2/tensor_names, save/SaveV2/shape_and_slices, OPTIMIZER/beta1_power/_55, OPTIMIZER/beta2_power/_57, OUTPUT_LAYER/BIAS/_59, OUTPUT_LAYER/BIAS/ADAM_OPTIMIZER/_61, OUTPUT_LAYER/BIAS/ADAM_OPTIMIZER_1/_63, OUTPUT_LAYER/WEIGHT/_65, OUTPUT_LAYER/WEIGHT/ADAM_OPTIMIZER/_67, OUTPUT_LAYER/WEIGHT/ADAM_OPTIMIZER_1/_69, rnn/multi_rnn_cell/cell_0/LSTM_CELL_0/bias/_71, rnn/multi_rnn_cell/cell_0/LSTM_CELL_0/bias/ADAM_OPTIMIZER/_73, rnn/multi_rnn_cell/cell_0/LSTM_CELL_0/bias/ADAM_OPTIMIZER_1/_75, rnn/multi_rnn_cell/cell_0/LSTM_CELL_0/kernel/_77, rnn/multi_rnn_cell/cell_0/LSTM_CELL_0/kernel/ADAM_OPTIMIZER/_79, rnn/multi_rnn_cell/cell_0/LSTM_CELL_0/kernel/ADAM_OPTIMIZER_1/_81, rnn/multi_rnn_cell/cell_1/LSTM_CELL_1/bias/_83, rnn/multi_rnn_cell/cell_1/LSTM_CELL_1/bias/ADAM_OPTIMIZER/_85, rnn/multi_rnn_cell/cell_1/LSTM_CELL_1/bias/ADAM_OPTIMIZER_1/_87, rnn/multi_rnn_cell/cell_1/LSTM_CELL_1/kernel/_89, rnn/multi_rnn_cell/cell_1/LSTM_CELL_1/kernel/ADAM_OPTIMIZER/_91, rnn/multi_rnn_cell/cell_1/LSTM_CELL_1/kernel/ADAM_OPTIMIZER_1/_93, rnn/multi_rnn_cell/cell_2/LSTM_CELL_2/bias/_95, rnn/multi_rnn_cell/cell_2/LSTM_CELL_2/bias/ADAM_OPTIMIZER/_97, rnn/multi_rnn_cell/cell_2/LSTM_CELL_2/bias/ADAM_OPTIMIZER_1/_99, rnn/multi_rnn_cell/cell_2/LSTM_CELL_2/kernel/_101, rnn/multi_rnn_cell/cell_2/LSTM_CELL_2/kernel/ADAM_OPTIMIZER/_103, rnn/multi_rnn_cell/cell_2/LSTM_CELL_2/kernel/ADAM_OPTIMIZER_1/_105)]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.

