In [1]:
"""
The default hyper-parameter searching program.
This is a control script.
"""
import numpy as np
import pandas as pd
import tensorflow as tf
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import matplotlib
import matplotlib.pyplot as plt
from pprint import pprint
from typing import Dict, List

import sys
sys.path.append(".../")
sys.path.append("../")

import constants
from core.tools.metrics import *
import core.tools.visualize as visualize
from core.tools.time_series import *
from core.tools.data_import import *
import core.tools.rnn_prepare as rnn_prepare

import core.models.stacked_lstm as stacked_lstm

import core.training.hps_methods as hps_methods

# data preparation phase.
pprint(constants.DATA_DIR)
choice = None
while choice is None or choice not in constants.DATA_DIR.keys():
    if choice is not None:
        print("Invalid data location received, try again...")
    choice = input("Select Dataset >>> ")
FILE_DIR = constants.DATA_DIR[choice]

print(f"Dataset chosen: \n\t{FILE_DIR}")

config_name = input("Name of configuration file to load >>> ")

exec(f"import core.training.configs.{config_name} as config")

for att in dir(config):
    if att.endswith("_config"):
        print(f"Loading: {att}")
        exec(f"globals().update(config.{att})")


parameter_collection = hps_methods.gen_hparam_set(config.train_param)


def individual_train(para) -> None:
    prepared_df = rnn_prepare.prepare_dataset(
        file_dir=FILE_DIR,
        periods=PERIODS,
        order=ORDER,
        remove=None
    )
    (X_train, X_val, X_test,
     y_train, y_val, y_test) = rnn_prepare.generate_splited_dataset(
        raw=prepared_df,
        train_ratio=0.8,
        val_ratio=0.1,
        lags=para["num_time_steps"]
    )
    data_collection = {
        "X_train": X_train,
        "X_val": X_val,
        "X_test": X_test,
        "y_train": y_train,
        "y_val": y_val,
        "y_test": y_test
    }

    def checkpoints(z): return [
        z*x for x in range(1, para["epochs"] // z)] + [-1]
    
    (metrics_dict, predictions) = stacked_lstm.exec_core(
        parameters=para,
        data_collection=data_collection,
        clip_grad=None,
        prediction_checkpoints=checkpoints(
            para["epochs"] // 10
        )
    )

    fig = visualize.plot_checkpoints(predictions, y_test, "test")
    plt.savefig(para["fig_path"]+"pred_records.svg")


for (i, para) in enumerate(parameter_collection):
    print(f"Control: executing [{i}]-th hyper-parameter searching session...")
    individual_train(para)

{'0': '/Users/tianyudu/Documents/Academics/EconForecasting/AnnEconForecast/data/UNRATE.csv',
 '1': '/home/ec2-user/AnnEconForecast/data/UNRATE.csv',
 '2': '/home/ubuntu/AnnEconForecast/data/UNRATE.csv',
 '3': '/home/ec2-user/AnnEconForecast/data/DEXCAUS.csv'}
Select Dataset >>> 1
Dataset chosen: 
	/home/ec2-user/AnnEconForecast/data/UNRATE.csv
Name of configuration file to load >>> sample_config
Loading: dp_config
Loading: file_config
Total number of parameter sets generated: 18
Control: executing [0]-th hyper-parameter searching session...
Retrieving raw data from /home/ec2-user/AnnEconForecast/data/UNRATE.csv...
Dataset loaded.    
	Index type: datetime64[ns]    
	Data type: float64
Processing data, taking (periods, order)=(1, 1)...
Dropping Nan observations...
First few rows of dataset loaded:
            UNRATE_period1_order1
DATE                             
1948-02-01                    0.4
1948-03-01                    0.2
1948-04-01                   -0.1
1948-05-01            

Control: executing [5]-th hyper-parameter searching session...
Retrieving raw data from /home/ec2-user/AnnEconForecast/data/UNRATE.csv...
Dataset loaded.    
	Index type: datetime64[ns]    
	Data type: float64
Processing data, taking (periods, order)=(1, 1)...
Dropping Nan observations...
First few rows of dataset loaded:
            UNRATE_period1_order1
DATE                             
1948-02-01                    0.4
1948-03-01                    0.2
1948-04-01                   -0.1
1948-05-01                   -0.4
1948-06-01                    0.1
StandardScaler applied, scaling based on the first 679 observations.
Total 836 observations generated.
Note: shape format: (num_obs, time_steps, num_inputs/outputs)
X shape = (836, 12, 1), y shape = (836, 1, 1)
Training and testing set generated,        
X_train shape: (668, 12, 1)        
y_train shape: (668, 1)        
X_test shape: (84, 12, 1)        
y_test shape: (84, 1)        
X_validation shape: (84, 12, 1)        
y_validatio

Note: no gradient clipping is applied.            
If possible gradient exploding detected (e.g. nan loss), try use clip_grad.

Iteration [0], Training MSE 11264.9638672; Validation MSE 11237.9531250

Iteration [100], Training MSE 0.8067419; Validation MSE 0.7994146

Iteration [200], Training MSE 0.9766335; Validation MSE 1.2322487
Saving the trained model...
Time taken for [300] epochs:  0:00:21.772474
Loss Summary:
	mae=1.1765927076339722
	mse=1.7206339836120605
	rmse=1.3117294311523438
	mape=23.658445358276367
Control: executing [11]-th hyper-parameter searching session...
Retrieving raw data from /home/ec2-user/AnnEconForecast/data/UNRATE.csv...
Dataset loaded.    
	Index type: datetime64[ns]    
	Data type: float64
Processing data, taking (periods, order)=(1, 1)...
Dropping Nan observations...
First few rows of dataset loaded:
            UNRATE_period1_order1
DATE                             
1948-02-01                    0.4
1948-03-01                    0.2
1948-04-01          

KeyboardInterrupt: 