In [1]:
import copy
import itertools
import os
from typing import Dict, List, Union

import matplotlib
import pandas as pd
from matplotlib import pyplot as plt

import constants
import core.tools.metrics as metrics
import core.tools.rnn_prepare as rnn_prepare
import core.tools.visualize as visualize
import core.tools.param_set_generator as param_set_generator
import core.models.stacked_lstm as stacked_lstm



In [2]:
from hps_configs.mac_config import main

In [3]:
param_set = param_set_generator.gen_hparam_set(main)
param = param_set[0]
file_dir = constants.DATA_DIR["a"]

Total number of parameter sets generated: 6


In [4]:
def checkpoints(period, total):
    """
    A helpful function for individual train method.
    to generate checkpoint list with integers 
    for every PERIOD time steps and TOTAL time steps in total.
    """
    ckps = [
        period * x for x in range(1, total // period)
    ]
    return ckps

In [5]:
# Generate the dataset.
df_ready = rnn_prepare.prepare_dataset(
    file_dir=file_dir,
    periods=param["PERIODS"],
    order=param["ORDER"],
    remove=None,
    verbose=False
)

# Split dataset.
(X_train, X_val, X_test,
 y_train, y_val, y_test) = rnn_prepare.split_dataset(
    raw=df_ready,
    train_ratio=param["TRAIN_RATIO"],
    val_ratio=param["VAL_RATIO"],
    lags=param["LAGS"]
)

# The gross dataset excluding the test set.
# Excluding the test set for isolation purpose.
model_data_feed = {
    "X_train": X_train,
    "X_val": X_val,
    "y_train": y_train,
    "y_val": y_val,
}

# The checkpoint list 
ckps = checkpoints(param["epochs"] // 10, param["epochs"]) + [-1]

predictions = stacked_lstm.exec_core(
    param=param,
    data=model_data_feed,
    prediction_checkpoints=ckps
)

Dataset loaded.    
	Index type: datetime64[ns]    
	Data type: float64
StandardScaler applied, scaling based on the first 679 observations.
Total 842 observations generated.
Note: shape format: (num_obs, time_steps, num_inputs/outputs)
X shape = (842, 6, 1), y shape = (842, 1, 1)
Training and testing set generated,        
X_train shape: (672, 6, 1)        
y_train shape: (672, 1)        
X_test shape: (85, 6, 1)        
y_test shape: (85, 1)        
X_validation shape: (85, 6, 1)        
y_validation shape: (85, 1)
Resetting Tensorflow defalut graph...
Note: no gradient clipping is applied.            
If possible gradient exploding detected (e.g. nan loss), try use clip_grad.
Starting training session...
Training model...

Iteration [0], Training MSE 127.1071625; Validation MSE 149.9715881

Iteration [100], Training MSE 0.7229499; Validation MSE 0.4551841
Saving the model...
Time taken for [150] epochs:  0:00:02.662637


In [6]:
val_final = list(predictions.values())[-1]["val"]
print("Final result (validation set):")
metric_test = metrics.merged_scores(
    actual=pd.DataFrame(y_val),
    pred=pd.DataFrame(val_final),
    verbose=True
)

# Visualize prediction during training.
for set_name in ["train", "val"]:
    pred = dict((e, val[set_name]) for e, val in predictions.items())
    plt.close()
    fig = visualize.plot_checkpoint_individual(
        predictions=pred,
        actual=model_data_feed["y_" + set_name],
        name=set_name)

    if not os.path.exists(param["fig_path"]):
        os.makedirs(param["fig_path"])
    assert not param["fig_path"].endswith("/")
    plt.savefig(param["fig_path"] + "/" + f"pred_record_{set_name}.svg")
    plt.close()

fig = visualize.plot_checkpoint_combined(
    predictions=predictions,
    actual={"train": y_train, "val": y_val}
)
if not os.path.exists(param["fig_path"]):
    os.makedirs(param["fig_path"])
assert not param["fig_path"].endswith("/")
plt.savefig(param["fig_path"] + "/" + f"pred_record_combined.svg")
plt.close()

Final result (validation set):
Loss Summary:
	mae=0.5523725748062134
	mse=0.5186339616775513
	rmse=0.7201624512672424
	mape=5.335463523864746
