In [2]:
%matplotlib notebook


# Setup

In [3]:
import os
import pickle
import sys
import time

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.metrics import r2_score
from tensorflow import keras
from tensorflow.keras import layers
from tqdm.auto import tqdm, trange

tqdm.pandas()

from collections import defaultdict

local_root_path = "."

sys.path.append(local_root_path)
import annutils

from datetime import datetime


  from .autonotebook import tqdm as notebook_tqdm


# Param setup

In [4]:
initial_lr = 0.001

'''
Dropout ratio at (before) the input layer
'''
input_dropout = 0.

'''
Dropout ratio at intermediate layers
'''
intermediate_dropout = 0

ndays=118
window_size=0
nwindows=0
num_sheets = 9

compression_opts = dict(method='zip', archive_name='out.csv')


# build_model_string def

In [5]:
def build_model_string(model_type, num_neurons_multiplier, input_dropout=0, intermediate_dropout=0):
    model_type = model_type.lower()
    model_str_def = None
    if model_type == 'mlp':
        ## 1. MLP Network
        model_str_def = '%sd%d_%sd%d_o1' % (('dr%.2f_' % input_dropout if input_dropout > 0 else ''),
                                            num_neurons_multiplier[0],
                                            ('dr%.2f_' % intermediate_dropout if intermediate_dropout > 0 else ''),
                                            num_neurons_multiplier[1])

    elif model_type == 'lstm':
        # 2. LSTM Network
        model_str_def = '%slstm%d_%sf_o1' % (('dr%.2f_' % input_dropout if input_dropout > 0 else ''),
                                             num_neurons_multiplier[0],
                                             ('dr%.2f_' % intermediate_dropout if intermediate_dropout > 0 else ''),)

    elif model_type == 'gru':
        # 3. GRU Network
        model_str_def = '%sg%d_%sf_o1' % (('dr%.2f_' % input_dropout if input_dropout > 0 else ''),
                                          num_neurons_multiplier[0],
                                          ('dr%.2f_' % intermediate_dropout if intermediate_dropout > 0 else ''),)

    elif model_type == 'resnet':
        # 4. ResNet
        if intermediate_dropout > 0:
            num_neurons_multiplier.insert(1, 'dr%.2f' % intermediate_dropout)
        model_str_def = '%sresnet%s' % (('dr%.2f_' % input_dropout if input_dropout > 0 else ''),
                                        '_' + '_'.join([str(ii) for ii in num_neurons_multiplier]))
        num_res_blocks = 1

    elif model_type == 'res-lstm':
        # 5. Res-LSTM
        if intermediate_dropout > 0:
            num_neurons_multiplier.insert(1, 'dr%.2f' % intermediate_dropout)
        model_str_def = '%sresidual_lstm%s' % (('dr%.2f_' % input_dropout if input_dropout > 0 else ''),
                                               '_' + '_'.join([str(ii) for ii in num_neurons_multiplier]))

    elif model_type == 'res-gru':
        # 6. Res-GRU
        if intermediate_dropout > 0:
            num_neurons_multiplier.insert(1, 'dr%.2f' % intermediate_dropout)
        model_str_def = '%sresidual_gru%s' % (('dr%.2f_' % input_dropout if input_dropout > 0 else ''),
                                              '_' + '_'.join([str(ii) for ii in num_neurons_multiplier]))

    elif model_type == 'transformer':        # 7. Transformer
        model_str_def = '%stransformer' % ('dr%.2f_' % input_dropout if input_dropout > 0 else '')

    return model_str_def


# tensorboard stuff

In [6]:
###############
# %load_ext tensorboard
# %tensorboard --logdir=./tf_training_logs/ --port=6006
now = datetime.now()
root_logdir = os.path.join(os.curdir, "tf_training_logs", now.strftime("%Y%m%d-%H%M%S"))

tensorboard_cb = keras.callbacks.TensorBoard(root_logdir)  ## Tensorflow Board Setup
###############

# ANN Specific Definitions
Next cell is a bunch of ann specific defs.

In [7]:
parameters = {
    "kernel_initializer": "he_normal"
}

def basic_1d(
        filters,
        stage=0,
        block=0,
        kernel_size=3,
        numerical_name=False,
        stride=None,
        force_identity_shortcut=False
):
    """
    A one-dimensional basic block.
    :param filters: the output’s feature space
    :param stage: int representing the stage of this block (starting from 0)
    :param block: int representing this block (starting from 0)
    :param kernel_size: size of the kernel
    :param numerical_name: if true, uses numbers to represent blocks instead of chars (ResNet{101, 152, 200})
    :param stride: int representing the stride used in the shortcut and the first conv layer, default derives stride from block id
    """
    if stride is None:
        if block != 0 or stage == 0:
            stride = 1
        else:
            stride = 2

    if block > 0 and numerical_name:
        block_char = "b{}".format(block)
    else:
        block_char = chr(ord('a') + block)

    stage_char = str(stage + 2)

    def f(x):
        y = keras.layers.ZeroPadding1D(padding=1, name="padding{}{}_branch2a".format(stage_char, block_char))(x)
        y = keras.layers.Conv1D(filters, kernel_size, strides=stride, use_bias=False,
                                name="res{}{}_branch2a".format(stage_char, block_char),
                                **parameters)(y)
        y = keras.layers.BatchNormalization()(y)
        y = keras.layers.Activation("relu", name="res{}{}_branch2a_relu".format(stage_char, block_char))(y)

        y = keras.layers.ZeroPadding1D(padding=1, name="padding{}{}_branch2b".format(stage_char, block_char))(y)
        y = keras.layers.Conv1D(filters, kernel_size, use_bias=False,
                                name="res{}{}_branch2b".format(stage_char, block_char),
                                **parameters)(y)
        y = keras.layers.BatchNormalization()(y)

        if block != 0 or force_identity_shortcut:
            shortcut = x
        else:
            shortcut = keras.layers.Conv1D(filters, 1, strides=stride, use_bias=False,
                                           name="res{}{}_branch1".format(stage_char, block_char),
                                           **parameters)(x)
            shortcut = keras.layers.BatchNormalization()(shortcut)

        y = keras.layers.Add(name="res{}{}".format(stage_char, block_char))([y, shortcut])

        y = keras.layers.Activation("relu", name="res{}{}_relu".format(stage_char, block_char))(y)

        return y

    return f

def bottleneck_1d(
        filters,
        stage=0,
        block=0,
        kernel_size=3,
        numerical_name=False,
        stride=None,
):
    """
    A one-dimensional bottleneck block.
    :param filters: the output’s feature space
    :param stage: int representing the stage of this block (starting from 0)
    :param block: int representing this block (starting from 0)
    :param kernel_size: size of the kernel
    :param numerical_name: if true, uses numbers to represent blocks instead of chars (ResNet{101, 152, 200})
    :param stride: int representing the stride used in the shortcut and the first conv layer, default derives stride from block id
    """
    if stride is None:
        stride = 1 if block != 0 or stage == 0 else 2

    # axis = -1 if keras.backend.image_data_format() == "channels_last" else 1

    if block > 0 and numerical_name:
        block_char = "b{}".format(block)
    else:
        block_char = chr(ord('a') + block)

    stage_char = str(stage + 2)

    def f(x):
        y = keras.layers.Conv1D(filters, 1, strides=stride, use_bias=False,
                                name="res{}{}_branch2a".format(stage_char, block_char),
                                **parameters)(x)
        y = keras.layers.BatchNormalization()(y)
        y = keras.layers.Activation("relu", name="res{}{}_branch2a_relu".format(stage_char, block_char))(y)

        y = keras.layers.ZeroPadding1D(padding=1, name="padding{}{}_branch2b".format(stage_char, block_char))(y)
        y = keras.layers.Conv1D(filters, kernel_size, use_bias=False,
                                name="res{}{}_branch2b".format(stage_char, block_char),
                                **parameters)(y)
        y = keras.layers.BatchNormalization()(y)
        y = keras.layers.Activation("relu", name="res{}{}_branch2b_relu".format(stage_char, block_char))(y)

        y = keras.layers.Conv1D(filters * 4, 1, use_bias=False,
                                name="res{}{}_branch2c".format(stage_char, block_char),
                                **parameters)(y)
        y = keras.layers.BatchNormalization()(y)

        if block == 0:
            shortcut = keras.layers.Conv1D(filters * 4, 1, strides=stride, use_bias=False,
                                           name="res{}{}_branch1".format(stage_char, block_char),
                                           **parameters)(x)
            shortcut = keras.layers.BatchNormalization()(shortcut)
        else:
            shortcut = x

        y = keras.layers.Add(name="res{}{}".format(stage_char, block_char))([y, shortcut])
        y = keras.layers.Activation("relu", name="res{}{}_relu".format(stage_char, block_char))(y)

        return y

    return f

###############
"""# Custom loss function"""

def mse_loss_masked(y_true, y_pred):
    squared_diff = tf.reduce_sum(tf.math.squared_difference(y_pred[y_true > 0], y_true[y_true > 0]))
    return squared_diff / (tf.reduce_sum(tf.cast(y_true > 0, tf.float32)) + 0.01)

# Define Sequential model
NFEATURES =  8 #dfinps.shape[1]  # * (ndays + nwindows)

def build_layer_from_string_def(s='i120', width_multiplier=1,
                                block=0,
                                force_identity_shortcut=False,
                                return_sequences_rnn=True):
    if s[0:4] == 'lstm':
        return layers.LSTM(units=int(s[4:]) * width_multiplier, return_sequences=return_sequences_rnn,
                           activation='sigmoid')
    elif s[0:3] == 'res':
        fields = s[3:].split('x')
        return basic_1d(filters=int(fields[0]),
                        stage=int(fields[3]),
                        block=block,
                        kernel_size=int(fields[1]),
                        stride=int(fields[2]),
                        force_identity_shortcut=force_identity_shortcut)
    elif s[0:3] == 'c1d':
        fields = s[3:].split('x')
        return keras.layers.Conv1D(filters=int(fields[0]), kernel_size=int(fields[1]), strides=int(fields[2]),
                                   padding='causal', activation='linear')
    elif s[0:2] == 'td':
        return keras.layers.TimeDistributed(keras.layers.Dense(int(s[2:]), activation='elu'))
    elif s[0:2] == 'dr':
        return keras.layers.Dropout(float(s[2:]))
    # elif s[0] == 'i':
    #     return keras.layers.InputLayer(input_shape=[int(s[1:]), NFEATURES])
    elif s[0] == 'f':
        return keras.layers.Flatten()
    elif s[0] == 'g':
        return keras.layers.GRU(int(s[1:]) * width_multiplier, return_sequences=True, activation='relu')
    elif s[0] == 'd':
        return keras.layers.Dense(int(s[1:]) * width_multiplier, activation='elu')
    elif s[0] == 'o':
        return keras.layers.Dense(int(s[1:]) * width_multiplier, activation='linear')
    else:
        raise Exception('Unknown layer def: %s' % s)

###############

def build_model_from_string_def(strdef='i120_f_d4_d2_d1', width_multiplier=1):
    layer_strings = strdef.split('_')
    print ('layer_strings:%s' % layer_strings)
    inputs = keras.layers.Input(shape=[int(layer_strings[0][1:]) * NFEATURES])
    x = None
    prev_conv_output_num_of_channels = None
    return_sequences_rnn = None
    for block, f in enumerate(layer_strings[1:-1]):
        if x is None:
            if ('lstm' in strdef) or ('g' in strdef):
                # these layers require 2D inputs and permutation
                x = layers.Reshape((ndays + nwindows, NFEATURES))(inputs)
                prev_conv_output_num_of_channels = NFEATURES
                x = layers.Permute((2, 1))(x)
                return_sequences_rnn = layer_strings[block + 2].startswith(('lstm', 'g', 'res', 'c1d'))
            elif ('res' in strdef) or ('cld' in strdef):
                # these layers require 2D inputs
                x = layers.Reshape((ndays + nwindows, NFEATURES))(inputs)
                prev_conv_output_num_of_channels = NFEATURES
            else:
                x = inputs

        x = build_layer_from_string_def(f, width_multiplier, block,
                                        force_identity_shortcut=(
                                                f.startswith('res') and prev_conv_output_num_of_channels == int(
                                            f[3:].split('x')[0])),
                                        return_sequences_rnn=return_sequences_rnn)(x)
        if f.startswith('lstm'):
            prev_conv_output_num_of_channels = int(f[4:])
        elif f.startswith('res') or f.startswith('c1d'):
            prev_conv_output_num_of_channels = int(f[3:].split('x')[0])

    outputs = keras.layers.Dense(int(layer_strings[-1][1:]) * width_multiplier, activation='linear')(x)
    model = keras.Model(inputs=inputs, outputs=outputs)
    model.compile(optimizer=keras.optimizers.Adam(
        learning_rate=initial_lr), loss="mse")
    return model

def build_resnet_model(nhidden1=8, nhidden2=2, output_shape=1, act_func='sigmoid',
                       filters=num_sheets - 1, kernel_size=3, stride=1,
                       num_res_blocks=1, input_dropout=0.):
    inputs = layers.Input(shape=NFEATURES * (ndays + nwindows))
    x = layers.Reshape((ndays + nwindows, NFEATURES))(inputs)
    x = layers.Dropout(input_dropout)(x)
    for ii in range(num_res_blocks - 1):
        # TODO: think about conv filter numbers and kernel sizes
        intermediate_features = layers.ZeroPadding1D(padding=1, name="padding%d_branch2a" % ii)(x)
        intermediate_features = layers.Conv1D(filters=NFEATURES, kernel_size=2, strides=1, use_bias=False,
                                              name="res%d_branch2a" % ii)(intermediate_features)
        intermediate_features = layers.BatchNormalization()(intermediate_features)
        intermediate_features = layers.Activation("relu", name="res%d_branch2a_relu" % ii)(intermediate_features)

        intermediate_features = layers.Conv1D(filters=NFEATURES, kernel_size=2, strides=1, use_bias=False,
                                              name="res%d_branch2b" % ii)(intermediate_features)
        intermediate_features = layers.BatchNormalization()(intermediate_features)
        intermediate_features = layers.Activation("relu", name="res%d_branch2b_relu" % ii)(intermediate_features)

        shortcut = x
        x = layers.Add(name="res%d_add" % ii)([intermediate_features, shortcut])

    y = layers.ZeroPadding1D(padding=1, name="padding%d_branch2a" % num_res_blocks)(x)
    y = layers.Conv1D(filters, kernel_size, strides=stride, use_bias=False,
                      name="res%d_branch2a" % num_res_blocks)(y)
    y = layers.BatchNormalization()(y)
    y = layers.Activation("relu", name="res%d_branch2a_relu" % num_res_blocks)(y)

    y = layers.ZeroPadding1D(padding=1, name="padding%d_branch2b" % num_res_blocks)(y)
    y = layers.Conv1D(filters, kernel_size, use_bias=False,
                      name="res%d_branch2b" % num_res_blocks)(y)
    y = layers.BatchNormalization()(y)
    y = layers.Flatten()(y)
    y = layers.Dense(nhidden1, activation=act_func)(y)

    shortcut = inputs
    shortcut = layers.Dense(nhidden1, activation=act_func)(shortcut)

    y = layers.Add(name="res%d_add" % num_res_blocks)([y, shortcut])
    y = layers.Dropout(intermediate_dropout)(y)

    y = layers.Activation("relu", name="res_relu")(y)

    y = layers.Dense(nhidden2, activation=act_func)(y)
    outputs = layers.Dense(output_shape, activation=keras.activations.linear, name='output')(y)

    model = keras.Model(inputs=inputs, outputs=outputs)

    model.compile(optimizer=keras.optimizers.Adam(
        learning_rate=initial_lr), loss=mse_loss_masked)
    return model

def build_residual_lstm_model(nhidden1=8, nhidden2=2, output_shape=1,
                              act_func='sigmoid', layer_type='lstm',
                              conv_init=None,
                              input_dropout=0.):
    rnn_layer = layers.LSTM if layer_type == 'lstm' else layers.GRU
    input_shape = NFEATURES * (ndays + nwindows)
    print("input shape: ", input_shape)
    inputs = layers.Input(shape=input_shape)
    x = layers.Reshape((ndays + nwindows, NFEATURES))(inputs)
    x = layers.Dropout(input_dropout)(x)
    x = layers.Permute((2, 1))(x)

    y = tf.keras.layers.Conv1D(ndays + nwindows, 1, activation='relu',
                               kernel_initializer=conv_init,
                               kernel_regularizer=tf.keras.regularizers.l1_l2(l1=0, l2=0),
                               trainable=False)(x)

    y = layers.Flatten()(y)
    y = layers.Dense(nhidden1, activation=act_func)(y)
    y = layers.Dropout(intermediate_dropout)(y)
    y = layers.Dense(nhidden2, activation=act_func)(y)
    y = layers.Dense(output_shape, activation=keras.activations.linear, name='mlp_output')(y)

    shortcut = x
    shortcut = layers.Dense(nhidden1, activation=act_func)(shortcut)
    shortcut = rnn_layer(units=output_shape * 2, activation=act_func, return_sequences=True)(shortcut)
    shortcut = layers.Flatten()(shortcut)
    shortcut = layers.Dense(output_shape, activation=keras.activations.linear, name='lstm_output')(shortcut)

    outputs = layers.Add(name="res_add")([y, shortcut])
    # outputs = layers.Activation("relu",name="res_relu")(outputs)
    outputs = layers.LeakyReLU(alpha=0.3, name="res_relu")(outputs)

    model = keras.Model(inputs=inputs, outputs=outputs)

    model.compile(optimizer=keras.optimizers.Adam(
        learning_rate=initial_lr), loss=mse_loss_masked)
    return model

def transformer_encoder(inputs, head_size, num_heads, ff_dim, dropout=0):
    # Normalization and Attention
    x = layers.LayerNormalization(epsilon=1e-6)(inputs)
    x = layers.MultiHeadAttention(
        key_dim=head_size, num_heads=num_heads, dropout=dropout
    )(x, x)
    x = layers.Dropout(dropout)(x)
    res = x + inputs

    # Feed Forward Part
    x = layers.LayerNormalization(epsilon=1e-6)(res)
    x = layers.Conv1D(filters=ff_dim, kernel_size=1, activation="relu")(x)
    x = layers.Dropout(dropout)(x)
    x = layers.Conv1D(filters=inputs.shape[-1], kernel_size=1)(x)
    return x + res

def build_transformer(head_size,
                      num_heads,
                      ff_dim,
                      num_transformer_blocks,
                      mlp_units,
                      output_shape,
                      dropout=0,
                      mlp_dropout=0,
                      input_dropout=0):
    inputs = keras.Input(shape=NFEATURES * (ndays + nwindows))
    x = layers.Reshape((ndays + nwindows, NFEATURES))(inputs)
    x = layers.Dropout(input_dropout)(x)
    for _ in range(num_transformer_blocks):
        x = transformer_encoder(x, head_size, num_heads, ff_dim, dropout)

    x = layers.GlobalAveragePooling1D(data_format="channels_first")(x)
    for dim in mlp_units:
        x = layers.Dense(dim, activation="relu")(x)
        x = layers.Dropout(mlp_dropout)(x)
    outputs = layers.Dense(output_shape)(x)
    outputs = layers.LeakyReLU(alpha=0.3, name="res_relu")(outputs)

    model = keras.Model(inputs=inputs, outputs=outputs)

    model.compile(optimizer=keras.optimizers.Adam(
        learning_rate=initial_lr), loss=mse_loss_masked)
    return model

# build_or_load_model def

In [8]:
def build_or_load_model(model_path, model_str_def, num_neurons_multiplier, output_shape  ):
    xscaler = None
    yscaler = None
    if os.path.exists(model_path + '.h5'):
        loaded_model = annutils.load_model(model_path,
                                           custom_objects={"mse_loss_masked": mse_loss_masked})
        model = loaded_model.model
        xscaler = loaded_model.xscaler
        yscaler = loaded_model.yscaler
        print('Ignored defined model arc and loaded pre-trained model from %s.h5' % model_path)

    len_stations = output_shape[1]
    print("len_stations: ", len_stations)
    if 'resnet' in model_str_def.lower():
        num_res_blocks =1
        model = build_resnet_model(nhidden1=num_neurons_multiplier[0] * len_stations,
                                   nhidden2=num_neurons_multiplier[-1] * len_stations, output_shape=len_stations,
                                   num_res_blocks=num_res_blocks,
                                   input_dropout=input_dropout)
    elif ('residual_lstm' in model_str_def.lower()) or ('residual_gru' in model_str_def.lower()):
        print("model is lstm or gru")
        print("ndays: ", ndays)
        print("nwindows: ", nwindows)
        print("window_size: ", window_size)
        print("output_shape: ", output_shape)
        conv_init = tf.constant_initializer(annutils.conv_filter_generator(ndays=ndays,
                                                                           window_size=window_size,
                                                                           nwindows=nwindows))

        layer_type = model_str_def.lower().split('_')[2]
        print("layer_type: ", layer_type)
        model = build_residual_lstm_model(num_neurons_multiplier[0] * len_stations,
                                          num_neurons_multiplier[-1] * len_stations,
                                          output_shape=len_stations,
                                          act_func='sigmoid',
                                          layer_type=layer_type,
                                          conv_init=conv_init,
                                          input_dropout=input_dropout)
    elif 'transformer' in model_str_def.lower():
        model = build_transformer(head_size=256,
                                  num_heads=4,
                                  ff_dim=4,
                                  num_transformer_blocks=4,
                                  mlp_units=[128],
                                  output_shape=len_stations,
                                  mlp_dropout=0.4,
                                  dropout=0.25,
                                  input_dropout=input_dropout)
    else:
        model = build_model_from_string_def(model_str_def, width_multiplier=len_stations)

    return model, xscaler, yscaler

# plot_history def

Now we should be ready to make some models and do some training






In [9]:
def plot_history(history):
	plt.plot(history.history['loss'])
	plt.plot(history.history['val_loss'])
	plt.title('model loss')
	plt.ylabel('loss')
	plt.xlabel('epoch')
	plt.legend(['train', 'val'], loc='upper left')
	plt.show()


# RUN: Train model experiments

In [13]:
experiments = ["4years_cal"]
models = {
    # "ResNet": [8,2],
    # "Res-LSTM":[8,2],
    "LSTM":[8],
    # "GRU": [8],
    # "Res-GRU":[8,2],
    # "Transformer":[8,2]  # this seems like its taking something like 27h to train!!! 2000s per epoch
}
for experiment in experiments:
    print("experiment: ", experiment)

    # create folders to save results
    result_folders = ['models', 'results', 'images']
    for result_folder in result_folders:
        folder_path = os.path.join(local_root_path, "Experiments", experiment, result_folder)
        if not os.path.exists(folder_path):
            os.makedirs(folder_path)

    train_X = pd.read_csv(os.path.join("Experiments", experiment, "train_X.csv"), index_col=0, compression=compression_opts)
    train_Y = pd.read_csv(os.path.join("Experiments", experiment, "train_Y.csv"), index_col=0, compression=compression_opts)
    test_X = pd.read_csv(os.path.join("Experiments", experiment, "test_X.csv"), index_col=0, compression=compression_opts)
    test_Y = pd.read_csv(os.path.join("Experiments", experiment, "test_Y.csv"), index_col=0, compression=compression_opts)

    for  model_type, num_neurons_multiplier in models.items():
        start = time.time()
        model_str_def = build_model_string(model_type, num_neurons_multiplier)

        full_model_str_def = 'i%d_' % (ndays + nwindows) + model_str_def

        model_path_prefix = "mtl_%s" % (full_model_str_def)

        print("model_str_def: %s" % model_str_def)
        model, xscaler, yscaler = build_or_load_model(model_path_prefix, full_model_str_def, num_neurons_multiplier, output_shape=train_Y.shape)

        epochs = 50

        print("Model summary:")
        print(model.summary())

        if(xscaler is None or yscaler is None):
            print("Creating new scalers")

        xscaler, yscaler = annutils.create_or_update_xyscaler(xscaler, yscaler, train_X, train_Y)
        print("Xscaler Min[0]: %s" % xscaler.min_val[0])
        print("Xscaler Max[0]: %s" % xscaler.max_val[0])

        scaled_X = xscaler.transform(train_X)
        scaled_Y = yscaler.transform(train_Y)

        scaled_test_X = xscaler.transform(test_X)
        scaled_test_Y = yscaler.transform(test_Y)

        history = model.fit(
            scaled_X,
            scaled_Y,
            epochs=epochs,
            batch_size=128,
            validation_data=(scaled_test_X, scaled_test_Y),
            callbacks=[
                keras.callbacks.EarlyStopping(
                    monitor="val_loss", patience=50, mode="min", restore_best_weights=True),
                tensorboard_cb
            ],
            verbose=2,
        )

        plot_history(history)

        model_savepath = os.path.join(local_root_path, "Experiments", experiment, 'models', model_path_prefix)
        # tf.saved_model.save(model, model_savepath)
        annutils.save_model(model_savepath, model, xscaler, yscaler)
        print('Model saved to %s' % model_savepath)
        print('Training time: %d min' % ((time.time() - start) / 60))
print("Done")

experiment:  4years_cal
model_str_def: lstm8_f_o1
len_stations:  23
layer_strings:['i118', 'lstm8', 'f', 'o1']
Model summary:
Model: "model_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_3 (InputLayer)        [(None, 944)]             0         
                                                                 
 reshape_2 (Reshape)         (None, 118, 8)            0         
                                                                 
 permute_2 (Permute)         (None, 8, 118)            0         
                                                                 
 lstm_2 (LSTM)               (None, 184)               223008    
                                                                 
 flatten_2 (Flatten)         (None, 184)               0         
                                                                 
 dense_2 (Dense)             (None, 23)                4255      

<IPython.core.display.Javascript object>

Model saved to .\Experiments\4years_cal\models\mtl_i118_lstm8_f_o1
Training time: 1 min
Done
