## Import Statements

In [None]:
# math stuff
from math import floor
import numpy as np
from sklearn.preprocessing import MinMaxScaler

# TensorFlow
import tensorflow as tf

# suppress numpy scientific e notation
np.set_printoptions(suppress=True)

## TensorFlow Version

In [None]:
# tensorflow version
print(tf.__version__)

## Create Dummy Data Set

In [None]:
# create dummy data similar to what my database query returns (several features over time)
sequence = np.float64(np.arange(20000).reshape((5000, 4)))

## Define Parameters and Hyperparameters

In [None]:
# sequence variables
observation_length = 240 # will cause encoder lstm cells to unroll 'observation_length' times
prediction_length = 12 # will cause decoder lstm cells to unroll 'prediction_length' times
stride = 2 # window for batch generation will slide by 'stride' time steps
combined_length = observation_length + prediction_length
total_sequence_length = len(sequence)
train_sequence_length = floor(total_sequence_length * 0.6)
val_sequence_length = floor(total_sequence_length * 0.4)

# compute number of batches to emit
num_of_train_seq_batches = floor((train_sequence_length - combined_length) / stride)
num_of_val_seq_batches = floor((val_sequence_length - combined_length) / stride)
print("# of batches in training sequence:", num_of_train_seq_batches)
print("# of batches in validation sequence:", num_of_val_seq_batches)

# number of features going into the encoder
features_enc_inp = len(sequence[1])

# number of features of the target sequence
features_dec_inp = 1
features_dec_exp_out = features_dec_inp

# number of batches used in each iteration
batch_size = 3
print("batch size:", batch_size)

# defining layers and number of units for basic lstm cells
enc_num_cells = 2 # how many lstm cells are we using
enc_num_units = 240 # how many lstm units, or commonly known as hidden dimensions/neurons, shall each lstm cell have
dec_num_cells = enc_num_cells
dec_num_units = enc_num_units

# optimizer variables
learning_rate = 0.0003
lr_decay = 0.95
momentum = 0.5
lambda_l2_reg = 0.02

# dropout
dropout_keep_prob = 0.5

# training parameters
num_batches_train = floor(num_of_train_seq_batches / batch_size)
num_batches_val = floor(num_of_val_seq_batches / batch_size)
num_epochs_train = 100
early_stop_limit = 100
print("# of training steps:", num_batches_train)
print("# of validation steps:", num_batches_val)
train_to_val_ratio = floor(num_batches_train / num_batches_val)
print("run validation op every:", train_to_val_ratio, "training steps")
num_batches_train = train_to_val_ratio * num_batches_val
print("new # of batches for training sequence:", num_batches_train)
print("early stop after:", early_stop_limit, "validation iterations without improvement")

## Create Generator for Training Data Set

In [None]:
# define generator function for training data
def gen_train():
    train_sequence = sequence[0:train_sequence_length]
    
    # transform and emit data in batches
    for i in range(0, num_of_train_seq_batches * stride, stride):
        result = np.array(train_sequence[i:i + combined_length])
        scaler = MinMaxScaler()
        result = scaler.fit_transform(result, y=None)
        
        # flip array upside down as data is ordered by date desc
        result_flipped = np.flipud(result)
        
        # yield results
        yield result

## Create Generator for Validation Data Set

In [None]:
# define generator function for validation data
def gen_val():
    val_sequence = sequence[train_sequence_length:train_sequence_length+val_sequence_length]
    
    # transform and emit data in batches
    for i in range(0, num_of_val_seq_batches * stride, stride):
        result = np.array(val_sequence[i:i + combined_length])
        scaler = MinMaxScaler()
        result = scaler.fit_transform(result, y=None)
        
        # flip array upside down as data is ordered by date desc
        result_flipped = np.flipud(result)
        
        # yield results
        yield result

## Reset Default TensorFlow Graph and Create Sequence Length Tensors

In [None]:
# reset default tf graph
tf.reset_default_graph()

# length of input and output
seq_length_inp = tf.fill([batch_size], observation_length)
seq_length_out = tf.fill([batch_size], prediction_length)

## Create TensorFlow Training Input and Output

In [None]:
# create placeholder for training next element
train_next_element = tf.placeholder(
    tf.float32, 
    shape=(combined_length, features_enc_inp), 
    name="next_train_element_from_generator")

# create training dataset from generator
train_dataset = tf.data.Dataset.from_generator(
    gen_train,
    (tf.float32),
    (tf.TensorShape([combined_length, features_enc_inp]))
    )

# prefetch 'number of batches' training sequences of data
train_prefetched = train_dataset.prefetch(num_of_train_seq_batches)

# shuffle training batches
train_buffer_size = tf.constant(
    num_of_train_seq_batches,
    dtype=tf.int64)

train_shuffled = train_prefetched.shuffle(
    train_buffer_size,
    seed=None
)

# batch training batches together 'batch size' times
train_batched = train_shuffled.batch(batch_size)

# create initializable training iterator
train_iterator = train_batched.make_initializable_iterator()
train_next_element = train_iterator.get_next()

# create training encoder input slice 
# from [first batch, first time step in observation sequence, first feature]
# to [last batch, last time step in observation sequence, last feature]
train_enc_inp = tf.slice(
    train_next_element,
    [0, 0, 0],
    [batch_size, observation_length, features_enc_inp])

# create training decoder input slice 
# from [first batch, first time step in prediction sequence, first feature]
# to [last batch, last time step in prediction sequence, first feature]
train_dec_inp = tf.slice(
    train_next_element,
    [0, observation_length, 0],
    [batch_size, prediction_length, features_dec_inp])

# create training decoder expected output slice 
# from [first batch, first time step in prediction sequence, first feature]
# to [last batch, last time step in prediction sequence, first feature]
train_dec_exp_out = tf.slice(
    train_next_element,
    [0, observation_length, 0],
    [batch_size, prediction_length, features_dec_exp_out])

## Create TensorFlow Validation Input and Output

In [None]:
# create placeholder for validation next element
val_next_element = tf.placeholder(
    tf.float32, 
    shape=(combined_length, features_enc_inp), 
    name="next_val_element_from_generator")

# create validation dataset from generator
val_dataset = tf.data.Dataset.from_generator(
    gen_val,
    (tf.float32),
    (tf.TensorShape([combined_length, features_enc_inp]))
    )

# prefetch 'number of batches' validation sequences of data
val_prefetched = val_dataset.prefetch(num_of_val_seq_batches)

# shuffle validation batches
val_buffer_size = tf.constant(
    num_of_val_seq_batches,
    dtype=tf.int64)

val_shuffled = val_prefetched.shuffle(
    val_buffer_size,
    seed=None
)

# batch validation batches together 'batch size' times
val_batched = val_shuffled.batch(batch_size)

# create initializable validation iterator
val_iterator = val_batched.make_initializable_iterator()
val_next_element = val_iterator.get_next()

# create validation encoder input slice 
# from [first batch, first time step in observation sequence, first feature]
# to [last batch, last time step in observation sequence, last feature]
val_enc_inp = tf.slice(
    val_next_element,
    [0, 0, 0],
    [batch_size, observation_length, features_enc_inp])

# create validation decoder input slice 
# from [first batch, first time step in prediction sequence, first feature]
# to [last batch, last time step in prediction sequence, first feature]
val_dec_inp = tf.slice(
    val_next_element,
    [0, observation_length, 0],
    [batch_size, prediction_length, features_dec_inp])

# create validation decoder expected output slice 
# from [first batch, first time step in prediction sequence, first feature]
# to [last batch, last time step in prediction sequence, first feature]
val_dec_exp_out = tf.slice(
    val_next_element,
    [0, observation_length, 0],
    [batch_size, prediction_length, features_dec_exp_out])

## Create Encoder and Decoder Cells

In [None]:
with tf.variable_scope('Seq2seq', reuse = tf.AUTO_REUSE):

    # create encoder cells
    enc_cells = []
    for i in range(enc_num_cells):
        enc_cells.append(tf.nn.rnn_cell.BasicLSTMCell(enc_num_units))
    enc_cell = tf.nn.rnn_cell.MultiRNNCell(enc_cells)
    enc_cell = tf.nn.rnn_cell.DropoutWrapper(enc_cell, output_keep_prob = dropout_keep_prob)

    # create decoder cells
    dec_cells = []
    for i in range(dec_num_cells):
        dec_cells.append(tf.nn.rnn_cell.BasicLSTMCell(dec_num_units))
    dec_cell = tf.nn.rnn_cell.MultiRNNCell(dec_cells)
    dec_cell = tf.nn.rnn_cell.DropoutWrapper(dec_cell, output_keep_prob = dropout_keep_prob)

## Define Training Model

In [None]:
with tf.variable_scope('Seq2seq', reuse = tf.AUTO_REUSE): 

    # define training encoder
    train_enc_out, train_enc_state = tf.nn.dynamic_rnn(
        enc_cell, 
        train_enc_inp,
        dtype = tf.float32,
        sequence_length = seq_length_inp,
        time_major = time_major)

    # define training training helper
    train_training_helper = tf.contrib.seq2seq.TrainingHelper(
        train_dec_inp, 
        seq_length_out, 
        time_major = time_major)

    # define training decoder
    train_decoder = tf.contrib.seq2seq.BasicDecoder(
        dec_cell, 
        train_training_helper, 
        train_enc_state)

    # define training dynamic decoding
    train_dec_out, train_dec_state, train_dec_out_seq_length = tf.contrib.seq2seq.dynamic_decode(
        train_decoder, 
        output_time_major = time_major)

    # extract training logits from decoder output
    train_dec_out_logits = train_dec_out.rnn_output

    # training dense layer to reduce output to 'features_dec_exp_out' feature(s)
    train_output_dense = tf.layers.dense(
        train_dec_out_logits,
        features_dec_exp_out)

    # training loss function
    train_loss = tf.reduce_mean(tf.nn.l2_loss(train_output_dense - train_dec_exp_out))

    # l2 regularization using all variables except for bias and batch_resample
    l2 = lambda_l2_reg * sum(
        tf.nn.l2_loss(tf_var)
            for tf_var in tf.trainable_variables()
            if not ("noreg" in tf_var.name or "Bias" in tf_var.name)
    )
    train_loss += l2

    # optimizer type and variables
    optimizer = tf.train.RMSPropOptimizer(
        learning_rate,
        decay = lr_decay,
        momentum = momentum,
        epsilon = 1e-10)

    # train operation
    train_op = optimizer.minimize(train_loss)

## Define Validation Model

In [None]:
with tf.variable_scope('Seq2seq', reuse = tf.AUTO_REUSE):    
    
    # define validation encoder
    val_enc_out, val_enc_state = tf.nn.dynamic_rnn(
        enc_cell, 
        val_enc_inp,
        dtype = tf.float32,
        sequence_length = seq_length_inp,
        time_major = time_major)
    
    # define validation training helper
    val_training_helper = tf.contrib.seq2seq.TrainingHelper(
        val_dec_inp, 
        seq_length_out, 
        time_major = time_major)
    
     # define validation decoder
    val_decoder = tf.contrib.seq2seq.BasicDecoder(
        dec_cell, 
        val_training_helper, 
        val_enc_state)
    
    # define validation dynamic decoding
    val_dec_out, val_dec_state, val_dec_out_seq_length = tf.contrib.seq2seq.dynamic_decode(
        val_decoder, 
        output_time_major = time_major)
    
    # extract validation logits from decoder output
    val_dec_out_logits = val_dec_out.rnn_output
    
    # validation dense layer to reduce output to 'features_dec_exp_out' feature(s)
    val_output_dense = tf.layers.dense(
        val_dec_out_logits,
        features_dec_exp_out)
    
    # validation loss function
    val_loss = tf.reduce_mean(tf.nn.l2_loss(val_output_dense - val_dec_exp_out))
    
    # l2 regularization using all variables except for bias and batch_resample
    l2 = lambda_l2_reg * sum(
        tf.nn.l2_loss(tf_var)
            for tf_var in tf.trainable_variables()
            if not ("noreg" in tf_var.name or "Bias" in tf_var.name)
    )
    val_loss += l2

## Check Trainable Variables

In [None]:
for tf_var in tf.trainable_variables():
    print(tf_var)

## Run Training and Validation Session

In [None]:
# define session
sess = tf.InteractiveSession()

# initialize global variables / check for uninitialized variables
sess.run([tf.global_variables_initializer(), tf.local_variables_initializer()])

# check for unitialized variables 
# print(sess.run(tf.report_uninitialized_variables())

lowest_val_loss = 100.0
early_stop_counter = 0

# run training and validation
print("epoch batch training_loss validation_loss lowest_val_loss early_stop_counter")
for epoch in range(num_epochs_train):
    sess.run([train_iterator.initializer, val_iterator.initializer])
    for batch in range(num_batches_train):
        if early_stop_counter <= early_stop_limit:
            try:
                train_results = sess.run([train_op, train_loss])
                if batch % train_to_val_ratio == 0:
                    try:
                        val_results = sess.run(val_loss)
                    except tf.errors.OutOfRangeError:
                        print("end of validation dataset")
                    if val_results < lowest_val_loss:
                        lowest_val_loss = val_results
                        early_stop_counter = 0
                    else:
                        early_stop_counter += 1
                    print(epoch, batch, train_results[1], val_results, lowest_val_loss, early_stop_counter)
                else:
                    pass
            except tf.errors.OutOfRangeError:
                print("end of training dataset")
        else:
            pass