In [23]:
"""A Contextual LSTM for prediction on the Endomondo Exercise Dataset

Trains the model described in:
(Zaremba, et. al.) Recurrent Neural Network Regularization
http://arxiv.org/abs/1409.2329


The hyperparameters used in the model:
- init_scale - the initial scale of the weights
- learning_rate - the initial value of the learning rate
- max_grad_norm - the maximum permissible norm of the gradient
- num_layers - the number of LSTM layers
- num_steps - the number of unrolled steps of LSTM
- hidden_size - the number of LSTM units
- max_epoch - the number of epochs trained with the initial learning rate
- max_max_epoch - the total number of epochs for training
- keep_prob - the probability of keeping weights in the dropout layer
- lr_decay - the decay of the learning rate for each epoch after "max_epoch"
- batch_size - the batch size


To run:

$ python Endomondo_RNN_tests.py --data_path=simple-examples/data/

"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import time

import numpy as np
import tensorflow as tf

# from tensorflow.models.rnn.ptb import reader
# from tensorflow.models.rnn import *
from dataInterpreter_Endomondo_supervised_derivedData import dataInterpreter, metaDataEndomondo

# flags = tf.flags
logging = tf.logging

# flags.DEFINE_string(
#    "model", "small",
#    "A type of model. Possible options are: small, medium, large.")
# flags.DEFINE_string("data_path", None, "data_path")

# FLAGS = flags.FLAGS

# model = "Larry"
model = "small"
data_path = "../multimodalDBM/endomondoHR_proper.json"
summaries_dir = "logs"
# endoFeatures = ["speed", "sport", "heart_rate", "gender", "altitude"]#The features we want the model to care about
endoFeatures = ["sport", "heart_rate", "gender", "altitude", "time_elapsed", "distance"]
#endoFeatures = ["sport", "heart_rate", "gender", "altitude"]
trainValTestSplit = [0.8, 0.1, 0.1]
targetAtt = "heart_rate"


class EndoModel(object):
    """The Endomondo Contextual LSTM model."""

    def __init__(self, is_training, config):
        self.is_training=is_training
        self.batch_size = batch_size = config.batch_size
        self.num_steps = num_steps = config.num_steps
        size = config.hidden_size
        # vocab_size = config.vocab_size
        #dataDim = config.dataDim
        inputShape = config.inputShape
        targetShape = config.targetShape
        pos_weight = config.pos_weight  # This is a coefficient that weights the relative importance of positive prediction error and negative prediction error. The default is 1 (equal weight.)

        self._input_data = tf.placeholder(tf.float32, [batch_size, num_steps, inputShape])
        self._targets = tf.placeholder(tf.float32, [batch_size, num_steps, targetShape])

        # Slightly better results can be obtained with forget gate biases
        # initialized to 1 but the hyperparameters of the model would need to be
        # different than reported in the paper.
        lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(size, forget_bias=0.0, state_is_tuple=True)
        # Other resonable activation functions include: activation=tf.nn.relu and activation=tf.nn.softmax
        # i.e. lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(size, forget_bias=0.0, state_is_tuple=True, activation=tf.nn.relu)
        if is_training and config.keep_prob < 1:
            lstm_cell = tf.nn.rnn_cell.DropoutWrapper(lstm_cell, output_keep_prob=config.keep_prob)
        cell = tf.nn.rnn_cell.MultiRNNCell([lstm_cell] * config.num_layers, state_is_tuple=True)

        self._initial_state = cell.zero_state(batch_size, tf.float32)

        # with tf.device("/cpu:0"):
        #  embedding = tf.get_variable("embedding", [vocab_size, size])
        #  inputs = tf.nn.embedding_lookup(embedding, self._input_data)
        inputs = self._input_data

        if is_training and config.keep_prob < 1:
            inputs = tf.nn.dropout(inputs, config.keep_prob)

        # Simplified version of tensorflow.models.rnn.rnn.py's rnn().
        # This builds an unrolled LSTM for tutorial purposes only.
        # In general, use the rnn() or state_saving_rnn() from rnn.py.
        #
        # The alternative version of the code below is:
        #
        inputs = [tf.squeeze(input_, [1])
                  for input_ in tf.split(1, num_steps, inputs)]
        outputs, state = tf.nn.rnn(cell, inputs, initial_state=self._initial_state)

        # Might need to change this stuff...
        output = tf.reshape(tf.concat(1, outputs), [-1, size])
        softmax_w = tf.get_variable("softmax_w", [size, targetShape])
        softmax_b = tf.get_variable("softmax_b", [targetShape])
        logits = tf.matmul(output, softmax_w) + softmax_b  # Probably need to change this...
        
        variable_summaries(inputs, 'inputs')
        variable_summaries(logits, 'logits')

        # Need a new loss function here...
        #loss = tf.nn.weighted_cross_entropy_with_logits(
        #    [logits],
        #    tf.reshape(self._targets, [-1, batch_size * num_steps, targetShape]),
        #    pos_weight)
        reshapedTargets=tf.reshape(self._targets, [-1, batch_size * num_steps, targetShape])
        logTarDiff = tf.sub(reshapedTargets, [logits])
        loss = tf.sqrt(tf.reduce_mean(tf.square(logTarDiff)))
        variable_summaries(loss, 'loss')
        variable_summaries(reshapedTargets, 'targets')
        variable_summaries(logTarDiff, 'logit-target difference')
        # loss = tf.nn.softmax_cross_entropy_with_logits(
        #    [logits],
        #    [tf.reshape(self._targets, [-1])],
        #    [tf.ones([batch_size * num_steps])])

        # loss = tf.nn.seq2seq.sequence_loss_by_example(
        #    [logits],
        #    [tf.reshape(self._targets, [-1])],
        #    [tf.ones([batch_size * num_steps])])
        self.loss = loss
        self._cost = cost = tf.reduce_sum(loss) / batch_size
        self._final_state = state
        
        self.merged = tf.merge_all_summaries()

        if not is_training:
            return

        self._lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars),
                                          config.max_grad_norm)
        optimizer = tf.train.GradientDescentOptimizer(self.lr)
        self._train_op = optimizer.apply_gradients(zip(grads, tvars))
        

    def assign_lr(self, session, lr_value):
        session.run(tf.assign(self.lr, lr_value))

    @property
    def input_data(self):
        return self._input_data

    @property
    def targets(self):
        return self._targets

    @property
    def initial_state(self):
        return self._initial_state

    @property
    def cost(self):
        return self._cost

    @property
    def final_state(self):
        return self._final_state

    @property
    def lr(self):
        return self._lr

    @property
    def train_op(self):
        return self._train_op


class ReallySmallConfig(object):
    """Small config."""
    init_scale = 0.1
    learning_rate = 1.0
    max_grad_norm = 5
    num_layers = 2
    num_steps = 40
    hidden_size = 100
    max_epoch = 2
    max_max_epoch = 8
    keep_prob = 1.0
    lr_decay = 0.5
    batch_size = 20
    # vocab_size = 10000
    dataDim = 0
    inputShape = []
    targetShape = []
    pos_weight = 1


class SmallConfig(object):
    """Small config."""
    init_scale = 0.1
    learning_rate = 1.0
    max_grad_norm = 5
    num_layers = 2
    num_steps = 25
    hidden_size = 200
    max_epoch = 4
    max_max_epoch = 13
    keep_prob = 1.0
    lr_decay = 0.5
    batch_size = 20
    # vocab_size = 10000
    dataDim = 0
    inputShape = []
    targetShape = []
    pos_weight = 1


class MediumConfig(object):
    """Medium config."""
    init_scale = 0.05
    learning_rate = 1.0
    max_grad_norm = 5
    num_layers = 2
    num_steps = 35
    hidden_size = 650
    max_epoch = 6
    max_max_epoch = 39
    keep_prob = 0.5
    lr_decay = 0.8
    batch_size = 20
    # vocab_size = 10000
    dataDim = 0
    inputShape = []
    targetShape = []
    pos_weight = 1


class LargeConfig(object):
    """Large config."""
    init_scale = 0.04
    learning_rate = 1.0
    max_grad_norm = 10
    num_layers = 2
    num_steps = 35
    hidden_size = 1500
    max_epoch = 14
    max_max_epoch = 55
    keep_prob = 0.35
    lr_decay = 1 / 1.15
    batch_size = 20
    # vocab_size = 10000
    dataDim = 0
    inputShape = []
    targetShape = []
    pos_weight = 1


class TestConfig(object):
    """Tiny config, for testing."""
    init_scale = 0.1
    learning_rate = 1.0
    max_grad_norm = 1
    num_layers = 1
    num_steps = 2
    hidden_size = 2
    max_epoch = 1
    max_max_epoch = 1
    keep_prob = 1.0
    lr_decay = 0.5
    batch_size = 20
    # vocab_size = 10000
    dataDim = 0
    inputShape = []
    targetShape = []
    pos_weight = 1


class LarryConfig(object):
    """Larry's custom config"""
    init_scale = 0.05
    learning_rate = 1.0
    max_grad_norm = 5
    num_layers = 2
    num_steps = 35
    hidden_size = 400
    max_epoch = 5
    max_max_epoch = 20
    keep_prob = 0.5
    lr_decay = 0.8
    batch_size = 20
    # vocab_size = 10000
    dataDim = 0
    inputShape = []
    targetShape = []
    pos_weight = 1
    
def variable_summaries(var, name):
  """Attach a lot of summaries to a Tensor."""
  with tf.name_scope('summaries'):
    mean = tf.reduce_mean(var)
    tf.scalar_summary('mean/' + name, mean)
    with tf.name_scope('stddev'):
      stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean)))
    tf.scalar_summary('stddev/' + name, stddev)
    tf.scalar_summary('max/' + name, tf.reduce_max(var))
    tf.scalar_summary('min/' + name, tf.reduce_min(var))
    #for i, val in enumerate(tf.reshape(var, [-1]).eval()):
    #    tf.scalar_summary('full/' + name + "_index:" + str(i), val)
    tf.histogram_summary(name, var)

def run_epoch(session, m, data_interp, eval_op, trainValidTest, verbose=False, writer=None):
    """Runs the model on the given data."""
    epoch_size = ((data_interp.numDataPoints // m.batch_size) - 1) // m.num_steps
    start_time = time.time()
    costs = 0.0
    iters = 0

    # c and h are the two components of the lstm state tuple
    # See https://www.tensorflow.org/versions/r0.9/api_docs/python/rnn_cell.html#classes-storing-split-rnncell-state
    # Must handle the seperate lstm states seperately since the multiRNN class doesn't yet have a way to do this for tuple states...

    state1_c = m.initial_state[0].c.eval()
    state1_h = m.initial_state[1].h.eval()
    state2_c = m.initial_state[0].c.eval()
    state2_h = m.initial_state[1].h.eval()

    state1 = (state1_c, state1_h)  # the initial state of the first lstm
    state2 = (state2_c, state2_h)  # the initial state of the second lstm

    # data_interp.newEpoch()
    dataGen = data_interp.endoIteratorSupervised(m.batch_size, m.num_steps, trainValidTest, targetAtt)  # A generator over the endomondo data
    # global dataGenTest
    # dataGenTest = dataGen
    # global modelTest
    # modelTest=data_interp
    for step, (x, y) in enumerate(dataGen):

        feed_dictionary = {m.input_data: x, m.targets: y,
                           m.initial_state[0].c: state1[0],
                           m.initial_state[0].h: state1[1],
                           m.initial_state[1].c: state2[0],
                           m.initial_state[1].h: state2[1],
                           }

        # feed_dict.update( network.all_drop )
        
        #if True:
        if m.is_training:
            loss, cost, state1_c, state1_h, state2_c, state2_h, summary, _ = session.run([m.loss, m.cost,
                                                                                 m.final_state[0].c,
                                                                                 m.final_state[0].h,
                                                                                 m.final_state[1].c,
                                                                                 m.final_state[1].h,
                                                                                 m.merged,
                                                                                 eval_op],
                                                                                 feed_dict=feed_dictionary)
        
            writer.add_summary(summary, step)
        else:
            loss, cost, state1_c, state1_h, state2_c, state2_h, _ = session.run([m.loss, m.cost,
                                                                                 m.final_state[0].c,
                                                                                 m.final_state[0].h,
                                                                                 m.final_state[1].c,
                                                                                 m.final_state[1].h,
                                                                                 eval_op],
                                                                                 feed_dict=feed_dictionary)

        state1 = (state1_c, state1_h)
        state2 = (state2_c, state2_h)

        # print(cost)

        costs += cost
        iters += m.num_steps

        if verbose and step % (epoch_size // 10) == 10:
            # print(np.log(-costs//iters))
            print("%.3f RMSE: %.4f speed: %.0f dpps" %
                  (step * 1.0 / epoch_size, np.exp(costs / iters),
                   iters * m.batch_size / (time.time() - start_time)))

    # print(costs)
    # print(iters)
    return np.exp(costs / iters)


def get_config():
    if model == "small":
        return SmallConfig()
    elif model == "medium":
        return MediumConfig()
    elif model == "large":
        return LargeConfig()
    elif model == "test":
        return TestConfig()
    elif model == "Larry":
        return LarryConfig()
    elif model == "really small":
        return ReallySmallConfig()
    else:
        raise ValueError("Invalid model: %s", model)


def main():
    if not data_path:
        raise ValueError("Must set --data_path to PTB data directory")

    # raw_data = reader.ptb_raw_data(data_path)
    # train_data, valid_data, test_data, _ = raw_data
    endo_reader = dataInterpreter(fn=data_path, scaleVals=False)
    endo_reader.buildDataSchema(endoFeatures, trainValTestSplit)

    inputShape = endo_reader.getInputDim(targetAtt)
    targetShape = endo_reader.getTargetDim(targetAtt)

    config = get_config()
    eval_config = get_config()
    eval_config.batch_size = 1
    eval_config.num_steps = 50 #Was originally set to 1
    #config.dataDim = dataShape
    config.inputShape = inputShape
    config.targetShape = targetShape
    #eval_config.dataDim = dataShape
    eval_config.inputShape = inputShape
    eval_config.targetShape = targetShape

    with tf.Graph().as_default(), tf.Session() as session:
        initializer = tf.random_uniform_initializer(-config.init_scale,
                                                    config.init_scale)
        with tf.variable_scope("model", reuse=None, initializer=initializer):
            m = EndoModel(is_training=True, config=config)
        with tf.variable_scope("model", reuse=True, initializer=initializer):
            mvalid = EndoModel(is_training=False, config=config)
            mtest = EndoModel(is_training=False, config=eval_config)
            
        train_writer = tf.train.SummaryWriter(summaries_dir + '/train', session.graph)
        test_writer = tf.train.SummaryWriter(summaries_dir + '/test')

        tf.initialize_all_variables().run()

        for i in range(config.max_max_epoch):
            lr_decay = config.lr_decay ** max(i - config.max_epoch, 0.0)
            m.assign_lr(session, config.learning_rate * lr_decay)

            print("Epoch: %d Learning rate: %.4f" % (i + 1, session.run(m.lr)))
            train_perplexity = run_epoch(session, m, endo_reader, m.train_op, 'train',
                                         verbose=True, writer=train_writer)
            print("Epoch: %d Train RMSE: %.4f" % (i + 1, train_perplexity))
            valid_perplexity = run_epoch(session, mvalid, endo_reader, tf.no_op(), 'valid', writer=test_writer)
            print("Epoch: %d Valid RMSE: %.4f" % (i + 1, valid_perplexity))

        test_perplexity = run_epoch(session, mtest, endo_reader, tf.no_op(), 'test', writer=test_writer)
        print("Test RMSE: %.4f" % test_perplexity)

if __name__ == "__main__":
    main()


Loading metadata
Metadata loaded
Epoch: 1 Learning rate: 1.0000
0.020 RMSE: 1.3154 speed: 9048 dpps
0.119 RMSE: 1.1162 speed: 13400 dpps
0.217 RMSE: 1.0856 speed: 13766 dpps
0.316 RMSE: 1.0743 speed: 13424 dpps
0.415 RMSE: 1.0672 speed: 13433 dpps
0.514 RMSE: 1.0637 speed: 13580 dpps
0.613 RMSE: 1.0599 speed: 13762 dpps
0.711 RMSE: 1.0567 speed: 13732 dpps
0.810 RMSE: 1.0551 speed: 13639 dpps
0.909 RMSE: 1.0545 speed: 13358 dpps
Epoch: 1 Train RMSE: 1.0531
Epoch: 1 Valid RMSE: 1.0448
Epoch: 2 Learning rate: 1.0000
0.020 RMSE: 1.0457 speed: 11617 dpps
0.119 RMSE: 1.0449 speed: 13235 dpps
0.217 RMSE: 1.0448 speed: 13818 dpps
0.316 RMSE: 1.0455 speed: 13504 dpps
0.415 RMSE: 1.0456 speed: 13324 dpps
0.514 RMSE: 1.0449 speed: 13566 dpps
0.613 RMSE: 1.0450 speed: 13532 dpps
0.711 RMSE: 1.0453 speed: 13594 dpps
0.810 RMSE: 1.0451 speed: 13516 dpps
0.909 RMSE: 1.0449 speed: 13531 dpps
Epoch: 2 Train RMSE: 1.0450
Epoch: 2 Valid RMSE: 1.0473
Epoch: 3 Learning rate: 1.0000
0.020 RMSE: 1.0452 spee