In [18]:
'''
Refer to https://medium.com/onfido-tech/higher-level-apis-in-tensorflow-67bfb602e6c0
Basically, we will use high level tensorflow's API: Estimator + Experiment + Dataset
'''
from __future__ import division, print_function

import tensorflow as tf
from tensorflow.contrib.learn import learn_runner
from tensorflow.contrib import rnn
from tensorflow.contrib.learn import ModeKeys
from tensorflow.contrib import slim
import numpy as np
import h5py
import scipy.io


# tf needs >= 1.3.0
assert tf.__version__>='1.3.0', 'tensorflow version needs to be no lower than 1.3.0'

In [None]:
trainmat = h5py.File('data/deepsea_train/train.mat', "r")
validmat = scipy.io.loadmat('/valid.mat')
testmat = scipy.io.loadmat('Data/test.mat')

# Set defult flags for script parsing

In [7]:
tf.logging.set_verbosity(tf.logging.DEBUG)

FLAGS = tf.app.flags.FLAGS
# Define the model and data directories i.e. model_dir where model will be saved
tf.app.flags.DEFINE_string(flag_name='model_dir', default_value='', docstring='')
tf.app.flags.DEFINE_string(flag_name='data_dir', default_value='', docstring='')

# Define and run the experiment

In [6]:
def run_experiment(argv=None):
    '''Run the experiment'''
    
    # Define the hyperparameters
    params = tf.contrib.training.HParams(
        learning_rate=0.001,
        n_classes=919,
        train_steps=5000,
        min_eval_frequency=100)
    
    # set the run config and directory to save the model and stats
    run_config = tf.contrib.learn.RunConfig()
    run_config = run_config.replace(model_dir=FLAGS.model_dir)
    
    learn_runner.run(
        experiment_fn=experiment_fn,
        run_config=run_config,
        schedule="train_and_evaluate",
        hparams=params)
                                    

# Experiment function

In [9]:
def experiment_fn(run_config, params):
    '''Create and experiment to train and evaluate the model'''
    
    run_config = run_config.replace(
                    save_checkpoints_steps=params.min_eval_frequency)
    
    # Define the classifier
    
    estimator = get_estimator(run_config, params)
    
    # Setup data loaders
    danq_train = train_input_fn, train_input_hook = get_train_inputs(
            batch_size=100, data=trainmat, test=False)
    
    danq_test = eval_input_fn, eval_input_hook = get_train_inputs(
            batch_size=100, data=trainmat, test=True)
    
    # Define the experiment
    
    experiment = tf.contrib.learn.Experiment(
        estimator=estimator,
        train_input_fn=train_input_fn,
        eval_input_fn=eval_input_fn,
        train_steps=params.train_steps,
        min_eval_frequency=params.min_eval_frequency,
        train_monitors=[train_input_hook],
        eval_hooks=[eval_input_hook],
        eval_steps=None)
    
    return experiment
    

# Define model

In [10]:
def get_estimator(run_config, params):
    '''Return the model as a tensorflow object'''
    
    return tf.estimator.Estimator(
            model_fn=model_fn,
            params=params,
            config=run_config)
    

# Define the model function

In [14]:
def model_fn(features, labels, mode, params):
    '''Model function is used in the estimator and is required for running model'''
    
    is_training = mode == ModeKeys.TRAIN
    
    # Define the models architecture
    logits = architecture(features, is_training=is_training)
    predictions = tf.argmax(logits, axis=1)
    
    # Loss functions and not needed during inference
    
    loss = None
    train_op = None
    eval_metric_ops = {}
    
    if mode != ModeKeys.INFER:
        loss = tf.losses.sparse_softmax_cross_entropy(labels=tf.cast(labels, tf.int32), logits=logits)
        
        train_op = get_train_op_fn(loss, params)
        eval_metric_ops = get_eval_metric_ops(labels, predictions)
    return tf.estimator.EstimatorSpec(mode=mode,
                                     predictions=predictions,
                                     loss=loss,
                                     train_op=train_op,
                                     eval_metric_ops=eval_metric_ops)

    
    

# Define the training and eval metric ops

In [15]:
def get_train_op_fn(loss, params):
    """Get the training Op.

    Args:
         loss (Tensor): Scalar Tensor that represents the loss function.
         params (HParams): Hyperparameters (needs to have `learning_rate`)

    Returns:
        Training Op
    """
    return tf.contrib.layers.optimize_loss(
        loss=loss,
        global_step=tf.contrib.framework.get_global_step(),
        optimizer=tf.train.AdamOptimizer,
        learning_rate=params.learning_rate
    )


def get_eval_metric_ops(labels, predictions):
    """Return a dict of the evaluation Ops.

    Args:
        labels (Tensor): Labels tensor for training and evaluation.
        predictions (Tensor): Predictions Tensor.
    Returns:
        Dict of metric results keyed by name.
    """
    return {
        'Accuracy': tf.metrics.accuracy(
            labels=labels,
            predictions=predictions,
            name='accuracy')
    }

# Define the architecture of NN

In [19]:
def BDNN(x):
    '''Bidirectional neural network'''
    
    forward_lstm = rnn.LSTMCell(320, reuse=tf.get_variable_scope().reuse)
    backward_lstm = rnn.LSTMCell(320, reuse=tf.get_variable_scope().reuse)
    
    brnn, _ = tf.nn.bidirectional_dynamic_rnn(forward_lstm, backward_lstm, x, dtype=tf.float32)

    return brnn

def architecture(inputs, is_training, scope='DanQNN'):
    
    '''
    This function creates a CNN followed by a bidirectional LSTM RNN as per DanQ publication
    We aim to have this implimented in tensorflow as it will be easier to
    modify the implimentation for other uses if we incorporate with tensorboard.
    '''
    
    with tf.variable_scope(scope):

        conv1d = tf.layers.conv1d(x, filters=nb_filter , strides=subsample, 
                                  padding=border, kernel_size=input_length, data_format='channels_first', reuse=reuse)

        max1 = tf.layers.max_pooling1d(conv1d, pool_size=max_pool_size, strides=max_strides)
        
        max1 = tf.layers.dropout(max1, rate=0.2,training=mode == tf.estimator.ModeKeys.TRAIN)
        

        brnn = BDNN(max1)
        
        brnn = tf.layers.dropout(brnn, rate=0.5,training=mode == tf.estimator.ModeKeys.TRAIN)

        brnn = tf.contrib.layers.flatten(brnn)
        
    with tf.variable_scope('DanQdense1', reuse=reuse):
        fc1 = tf.layers.dense(brnn, units=925, activation=tf.nn.relu)
        
    with tf.variable_scope('DanQdense2', reuse=reuse):
        fc2 = tf.layers.dense(fc1, units=919, activation=tf.nn.softmax)
        return fc2

In [8]:
# Define data loaders #####################################
class IteratorInitializerHook(tf.train.SessionRunHook):
    """Hook to initialise data iterator after Session is created."""

    def __init__(self):
        super(IteratorInitializerHook, self).__init__()
        self.iterator_initializer_func = None

    def after_create_session(self, session, coord):
        """Initialise the iterator after the session has been created."""
        self.iterator_initializer_func(session)

# Define the training inputs
def get_train_inputs(batch_size, data, test=False):
    """Return the input function to get the training data.
    Args:
        batch_size (int): Batch size of training iterator that is returned
                          by the input function.
        data (Object): Object holding the loaded data.
        test (boolean): if test, then load valid mat for testing purposes
    Returns:
        (Input function, IteratorInitializerHook):
            - Function that returns (features, labels) when called.
            - Hook to initialise input iterator.
    """
    iterator_initializer_hook = IteratorInitializerHook()

    def train_inputs():
        """Returns training set as Operations.
        Returns:
            (features, labels) Operations that iterate over the dataset
            on every evaluation
        """
        with tf.name_scope('Training_data'):
            # Get  data
            if test:
                DNA = data['validxdata']
                labels = data['validdata']
            else:
                DNA = data['trainxdata']
                labels = data['traindata']
            # Define placeholders
            DNA_placeholder = tf.placeholder(
                DNA.dtype, DNA.shape)
            labels_placeholder = tf.placeholder(
                labels.dtype, labels.shape)
            # Build dataset iterator
            dataset = tf.contrib.data.Dataset.from_tensor_slices(
                (DNA_placeholder, labels_placeholder))
            dataset = dataset.repeat(None)  # Infinite iterations
            dataset = dataset.shuffle(buffer_size=10000)
            dataset = dataset.batch(batch_size)
            iterator = dataset.make_initializable_iterator()
            next_example, next_label = iterator.get_next()
            # Set runhook to initialize iterator
            iterator_initializer_hook.iterator_initializer_func = \
                lambda sess: sess.run(
                    iterator.initializer,
                    feed_dict={images_placeholder: DNA,
                               labels_placeholder: labels})
            # Return batched (features, labels)
            return next_example, next_label

    # Return function and hook
    return train_inputs, iterator_initializer_hook

    def get_test_inputs(batch_size, data, test=False):
        """Return the input function to get the test data.
        Args:
            batch_size (int): Batch size of training iterator that is returned
                          by the input function.
            data (Object): Object holding the loaded data.
            test (boolean): if test, then load valid mat for testing purposes
        Returns:
            (Input function, IteratorInitializerHook):
                - Function that returns (features, labels) when called.
                - Hook to initialise input iterator.
        """
        iterator_initializer_hook = IteratorInitializerHook()

    def test_inputs():
        """Returns training set as Operations.
        Returns:
            (features, labels) Operations that iterate over the dataset
            on every evaluation
        """
        with tf.name_scope('Test_data'):
            # Get data
            if test:
                DNA = data['validxdata']
                labels = data['validdata']
            else:
                DNA = data['testxdata']
                labels = data['testdata']
            # Define placeholders
            DNA_placeholder = tf.placeholder(
                DNA.dtype, DNA.shape)
            labels_placeholder = tf.placeholder(
                labels.dtype, labels.shape)
            # Build dataset iterator
            dataset = tf.contrib.data.Dataset.from_tensor_slices(
                (DNA_placeholder, labels_placeholder))
            dataset = dataset.batch(batch_size)
            iterator = dataset.make_initializable_iterator()
            next_example, next_label = iterator.get_next()
            # Set runhook to initialize iterator
            iterator_initializer_hook.iterator_initializer_func = \
                lambda sess: sess.run(
                    iterator.initializer,
                    feed_dict={images_placeholder: DNA,
                               labels_placeholder: labels})
            return next_example, next_label

    # Return function and hook
    return test_inputs, iterator_initializer_hook