In [26]:
'''
Refer to https://medium.com/onfido-tech/higher-level-apis-in-tensorflow-67bfb602e6c0
Basically, we will use high level tensorflow's API: Estimator + Experiment + Dataset
'''
from __future__ import division, print_function

import tensorflow as tf
from tensorflow.contrib.learn import learn_runner
from tensorflow.contrib import rnn
import numpy as np
import h5py
import scipy.io

# tf needs >= 1.3.0
assert tf.__version__>='1.3.0', 'tensorflow version needs to be no lower than 1.3.0'

In [3]:
np.random.seed(1337)

# Load the data into python
Using Dataset to load data

In [None]:
# Define data loaders #####################################
class IteratorInitializerHook(tf.train.SessionRunHook):
    """Hook to initialise data iterator after Session is created."""

    def __init__(self):
        super(IteratorInitializerHook, self).__init__()
        self.iterator_initializer_func = None

    def after_create_session(self, session, coord):
        """Initialise the iterator after the session has been created."""
        self.iterator_initializer_func(session)

# Define the training inputs
def get_train_inputs(batch_size, data, test=False):
    """Return the input function to get the training data.
    Args:
        batch_size (int): Batch size of training iterator that is returned
                          by the input function.
        data (Object): Object holding the loaded data.
        test (boolean): if test, then load valid mat for testing purposes
    Returns:
        (Input function, IteratorInitializerHook):
            - Function that returns (features, labels) when called.
            - Hook to initialise input iterator.
    """
    iterator_initializer_hook = IteratorInitializerHook()

    def train_inputs():
        """Returns training set as Operations.
        Returns:
            (features, labels) Operations that iterate over the dataset
            on every evaluation
        """
        with tf.name_scope('Training_data'):
            # Get  data
            if test:
                DNA = data['validxdata']
                labels = data['validdata']
            else:
                DNA = data['trainxdata']
                labels = data['traindata']
            # Define placeholders
            DNA_placeholder = tf.placeholder(
                DNA.dtype, DNA.shape)
            labels_placeholder = tf.placeholder(
                labels.dtype, labels.shape)
            # Build dataset iterator
            dataset = tf.contrib.data.Dataset.from_tensor_slices(
                (DNA_placeholder, labels_placeholder))
            dataset = dataset.repeat(None)  # Infinite iterations
            dataset = dataset.shuffle(buffer_size=10000)
            dataset = dataset.batch(batch_size)
            iterator = dataset.make_initializable_iterator()
            next_example, next_label = iterator.get_next()
            # Set runhook to initialize iterator
            iterator_initializer_hook.iterator_initializer_func = \
                lambda sess: sess.run(
                    iterator.initializer,
                    feed_dict={images_placeholder: DNA,
                               labels_placeholder: labels})
            # Return batched (features, labels)
            return next_example, next_label

    # Return function and hook
    return train_inputs, iterator_initializer_hook

def get_test_inputs(batch_size, data, test=False):
    """Return the input function to get the test data.
    Args:
        batch_size (int): Batch size of training iterator that is returned
                          by the input function.
        data (Object): Object holding the loaded data.
        test (boolean): if test, then load valid mat for testing purposes
    Returns:
        (Input function, IteratorInitializerHook):
            - Function that returns (features, labels) when called.
            - Hook to initialise input iterator.
    """
    iterator_initializer_hook = IteratorInitializerHook()

    def test_inputs():
        """Returns training set as Operations.
        Returns:
            (features, labels) Operations that iterate over the dataset
            on every evaluation
        """
        with tf.name_scope('Test_data'):
            # Get data
            if test:
                DNA = data['validxdata']
                labels = data['validdata']
            else:
                DNA = data['testxdata']
                labels = data['testdata']
            # Define placeholders
            DNA_placeholder = tf.placeholder(
                DNA.dtype, DNA.shape)
            labels_placeholder = tf.placeholder(
                labels.dtype, labels.shape)
            # Build dataset iterator
            dataset = tf.contrib.data.Dataset.from_tensor_slices(
                (DNA_placeholder, labels_placeholder))
            dataset = dataset.batch(batch_size)
            iterator = dataset.make_initializable_iterator()
            next_example, next_label = iterator.get_next()
            # Set runhook to initialize iterator
            iterator_initializer_hook.iterator_initializer_func = \
                lambda sess: sess.run(
                    iterator.initializer,
                    feed_dict={images_placeholder: DNA,
                               labels_placeholder: labels})
            return next_example, next_label

    # Return function and hook
    return test_inputs, iterator_initializer_hook

In [4]:
#trainmat = h5py.File('data/deepsea_train/train.mat', "r")
validmat = scipy.io.loadmat('Data/valid.mat')
#testmat = scipy.io.loadmat('Data/test.mat')

In [6]:
validmat

{'__globals__': [],
 '__header__': b'MATLAB 5.0 MAT-file, Platform: GLNXA64, Created on: Thu Jun 18 23:23:16 2015',
 '__version__': '1.0',
 'validdata': array([[0, 0, 0, ..., 1, 0, 0],
        [1, 1, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 1, 0, 0],
        ..., 
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0]], dtype=uint8),
 'validxdata': array([[[0, 0, 0, ..., 0, 0, 1],
         [0, 0, 0, ..., 1, 1, 0],
         [1, 0, 0, ..., 0, 0, 0],
         [0, 1, 1, ..., 0, 0, 0]],
 
        [[0, 0, 0, ..., 0, 0, 0],
         [0, 0, 1, ..., 0, 0, 1],
         [1, 0, 0, ..., 1, 0, 0],
         [0, 1, 0, ..., 0, 1, 0]],
 
        [[0, 1, 0, ..., 0, 0, 0],
         [0, 0, 0, ..., 0, 0, 0],
         [1, 0, 0, ..., 0, 1, 0],
         [0, 0, 1, ..., 1, 0, 1]],
 
        ..., 
        [[1, 0, 1, ..., 0, 0, 0],
         [0, 0, 0, ..., 0, 1, 0],
         [0, 0, 0, ..., 0, 0, 0],
         [0, 1, 0, ..., 1, 0, 1]],
 
        [[0, 0, 0, ..., 0, 0, 0],
   

In [5]:
#X_train = np.transpose(np.array(trainmat['trainxdata']), axes=(2,0,1))

In [9]:
#y_train = np.transpose(trainmat['traindata']).T

NameError: name 'trainmat' is not defined

In [11]:
print(y_train.shape)
y_train[1:2]

(919, 4400000)


array([[0, 0, 0, ..., 0, 1, 0]], dtype=uint8)

In [5]:
learning_rate = 0.001
num_steps = 2000
batch_size = 1000

input_dim = 4
input_length = 1000
num_classes = 919
dropout_cnn = 0.2

# DanQNN 

In [23]:
def BDNN(x):
    forward_lstm = rnn.LSTMCell(320, reuse=tf.get_variable_scope().reuse)
    backward_lstm = rnn.LSTMCell(320, reuse=tf.get_variable_scope().reuse)
    
    brnn, _ = tf.nn.bidirectional_dynamic_rnn(forward_lstm, backward_lstm, x, dtype=tf.float32)

    return brnn

def DanQNN(x, input_length, nb_filter, filter_length,
            activation=tf.nn.relu, subsample=1, border="VALID", reuse=True, max_strides=13,
          max_pool_size=13, mode=None):
    
    '''
    This function creates a CNN followed by a bidirectional LSTM RNN as per DanQ publication
    We aim to have this implimented in tensorflow as it will be easier to
    modify the implimentation for other uses if we incorporate with tensorboard.
    '''
    
    with tf.variable_scope('DanQNN', reuse=reuse):
    
        conv1d = tf.layers.conv1d(x, filters=nb_filter , strides=subsample, 
                                  padding=border, kernel_size=input_length, data_format='channels_first', reuse=reuse)

        max1 = tf.layers.max_pooling1d(conv1d, pool_size=max_pool_size, strides=max_strides)
        
        max1 = tf.layers.dropout(max1, rate=0.2,training=mode == tf.estimator.ModeKeys.TRAIN)
        

        brnn = BDNN(max1)
        
        brnn = tf.layers.dropout(brnn, rate=0.5,training=mode == tf.estimator.ModeKeys.TRAIN)

        brnn = tf.contrib.layers.flatten(brnn)
        print(brnn)
    with tf.variable_scope('DanQdense1', reuse=reuse):
        fc1 = tf.layers.dense(brnn, units=925, activation=tf.nn.relu)
        print(fc1)
    #with tf.variable_scope('DanQdense2', reuse=reuse):
        #fc2 = tf.layers.dense(fc1, units=925, activation=tf.nn.relu)
        #print(fc2)
    with tf.variable_scope('DanQdense2', reuse=reuse):
        fc2 = tf.layers.dense(fc1, units=919, activation=tf.nn.softmax)
        print(fc2)
        # Not sure if you have to return this function 
        #return fc3
        

In [24]:
tf.reset_default_graph()
DanQNN(tf.cast(validmat['validxdata'][1:100], dtype=tf.float32),1000,320,26,reuse=False)


Tensor("DanQNN/Flatten/Reshape:0", shape=(2, 760320), dtype=float32)
Tensor("DanQdense1/dense/Relu:0", shape=(2, 925), dtype=float32)
Tensor("DanQdense2/dense/Softmax:0", shape=(2, 919), dtype=float32)


In [None]:
if mode == tf.estimator.ModeKeys.PREDICT:
    return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)

# 
logits_train = DanQNN(features, num_classes, reuse=False)
logits_test = DanQNN(features, num_classes, reuse=True)
    
pred_classes = tf.argmax(logits_test, axis=1)
pred_probas = tf.nn.softmax(logits_test)
    
    
loss_op = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(
                            logits=logits_train, labels=tf.cast(labels, dtype=tf.int32)))
    