# CA RNN
Toy example in 1d


In [13]:
'''
Example
'''
import tensorflow as tf
import numpy as np

import utils


class GRUCell(object):
    '''Standard GRU cell.
    
    Calling this cell on a RNN network returns a tuple of activations (h, h)
    '''
    def __init__(self, state_size, activation=tf.sigmoid):
        self._state_size = state_size
        self._activation = activation

    @property
    def state_size(self):
        return self._state_size
    
#     def zero_state(self, batch_size):
#         return 
        
    def __call__(self, s, scope=None):
        with tf.name_scope('GRUCell'):
            
            # trainable variables
            with tf.variable_scope('weights', initializer=tf.contrib.layers.xavier_initializer()) as scope:
                W_u = tf.get_variable('W_u', [self._state_size, self._state_size])
                W_r = tf.get_variable('W_r', [self._state_size, self._state_size])
                W   = tf.get_variable('W', [self._state_size, self._state_size])
                b   = tf.get_variable('b', [self._state_size], initializer=tf.constant_initializer(1.0))
            scope.reuse_variables()
            
            # u = update, r = reset, c = candidates, h = new hidden states
            u = self._activation(tf.matmul(s, W_u) + b)
            r = self._activation(tf.matmul(s, W_r) + b)
            c = tf.tanh(tf.matmul(tf.multiply(s, r), W) + b)
            h = tf.multiply(s, u) + tf.multiply((1 - u), c)
            return h, h

        
class ca_rnn(object):
    '''
    A CA RNN neural network tensorflow model.
    
    Aims to generalise a CA algorithm from training data
    
    Properties:
        inference:
        loss:
        optimizer:
        prediction:
    '''
    def __init__(self,
                 x,
                 y=None,
                 learning_rate=1e-4,
                 state_size=5,
                 batch_size=1):
        self._learning_rate = learning_rate
        
        # Inference
        with tf.name_scope('inference'):
            width = 5
            depth = 1
            classes = 2
            time_steps = 1

            # RNN
            with tf.name_scope('rnn'):
                # GRU cell
                cell = GRUCell(state_size)

                # Just one time step
                zero_state = tf.reshape(x, [batch_size, width])  # load all X on state zero
#                 zero_state = x
                output, state = cell(zero_state)

            # Convolution
            with tf.name_scope('convolution'):
                output = tf.reshape(output, [batch_size, width, depth])
                kernel = tf.Variable(tf.random_normal([3, 1, 1]), dtype=tf.float32, name='kernel')
                conv = tf.nn.conv1d(output, kernel, stride=1, padding='SAME')

            # FC1
            with tf.name_scope('fc1'):
                with tf.variable_scope('output'):
                    W = tf.get_variable('W_out', [state_size, classes])
                    b = tf.get_variable('b_out', [classes], initializer=tf.constant_initializer(1.0))

                flatten = tf.reshape(conv, [batch_size, width])
                fc1 = tf.nn.xw_plus_b(flatten, W, b, name='fc1')
#                 fc1 = tf.reshape(fc1, [batch_size, classes, depth])

            self._logits = fc1
        
        # loss function
        with tf.name_scope('loss'):
            cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=self._logits, labels=y, name='cross_entropy')
            self._loss = tf.reduce_mean(cross_entropy)
            # add scalar histogram for loss
            
            self._optimizer = tf.train.AdamOptimizer(self._learning_rate).minimize(self._loss)
        
        # evaluation
        with tf.name_scope('prediction'):
            correct = tf.nn.in_top_k(self._logits, y, 1)
            self._prediction = tf.reduce_mean(tf.cast(correct, tf.float32))
        
    @property
    def inference(self):
        return self._logits
    
    @property
    def optimizer(self):
        return self._optimizer
    
    @property
    def loss(self):
        return self._loss
    
    @property
    def prediction(self):
        return self._prediction


def train_network(model, 
                  dataset, 
                  batch_size=1, 
                  n_epochs=1, 
                  tb_run=None):
    '''
    Function to train a neural network
    '''
    init = tf.global_variables_initializer()
    
    with tf.Session() as sess:
        sess.run(init)

        # Save data for tensorboard
        writer = tf.summary.FileWriter('./graphs/run' + str(n_run), sess.graph)

        average_loss = .0
        n_batches = int(dataset.train.n_samples / batch_size)
        for epoch in range(n_epochs):
            for batch in range(n_batches):
                x, y = dataset.train.next_batch(batch_size)

                # train
                loss_batch, _, global_step = sess.run([model.loss, model.optimizer, increment_step], 
                                         feed_dict={x_pl: x, y_pl: y} )
                average_loss += loss_batch

                if (global_step) % 1000 == 0:
                    print('Average loss at step {}: {:5.3f}'.format(global_step,
                                                                    average_loss / global_step))
            # validate now and again
            if epoch % 2 == 0:
                total_correct_pred = 0
                for batch in range(n_batches):
                    x_valid, y_valid = dataset.valid.next_batch(batch_size)
                    valid_loss, prediction = sess.run([model.loss, model.prediction], 
                                         feed_dict={x_pl: x_valid, y_pl: y_valid} )
                    total_correct_pred += prediction
                print('Accuracy at step {}: {:5.3f}'.format(global_step,
                                                                    total_correct_pred / dataset.valid.n_samples)) 
        writer.close()
    print('Finished {} steps with avg. loss: {:5.3f}'.format(global_step, average_loss / global_step))

    
# Reset tensorflow variable names
tf.reset_default_graph()

# Placeholders [batch, width, depth]
with tf.name_scope('placeholders'):
    x_pl = tf.placeholder(tf.float32, shape=[None, 5, 1], name='x')
    y_pl = tf.placeholder(tf.int32, shape=[None], name='y')

# Tensorboard stats
global_step = tf.Variable(0, dtype=tf.int32, trainable=False, name='global_step')
increment_step = global_step.assign_add(1)
n_run = 1

# Dataset
dataset = utils.build_1d_dataset(width=5, n_samples=100000)

# Hyperparameters
learning_rate = 0.01
batch_size = 64
epochs = 4

# Construct experiment
model = ca_rnn(x_pl, y_pl, batch_size=batch_size, learning_rate=learning_rate)
train_network(model, dataset, batch_size, epochs, n_run)

Average loss at step 1000: 0.017
Accuracy at step 1250: 0.125
Average loss at step 2000: 0.008
Average loss at step 3000: 0.006
Accuracy at step 3750: 0.125
Average loss at step 4000: 0.004
Average loss at step 5000: 0.003
Finished 5000 steps with avg. loss: 0.003
