In [4]:
'''
Example
'''
import tensorflow as tf
import numpy as np

from collections import namedtuple

class GRUCell(object):
    def __init__(self, state_size, activation=tf.sigmoid):
        self._state_size = state_size
        self._activation = activation

    @property
    def state_size(self):
        return self._state_size
    
#     def zero_state(self, batch_size):
#         return 
        
    def __call__(self, s, scope=None):
        with tf.name_scope('GRUCell'):
            
            # trainable variables
            with tf.variable_scope('weights', initializer=tf.contrib.layers.xavier_initializer()) as scope:
                W_u = tf.get_variable('W_u', [self._state_size, self._state_size])
                W_r = tf.get_variable('W_r', [self._state_size, self._state_size])
                W   = tf.get_variable('W', [self._state_size, self._state_size])
                b   = tf.get_variable('b', [self._state_size], initializer=tf.constant_initializer(1.0))
            scope.reuse_variables()
            
            # u = update, r = reset, c = candidates, h = new hidden states
            u = self._activation(tf.matmul(s, W_u) + b)
            r = self._activation(tf.matmul(s, W_r) + b)
            c = tf.tanh(tf.matmul(tf.multiply(s, r), W) + b)
            h = tf.multiply(s, u) + tf.multiply((1 - u), c)
            return h, h

        
class ca_rnn(object):
    def __init__(self,
                 x,
                 y=None,
                 learning_rate=1e-4,
                 state_size=5,
                 batch_size=1):
        self._learning_rate = learning_rate
        
        # Inference
        with tf.name_scope('inference'):
            width = 5
            depth = 1
            classes = 2
            time_steps = 1

            # RNN
            with tf.name_scope('rnn'):
                # GRU cell
                cell = GRUCell(state_size)

                # Just one time step
                zero_state = tf.reshape(x, [batch_size, width])  # load all X on state zero
#                 zero_state = x
                output, state = cell(zero_state)

            # Convolution
            with tf.name_scope('convolution'):
                output = tf.reshape(output, [batch_size, width, depth])
                kernel = tf.constant([1, 1, 1], shape=[3, 1, 1], dtype=tf.float32, name='kernel')
                conv = tf.nn.conv1d(output, kernel, stride=1, padding='SAME')

            # FC1
            with tf.name_scope('fc1'):
                with tf.variable_scope('output'):
                    W = tf.get_variable('W_out', [state_size, classes])
                    b = tf.get_variable('b_out', [classes], initializer=tf.constant_initializer(1.0))

                flatten = tf.reshape(conv, [batch_size, width])
                fc1 = tf.nn.xw_plus_b(flatten, W, b, name='fc1')
#                 fc1 = tf.reshape(fc1, [batch_size, classes, depth])

            self._logits = fc1
        
        # loss function
        with tf.name_scope('loss'):
            cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=self._logits, labels=y, name='cross_entropy')
            self._loss = tf.reduce_mean(cross_entropy)
            # add scalar histogram for loss
            
            self._optimizer = tf.train.AdamOptimizer(self._learning_rate).minimize(self._loss)
        
        # evaluation
        with tf.name_scope('prediction'):
            correct = tf.nn.in_top_k(self._logits, y, 1)
            self._prediction = tf.reduce_mean(tf.cast(correct, tf.float32))
        
    @property
    def inference(self):
        return self._logits
    
    @property
    def optimizer(self):
        return self._optimizer
    
    @property
    def loss(self):
        return self._loss
    
    @property
    def prediction(self):
        return self._prediction


class Dataset1d(object):
    def __init__(self, x, y):
        self._epochs_completed = 0
        self._index_in_epoch = 0
        self._n_samples = x.shape[0]
        self._x = x
        self._y = y
        
    def next_batch(self, batch_size, shuffle=True):
        start = self._index_in_epoch
        
        if self._epochs_completed == 0 and start == 0 and shuffle:
            perm0 = np.arange(self._n_samples)
            np.random.shuffle(perm0)
            self._x = self._x[perm0]
            self._y = self._y[perm0]
            
        if start + batch_size > self._n_samples:
            # finished
            self._epochs_completed += 1
            rest_samples = self._n_samples - start
            x_rest_part = self._x[start:self._n_samples]
            y_rest_part = self._y[start:self._n_samples]
            
            if shuffle:
                perm = np.arange(self._n_samples)
                np.random.shuffle(perm)
                self._x = self._x[perm]
                self._y = self._y[perm]

            # Start next epoch.
            start = 0
            self._index_in_epoch = batch_size - rest_samples
            end = self._index_in_epoch
            x_new_part = self._x[start:end]
            y_new_part = self._y[start:end]
            
            return np.concatenate((x_rest_part, x_new_part), axis=0) , np.concatenate((y_rest_part, y_new_part), axis=0)
        else:
            self._index_in_epoch += batch_size
            end = self._index_in_epoch
            
            return self._x[start:end], self._y[start:end]
    
    @property
    def x(self):
        return self._x
    
    @property
    def y(self):
        return self._y
    
    @property
    def n_samples(self):
        return self._n_samples
    
    @property
    def epochs_completed(self):
        return self._epochs_completed


def build_1d_dataset(
    width=8,
    depth=1,
    n_samples=100,
    k_value=2,
    train_split=0.8,
    valid_split=0.5,
    verbose=False):
    
    x = np.random.randint(0, k_value, size=[n_samples, width, depth])
    y = np.zeros(n_samples, dtype=int)
    
    # samples, [width, depth]
    for i, board in enumerate(x):
        # count connection length
        connection_length = 0
        # width, depth
        for j, grid in enumerate(board):
            if grid == [1]:
                connection_length += 1 
            else:
                break
                
        if connection_length == width:
            y[i] = 1
        else:
            y[i] = 0
#         y[i] = connection_length

    dataset = namedtuple('Dataset', ['train', 'valid', 'test'])
    
    # Split dataset
    n_train = int(n_samples * train_split)
    n_valid = int((n_samples - n_train) * valid_split)
    
    dataset.train = Dataset1d(x[:n_train], y[:n_train])
    dataset.valid = Dataset1d(x[:n_valid], y[:n_valid])
    dataset.test = Dataset1d(x[:n_valid], y[:n_valid])
    
    return dataset


def train_network(model, 
                  dataset, 
                  batch_size=1, 
                  n_epochs=1):
    init = tf.global_variables_initializer()
    
    with tf.Session() as sess:
        sess.run(init)

        writer = tf.summary.FileWriter('./graphs/run1', sess.graph)

        average_loss = .0
        n_batches = int(dataset.train.n_samples / batch_size)
        for epoch in range(n_epochs):
            for batch in range(n_batches):
                x, y = dataset.train.next_batch(batch_size)

                # train
                loss_batch, _, global_step = sess.run([model.loss, model.optimizer, increment_step], 
                                         feed_dict={x_pl: x, y_pl: y} )
                average_loss += loss_batch

                if (global_step) % 1000 == 0:
                    print('Average loss at step {}: {:5.3f}'.format(global_step,
                                                                    average_loss / global_step))
            # validate now and again
            if epoch % 2 == 0:
                total_correct_pred = 0
                for batch in range(n_batches):
                    x_valid, y_valid = dataset.valid.next_batch(batch_size)
                    valid_loss, prediction = sess.run([model.loss, model.prediction], 
                                         feed_dict={x_pl: x_valid, y_pl: y_valid} )
                    total_correct_pred += prediction
                print('Accuracy at step {}: {:5.3f}'.format(global_step,
                                                                    total_correct_pred / dataset.valid.n_samples))
                
        writer.close()
    print('Finished {} steps with avg. loss: {:5.3f}'.format(global_step, average_loss / global_step))


tf.reset_default_graph()

# placeholder [batch, width, depth]
with tf.name_scope('placeholders'):
    x_pl = tf.placeholder(tf.float32, shape=[None, 5, 1], name='x')
    y_pl = tf.placeholder(tf.int32, shape=[None], name='y')

global_step = tf.Variable(0, dtype=tf.int32, trainable=False, name='global_step')
increment_step = global_step.assign_add(1)

dataset = build_1d_dataset(width=5, n_samples=1000000)
batch_size = 128
epochs = 10

model = ca_rnn(x_pl, y_pl, batch_size=batch_size, learning_rate=0.01)
train_network(model, dataset, batch_size, epochs)

Average loss at step 1000: 0.058
Average loss at step 2000: 0.030
Average loss at step 3000: 0.020
Average loss at step 4000: 0.015
Average loss at step 5000: 0.012
Average loss at step 6000: 0.010
Accuracy at step 6250: 0.062
Average loss at step 7000: 0.009
Average loss at step 8000: 0.008
Average loss at step 9000: 0.007
Average loss at step 10000: 0.006
Average loss at step 11000: 0.005
Average loss at step 12000: 0.005
Average loss at step 13000: 0.005
Average loss at step 14000: 0.004
Average loss at step 15000: 0.004
Average loss at step 16000: 0.004
Average loss at step 17000: 0.004
Average loss at step 18000: 0.003
Accuracy at step 18750: 0.062
Average loss at step 19000: 0.003
Average loss at step 20000: 0.003
Average loss at step 21000: 0.003
Average loss at step 22000: 0.003
Average loss at step 23000: 0.003
Average loss at step 24000: 0.003
Average loss at step 25000: 0.002
Average loss at step 26000: 0.002
Average loss at step 27000: 0.002
Average loss at step 28000: 0.00

In [45]:
'''
Example
1d convolution in tensorflow
'''
import tensorflow as tf
import numpy as np

sess = tf.InteractiveSession()

# filter [width, channel in, channel out]
# U = tf.Variable(np.array([.5, .5, .5]).reshape(3, 1, 1), dtype=tf.float32)
U = tf.Variable([1., 1., 1.], dtype=tf.float32)
U = tf.reshape(U, [3, 1, 1])

b = tf.Variable(np.array([0., 0., 0., 0., 0.]).reshape(5, 1), dtype=tf.float32)

# input [batch, length, channels/dims]
s = tf.constant(np.array([[1., 1., 1., 0., 1.], [1., 1., 1., 1., 1.]]).reshape(2, 5, 1), dtype=tf.float32)


# x = tf.matmul(s, U)
# tf.nn.convolution(s, [3, 1, 1, 1], padding='SAME', strides=[1, 1, 1, 1])
x = tf.nn.conv1d(s, U, stride=1, padding='SAME') + b
init = tf.global_variables_initializer()
sess.run(init)
print(sess.run(x))

sess.close()

[[[ 2.]
  [ 3.]
  [ 2.]
  [ 2.]
  [ 1.]]

 [[ 2.]
  [ 3.]
  [ 3.]
  [ 3.]
  [ 2.]]]


In [33]:
'''
Example
GRU cell single pass
'''
import tensorflow as tf

sess = tf.InteractiveSession()

# input [batch, length]
state_size = 5
batch = 2
width = 5
depth = 1

# placeholder [batch, width, depth]
x = tf.constant([1, 1, 0, 0, 1, 1, 1, 1, 0, 1], shape=[batch, width, depth], dtype=tf.float32)

# trainable variables
W_u = tf.Variable(tf.ones([state_size, state_size]), dtype=tf.float32)
W_r = tf.Variable(tf.ones([state_size, state_size]), dtype=tf.float32)
W = tf.Variable(tf.ones([state_size, state_size]), dtype=tf.float32)
b = tf.constant([1, 1, 1, 1, 1], shape=[1, 5], dtype=tf.float32)

# GRU
s = tf.reshape(x, [batch, width])
activation = tf.sigmoid

u = activation(tf.matmul(s, W_u) + b)
r = activation(tf.matmul(s, W_r) + b)
c = tf.tanh(tf.matmul(tf.multiply(s, r), W) + b)
h = tf.multiply(s, u) + tf.multiply((1 - u), c)

# convolution
output = tf.reshape(h, [batch, width, channels])
kernel = tf.constant([1., 1., 1.], shape=[3, 1, 1], dtype=tf.float32)
conv = tf.nn.conv1d(output, kernel, stride=1, padding='SAME')

sess.run(tf.global_variables_initializer())
print(sess.run(conv))

sess.close()

[[[ 1.99997318]
  [ 2.017946  ]
  [ 1.03593218]
  [ 1.03593218]
  [ 1.01795936]]

 [[ 1.99999869]
  [ 2.99999809]
  [ 2.00669098]
  [ 2.00669098]
  [ 1.00669158]]]


In [6]:
a = tf.constant([1, 2, 3, 4, 5, 6], shape=[2, 3, 1])
a = tf.reshape(a, [2, 3])
b = tf.constant([1, 2, 3, 4, 5, 6, 7, 8, 9], shape=[3, 3])
c = tf.matmul(a, b)
print(sess.run([a, b]))

[30 36 42]


In [None]:
        outputs = []
        for t, input_ in enumerate(inputs):
            if t > 0: 
                scope.reuse_variables()
                
            output, state = cell(input_, state)
            outputs.append(output)
            
#                     x = np.array([[1, 0, 0, 0, 1, 1, 1, 1, 0, 1]]).reshape([2, 5, 1])
#                     y = np.array([0, 1]).reshape([2])

In [5]:
debug

> [0;32m/Users/fred/Developer/ca-rnn/venv/lib/python3.6/site-packages/tensorflow/python/client/session.py[0m(1035)[0;36m_do_call[0;34m()[0m
[0;32m   1033 [0;31m        [0;32mexcept[0m [0mKeyError[0m[0;34m:[0m[0;34m[0m[0m
[0m[0;32m   1034 [0;31m          [0;32mpass[0m[0;34m[0m[0m
[0m[0;32m-> 1035 [0;31m      [0;32mraise[0m [0mtype[0m[0;34m([0m[0me[0m[0;34m)[0m[0;34m([0m[0mnode_def[0m[0;34m,[0m [0mop[0m[0;34m,[0m [0mmessage[0m[0;34m)[0m[0;34m[0m[0m
[0m[0;32m   1036 [0;31m[0;34m[0m[0m
[0m[0;32m   1037 [0;31m  [0;32mdef[0m [0m_extend_graph[0m[0;34m([0m[0mself[0m[0;34m)[0m[0;34m:[0m[0;34m[0m[0m
[0m
ipdb> cross_entropy.get_shape()
*** NameError: name 'cross_entropy' is not defined
ipdb> node_def
name: "loss/SparseSoftmaxCrossEntropyWithLogits/SparseSoftmaxCrossEntropyWithLogits"
op: "SparseSoftmaxCrossEntropyWithLogits"
input: "loss/SparseSoftmaxCrossEntropyWithLogits/Reshape"
input: "loss/SparseSoftmaxCrossEntrop