In [6]:
import tensorflow as tf
import numpy as np
import init_word_representations as iwr

In [6]:
tokens, U = iwr.get_pretrained_wordvec_from_file("./data/word_representations/glove.6B.50d.txt", (400000, 50))

In [56]:
class SentCNN(object):
    """
    A CNN for utterance and relation pair matching regression.
    Uses an embedding layer, convolutional layer, max-pooling layer,
    and a logistic regression layer.
    """
    
    def __init__(self, 
                 sequence_length, 
                 num_classes, 
                 init_embeddings, 
                 filter_sizes, 
                 num_filters,
                 embeddings_trainable=False):
        """
        :param sequence_length: The length of our sentences. Here we always pad
        our sentences to have the same length (depending on the longest sentences
        in our dataset).
        :param num_classes: Number of classes in the output layer.
        :param init_embeddings: Pre-trained word embeddings or initialied values.
        :filter_sizes: The number of words we want our convolutional filters to cover. 
        We will have num_filters for each size specified here. For example, [3, 4, 5] 
        means that we will have filters that slide over 3, 4 and 5 words respectively, 
        for a total of 3 * num_filters filters.
        :num_filters: The number of filters per filter size (see above).
        :embeddings_trainable: Train embeddings or not.
        """
        
        # Placeholders for input, output and dropout
        
        # input_x_u: batch_size x sequence_length
        self.input_x_u = tf.placeholder(tf.int32, 
                                        [None, sequence_length],
                                        name="input_x_u")
        # input_x_r: batch_size x num_classes x sequence_length
        self.input_x_r = tf.placeholder(tf.int32, 
                                        [None, num_classes, sequence_length],
                                        name="input_x_r")
        # input_y: batch_size, 
        self.input_y = tf.placeholder(tf.int64, 
                                      [None],
                                      name="input_y")
        
        # self.dropout_keep_prob = tf.placeholder(tf.float32, 
                                                #name="dropout_keep_prob")
        
        self.embedding_size = np.shape(init_embeddings)[1]
        
        # Embedding layer
        with tf.name_scope("embedding"):
            W = tf.Variable(init_embeddings,
                            trainable=embeddings_trainable,
                            name='W')
            # batch_size x sequence_length x embedding_size
            self.embedded_u = tf.nn.embedding_lookup(W, self.input_x_u)
            print "DEBUG: embedded_u -> %s" % self.embedded_u
            # batch_size x num_classes x sequence_length x embedding_size
            self.embedded_r = tf.nn.embedding_lookup(W, self.input_x_r)
            print "DEBUG: embedded_r -> %s" % self.embedded_r
            # batch_size x sequence_length x embedding_size x 1
            self.embedded_u_expanded = tf.expand_dims(self.embedded_u, -1)
            print "DEBUG: embedded_u_expanded -> %s" % self.embedded_u_expanded
            # batch_size x num_classes x sequence_length x embedding_size x 1
            self.embedded_r_expanded = tf.expand_dims(self.embedded_r, -1)
            print "DEBUG: embedded_r_expanded -> %s" % self.embedded_r_expanded
        
        # Create a convolution + maxpooling layer for each filter size
        pooled_outputs_u = []
        pooled_outputs_r = []
        for i, filter_size in enumerate(filter_sizes):
            with tf.name_scope("conv-maxpool-%s-u" % filter_size):
                # Convolution layer
                filter_shape = [filter_size, self.embedding_size, 1, num_filters]
                W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), 
                                name='W')
                b = tf.Variable(tf.constant(0.1, shape=[num_filters]), 
                                name='b')
                conv_u = tf.nn.conv2d(
                    self.embedded_u_expanded,
                    W,
                    strides=[1, 1, 1, 1],
                    padding="VALID",
                    name="conv-u")             
                # Apply nonlinearity
                h_u = tf.nn.relu(tf.nn.bias_add(conv_u, b), name="relu-u")

                # Maxpooling over outputs
                pooled_u = tf.nn.max_pool(
                    h_u,
                    ksize=[1, sequence_length - filter_size + 1, 1, 1],
                    strides=[1, 1, 1, 1],
                    padding="VALID",
                    name="pool-u")
                pooled_outputs_u.append(pooled_u)
                
                # Pass each element in x_r through the same layer
                pooled_outputs_r_wclasses = []
                for j in range(num_classes):
                    embedded_r = self.embedded_r_expanded[:, j, :, :, :]
                    conv_r_j = tf.nn.conv2d(
                        embedded_r,
                        W, 
                        strides=[1, 1, 1, 1],
                        padding="VALID",
                        name="conv-r-%s" % j)
                    
                    h_r_j = tf.nn.relu(tf.nn.bias_add(conv_r_j, b), name="relu-r-%s" % j)
                    
                    pooled_r_j = tf.nn.max_pool(
                        h_r_j,
                        ksize=[1, sequence_length - filter_size + 1, 1, 1],
                        strides=[1, 1, 1, 1],
                        padding="VALID",
                        name="pool-r-%s" % j)
                    pooled_outputs_r_wclasses.append(pooled_r_j)
                # out_tensor: batch_size x 1 x num_class x num_filters
                out_tensor = tf.concat(2, pooled_outputs_r_wclasses)
                pooled_outputs_r.append(out_tensor)
                    
        
        # Combine all the pooled features
        num_filters_total = num_filters * len(filter_sizes)
        print "DEBUG: pooled_outputs_u -> %s" % pooled_outputs_u
        self.h_pool_u = tf.concat(3, pooled_outputs_u)
        print "DEBUG: h_pool_u -> %s" % self.h_pool_u
        # batch_size x 1 x num_filters_total
        self.h_pool_flat_u = tf.reshape(self.h_pool_u, [-1, 1, num_filters_total])
        print "DEBUG: h_pool_flat_u -> %s" % self.h_pool_flat_u
        
        
        print "DEBUG: pooled_outputs_r -> %s" % pooled_outputs_r
        self.h_pool_r = tf.concat(3, pooled_outputs_r)
        print "DEBUG: h_pool_r -> %s" % self.h_pool_r
        # h_pool_flat_r: batch_size x num_classes X num_filters_total
        self.h_pool_flat_r = tf.reshape(self.h_pool_u, [-1, num_classes, num_filters_total])
        print "DEBUG: h_pool_flat_r -> %s" % self.h_pool_flat_r
        
        # Add dropout
        # with tf.name_scope("dropout"):
        #    self.h_drop_u = tf.nn.dropout(self.h_pool_flat_u, self.dropout_keep_prob)
        
        # cosine layer - final scores and predictions
        with tf.name_scope("cosine_layer"):
            dot =  tf.reduce_sum(tf.mul(self.h_pool_flat_u, 
                                        self.h_pool_flat_r), 2)
            print "DEBUG: dot -> %s" % dot
            sqrt_u = tf.sqrt(tf.reduce_sum(self.h_pool_flat_u**2, 2))
            print "DEBUG: sqrt_u -> %s" % sqrt_u
            sqrt_r = tf.sqrt(tf.reduce_sum(self.h_pool_flat_r**2, 2))
            print "DEBUG: sqrt_r -> %s" % sqrt_r
            self.cosine = dot / (sqrt_u * sqrt_r)
            print "DEBUG: cosine -> %s" % self.cosine
            self.predictions = tf.argmax(self.cosine, 1, name="predictions")
            print "DEBUG: predictions -> %s" % self.predictions
        
        # softmax regression - loss and prediction
        with tf.name_scope("loss"):
            losses = tf.nn.sparse_softmax_cross_entropy_with_logits(self.cosine, self.input_y)
            self.loss = tf.reduce_mean(losses)
            
        # Calculate Accuracy
        with tf.name_scope("accuracy"):
            correct_predictions = tf.equal(self.predictions, self.input_y)
            self.accuracy = tf.reduce_mean(tf.cast(correct_predictions, "float"), name="accuracy")        
        

In [57]:
init_embeddings=[[0.111, 0.919, 0.818, 0.717, 0.616],
                 [0.212, 0.111, 0.118, 0.919, 0.515],
                 [0.313, 0.212, 0.217, 0.811, 0.414],
                 [0.414, 0.313, 0.316, 0.712, 0.313],
                 [0.515, 0.414, 0.515, 0.613, 0.212],
                 [0.616, 0.717, 0.818, 0.919, 0.111]]

cnn = SentCNN(sequence_length=3, 
              num_classes=4, 
              init_embeddings=init_embeddings, 
              filter_sizes=[2, 2], 
              num_filters=10,
              embeddings_trainable=False)
global_step = tf.Variable(0, name="global_step", trainable=False)
optimizer = tf.train.AdamOptimizer(1e-4)
grads_and_vars = optimizer.compute_gradients(cnn.loss)
train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step)

DEBUG: embedded_u -> Tensor("embedding_23/embedding_lookup:0", shape=(?, 3, 5), dtype=float32)
DEBUG: embedded_r -> Tensor("embedding_23/embedding_lookup_1:0", shape=(?, 4, 3, 5), dtype=float32)
DEBUG: embedded_u_expanded -> Tensor("embedding_23/ExpandDims:0", shape=(?, 3, 5, 1), dtype=float32)
DEBUG: embedded_r_expanded -> Tensor("embedding_23/ExpandDims_1:0", shape=(?, 4, 3, 5, 1), dtype=float32)
DEBUG: pooled_outputs_u -> [<tf.Tensor 'conv-maxpool-2-u_28/pool-u:0' shape=(?, 1, 1, 10) dtype=float32>, <tf.Tensor 'conv-maxpool-2-u_29/pool-u:0' shape=(?, 1, 1, 10) dtype=float32>]
DEBUG: h_pool_u -> Tensor("concat_18:0", shape=(?, 1, 1, 20), dtype=float32)
DEBUG: h_pool_flat_u -> Tensor("Reshape_13:0", shape=(?, 1, 20), dtype=float32)
DEBUG: pooled_outputs_r -> [<tf.Tensor 'conv-maxpool-2-u_28/concat:0' shape=(?, 1, 4, 10) dtype=float32>, <tf.Tensor 'conv-maxpool-2-u_29/concat:0' shape=(?, 1, 4, 10) dtype=float32>]
DEBUG: h_pool_r -> Tensor("concat_19:0", shape=(?, 1, 4, 20), dtype=float

In [12]:
sess = tf.InteractiveSession()

Exception AssertionError: AssertionError() in <bound method InteractiveSession.__del__ of <tensorflow.python.client.session.InteractiveSession object at 0x7f7f91083750>> ignored


In [None]:
def train_step(x_u_batch, x_r_batch, y_batch):
    """
    A single training step.
    """
    feed_dict = {
        
    }