# This notebook implements the model given the already completed generator

Author: Riaan Zoetmulder

In [5]:
from options import load_arguments
from IO import create_embedding_layer, read_annotations, create_batches
import tensorflow as tf
from models import Generator
from advanced_layers import RCNNCell
import cPickle as pickle
%load_ext autoreload
%autoreload 2
print tf.__version__

# Custom Recurrent Layers

### ExtRCNNCell class

In [2]:
class ExtRCNNCell(RCNNCell):

    def __call__(self, x, hc_tm1):
        x_t, mask_t = x[0], x[1]
        prevstate, hc_t  = super(ExtRCNNCell, self).__call__(x_t, hc_tm1)
        #print 'shape of mask: ', mask_t.get_shape()
        #print 'shape of hct: ', hc_t.get_shape()
        #print 'shape of hctm1: ', hc_tm1.get_shape()
        #print 'shape of prevstate: ', prevstate.get_shape()
        a= mask_t * hc_t 
        b = (1-mask_t) * hc_tm1    
        hc_t = a + b
        return prevstate, hc_t


    def copy_params(self, from_obj):
        self.internal_layers = from_obj.internal_layers
        self.bias = from_obj.bias

### ExtLSTMCell class

In [3]:
#TODO: Implement

### Standard Layer

In [4]:
def Layer(x, n_classes,  hasbias = True, scope = None, act = tf.nn.sigmoid):
    
    with tf.variable_scope(scope or 'output_layer') as scope:
        W = tf.get_variable('W_out', 
                            [x.get_shape()[1], n_classes],
                            initializer = tf.contrib.layers.initializers.xavier_initializer())
        

        
        temp = tf.matmul(x, W)
        if hasbias:
            
            B = tf.get_variable('B_out', 
                                [1, n_classes],
                                initializer = tf.constant_initializer(0.0))
            
            temp += B

        logits = act(temp)
        
        
    
    return logits
        
        
        
    

# Encoder Class

In [5]:
class Encoder(object):

    def __init__(self, args, embedding_layer, nclasses, generator):
        self.args = args
        self.emb_layer = embedding_layer
        self.nclasses = nclasses
        self.gen = generator

    def ready(self):
        with tf.variable_scope("Encoder"):
            
            gen = self.gen
            emb_layer = self.emb_layer
            args = self.args
            padding_id = emb_layer.vocab_map["<padding>"]
            training = gen.training

            # variables from the generator
            dropout = gen.dropout
            x = gen.x
            z = gen.zpred
            z = tf.expand_dims(z, 2)

            # input placeholder
            y = self.y = tf.placeholder(tf.float32, [None, self.nclasses], name= 'target_values')

            n_d = args.hidden_dimension
            n_e = emb_layer.n_d

            layers = self.layers = [ ]
            zero_states = self.zero_states = [ ]

            depth = args.depth
            use_all = args.use_all
            layer_type = args.layer.lower()

            # create layers
            for i in xrange(depth):

                # TODO: Include ExtLSTMCell here
                layers.append(
                                ExtRCNNCell(n_d,
                                            idx = 'ExtRCNNCell_%i'%i)
                             )
                zero_states.append(
                                    layers[i].zero_state(x.get_shape()[1])
                                  )


            # TODO: Some stuff missing here!

            # create layers

            h_prev = gen.rnn_inputs
            lst_states = []
            # print 'z outside shape: ', z.get_shape()
            # print 'zero_state outside shape: ', zero_state.get_shape()
            # print 'embs: ', embs.get_shape()

            for idx, layer in enumerate(layers):

                # a bug might occur here because you are using the same names for hnext t and t+1 
                h_next, _=  tf.nn.dynamic_rnn(layer,
                                          (h_prev, z),
                                          initial_state= zero_states[idx], 
                                          time_major = True)
                print 'layer ' + str(idx)+ ' ', h_next.get_shape()

                ############################
                # TODO: if pooling do stuff#
                ############################
                if args.pooling:
                    # do something
                    print 'implement the pooling'
                    raise NotImplemented

                else:
                    lst_states.append(h_next[-1])

                # update next state, apply dropout
                h_prev = tf.cond(training,
                             lambda: tf.nn.dropout(h_next, dropout), 
                             lambda: h_next, name='dropout_h_next')

            # select whether to use all of them or not.
            if args.use_all:
                size = depth * n_d

                # batch * size (i.e. n_d*depth)
                h_final = tf.concat(1, lst_states)
            else:
                size = n_d
                h_final = lst_states[-1]

            # apply dropout to final state
            h_final = tf.cond(training,
                             lambda: tf.nn.dropout(h_final, dropout), 
                             lambda: h_final, name='dropout_h_next')


            print h_final.get_shape()
            # implement final layer
            preds = self.preds = Layer(h_final, self.nclasses)

            print 'preds: ', preds.get_shape()
            loss_mat = self.loss_mat = (preds-y)**2 # batch

            # difference in predicitons
            pred_diff = self.pred_diff = tf.reduce_mean(tf.reduce_max(preds, 1) - tf.reduce_min(preds, 1))

            # get the loss for each class
            if args.aspect < 0:
                loss_vec = tf.reduce_mean(loss_mat, 1)
            else:
                assert args.aspect < self.nclasses

                loss_vec = loss_mat[:,args.aspect]

            self.loss_vec = loss_vec

            # get values from the generator
            zsum = gen.zsum
            zdiff = gen.zdiff
            logpz = gen.logpz


            coherent_factor = args.sparsity * args.coherent
            # total loss
            loss = self.loss = tf.reduce_mean(loss_vec)

            # calculate the sparsity cost
            sparsity_cost = self.sparsity_cost = tf.reduce_mean(zsum) * args.sparsity + \
                                                 tf.reduce_mean(zdiff) * coherent_factor

            # loss function as mentioned in the paper
            cost_vec = loss_vec + zsum * args.sparsity + zdiff * coherent_factor

            cost_logpz = tf.reduce_mean(cost_vec * tf.reduce_sum(logpz, 0))
            self.obj = tf.reduce_mean(cost_vec)

        variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='Encoder')
        
        total_parameters = 0
        for variable in variables:
            sh = variable.get_shape()
            variable_parametes = 1
            for dim in sh:
                variable_parametes *= dim.value
            total_parameters += variable_parametes
        print 'total # Encoder parameters:', total_parameters
        
        
        # theano code
        
        lossL2 = tf.add_n([ tf.nn.l2_loss(v) for v in variables
                    if 'bias' not in v.name ]) * self.args.l2_reg
        
        # generator and encoder loss
        self.cost_g = cost_logpz * 10 + gen.L2_loss
        self.cost_e = loss * 10 + lossL2
        
        print 'initialized!'
        
        

        
        
        
        

# Model Class to be completed

In [6]:

class Model(object):

    def __init__(self, args, embedding_layer, nclasses):
        self.args = args
        self.embedding_layer = embedding_layer
        self.nclasses = nclasses


    def ready(self):
        args, embedding_layer, nclasses = self.args, self.embedding_layer, self.nclasses
        self.generator = Generator(args, nclasses, embedding_layer)
        self.encoder = Encoder(args, embedding_layer, nclasses, self.generator)
        
        self.generator.ready()
        self.encoder.ready()


# Main loop

In [7]:
# load word embeddings once:

embed_layer = create_embedding_layer(
                                    'data/review+wiki.filtered.200.txt.gz'
                                    )

147759 pre-trained embeddings loaded.


In [21]:
def reset_graph():
    if 'sess' in globals() and sess:
        sess.close()
    tf.reset_default_graph()
    

def main(args, embed_layer):
    print 'Parser Arguments' 
    
    for key,value in args.__dict__.iteritems():
        print u'{0}: {1}'.format(key, value)
        
        
    # ensure embeddings exist
    assert args.embedding, "Pre-trained word embeddings required."
    
    max_len = args.max_len
    
    if args.train:
        train_x, train_y = read_annotations(args.train)
        train_x = [ embed_layer.map_to_ids(x)[:max_len] for x in train_x ]
                   
    # TODO: create development and test sets and rationale stuff
            

    if args.train:
        with tf.Session() as sess:
            # initialize Model
            #TODO: create encoder class in model
            
            model = Model(
                        args = args,
                        embedding_layer = embed_layer,
                        nclasses = len(train_y[0])
                    )
            model.ready()
            
            # added this for testing
            # TODO: Remove later
            train_batches_x, train_batches_y = create_batches(
                                train_x, train_y, args.batch, model.generator.padding_id
                            )
            
            feed_dict={model.generator.x: train_batches_x[0],model.encoder.y : train_batches_y[0], model.generator.embedding_placeholder: embed_layer.params[0], 
                      model.generator.dropout: 0.5, model.generator.training: True}
                      
            
            init = tf.initialize_all_variables()
            
            
            sess.run(init)
            print 'past graph initialization'
            
            
            cost_g = sess.run(model.encoder.cost_g , feed_dict)
            
            print cost_g
            


In [22]:
args = pickle.load( open( "data/args.p", "rb" ) )
reset_graph()
main(args, embed_layer)

Parser Arguments
layer: rcnn
aspect: -1
dump: 
fix_emb: 1
save_model: 
batch: 256
max_len: 256
use_all: 1
learning: adam
max_epochs: 100
load_rationale: 
l2_reg: 1e-06
sparsity: 0.0003
decay_lr: 1
load_model: 
beta2: 0.999
beta1: 0.9
test: 
activation: tanh
learning_rate: 0.0005
hidden_dimension: 200
coherent: 2.0
train: data/reviews.aspect1.train.txt.gz
dropout: 0.1
eval_period: -1
dev: 
pooling: 0
depth: 2
hidden_dimension2: 30
embedding: data/review+wiki.filtered.200.txt.gz
order: 2
70000 examples loaded from data/reviews.aspect1.train.txt.gz
max text length: 1145
Received dictionary of vocab size 147761 and embedding dim 200.
shape of z: (?, 256, 1)
total #  Generator parameters: 358281
layer 0  (?, 256, 200)
layer 1  (?, 256, 200)
(256, 400)
preds:  (256, 5)
total # Encoder parameters: 322805
initialized!
past graph initialization
-48.0557
