In [1]:
import os
import tensorflow as tf
import numpy as np
import pickle
import logging
import tqdm
import gc
import math
import unicodedata
import itertools
from six.moves import zip_longest
from tensorflow.python.layers.core import Dense

  return f(*args, **kwds)


In [2]:
flags = tf.app.flags

flags.DEFINE_string("rnn_cell", "lstm", "rnn cell")
flags.DEFINE_string("data_file", "Data/CQA_bin.pkl", "data_file")

flags.DEFINE_integer("batch_size", 1, "batch_size")
flags.DEFINE_integer("epochs", 30, "epochs")

flags.DEFINE_integer("decoder_length",100,"decoder_length")

flags.DEFINE_integer("dim_str", 50, "dim_str")
flags.DEFINE_integer("dim_sem", 75, "dim_sem")
flags.DEFINE_integer("dim_output", 150, "dim_output")
flags.DEFINE_float("keep_prob", 0.7, "keep_prob")
flags.DEFINE_string("opt", 'Adagrad', "opt")
flags.DEFINE_float("lr", 0.05, "lr")
flags.DEFINE_float("norm", 1e-4, "norm")
flags.DEFINE_integer("gpu", 0, "gpu")

flags.DEFINE_string("sent_attention", "max", "sent_attention")
flags.DEFINE_string("ans_attention", "max", "ans_attention")
flags.DEFINE_string("doc_attention", "max", "doc_attention")
flags.DEFINE_bool("large_data", True, "large_data")
flags.DEFINE_integer("log_period", 5000, "log_period")


In [3]:
def grouper(iterable, n, fillvalue=None, shorten=False, num_groups=None):
    args = [iter(iterable)] * n
    out = zip_longest(*args, fillvalue=fillvalue)
    out = list(out)
    if num_groups is not None:
        default = (fillvalue,) * n
        assert isinstance(num_groups, int)
        out = list(each for each, _ in zip_longest(out, range(num_groups), fillvalue=default))
    if shorten:
        assert fillvalue is None
        out = (tuple(e for e in each if e is not None) for each in out)
    return out

In [4]:
class Instance:
    def __init__(self):
        self.token_idxs = None
        self.abstract_idxs = None
        self.idx = -1

    def _doc_len(self):
        k = len(self.token_idxs)
        return (k)

    def _abstract_len(self):
        k = len(self.abstract_idxs)
        return k

    def _max_ans_len(self):
        k = max([len(ans) for ans in self.token_idxs])
        return int(k)
    
    def _max_sent_len(self):
        k = max([len(sent) for ans in self.token_idxs for sent in ans ])
        return int(k)

In [5]:
class DataSet:
    def __init__(self, data):
        self.data = data
        self.num_examples = len(self.data)

    def sort(self):
        random.shuffle(self.data)
        self.data = sorted(self.data, key=lambda x: x._max_sent_len())
        self.data = sorted(self.data, key=lambda x: x._max_ans_len())
        self.data = sorted(self.data, key=lambda x: x._doc_len())

    def get_by_idxs(self, idxs):
        return [self.data[idx] for idx in idxs]

    def get_batches(self, batch_size, num_epochs=None, rand = True):
        num_batches_per_epoch = int(math.ceil(self.num_examples / batch_size))
        idxs = list(range(self.num_examples))
        _grouped = lambda: list(grouper(idxs, batch_size))

        if(rand):
            grouped = lambda: random.sample(_grouped(), num_batches_per_epoch)
        else:
            grouped = _grouped
        num_steps = num_epochs*num_batches_per_epoch
        batch_idx_tuples = itertools.chain.from_iterable(grouped() for _ in range(num_epochs))
        for i in range(num_steps):
            batch_idxs = tuple(i for i in next(batch_idx_tuples) if i is not None)
            batch_data = self.get_by_idxs(batch_idxs)
            yield i,batch_data


In [6]:
def LReLu(x, leak=0.01):
    f1 = 0.5 * (1 + leak)
    f2 = 0.5 * (1 - leak)
    return f1 * x + f2 * tf.abs(x)

In [7]:
def dynamicBiRNN(input, seqlen, n_hidden, cell_type, cell_name=''):
    batch_size = tf.shape(input)[0]
    with tf.variable_scope(cell_name + 'fw', initializer=tf.contrib.layers.xavier_initializer(), dtype = tf.float32):
        if(cell_type == 'gru'):
            fw_cell = tf.contrib.rnn.GRUCell(n_hidden)
        elif(cell_type == 'lstm'):
            fw_cell = tf.contrib.rnn.LSTMCell(n_hidden)

        fw_initial_state = fw_cell.zero_state(batch_size, tf.float32)
    with tf.variable_scope(cell_name + 'bw', initializer=tf.contrib.layers.xavier_initializer(), dtype = tf.float32):
        if(cell_type == 'gru'):
            bw_cell = tf.contrib.rnn.GRUCell(n_hidden)
        elif(cell_type == 'lstm'):
            bw_cell = tf.contrib.rnn.LSTMCell(n_hidden)
        bw_initial_state = bw_cell.zero_state(batch_size, tf.float32)
    
    with tf.variable_scope(cell_name):
        outputs, output_states = tf.nn.bidirectional_dynamic_rnn(fw_cell, bw_cell, input,
                                                                 initial_state_fw=fw_initial_state,
                                                                 initial_state_bw=bw_initial_state,
                                                                 sequence_length=seqlen)
    return outputs, output_states

In [8]:
def decode(helper, scope, reuse=None):
    with tf.variable_scope(scope, reuse=reuse):
        attention_mechanism = tf.contrib.seq2seq.BahdanauAttention(num_units=num_units, memory=encoder_outputs,memory_sequence_length=input_lengths)
        cell = tf.contrib.rnn.GRUCell(num_units=num_units)
        attn_cell = tf.contrib.seq2seq.AttentionWrapper(cell, attention_mechanism, attention_layer_size=num_units / 2)
        out_cell = tf.contrib.rnn.OutputProjectionWrapper(attn_cell, vocab_size, reuse=reuse)
        decoder = tf.contrib.seq2seq.BasicDecoder(cell=out_cell, helper=helper,initial_state=out_cell.zero_state(dtype=tf.float32, batch_size=batch_size))#initial_state=encoder_final_state)
        outputs = tf.contrib.seq2seq.dynamic_decode(decoder=decoder, output_time_major=False,impute_finished=True, maximum_iterations=output_max_length)
        return outputs[0]

In [9]:
def get_structure(name, input, max_l, mask_parser_1, mask_parser_2):
    def _getDep(input, mask1, mask2):
        #input: batch_l, sent_l, rnn_size
        with tf.variable_scope("Structure/"+name, reuse=True, dtype=tf.float32):
            w_parser_p = tf.get_variable("w_parser_p")
            w_parser_c = tf.get_variable("w_parser_c")
            b_parser_p = tf.get_variable("bias_parser_p")
            b_parser_c = tf.get_variable("bias_parser_c")

            w_parser_s = tf.get_variable("w_parser_s")
            w_parser_root = tf.get_variable("w_parser_root")

        parent = tf.tanh(tf.tensordot(input, w_parser_p, [[2], [0]]) + b_parser_p)
        child = tf.tanh(tf.tensordot(input, w_parser_c, [[2], [0]])+b_parser_c)
        # rep = LReLu(parent+child)
        temp = tf.tensordot(parent,w_parser_s,[[-1],[0]])
        raw_scores_words_ = tf.matmul(temp,tf.matrix_transpose(child))

        # raw_scores_words_ = tf.squeeze(tf.tensordot(rep, w_parser_s, [[3], [0]]) , [3])
        raw_scores_root_ = tf.squeeze(tf.tensordot(input, w_parser_root, [[2], [0]]) , [2])
        raw_scores_words = tf.exp(raw_scores_words_)
        raw_scores_root = tf.exp(raw_scores_root_)
        tmp = tf.zeros_like(raw_scores_words[:,:,0])
        raw_scores_words = tf.matrix_set_diag(raw_scores_words,tmp)

        str_scores, LL = _getMatrixTree(raw_scores_root, raw_scores_words, mask1, mask2)
        return str_scores

    def _getMatrixTree(r, A, mask1, mask2):
        L = tf.reduce_sum(A, 1)
        L = tf.matrix_diag(L)
        L = L - A

        LL = L[:, 1:, :]
        LL = tf.concat([tf.expand_dims(r, [1]), LL], 1)
        LL_inv = tf.matrix_inverse(LL)  #batch_l, doc_l, doc_l
        d0 = tf.multiply(r, LL_inv[:, :, 0])
        LL_inv_diag = tf.expand_dims(tf.matrix_diag_part(LL_inv), 2)
        tmp1 = tf.matrix_transpose(tf.multiply(tf.matrix_transpose(A), LL_inv_diag))
        tmp2 = tf.multiply(A, tf.matrix_transpose(LL_inv))
        d = mask1 * tmp1 - mask2 * tmp2
        d = tf.concat([tf.expand_dims(d0,[1]), d], 1)
        return d, LL

    str_scores = _getDep(input, mask_parser_1, mask_parser_2)
    return str_scores

In [10]:
class StructureModel():
    def __init__(self, config):
        self.config = config
        t_variables = {}
        t_variables['keep_prob'] = tf.placeholder(tf.float32)
        t_variables['batch_l'] = tf.placeholder(tf.int32)
        
        #Placeholder for answers and abstracts
        t_variables['token_idxs'] = tf.placeholder(tf.int32, [None, None, None, None])
        t_variables['abstract_idxs'] = tf.placeholder(tf.int32, [None,None])

        #Storing length of each heirarchy element
        t_variables['sent_l'] = tf.placeholder(tf.int32, [None, None,None])
        t_variables['ans_l'] = tf.placeholder(tf.int32, [None, None])
        t_variables['doc_l'] = tf.placeholder(tf.int32, [None])
        t_variables['abstract_l'] = tf.placeholder(tf.int32,[None])

        #Storing upper limit of each element length
        t_variables['max_sent_l'] = tf.placeholder(tf.int32)
        t_variables['max_doc_l'] = tf.placeholder(tf.int32)
        t_variables['max_ans_l'] = tf.placeholder(tf.int32)
        t_variables['max_abstract_l'] = tf.placeholder(tf.int32)

        #Masks to limit element sizes
        t_variables['mask_tokens'] = tf.placeholder(tf.float32, [None, None, None,None])
        t_variables['mask_sents'] = tf.placeholder(tf.float32, [None, None,None])
        t_variables['mask_answers']= tf.placeholder(tf.float32,[None,None])
        
        #Parser Masks
        t_variables['mask_parser_1'] = tf.placeholder(tf.float32, [None, None, None])
        t_variables['mask_parser_2'] = tf.placeholder(tf.float32, [None, None, None])

        t_variables['start_tokens'] = tf.placeholder(tf.int32,[None])

        
        self.t_variables = t_variables


    def get_feed_dict(self, batch):
        batch_size = len(batch)
        abstracts_l_matrix = np.zeros([batch_size],np.int32)
        doc_l_matrix = np.zeros([batch_size], np.int32)

        for i, instance in enumerate(batch):
            n_ans = len(instance.token_idxs)
            n_words = len(instance.abstract_idxs)
            doc_l_matrix[i] = n_ans
            abstracts_l_matrix[i] = n_words
        
        max_doc_l = np.max(doc_l_matrix)
        max_ans_l = max([max([len(ans) for ans in doc.token_idxs]) for doc in batch])
        max_sent_l = max([max([max([len(sent) for itr in doc.token_idxs for sent in itr]) for ans in doc.token_idxs]) for doc in batch])
        max_abstract_l = np.max(abstracts_l_matrix)

        ans_l_matrix = np.zeros([batch_size, max_doc_l], np.int32)
        sent_l_matrix = np.zeros([batch_size, max_doc_l, max_ans_l], np.int32)

        token_idxs_matrix = np.zeros([batch_size, max_doc_l, max_ans_l, max_sent_l], np.int32)
        abstract_idx_matrix = np.zeros([batch_size,max_abstract_l], np.int32)

        mask_tokens_matrix = np.ones([batch_size, max_doc_l, max_ans_l, max_sent_l], np.float32)
        mask_sents_matrix = np.ones([batch_size, max_doc_l, max_ans_l], np.float32)
        mask_answers_matrix = np.ones([batch_size, max_doc_l],np.float32)
        mask_abstact_matrix = np.ones([batch_size,max_abstract_l],np.float32)

        for i, instance in enumerate(batch):
            n_answers = len(instance.token_idxs)
            abstract_ = instance.abstract_idxs
            abstract_idx_matrix[i,:len(abstract_)] = np.asarray(abstract_)
            mask_abstact_matrix[i,len(abstract_):] = 0
            abstracts_l_matrix[i] = len(abstract_)

            for j, ans in enumerate(instance.token_idxs):
                for k, sent in enumerate(instance.token_idxs[j]):
                    token_idxs_matrix[i, j, k,:len(sent)] = np.asarray(sent)
                    mask_tokens_matrix[i, j, k,len(sent):] = 0
                    sent_l_matrix[i, j,k] = len(sent)

                mask_sents_matrix[i,j,len(ans):]=0
                ans_l_matrix[i,j] = len(ans)

            mask_answers_matrix[i, n_answers:] = 0
        
        mask_parser_1 = np.ones([batch_size, max_doc_l, max_doc_l], np.float32)
        mask_parser_2 = np.ones([batch_size, max_doc_l, max_doc_l], np.float32)
        mask_parser_1[:, :, 0] = 0
        mask_parser_2[:, 0, :] = 0
        
        feed_dict = {self.t_variables['token_idxs']: token_idxs_matrix,self.t_variables['abstract_idxs']: abstract_idx_matrix,
                     self.t_variables['sent_l']: sent_l_matrix,self.t_variables['ans_l']:ans_l_matrix,self.t_variables['doc_l']: doc_l_matrix, 
                     self.t_variables['mask_tokens']: mask_tokens_matrix, self.t_variables['mask_sents']: mask_sents_matrix, self.t_variables['mask_answers']:mask_answers_matrix,
                     self.t_variables['abstract_l']:abstracts_l_matrix,
                     self.t_variables['max_sent_l']: max_sent_l,self.t_variables['max_ans_l']:max_ans_l, self.t_variables['max_doc_l']: max_doc_l,self.t_variables['max_abstract_l']: max_abstract_l,
                     self.t_variables['mask_parser_1']: mask_parser_1, self.t_variables['mask_parser_2']: mask_parser_2,
                     self.t_variables['batch_l']: batch_size, self.t_variables['keep_prob']:self.config.keep_prob}
        
        return  feed_dict



    def build(self):
        with tf.variable_scope("Embeddings"):
            #Initial embedding placeholders
            self.embeddings = tf.get_variable("emb", [self.config.n_embed, self.config.d_embed], dtype=tf.float32,
                                         initializer=tf.contrib.layers.xavier_initializer())
            embeddings_root = tf.get_variable("emb_root", [1, 1, 2 * self.config.dim_sem], dtype=tf.float32,
                                                  initializer=tf.contrib.layers.xavier_initializer())
            embeddings_root_a = tf.get_variable("emb_root_ans", [1, 1,2* self.config.dim_sem], dtype=tf.float32,
                                                    initializer=tf.contrib.layers.xavier_initializer())
            embeddings_root_s = tf.get_variable("emb_root_s", [1, 1,2* self.config.dim_sem], dtype=tf.float32,
                                                    initializer=tf.contrib.layers.xavier_initializer())

        with tf.variable_scope("Model"):
            #Weights and biases at pooling layers and final softmax for output. (Fianl might not be required)(Semantic combination part)
            w_comb = tf.get_variable("w_comb", [4 * self.config.dim_sem, 2 * self.config.dim_sem], dtype=tf.float32,
                            initializer=tf.contrib.layers.xavier_initializer())
            b_comb = tf.get_variable("bias_comb", [2 * self.config.dim_sem], dtype=tf.float32, initializer=tf.constant_initializer())

            w_comb_a = tf.get_variable("w_comb_a", [4 * self.config.dim_sem, 2 * self.config.dim_sem], dtype=tf.float32,
                            initializer=tf.contrib.layers.xavier_initializer())
            b_comb_a = tf.get_variable("bias_comb_a", [2 * self.config.dim_sem], dtype=tf.float32, initializer=tf.constant_initializer())

            w_comb_s = tf.get_variable("w_comb_s", [4 * self.config.dim_sem, 2 * self.config.dim_sem], dtype=tf.float32,
                            initializer=tf.contrib.layers.xavier_initializer())
            b_comb_s = tf.get_variable("bias_comb_s", [2 * self.config.dim_sem], dtype=tf.float32, initializer=tf.constant_initializer())

            w_softmax = tf.get_variable("w_softmax", [2 * self.config.dim_sem, self.config.dim_output], dtype=tf.float32,
                            initializer=tf.contrib.layers.xavier_initializer())
            b_softmax = tf.get_variable("bias_softmax", [self.config.dim_output], dtype=tf.float32,
                            initializer=tf.contrib.layers.xavier_initializer())

        with tf.variable_scope("Structure/doc"):
            #Placeholders for hierarchical model at document level(structural part)
            tf.get_variable("w_parser_p", [2 * self.config.dim_str, 2 * self.config.dim_str],
                            dtype=tf.float32,
                            initializer=tf.contrib.layers.xavier_initializer())
            tf.get_variable("w_parser_c", [2 * self.config.dim_str, 2 * self.config.dim_str],
                            dtype=tf.float32,
                            initializer=tf.contrib.layers.xavier_initializer())
            tf.get_variable("w_parser_s", [2 * self.config.dim_str, 2 * self.config.dim_str], dtype=tf.float32,
                            initializer=tf.contrib.layers.xavier_initializer())
            tf.get_variable("bias_parser_p", [2 * self.config.dim_str], dtype=tf.float32,
                            initializer=tf.contrib.layers.xavier_initializer())
            tf.get_variable("bias_parser_c", [2 * self.config.dim_str], dtype=tf.float32,
                            initializer=tf.contrib.layers.xavier_initializer())
            tf.get_variable("w_parser_root", [2 * self.config.dim_str, 1], dtype=tf.float32,
                            initializer=tf.contrib.layers.xavier_initializer())

        with tf.variable_scope("Structure/ans"):
            #Placeholders for  hierarchial model at answer level(structural part)
            tf.get_variable("w_parser_p", [2 * self.config.dim_str, 2 * self.config.dim_str],
                            dtype=tf.float32,
                            initializer=tf.contrib.layers.xavier_initializer())
            tf.get_variable("w_parser_c", [2 * self.config.dim_str, 2 * self.config.dim_str],
                            dtype=tf.float32,
                            initializer=tf.contrib.layers.xavier_initializer())
            tf.get_variable("bias_parser_p", [2 * self.config.dim_str], dtype=tf.float32,
                            initializer=tf.contrib.layers.xavier_initializer())
            tf.get_variable("bias_parser_c", [2 * self.config.dim_str], dtype=tf.float32,
                            initializer=tf.contrib.layers.xavier_initializer())

            tf.get_variable("w_parser_s", [2 * self.config.dim_str, 2 * self.config.dim_str], dtype=tf.float32,
                            initializer=tf.contrib.layers.xavier_initializer())
            tf.get_variable("w_parser_root", [2 * self.config.dim_str, 1], dtype=tf.float32,
                            initializer=tf.contrib.layers.xavier_initializer())

        with tf.variable_scope("Structure/sent"):
            #Placeholders for hierarchial model at sentence level(structural part)
            tf.get_variable("w_parser_p", [2 * self.config.dim_str, 2 * self.config.dim_str],
                            dtype=tf.float32,
                            initializer=tf.contrib.layers.xavier_initializer())
            tf.get_variable("w_parser_c", [2 * self.config.dim_str, 2 * self.config.dim_str],
                            dtype=tf.float32,
                            initializer=tf.contrib.layers.xavier_initializer())
            tf.get_variable("bias_parser_p", [2 * self.config.dim_str], dtype=tf.float32,
                            initializer=tf.contrib.layers.xavier_initializer())
            tf.get_variable("bias_parser_c", [2 * self.config.dim_str], dtype=tf.float32,
                            initializer=tf.contrib.layers.xavier_initializer())

            tf.get_variable("w_parser_s", [2 * self.config.dim_str, 2 * self.config.dim_str], dtype=tf.float32,
                            initializer=tf.contrib.layers.xavier_initializer())
            tf.get_variable("w_parser_root", [2 * self.config.dim_str, 1], dtype=tf.float32,
                            initializer=tf.contrib.layers.xavier_initializer())

        #Variables of dimension batchsize passing length of each vector to architectures
        sent_l = self.t_variables['sent_l']
        ans_l = self.t_variables['ans_l']
        doc_l = self.t_variables['doc_l']
        abstract_l = self.t_variables['abstract_l']
        
        #Maximum lengths of sentences, answers and documents to be processed
        max_sent_l = self.t_variables['max_sent_l']
        max_ans_l = self.t_variables['max_ans_l']
        max_doc_l = self.t_variables['max_doc_l']
        max_abstract_l = self.t_variables['max_abstract_l']

        #batch size
        batch_l = self.t_variables['batch_l']

        #Creating embedding matrices for answers and abstracts corresponding to indexes
        tokens_input = tf.nn.embedding_lookup(self.embeddings, self.t_variables['token_idxs'][:,:max_doc_l, :max_ans_l, :max_sent_l])
        reference_input = tf.nn.embedding_lookup(self.embeddings,self.t_variables['abstract_idxs'][:,:max_abstract_l])
        
        #Dropout on input
        tokens_input = tf.nn.dropout(tokens_input, self.t_variables['keep_prob'])

        #Masking inputs
        mask_tokens = self.t_variables['mask_tokens'][:,:max_doc_l, :max_ans_l, :max_sent_l]
        mask_sents = self.t_variables['mask_sents'][:, :max_doc_l,:max_ans_l]
        mask_answers = self.t_variables['mask_answers'][:,:max_doc_l]


        [_, _, _, _, rnn_size] = tokens_input.get_shape().as_list()
        tokens_input_do = tf.reshape(tokens_input, [batch_l * max_doc_l*max_ans_l, max_sent_l, rnn_size])

        sent_l = tf.reshape(sent_l, [batch_l * max_doc_l* max_ans_l])
        mask_tokens = tf.reshape(mask_tokens, [batch_l * max_doc_l*max_ans_l, -1])

        #Word level input
        tokens_output, _ = dynamicBiRNN(tokens_input_do, sent_l, n_hidden=self.config.dim_hidden,
                                        cell_type=self.config.rnn_cell, cell_name='Model/sent')
        
        tokens_sem = tf.concat([tokens_output[0][:,:,:self.config.dim_sem], tokens_output[1][:,:,:self.config.dim_sem]], 2)
        tokens_str = tf.concat([tokens_output[0][:,:,self.config.dim_sem:], tokens_output[1][:,:,self.config.dim_sem:]], 2)
        
        temp1 = tf.zeros([batch_l * max_doc_l*max_ans_l, max_sent_l,1], tf.float32)
        temp2 = tf.zeros([batch_l * max_doc_l*max_ans_l ,1,max_sent_l], tf.float32)

        mask1 = tf.ones([batch_l * max_doc_l * max_ans_l, max_sent_l, max_sent_l-1], tf.float32)
        mask2 = tf.ones([batch_l * max_doc_l * max_ans_l, max_sent_l-1, max_sent_l], tf.float32)
        
        mask1 = tf.concat([temp1,mask1],2)
        mask2 = tf.concat([temp2,mask2],1)

        str_scores_s_ = get_structure('sent', tokens_str, max_sent_l, mask1, mask2)  # batch_l,  sent_l+1, sent_l
        str_scores_s = tf.matrix_transpose(str_scores_s_)  # soft parent
        tokens_sem_root = tf.concat([tf.tile(embeddings_root_s, [batch_l * max_doc_l *max_ans_l, 1, 1]), tokens_sem], 1)
        tokens_output_ = tf.matmul(str_scores_s, tokens_sem_root)
        tokens_output = LReLu(tf.tensordot(tf.concat([tokens_sem, tokens_output_], 2), w_comb_s, [[2], [0]]) + b_comb_s)

        if (self.config.sent_attention == 'sum'):
            tokens_output = tokens_output * tf.expand_dims(mask_tokens,2)
            tokens_output = tf.reduce_sum(tokens_output, 1)
        elif (self.config.sent_attention == 'mean'):
            tokens_output = tokens_output * tf.expand_dims(mask_tokens,2)
            tokens_output = tf.reduce_sum(tokens_output, 1)/tf.expand_dims(tf.cast(sent_l,tf.float32),1)
        elif (self.config.sent_attention == 'max'):
            tokens_output = tokens_output + tf.expand_dims((mask_tokens-1)*999,2)
            tokens_output = tf.reduce_max(tokens_output, 1)

        #Sentence level RNN
        sents_input = tf.reshape(tokens_output, [batch_l*max_doc_l, max_ans_l,2*self.config.dim_sem])
        ans_l = tf.reshape(ans_l,[batch_l*max_doc_l])
        mask_sents = tf.reshape(mask_sents,[batch_l*max_doc_l,-1])

        sents_output, _ = dynamicBiRNN(sents_input, ans_l, n_hidden=self.config.dim_hidden, cell_type=self.config.rnn_cell, cell_name='Model/ans')

        sents_sem = tf.concat([sents_output[0][:,:,:self.config.dim_sem], sents_output[1][:,:,:self.config.dim_sem]], 2)
        sents_str = tf.concat([sents_output[0][:,:,self.config.dim_sem:], sents_output[1][:,:,self.config.dim_sem:]], 2)

        temp1 = tf.zeros([batch_l * max_doc_l, max_ans_l, 1], tf.float32)
        temp2 = tf.zeros([batch_l * max_doc_l, 1, max_ans_l], tf.float32)

        mask1 = tf.ones([batch_l * max_doc_l , max_ans_l, max_ans_l-1], tf.float32)
        mask2 = tf.ones([batch_l * max_doc_l , max_ans_l-1, max_ans_l], tf.float32)
        
        mask1 = tf.concat([temp1,mask1],2)
        mask2 = tf.concat([temp2,mask2],1)

        str_scores_ = get_structure('ans', sents_str, max_ans_l, mask1,mask2)  #batch_l,  sent_l+1, sent_l
        str_scores = tf.matrix_transpose(str_scores_)  # soft parent
        sents_sem_root = tf.concat([tf.tile(embeddings_root_a, [batch_l*max_doc_l, 1, 1]), sents_sem], 1)
        sents_output_ = tf.matmul(str_scores, sents_sem_root)
        sents_output = LReLu(tf.tensordot(tf.concat([sents_sem, sents_output_], 2), w_comb, [[2], [0]]) + b_comb)

        if (self.config.doc_attention == 'sum'):
            sents_output = sents_output * tf.expand_dims(mask_sents,2)
            sents_output = tf.reduce_sum(sents_output, 1)
        elif (self.config.doc_attention == 'mean'):
            sents_output = sents_output * tf.expand_dims(mask_sents,2)
            sents_output = tf.reduce_sum(sents_output, 1)/tf.expand_dims(tf.cast(ans_l,tf.float32),1)
        elif (self.config.doc_attention == 'max'):
            sents_output = sents_output + tf.expand_dims((mask_sents-1)*999,2)
            sents_output = tf.reduce_max(sents_output, 1)

        #Answer level RNN
        ans_input = tf.reshape(sents_output, [batch_l, max_doc_l,2*self.config.dim_sem])
        ans_output, _ = dynamicBiRNN(ans_input, doc_l, n_hidden=self.config.dim_hidden, cell_type=self.config.rnn_cell, cell_name='Model/doc')

        ans_sem = tf.concat([ans_output[0][:,:,:self.config.dim_sem], ans_output[1][:,:,:self.config.dim_sem]], 2)
        ans_str = tf.concat([ans_output[0][:,:,self.config.dim_sem:], ans_output[1][:,:,self.config.dim_sem:]], 2)

        str_scores_ = get_structure('doc', ans_str, max_doc_l, self.t_variables['mask_parser_1'], self.t_variables['mask_parser_2'])  #batch_l,  sent_l+1, sent_l
        str_scores = tf.matrix_transpose(str_scores_)  # soft parent
        ans_sem_root = tf.concat([tf.tile(embeddings_root, [batch_l, 1, 1]), ans_sem], 1)
        ans_output_ = tf.matmul(str_scores, ans_sem_root)
        ans_output = LReLu(tf.tensordot(tf.concat([ans_sem, ans_output_], 2), w_comb, [[2], [0]]) + b_comb)

        if (self.config.ans_attention == 'sum'):
            ans_output = ans_output * tf.expand_dims(mask_answers,2)
            # ans_output = tf.reduce_sum(ans_output, 1)
        elif (self.config.ans_attention == 'mean'):
            ans_output = ans_output * tf.expand_dims(mask_answers,2)
            ans_output = tf.reduce_sum(ans_output, 1)/tf.expand_dims(tf.cast(doc_l,tf.float32),1)
        elif (self.config.ans_attention == 'max'):
            ans_output = ans_output + tf.expand_dims((mask_answers-1)*999,2)
            ans_output = tf.reduce_max(ans_output, 1)

        print("Encoder pooled ans_output shape", ans_output.shape)
        encoder_output = ans_output

        tgt_vocab_size = self.config.vsize
        learning_rate = self.config.lr
        
        decoder_cell = tf.nn.rnn_cell.BasicLSTMCell(self.config.dim_output)
        helper = tf.contrib.seq2seq.TrainingHelper(reference_input, abstract_l, time_major=True)
        lstm_init = tf.contrib.rnn.LSTMStateTuple(encoder_output,encoder_output)
        projection_layer = tf.layers.Dense(tgt_vocab_size, use_bias=False)
        
        #How to initialize the LSTM?
        decoder = tf.contrib.seq2seq.BasicDecoder(decoder_cell, helper,lstm_init,output_layer=projection_layer)
        
        outputs, states, seq_l = tf.contrib.seq2seq.dynamic_decode(decoder)
        logits = outputs.rnn_output

        print("rnn_output.shape=", outputs.rnn_output.shape)
        print("sample_id.shape=", outputs.sample_id.shape)
        print("final_state=", states)
        print("final_sequence_lengths.shape=", seq_l.shape)

        #Returns loss. 
        loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=self.t_variables['abstract_idxs'], logits=logits)

        global_step = tf.Variable(0, name='global_step', trainable=False)

        params = tf.trainable_variables()
        gradients = tf.gradients(loss, params)
        clipped_gradients, _ = tf.clip_by_global_norm(gradients, 5.0)

        optimizer = tf.train.AdamOptimizer(learning_rate,epsilon=0.1)
        update_step = optimizer.apply_gradients(zip(clipped_gradients, params),global_step=global_step)

        self.final_output = logits
        self.loss = loss
        self.opt = optimizer.minimize(loss)

#         inference_helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(embedding_decoder,tf.fill([hparams.batch_size], tgt_sos_id), tgt_eos_id)
#         inference_decoder = tf.contrib.seq2seq.BasicDecoder(decoder_cell, inference_helper, initial_state,output_layer=projection_layer)
#         source_sequence_length = hparams.encoder_lengthmaximum_iterations = tf.round(tf.reduce_max(source_sequence_length) * 2)
#         outputs, _, _ = tf.contrib.seq2seq.dynamic_decode(inference_decoder, maximum_iterations=maximum_iterations)translations = outputs.sample_id

#         decoder_initial_state = tf.contrib.seq2seq.tile_batch(initial_state, multiplier=hparams.beam_width)
#         inference_decoder = tf.contrib.seq2seq.BeamSearchDecoder(cell=decoder_cell,embedding=embedding_decoder,start_tokens=tf.fill([hparams.batch_size], tgt_sos_id),
#         end_token=tgt_eos_id,initial_state=decoder_initial_state,beam_width=hparams.beam_width,output_layer=projection_layer,
#         length_penalty_weight=0.0)
#         outputs, _, _ = tf.contrib.seq2seq.dynamic_decode(inference_decoder, maximum_iterations=maximum_iterations)
#         translations = outputs.predicted_ids



In [11]:
config = flags.FLAGS
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = str(config.gpu)

import random

hash = random.getrandbits(32)
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)
ah = logging.FileHandler(str(hash)+'.log')
ah.setLevel(logging.DEBUG)
formatter = logging.Formatter('%(asctime)s - %(message)s')
ah.setFormatter(formatter)
logger.addHandler(ah)

In [12]:
gc.disable()
train, dev, test, embeddings, vocab = pickle.load(open(config.data_file,'rb'))
gc.enable()
print('Data loaded succesfully')



Data loaded succesfully


In [13]:
trainset, devset, testset = DataSet(train), DataSet(dev), DataSet(test)
vocab = dict([(v.index,k) for k,v in vocab.items()])
trainset.sort()
train_batches = trainset.get_batches(config.batch_size, config.epochs, rand=True)
dev_batches = devset.get_batches(config.batch_size, 1, rand=False)
test_batches = testset.get_batches(config.batch_size, 1, rand=False)
dev_batches = [i for i in dev_batches]
test_batches = [i for i in test_batches]

num_examples, train_batches, dev_batches, test_batches, embedding_matrix, vocab = len(train), train_batches, dev_batches, test_batches, embeddings, vocab


In [14]:
config.n_embed, config.d_embed = embedding_matrix.shape
config.vocab = vocab
config.vsize = len(vocab)
config.inv_vocab = {v: k for k, v in vocab.items()}
config.dim_hidden = config.dim_sem+config.dim_str

# print(config.__flags)
logger.critical(str(config.__flags))


In [15]:
model = StructureModel(config)

In [16]:
model.build()

Encoder pooled ans_output shape (?, 150)
rnn_output.shape= (?, ?, 226129)
sample_id.shape= (?, ?)
final_state= LSTMStateTuple(c=<tf.Tensor 'decoder/while/Exit_3:0' shape=(?, 150) dtype=float32>, h=<tf.Tensor 'decoder/while/Exit_4:0' shape=(?, 150) dtype=float32>)
final_sequence_lengths.shape= (?,)


In [17]:
num_batches_per_epoch = int(num_examples / config.batch_size)
num_steps = config.epochs * num_batches_per_epoch

gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.9)

with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
    gvi = tf.global_variables_initializer()
    sess.run(gvi)
    sess.run(model.embeddings.assign(embedding_matrix.astype(np.float32)))
    loss = 0

    for ct, batch in tqdm.tqdm(train_batches, total=num_steps):
        feed_dict = model.get_feed_dict(batch)
        outputs,_,_loss = sess.run([model.final_output, model.opt, model.loss], feed_dict=feed_dict)
        loss+=_loss
#         if(ct%config.log_period==0):
#             replies = sess.run([translations], feed_dict=feed_dict)
#             print(replies)
#             print('Step: {} Loss: {}\n'.format(ct, loss))
# #             print('Test ACC: {}\n'.format(acc_test))
# #             print('Dev  ACC: {}\n'.format(acc_dev))
#             logger.debug('Step: {} Loss: {}\n'.format(ct, loss))
# #             logger.debug('Test ACC: {}\n'.format(acc_test))
# #             logger.debug('Dev  ACC: {}\n'.format(acc_dev))
#             logger.handlers[0].flush()
#             loss = 0

  0%|          | 0/1826070 [00:00<?, ?it/s]


InvalidArgumentError: Tried to read from index 1 but array size is: 1
	 [[Node: decoder/while/BasicDecoderStep/TrainingHelperNextInputs/cond/TensorArrayReadV3 = TensorArrayReadV3[dtype=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"](decoder/while/BasicDecoderStep/TrainingHelperNextInputs/cond/TensorArrayReadV3/Switch, decoder/while/BasicDecoderStep/TrainingHelperNextInputs/cond/TensorArrayReadV3/Switch_1, decoder/while/BasicDecoderStep/TrainingHelperNextInputs/cond/TensorArrayReadV3/Switch_2)]]

Caused by op 'decoder/while/BasicDecoderStep/TrainingHelperNextInputs/cond/TensorArrayReadV3', defined at:
  File "/usr/local/Cellar/python/3.7.2_1/Frameworks/Python.framework/Versions/3.7/lib/python3.7/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/usr/local/Cellar/python/3.7.2_1/Frameworks/Python.framework/Versions/3.7/lib/python3.7/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/Users/300041707/Personal/myenv/lib/python3.7/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/Users/300041707/Personal/myenv/lib/python3.7/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/Users/300041707/Personal/myenv/lib/python3.7/site-packages/ipykernel/kernelapp.py", line 505, in start
    self.io_loop.start()
  File "/Users/300041707/Personal/myenv/lib/python3.7/site-packages/tornado/platform/asyncio.py", line 132, in start
    self.asyncio_loop.run_forever()
  File "/usr/local/Cellar/python/3.7.2_1/Frameworks/Python.framework/Versions/3.7/lib/python3.7/asyncio/base_events.py", line 539, in run_forever
    self._run_once()
  File "/usr/local/Cellar/python/3.7.2_1/Frameworks/Python.framework/Versions/3.7/lib/python3.7/asyncio/base_events.py", line 1775, in _run_once
    handle._run()
  File "/usr/local/Cellar/python/3.7.2_1/Frameworks/Python.framework/Versions/3.7/lib/python3.7/asyncio/events.py", line 88, in _run
    self._context.run(self._callback, *self._args)
  File "/Users/300041707/Personal/myenv/lib/python3.7/site-packages/tornado/ioloop.py", line 758, in _run_callback
    ret = callback()
  File "/Users/300041707/Personal/myenv/lib/python3.7/site-packages/tornado/stack_context.py", line 300, in null_wrapper
    return fn(*args, **kwargs)
  File "/Users/300041707/Personal/myenv/lib/python3.7/site-packages/tornado/gen.py", line 1233, in inner
    self.run()
  File "/Users/300041707/Personal/myenv/lib/python3.7/site-packages/tornado/gen.py", line 1147, in run
    yielded = self.gen.send(value)
  File "/Users/300041707/Personal/myenv/lib/python3.7/site-packages/ipykernel/kernelbase.py", line 370, in dispatch_queue
    yield self.process_one()
  File "/Users/300041707/Personal/myenv/lib/python3.7/site-packages/tornado/gen.py", line 346, in wrapper
    runner = Runner(result, future, yielded)
  File "/Users/300041707/Personal/myenv/lib/python3.7/site-packages/tornado/gen.py", line 1080, in __init__
    self.run()
  File "/Users/300041707/Personal/myenv/lib/python3.7/site-packages/tornado/gen.py", line 1147, in run
    yielded = self.gen.send(value)
  File "/Users/300041707/Personal/myenv/lib/python3.7/site-packages/ipykernel/kernelbase.py", line 357, in process_one
    yield gen.maybe_future(dispatch(*args))
  File "/Users/300041707/Personal/myenv/lib/python3.7/site-packages/tornado/gen.py", line 326, in wrapper
    yielded = next(result)
  File "/Users/300041707/Personal/myenv/lib/python3.7/site-packages/ipykernel/kernelbase.py", line 267, in dispatch_shell
    yield gen.maybe_future(handler(stream, idents, msg))
  File "/Users/300041707/Personal/myenv/lib/python3.7/site-packages/tornado/gen.py", line 326, in wrapper
    yielded = next(result)
  File "/Users/300041707/Personal/myenv/lib/python3.7/site-packages/ipykernel/kernelbase.py", line 534, in execute_request
    user_expressions, allow_stdin,
  File "/Users/300041707/Personal/myenv/lib/python3.7/site-packages/tornado/gen.py", line 326, in wrapper
    yielded = next(result)
  File "/Users/300041707/Personal/myenv/lib/python3.7/site-packages/ipykernel/ipkernel.py", line 294, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/Users/300041707/Personal/myenv/lib/python3.7/site-packages/ipykernel/zmqshell.py", line 536, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/Users/300041707/Personal/myenv/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 2819, in run_cell
    raw_cell, store_history, silent, shell_futures)
  File "/Users/300041707/Personal/myenv/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 2845, in _run_cell
    return runner(coro)
  File "/Users/300041707/Personal/myenv/lib/python3.7/site-packages/IPython/core/async_helpers.py", line 67, in _pseudo_sync_runner
    coro.send(None)
  File "/Users/300041707/Personal/myenv/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3020, in run_cell_async
    interactivity=interactivity, compiler=compiler, result=result)
  File "/Users/300041707/Personal/myenv/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3191, in run_ast_nodes
    if (yield from self.run_code(code, result)):
  File "/Users/300041707/Personal/myenv/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3267, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-16-2f52417d930d>", line 1, in <module>
    model.build()
  File "<ipython-input-10-f1da5f099d2b>", line 323, in build
    outputs, states, seq_l = tf.contrib.seq2seq.dynamic_decode(decoder)
  File "/Users/300041707/Personal/myenv/lib/python3.7/site-packages/tensorflow/contrib/seq2seq/python/ops/decoder.py", line 286, in dynamic_decode
    swap_memory=swap_memory)
  File "/Users/300041707/Personal/myenv/lib/python3.7/site-packages/tensorflow/python/ops/control_flow_ops.py", line 2816, in while_loop
    result = loop_context.BuildLoop(cond, body, loop_vars, shape_invariants)
  File "/Users/300041707/Personal/myenv/lib/python3.7/site-packages/tensorflow/python/ops/control_flow_ops.py", line 2640, in BuildLoop
    pred, body, original_loop_vars, loop_vars, shape_invariants)
  File "/Users/300041707/Personal/myenv/lib/python3.7/site-packages/tensorflow/python/ops/control_flow_ops.py", line 2590, in _BuildLoop
    body_result = body(*packed_vars_for_body)
  File "/Users/300041707/Personal/myenv/lib/python3.7/site-packages/tensorflow/contrib/seq2seq/python/ops/decoder.py", line 234, in body
    decoder_finished) = decoder.step(time, inputs, state)
  File "/Users/300041707/Personal/myenv/lib/python3.7/site-packages/tensorflow/contrib/seq2seq/python/ops/basic_decoder.py", line 147, in step
    sample_ids=sample_ids)
  File "/Users/300041707/Personal/myenv/lib/python3.7/site-packages/tensorflow/contrib/seq2seq/python/ops/helper.py", line 241, in next_inputs
    lambda: nest.map_structure(read_from_ta, self._input_tas))
  File "/Users/300041707/Personal/myenv/lib/python3.7/site-packages/tensorflow/python/util/deprecation.py", line 316, in new_func
    return func(*args, **kwargs)
  File "/Users/300041707/Personal/myenv/lib/python3.7/site-packages/tensorflow/python/ops/control_flow_ops.py", line 1864, in cond
    orig_res_f, res_f = context_f.BuildCondBranch(false_fn)
  File "/Users/300041707/Personal/myenv/lib/python3.7/site-packages/tensorflow/python/ops/control_flow_ops.py", line 1725, in BuildCondBranch
    original_result = fn()
  File "/Users/300041707/Personal/myenv/lib/python3.7/site-packages/tensorflow/contrib/seq2seq/python/ops/helper.py", line 241, in <lambda>
    lambda: nest.map_structure(read_from_ta, self._input_tas))
  File "/Users/300041707/Personal/myenv/lib/python3.7/site-packages/tensorflow/python/util/nest.py", line 413, in map_structure
    structure[0], [func(*x) for x in entries])
  File "/Users/300041707/Personal/myenv/lib/python3.7/site-packages/tensorflow/python/util/nest.py", line 413, in <listcomp>
    structure[0], [func(*x) for x in entries])
  File "/Users/300041707/Personal/myenv/lib/python3.7/site-packages/tensorflow/contrib/seq2seq/python/ops/helper.py", line 238, in read_from_ta
    return inp.read(next_time)
  File "/Users/300041707/Personal/myenv/lib/python3.7/site-packages/tensorflow/python/util/tf_should_use.py", line 49, in fn
    return method(self, *args, **kwargs)
  File "/Users/300041707/Personal/myenv/lib/python3.7/site-packages/tensorflow/python/util/tf_should_use.py", line 49, in fn
    return method(self, *args, **kwargs)
  File "/Users/300041707/Personal/myenv/lib/python3.7/site-packages/tensorflow/python/ops/tensor_array_ops.py", line 281, in read
    name=name)
  File "/Users/300041707/Personal/myenv/lib/python3.7/site-packages/tensorflow/python/ops/gen_data_flow_ops.py", line 4445, in _tensor_array_read_v3
    dtype=dtype, name=name)
  File "/Users/300041707/Personal/myenv/lib/python3.7/site-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "/Users/300041707/Personal/myenv/lib/python3.7/site-packages/tensorflow/python/framework/ops.py", line 2956, in create_op
    op_def=op_def)
  File "/Users/300041707/Personal/myenv/lib/python3.7/site-packages/tensorflow/python/framework/ops.py", line 1470, in __init__
    self._traceback = self._graph._extract_stack()  # pylint: disable=protected-access

InvalidArgumentError (see above for traceback): Tried to read from index 1 but array size is: 1
	 [[Node: decoder/while/BasicDecoderStep/TrainingHelperNextInputs/cond/TensorArrayReadV3 = TensorArrayReadV3[dtype=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"](decoder/while/BasicDecoderStep/TrainingHelperNextInputs/cond/TensorArrayReadV3/Switch, decoder/while/BasicDecoderStep/TrainingHelperNextInputs/cond/TensorArrayReadV3/Switch_1, decoder/while/BasicDecoderStep/TrainingHelperNextInputs/cond/TensorArrayReadV3/Switch_2)]]
