In [1]:
import numpy as np
import tensorflow as tf
import json
import os
from nltk.tokenize import word_tokenize
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
from nltk.corpus import stopwords
from nltk.tokenize import sent_tokenize
from sklearn.feature_extraction.text import TfidfVectorizer
from tqdm import tqdm

%cd "E:\NLP\QA"

E:\NLP\QA


In [2]:
with open("documents.json") as f:
    doc = json.load(f)
with open("training.json") as f:
    train = json.load(f)
with open("mapping.json","r") as f:
    word_dict = json.load(f)
with open("char_dict.json","r") as f:
    char_dict = json.load(f)
embedding = np.load("embedding.npy").astype("float32")

In [44]:
tokens = []
for d in doc:
    paragraphs = d["text"]
    for paragraph in paragraphs:
        tokens += word_tokenize(paragraph.lower())
print(len(set(tokens)))
for sample in train:
    question = sample["question"]
    tokens += word_tokenize(question.lower())
print(len(set(tokens)))
tokens = set(tokens)

98289
100757


In [67]:
embedding = []
word_dict = dict()
with open("glove.6B.50d.txt","r",encoding="utf-8", errors="ignore") as f:
    for line in f:
        data = line.split()
        word = data[0]
        if word in tokens:
            embedding.append(np.array([float(i) for i in data[1:]]))
            word_dict[word] = len(word_dict)
embedding.append(np.random.uniform(-0.5,0.5,50))
word_dict["<UNK>"] = len(word_dict)
embedding.append(np.zeros(50))
word_dict["<PAD>"] = len(word_dict)

In [68]:
np.save("embedding", embedding)
with open("mapping.json","w") as f:
    json.dump(word_dict, f)

In [68]:
char_set = []
for token in word_dict.keys():
    char_set += [c for c in token]
char_set = set(char_set)
print(len(char_set))
char_dict = dict()
for char in char_set:
    char_dict[char] = len(char_dict)
char_dict["<unk>"] = len(char_dict)
char_dict["<pad>"] = len(char_dict)
print(len(char_dict))
with open("char_dict.json","w") as f:
    json.dump(char_dict, f)

214
216


In [4]:
print(embedding.shape)
print(len(word_dict))
print(len(char_dict))
print(word_dict["<PAD>"])
print(char_dict["<pad>"])

(72499, 50)
72499
216
72498
215


In [7]:
topk = 3

stopword = set(stopwords.words('english'))
punc = set(['"','\'',"?",".",",","/","<",">",":",";"])

In [8]:
def unknown_detection(token_list):
    new_list = []
    for token in token_list:
        if token in word_dict:
            new_list.append(token)
        else:
            new_list.append("<UNK>")
    return new_list

def unknown_detection_char(char_list):
    for i in range(len(char_list)):
        if char_list[i] not in char_dict:
            char_list[i] = "<unk>"
    return char_list

def generate_char(token_list):
    new_list = []
    for token in token_list:
        if token == "<PAD>":
            char_list = ["<pad>"]*16
        else:
            char_list = [c for c in token[:16]]
        while len(char_list) < 16:
            char_list.append("<pad>")
        for char in char_list:
            if char in char_dict:
                new_list.append(char)
            else:
                new_list.append("<unk>")
    return new_list

padded_train = []
for sample in tqdm(train):
    new_sample = dict()
    docid = sample["docid"]

    question = word_tokenize(sample["question"].lower())

    answer = word_tokenize(sample["text"].lower())
    answer_para = sample["answer_paragraph"]

    para = doc[docid]["text"][answer_para].lower()
    assert(doc[docid]["docid"] == docid)
    para = word_tokenize(para)[:240]
    
    # extract indices of answer from paragraph
    answer_idx = None
    for i, j in enumerate(para):
        if j == answer[0]:
            k = 1
            while k < len(answer) and i+k<len(para):
                if para[i+k] != answer[k]:
                    break
                k += 1
            else:
                answer_idx = (i, i+k)
                break
    # ignore samples that no answer can be found
    if answer_idx is None:
        continue
    
    while len(para) < 240:
        para.append("<PAD>")
    content_char = generate_char(para)
    content = unknown_detection(para)
    
    while len(answer) < 7:
        answer.append("<PAD>")
    answer = answer[:7]
    answer_char = generate_char(answer)
    answer = unknown_detection(answer)
    
    padded_question = question[:30]
    while len(padded_question) < 30:
        padded_question.append("<PAD>")
    question_char = generate_char(padded_question)
    padded_question = unknown_detection(padded_question)
    
    new_sample["question"] = padded_question
    new_sample["q_char"] = question_char
    new_sample["content"] = content
    new_sample["c_char"] = content_char
    new_sample["answer"] = answer
    new_sample["answer_char"] = answer_char
    new_sample["answer_idx"] = answer_idx
    
    assert len(padded_question) == 30
    assert len(question_char) == 480
    assert len(content) == 240
    assert len(content_char) == 3840
    assert len(answer) == 7
    assert len(answer_char) == 112
    assert len(answer_idx) == 2
    
    padded_train.append(new_sample)

100%|███████████████████████████████████████████████████████████████████████████| 43379/43379 [01:55<00:00, 373.97it/s]


In [9]:
padded_train[888].keys()

dict_keys(['question', 'answer', 'q_char', 'c_char', 'content', 'answer_char', 'answer_idx'])

In [10]:
curr = padded_train[1]
start, end = curr["answer_idx"]
print(curr["content"][start:end])
print(curr["answer"])
print(list(curr["answer_idx"]))

['cylinder']
['cylinder', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>']
[76, 77]


In [11]:
def generate_training_data(padded):
    """
    input for NN:
        c:           content token
        c_char:      content character
        q:           question token
        q_char:      question character
    output:
        probability distribution of start and end position over content
    """
    c, c_chars, q, q_chars, a_idx = [], [], [], [], []
    
    cnt = 0
    for sample in tqdm(padded):
        cnt += 1
        question = sample["question"]
        content = sample["content"]
        q_char = sample["q_char"]
        c_char = sample["c_char"]
        aidx = sample["answer_idx"]
        answer = sample["answer"]
        
        # ignore answer only contains <UNK>
        idx = answer.index("<PAD>") if "<PAD>" in answer else 7
        if all(t == "<UNK>" for t in answer[:idx]):
            continue
        
        q_mapped = [word_dict[t] for t in question]
        c_mapped = [word_dict[t] for t in content]
        q_char_mapped = [char_dict[ch] for ch in q_char]
        c_char_mapped = [char_dict[ch] for ch in c_char]
        
        c.append(c_mapped)
        q.append(q_mapped)
        c_chars.append(c_char_mapped)
        q_chars.append(q_char_mapped)
        a_idx.append(aidx)
        
    return np.array(c), np.array(c_chars), np.array(q), np.array(q_chars), np.array(a_idx)
 

In [12]:
def f_score(pred_s, pred_e, true_s, true_e, context):
    # computes average f_measure for a batch
    f_sum = 0
    l = len(pred_s)
    for i in range(l):
        if pred_e[i] < pred_s[i]:
            continue
        TP, FN, FP = 0, 0, 0
        guess = context[i][pred_s[i]:pred_e[i]+1]
        true = context[i][true_s[i]:true_e[i]+1]
        for token in guess:
            if token in true:
                TP += 1
            else:
                FP += 1
        for token in true:
            if token not in guess:
                FN += 1
        precision = TP/(TP+FP)
        recall = TP/(TP+FN)
        f = 2*precision*recall/(precision+recall+1e-8)
        f_sum += f
    return f_sum/l

In [30]:
c, c_char, q, q_char, a_idx = generate_training_data(padded_train)

100%|██████████████████████████████████████████████████████████████████████████| 41802/41802 [00:17<00:00, 2431.38it/s]


In [108]:
print(a_idx[30000])

[24 26]


In [109]:
tf.reset_default_graph()

c_ph = tf.placeholder(tf.int32, c.shape)
c_char_ph = tf.placeholder(tf.int32, c_char.shape)
q_ph = tf.placeholder(tf.int32, q.shape)
q_char_ph = tf.placeholder(tf.int32, q_char.shape)
aidx_ph = tf.placeholder(tf.int32, a_idx.shape)

dataset = tf.data.Dataset.from_tensor_slices((c_ph, c_char_ph, q_ph, q_char_ph, aidx_ph))

epoch = 20
batch = 20

def make_dataset(dataset):
    dataset = dataset.shuffle(20000)
    dataset = dataset.repeat(epoch)
    dataset = dataset.batch(batch)
    dataset = dataset.prefetch(batch*2)
    iterator = dataset.make_initializable_iterator()
    return iterator

train_iter = make_dataset(dataset)
next_batch = train_iter.get_next()

In [110]:
# todo: depthwise separable convolutions
# todo: position encoding
# todo: multihead attention(maybe)
# todo: regularization(dropout)

def embedding_encoder_block(scope, inputs):
    with tf.variable_scope(scope,reuse=tf.AUTO_REUSE):
        #convolution block
        residual1 = tf.layers.conv1d(inputs, 128, 7, padding="same",activation=tf.nn.relu)
        
        norm1 = tf.contrib.layers.layer_norm(residual1)
        norm1 = tf.nn.dropout(norm1, 1-dp)
        conv2 = tf.layers.conv1d(norm1, 128, 7, padding="same",activation=tf.nn.relu)
        conv2 = tf.nn.dropout(conv2, 1-dp)
        residual2 = tf.add(residual1, conv2)
        
        norm2 = tf.contrib.layers.layer_norm(residual2)
        norm2 = tf.nn.dropout(norm2, 1-dp)
        conv3 = tf.layers.conv1d(norm2, 128, 7, padding="same",activation=tf.nn.relu)
        conv3 = tf.nn.dropout(conv3, 1-dp)
        residual3 = tf.add(residual2, conv3)
        
        norm3 = tf.contrib.layers.layer_norm(residual3)
        norm3 = tf.nn.dropout(norm3, 1-dp)
        conv4 = tf.layers.conv1d(norm3, 128, 7, padding="same",activation=tf.nn.relu)
        conv4 = tf.nn.dropout(conv4, 1-dp)
        residual4 = tf.add(residual3, conv4)
        
        # self-attention block
        norm4 = tf.contrib.layers.layer_norm(residual4)
        attention = tf.matmul(norm4, norm4, transpose_b=True)
        dk = tf.cast(tf.shape(norm4)[-1], dtype=tf.float32)
        scaled = tf.divide(attention, tf.sqrt(dk))
        attention = tf.nn.softmax(scaled, axis=-1)
        attention_out = tf.matmul(attention, norm4)
        residual5 = tf.add(residual4, attention_out)
        
        # feedforwoad layer
        norm5 = tf.contrib.layers.layer_norm(residual5)
        norm5 = tf.nn.dropout(norm5, 1-dp)
        ffn1 = tf.layers.conv1d(norm5, 128, 1, activation=tf.nn.relu)
        ffn1 = tf.nn.dropout(ffn1, 1-dp)
        ffn2 = tf.layers.conv1d(ffn1, 128, 1)
        ffn2 = tf.nn.dropout(ffn2, 1-dp)
        residual6 = tf.add(residual5, ffn2)
    return residual6

def model_encoder_block(scope, inputs):
    with tf.variable_scope(scope,reuse=tf.AUTO_REUSE):
        outputs = inputs
        for i in range(7):
            with tf.variable_scope("conv_block{}".format(i),reuse=tf.AUTO_REUSE):
                norm1 = tf.contrib.layers.layer_norm(outputs)
                norm1 = tf.nn.dropout(norm1, 1-dp)
                conv1 = tf.layers.conv1d(norm1, 128, 7, padding="same", activation=tf.nn.relu)
                conv1 = tf.nn.dropout(conv1, 1-dp)
                residual1 = tf.add(outputs, conv1)
            
            with tf.variable_scope("self_attention{}".format(i),reuse=tf.AUTO_REUSE):
                norm2 = tf.contrib.layers.layer_norm(residual1)
                attention = tf.matmul(norm2, norm2, transpose_b=True)
                dk = tf.cast(tf.shape(norm2)[-1], dtype=tf.float32)
                scaled = tf.divide(attention, tf.sqrt(dk))
                attention = tf.nn.softmax(scaled, axis=-1)
                attention_out = tf.matmul(attention, norm2)
                residual2 = tf.add(residual1, attention_out)
            
            with tf.variable_scope("feedforward{}".format(i),reuse=tf.AUTO_REUSE):
                norm3 = tf.contrib.layers.layer_norm(residual2)
                norm3 = tf.nn.dropout(norm3, 1-dp)
                ffn1 = tf.layers.conv1d(norm3, 128, 1, activation=tf.nn.relu)
                ffn1 = tf.nn.dropout(ffn1, 1-dp)
                ffn2 = tf.layers.conv1d(ffn1, 128, 1)
                ffn2 = tf.nn.dropout(ffn2, 1-dp)
                outputs = tf.add(residual2, ffn2)
    return outputs

def highway(scope, inputs):
    size = inputs.shape.as_list()[-1]
    with tf.variable_scope(scope, reuse=tf.AUTO_REUSE):
        T1 = tf.layers.conv1d(inputs, size, 1, activation=tf.nn.sigmoid, bias_initializer=tf.constant_initializer(-1))
        H1 = tf.layers.conv1d(inputs, size, 1)
        H1 = tf.nn.dropout(H1, 1-dp)
        highway1 = T1 * H1 + inputs * (1.0 - T1)
        T2 = tf.layers.conv1d(highway1, size, 1, activation=tf.nn.sigmoid, bias_initializer=tf.constant_initializer(-1))
        H2 = tf.layers.conv1d(highway1, size, 1)
        H2 = tf.nn.dropout(H2, 1-dp)
        highway2 = T2 * H2 + highway1 * (1.0 - T2)
    return highway2

In [111]:
#tf.reset_default_graph()

with tf.name_scope("inputs"):
    q_input = tf.placeholder(tf.int32, [None, 30], name="q")
    q_char_input = tf.placeholder(tf.int32, [None, 480], name="q_char")
    c_input = tf.placeholder(tf.int32, [None, 240], name="c")
    c_char_input = tf.placeholder(tf.int32, [None, 3840], name="c_char")
    start_mask = tf.placeholder(tf.int32, [None], name="start_mask")
    end_mask = tf.placeholder(tf.int32, [None], name="end_mask")
    batch_size = tf.placeholder(tf.int32, (), name="batch_size")
    dp = tf.placeholder(tf.float32, (), name="drop_prob")

tf.add_to_collection("infer_input", q_input)
tf.add_to_collection("infer_input", q_char_input)
tf.add_to_collection("infer_input", c_input)
tf.add_to_collection("infer_input", c_char_input)
tf.add_to_collection("infer_input", dp)

with tf.variable_scope("Input_Embedding_Layer"):
    # input embedding layer
    with tf.variable_scope("W_Embedding"):
        pretrained_embedding = tf.get_variable("w_embedding",
                                               shape=[72497, 50],
                                               initializer=tf.constant_initializer(embedding[:-2,:]),
                                               trainable=False)
        unknown_embedding = tf.get_variable("unknown",
                                            shape=[1, 50],
                                            initializer=tf.random_uniform_initializer(-0.5,0.5),
                                            trainable=True)
        padding_embedding = tf.get_variable("padding",
                                            shape=[1, 50],
                                            initializer=tf.zeros_initializer(),
                                            trainable=False)
        word_embedding = tf.concat([pretrained_embedding, unknown_embedding, padding_embedding], 0)
        q_embed = tf.nn.embedding_lookup(word_embedding, q_input)
        c_embed = tf.nn.embedding_lookup(word_embedding, c_input)

    with tf.variable_scope("C_Embedding"):
        char_embedding = tf.get_variable("c_embedding",
                                         shape=[215, 200],
                                         initializer=tf.random_uniform_initializer(-0.5,0.5),
                                         trainable=True)
        padding = tf.get_variable("padding",
                                  shape=[1, 200],
                                  initializer=tf.zeros_initializer(),
                                  trainable=False)
        char_combined = tf.concat([char_embedding, padding], 0, name="char_embedding")
        q_char_embed = tf.nn.embedding_lookup(char_combined, q_char_input)
        c_char_embed = tf.nn.embedding_lookup(char_combined, c_char_input)
        squeeze_to_word_q = tf.layers.max_pooling1d(q_char_embed, 16, 16)
        squeeze_to_word_c = tf.layers.max_pooling1d(c_char_embed, 16, 16)
        
    with tf.variable_scope("embedding_output"):
        q_embed_out = tf.concat([q_embed, squeeze_to_word_q], 2)
        q_embed_out = tf.nn.dropout(q_embed_out, 1-dp)
        c_embed_out = tf.concat([c_embed, squeeze_to_word_c], 2)
        c_embed_out = tf.nn.dropout(c_embed_out, 1-dp*0.5)
        q_embed_out = highway("highway", q_embed_out)
        c_embed_out = highway("highway", c_embed_out)

with tf.variable_scope("Embedding_Encoder_Layer"):
    # embedding encoder layer
    q_encoded = embedding_encoder_block("encoder_block", q_embed_out)
    c_encoded = embedding_encoder_block("encoder_block", c_embed_out)
    print(q_encoded.shape, c_encoded.shape)
    
with tf.variable_scope("Context_Query_Attention_Layer"):
    # context_query attention layer
    # first compute similarity matrix between context and query
    # S_tj = w * [C_t; Q_j; C_t*Q_j]
    c_expand = tf.expand_dims(c_encoded, 2)
    c_expand = tf.tile(c_expand, [1,1,30,1])
    q_expand = tf.expand_dims(q_encoded, 1)
    q_expand = tf.tile(q_expand, [1,240,1,1])
    qc_mul = tf.multiply(c_expand, q_expand)
    concat = tf.concat([c_expand,q_expand,qc_mul], 3)
    w = tf.get_variable("s_w", [384,1])
    
    # similarity matrix S (logits)
    S = tf.einsum("abcde,ef->abcdf", tf.expand_dims(concat,3),w)
    S = tf.squeeze(S,[-2,-1])
    # S_: softmax over rows
    S_ = tf.nn.softmax(S)
    # S__T: transpose of softmax over coloum
    S__T = tf.transpose(tf.nn.softmax(S, axis=1),[0,2,1])
    # context_query attention
    A = tf.matmul(S_, q_encoded)
    # query_context attention
    B = tf.matmul(tf.matmul(S_, S__T), c_encoded)
    
    # layer output
    G = tf.concat([c_encoded, A, tf.multiply(c_encoded,A), tf.multiply(c_encoded,B)],2)
    print(G.shape)

with tf.variable_scope("Model_Encoder_Layer"):
    # model encoder layer
    G_conv = tf.layers.conv1d(G, 128, 7, padding="same", activation=tf.nn.relu)
    model_encoder1 = model_encoder_block("model_encoder", G_conv)
    model_encoder2 = model_encoder_block("model_encoder", model_encoder1)
    model_encoder3 = model_encoder_block("model_encoder", model_encoder2)
    print(model_encoder1.shape,model_encoder2.shape,model_encoder3.shape)

global_step = tf.Variable(0,dtype=tf.int32,trainable=False,name='global_step')

with tf.variable_scope("Output_Layer"):
    # output layer
    p1_input = tf.concat([model_encoder1, model_encoder2],2)
    p2_input = tf.concat([model_encoder2, model_encoder3],2)
    p1_prob = tf.nn.softmax(tf.squeeze(tf.layers.conv1d(p1_input, 1, 1),-1))
    p2_prob = tf.nn.softmax(tf.squeeze(tf.layers.conv1d(p2_input, 1, 1),-1))
    pred_s = tf.argmax(p1_prob, axis=1)
    pred_e = tf.argmax(p2_prob, axis=1)
    s_pairs = tf.concat([tf.expand_dims(tf.range(batch_size),1), tf.expand_dims(start_mask,1)],1)
    e_pairs = tf.concat([tf.expand_dims(tf.range(batch_size),1), tf.expand_dims(end_mask,1)],1)
    yhat_p1 = tf.add(tf.gather_nd(p1_prob, s_pairs), 1e-15)
    yhat_p2 = tf.add(tf.gather_nd(p2_prob, e_pairs), 1e-15)

tf.add_to_collection("predictions", p1_prob)
tf.add_to_collection("predictions", p2_prob)
    
with tf.variable_scope("Optimizer"):
    # add l2 weight decay to all variables
    trainables = tf.trainable_variables()
    loss_l2 = tf.add_n([ tf.nn.l2_loss(v) for v in trainables if 'bias' not in v.name ]) * 3e-7
    loss = -tf.reduce_mean(tf.log(yhat_p1) + tf.log(yhat_p2)) + loss_l2
    
    # perform cold warm up gradient clipping
    lr = tf.minimum(0.001, 0.001 / tf.log(999.) * tf.log(tf.cast(global_step, tf.float32) + 1))
    optimizer = tf.train.AdamOptimizer(lr, beta1=0.8,epsilon=1e-7)
    gradients, variables = zip(*optimizer.compute_gradients(loss))
    gradients, _ = tf.clip_by_global_norm(gradients, 5.0)
    opt_op = optimizer.apply_gradients(zip(gradients, variables), global_step=global_step)
    
    # apply exponential moving average
    ema = tf.train.ExponentialMovingAverage(decay=0.9999)
    with tf.control_dependencies([opt_op]):
        train_step = ema.apply(trainables)

tf.add_to_collection("train_step", train_step)
        
tf.summary.scalar("loss", loss)
f_measure = tf.get_variable("f_measure", ())
tf.summary.scalar("f_measure", f_measure)
print(yhat_p1.shape, yhat_p2.shape)

(?, 30, 128) (?, 240, 128)
(?, 240, 512)
(?, 240, 128) (?, 240, 128) (?, 240, 128)
(?,) (?,)


In [None]:
with tf.device("/gpu:0"):
    config = tf.ConfigProto(allow_soft_placement = True)
    with tf.Session(config=config) as sess:
        sess.run(tf.global_variables_initializer())
        sess.run(train_iter.initializer, feed_dict={c_ph: c,
                                                    c_char_ph: c_char,
                                                    q_ph: q,
                                                    q_char_ph: q_char,
                                                    aidx_ph: a_idx})
        merged = tf.summary.merge_all()
        writer = tf.summary.FileWriter("logs/", sess.graph)
        saver = tf.train.Saver(max_to_keep=3)
        cnt = 0
        f = 0
        while True:
            try:
                cnt += 1
                next_c, next_c_char, next_q, next_q_char, next_mask = sess.run(next_batch)
                next_smask = next_mask[:,0]
                next_emask = next_mask[:,1]-1
                feed_dict = {q_input: next_q,
                             q_char_input: next_q_char,
                             c_input: next_c,
                             c_char_input: next_c_char,
                             start_mask: next_smask,
                             end_mask: next_emask,
                             f_measure:f,
                             batch_size: len(next_c),
                             dp:0.1}
                _, ps, pe, step, s = sess.run([train_step, pred_s, pred_e, global_step, merged],feed_dict=feed_dict)
                f = f_score(ps, pe, next_smask, next_emask, next_c)
                writer.add_summary(s, step)
                if cnt % 1500 == 0:
                    print(cnt)
                    saver.save(sess, "model/naive", global_step=step)
            except tf.errors.OutOfRangeError:
                saver.save(sess, "model/naive", global_step=step)
        print("done!")

In [152]:
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    next_smask = em[:,0]
    next_emask = em[:,1]-1
    feed_dict = {q_input: eq,
                 q_char_input: eqc,
                 c_input: ec,
                 c_char_input: ecc,
                 start_mask: next_smask,
                 end_mask: next_emask,
                 f_measure:f,
                 batch_size: len(next_c)}
    _, ps, pe, step = sess.run([train_step, s_pairs, e_pairs, global_step],feed_dict=feed_dict)
    #f = f_score(ps, pe, next_smask, next_emask, next_c)

In [16]:
import os

In [31]:
tf.reset_default_graph()

sess = tf.Session()
ckpt = tf.train.get_checkpoint_state(os.path.dirname('model/checkpoint'))
saver = tf.train.import_meta_graph("model/naive-13500.meta", clear_devices=True)
saver.restore(sess, ckpt.model_checkpoint_path)
qi,qci, ci, cci = tf.get_collection("infer_input")
p1, p2 = tf.get_collection("predictions")

INFO:tensorflow:Restoring parameters from model\naive-13500


In [62]:
test1, test2 = [tensor.name for tensor in tf.get_default_graph().as_graph_def().node 
 if "Embedding_Encoder_Layer" in tensor.name and 
 "Optimizer" not in tensor.name and "Add_4" in tensor.name]

In [86]:
o = sess.run(l, feed_dict={qi:q[:1,:],qci:q_char[:1,:],ci:c[:1,:],cci:c_char[:1,:]})

In [74]:
test1, test2 = [tensor for tensor in tf.get_default_graph().get_operations() 
 if "Embedding_Layer" in tensor.name and 
 "Optimizer" not in tensor.name and "highway" in tensor.name and "add_1" in tensor.name]

In [85]:
l = [t for t in tf.global_variables() if "Embedding_Layer" in t.name and "Optimizer" not in t.name]

In [104]:
[np.min(x) for x in o]

  return umr_minimum(a, axis, None, out, keepdims)


[-4.9027, nan, 0.0, nan, 0.0, nan, nan, nan, nan, nan, nan, nan, nan]

In [105]:
l[1]

<tf.Variable 'Input_Embedding_Layer/W_Embedding/unknown:0' shape=(1, 50) dtype=float32_ref>

In [106]:
o[1]

array([[nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
        nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
        nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
        nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan]],
      dtype=float32)

In [107]:
sess.close()