In [1]:
import tensorflow as tf
from read_data import read_data, get_squad_data_filter, update_config
from tensorflow.contrib.rnn.python.ops.rnn_cell import _linear
import flag as fg
import os
import json
import numpy as np
from pprint import pprint
from functools import reduce
from operator import mul
from my.tensorflow import get_initializer

config = fg.main(_)

In [2]:
config.out_dir = os.path.join(config.out_base_dir, config.model_name, str(config.run_id).zfill(2))


assert config.load or config.mode == 'train', "config.load must be True if not training"
if not config.load and os.path.exists(config.out_dir):
    shutil.rmtree(config.out_dir)

config.save_dir = os.path.join(config.out_dir, "save")
config.log_dir = os.path.join(config.out_dir, "log")
config.eval_dir = os.path.join(config.out_dir, "eval")
config.answer_dir = os.path.join(config.out_dir, "answer")
if not os.path.exists(config.out_dir):
    os.makedirs(config.out_dir)
if not os.path.exists(config.save_dir):
    os.mkdir(config.save_dir)
if not os.path.exists(config.log_dir):
    os.mkdir(config.log_dir)
if not os.path.exists(config.answer_dir):
    os.mkdir(config.answer_dir)
if not os.path.exists(config.eval_dir):
    os.mkdir(config.eval_dir)

In [3]:
data_filter = get_squad_data_filter(config)

train_data = read_data(config, 'train', False, data_filter=data_filter)
dev_data = read_data(config, 'dev', False, data_filter=data_filter)

update_config(config, [train_data, dev_data])

Loaded 87507/87599 examples from train
Loaded 10544/10570 examples from dev


In [4]:
word2vec_dict = train_data.shared['lower_word2vec'] if config.lower_word else train_data.shared['word2vec']
word2idx_dict = train_data.shared['word2idx']

idx2vec_dict = {word2idx_dict[word]: vec for word, vec in word2vec_dict.items() if word in word2idx_dict}
emb_mat = np.array([idx2vec_dict[idx] if idx in idx2vec_dict
                    else np.random.multivariate_normal(np.zeros(config.word_emb_size), np.eye(config.word_emb_size))
                    for idx in range(config.word_vocab_size)])
config.emb_mat = emb_mat

# pprint(config.__flags, indent=2)

In [5]:
# Context and Ques Parameters
N = config.batch_size
M = config.max_num_sents
JX = config.max_sent_size
JQ = config.max_ques_size
VW = config.word_vocab_size
VC = config.char_vocab_size
W = config.max_word_size
d =  config.hidden_size
dc = config.char_emb_size
dw = config.word_emb_size
dco = config.char_out_size

In [6]:
# Placeholders

x = tf.placeholder('int32', [N, None, None], name='x')
cx = tf.placeholder('int32', [N, None, None, W], name='cx')
x_mask = tf.placeholder('bool', [N, None, None], name='x_mask')
q = tf.placeholder('int32', [N, None], name='q')
cq = tf.placeholder('int32', [N, None, W], name='cq')
q_mask = tf.placeholder('bool', [N, None], name='q_mask')
y = tf.placeholder('bool', [N, None, None], name='y')
y2 = tf.placeholder('bool', [N, None, None], name='y2')
is_train = tf.placeholder('bool', [], name='is_train')
new_emb_mat = tf.placeholder('float', [None, config.word_emb_size], name='new_emb_mat')

In [7]:
global_step = tf.get_variable('global_step', shape=[], dtype='int32', initializer=tf.constant_initializer(0), trainable=False)

In [8]:

def get_initializer(matrix):
    def _initializer(shape, dtype=None, partition_info=None, **kwargs): return matrix
    return _initializer


In [9]:
def dropout(x, keep_prob, is_train, noise_shape=None, seed=None, name=None):
    with tf.name_scope(name or "dropout"):
        if keep_prob < 1.0:
            d = tf.nn.dropout(x, keep_prob, noise_shape=noise_shape, seed=seed)
            out = tf.cond(is_train, lambda: d, lambda: x)
            return out
        return x

In [10]:
def conv1d(in_, filter_size, height, padding, is_train=None, keep_prob=1.0, scope=None):
    with tf.variable_scope(scope or "conv1d"):
        num_channels = in_.get_shape()[-1]
        filter_ = tf.get_variable("filter", shape=[1, height, num_channels, filter_size], dtype='float')
        bias = tf.get_variable("bias", shape=[filter_size], dtype='float')
        strides = [1, 1, 1, 1]
        if is_train is not None and keep_prob < 1.0:
            in_ = dropout(in_, keep_prob, is_train)
        xxc = tf.nn.conv2d(in_, filter_, strides, padding) + bias  # [N*M, JX, W/filter_stride, d]
        out = tf.reduce_max(tf.nn.relu(xxc), 2)  # [-1, JX, d]
        return out

In [11]:
def multi_conv1d(in_, filter_sizes, heights, padding, is_train=None, keep_prob=1.0, scope=None):
    with tf.variable_scope(scope or "multi_conv1d"):
        assert len(filter_sizes) == len(heights)
        outs = []
        for filter_size, height in zip(filter_sizes, heights):
            if filter_size == 0:
                continue
            out = conv1d(in_, filter_size, height, padding, is_train=is_train, keep_prob=keep_prob, scope="conv1d_{}".format(height))
            outs.append(out)
        concat_out = tf.concat(outs, 2)
        return concat_out

In [12]:
with tf.variable_scope("emb"):
    if config.use_char_emb:
        with tf.variable_scope("char"):

            char_emb_mat = tf.get_variable("char_emb_mat", shape=[VC, dc], dtype='float')
    
            Acx = tf.nn.embedding_lookup(char_emb_mat, cx)  # [N, M, JX, W, dc]
            Acq = tf.nn.embedding_lookup(char_emb_mat, cq)  # [N, JQ, W, dc]
            Acx = tf.reshape(Acx, [-1, JX, W, dc])
            Acq = tf.reshape(Acq, [-1, JQ, W, dc])
            
            filter_sizes = list(map(int, config.out_channel_dims.split(',')))
            heights = list(map(int, config.filter_heights.split(',')))
            
            with tf.variable_scope("conv"):
                xx = multi_conv1d(Acx, filter_sizes, heights, "VALID", is_train, config.keep_prob, scope="xx")
                tf.get_variable_scope().reuse_variables()
                qq = multi_conv1d(Acq, filter_sizes, heights, "VALID", is_train, config.keep_prob, scope="xx")

                xx = tf.reshape(xx, [-1, M, JX, dco])
                qq = tf.reshape(qq, [-1, JQ, dco])
            
            
    if config.use_word_emb:
        with tf.name_scope("word"):
            
            if config.mode == 'train':
                word_emb_mat = tf.get_variable("word_emb_mat", dtype='float', shape=[VW, dw], initializer=get_initializer(config.emb_mat))
            else:
                word_emb_mat = tf.get_variable("word_emb_mat", shape=[VW, dw], dtype='float')
            
            if config.use_glove_for_unk:
                word_emb_mat = tf.concat([word_emb_mat, new_emb_mat], 0)

            Ax = tf.nn.embedding_lookup(word_emb_mat, x)  # [N, M, JX, d]
            Aq = tf.nn.embedding_lookup(word_emb_mat, q)  # [N, JQ, d]
            
        if config.use_char_emb:
            xx = tf.concat([xx, Ax], 3)  # [N, M, JX, di]
            qq = tf.concat([qq, Aq], 2)  # [N, JQ, di]
        else:
            xx = Ax
            qq = Aq

In [13]:
def flatten(tensor, keep):
    fixed_shape = tensor.get_shape().as_list()
    start = len(fixed_shape) - keep
    left = reduce(mul, [fixed_shape[i] or tf.shape(tensor)[i] for i in range(start)])
    out_shape = [left] + [fixed_shape[i] or tf.shape(tensor)[i] for i in range(start, len(fixed_shape))]
    flat = tf.reshape(tensor, out_shape)
    return flat

In [14]:
def reconstruct(tensor, ref, keep):
    ref_shape = ref.get_shape().as_list()
    tensor_shape = tensor.get_shape().as_list()
    ref_stop = len(ref_shape) - keep
    tensor_start = len(tensor_shape) - keep
    pre_shape = [ref_shape[i] or tf.shape(ref)[i] for i in range(ref_stop)]
    keep_shape = [tensor_shape[i] or tf.shape(tensor)[i] for i in range(tensor_start, len(tensor_shape))]
    # pre_shape = [tf.shape(ref)[i] for i in range(len(ref.get_shape().as_list()[:-keep]))]
    # keep_shape = tensor.get_shape().as_list()[-keep:]
    target_shape = pre_shape + keep_shape
    out = tf.reshape(tensor, target_shape)
    return out

In [15]:
def linear(args, output_size, scope=None, is_train=None, input_keep_prob=1.0):
    
    flat_args = [flatten(arg, 1) for arg in args]
    shape = args[0].get_shape().as_list()
    input_size = shape[-1]
    
    if input_keep_prob < 1.0:
        assert is_train is not None
        flat_args = [tf.cond(is_train, lambda: tf.nn.dropout(arg, input_keep_prob), lambda: arg)
                     for arg in flat_args]
        
    with tf.variable_scope(scope or "linear"):
        W = tf.get_variable("W", [output_size, input_size], dtype=args[0].dtype)
        b = tf.get_variable("b", [output_size], dtype=args[0].dtype)

        flat_out = tf.matmul(flat_args[0], tf.transpose(W)) + b
        out = reconstruct(flat_out, args[0], 1)

        return out

In [16]:
def highway_layer(arg, scope=None, input_keep_prob=1.0, is_train=None):
    with tf.variable_scope(scope or "highway_layer"):
        d = arg.get_shape()[-1]
        trans = linear([arg], d, scope='trans', input_keep_prob=input_keep_prob, is_train=is_train)
        trans = tf.nn.relu(trans)
        gate = linear([arg], d, scope='gate', input_keep_prob=input_keep_prob, is_train=is_train)
        gate = tf.nn.sigmoid(gate)
        out = gate * trans + (1 - gate) * arg
        return out


def highway_network(arg, num_layers, input_keep_prob=1.0, is_train=None):
    with tf.variable_scope("highway_network"):
        prev = arg
        cur = None
        for layer_idx in range(num_layers):
            cur = highway_layer(prev, scope="layer_{}".format(layer_idx), 
                                input_keep_prob=input_keep_prob, is_train=is_train)
            prev = cur
        return cur

In [17]:
with tf.variable_scope("highway_network"):
    xx = highway_network(xx, config.highway_num_layers, is_train=is_train)
    tf.get_variable_scope().reuse_variables()
    qq = highway_network(qq, config.highway_num_layers, is_train=is_train)

In [18]:
xx

<tf.Tensor 'highway_network/highway_network/layer_1/add:0' shape=(60, 1, 400, 200) dtype=float32>