In [1]:
import tensorflow as tf
from read_data import read_data, get_squad_data_filter, update_config
import flag as fg
import os
import json
import numpy as np
from pprint import pprint
from functools import reduce
from operator import mul
from helper import get_initializer, dropout, conv1d, multi_conv1d
from helper import flatten, reconstruct, linear, highway_layer, highway_network, mask, exp_mask, softmax
import math
import random
import itertools
from helper import grouper
from utils import index
from tqdm import tqdm
from read_data import DataSet

config = fg.main(_)
config.model_name = 'basic_60'
config.out_dir = os.path.join(config.out_base_dir, config.model_name, str(config.run_id).zfill(2))

assert config.load or config.mode == 'train', "config.load must be True if not training"
if not config.load and os.path.exists(config.out_dir):
    shutil.rmtree(config.out_dir)

config.save_dir = os.path.join(config.out_dir, "save")
config.log_dir = os.path.join(config.out_dir, "log")
config.eval_dir = os.path.join(config.out_dir, "eval")
config.answer_dir = os.path.join(config.out_dir, "answer")
if not os.path.exists(config.out_dir):
    os.makedirs(config.out_dir)
if not os.path.exists(config.save_dir):
    os.mkdir(config.save_dir)
if not os.path.exists(config.log_dir):
    os.mkdir(config.log_dir)
if not os.path.exists(config.answer_dir):
    os.mkdir(config.answer_dir)
if not os.path.exists(config.eval_dir):
    os.mkdir(config.eval_dir)

data_filter = get_squad_data_filter(config)

train_data = read_data(config, 'train', False, data_filter=data_filter)
dev_data = read_data(config, 'dev', False, data_filter=data_filter)

update_config(config, [train_data, dev_data])

word2vec_dict = train_data.shared['lower_word2vec'] if config.lower_word else train_data.shared['word2vec']
word2idx_dict = train_data.shared['word2idx']

idx2vec_dict = {word2idx_dict[word]: vec for word, vec in word2vec_dict.items() if word in word2idx_dict}
emb_mat = np.array([idx2vec_dict[idx] if idx in idx2vec_dict
                    else np.random.multivariate_normal(np.zeros(config.word_emb_size), np.eye(config.word_emb_size))
                    for idx in range(config.word_vocab_size)])
config.emb_mat = emb_mat

# pprint(config.__flags, indent=2)

Loaded 87507/87599 examples from train
Loaded 10544/10570 examples from dev


In [2]:
config.batch_size = 60

In [3]:
# Context and Ques Parameters
N = config.batch_size
M = config.max_num_sents
JX = config.max_sent_size
JQ = config.max_ques_size
VW = config.word_vocab_size
VC = config.char_vocab_size
W = config.max_word_size
d =  config.hidden_size
dc = config.char_emb_size
dw = config.word_emb_size
dco = config.char_out_size

with tf.device('/device:GPU:3'):

    # Placeholders

    x = tf.placeholder('int32', [N, None, None], name='x')
    cx = tf.placeholder('int32', [N, None, None, W], name='cx')
    x_mask = tf.placeholder('bool', [N, None, None], name='x_mask')
    q = tf.placeholder('int32', [N, None], name='q')
    cq = tf.placeholder('int32', [N, None, W], name='cq')
    q_mask = tf.placeholder('bool', [N, None], name='q_mask')
    y1 = tf.placeholder('bool', [N, None, None], name='y1')
    y2 = tf.placeholder('bool', [N, None, None], name='y2')
    is_train = tf.placeholder('bool', [], name='is_train')
    new_emb_mat = tf.placeholder('float', [None, config.word_emb_size], name='new_emb_mat')

    global_step = tf.get_variable('global_step', shape=[], dtype='int32', initializer=tf.constant_initializer(0), trainable=False)
    tensor_dict = {}

    with tf.variable_scope("embedding_layer"):
        if config.use_char_emb:
            with tf.variable_scope("char"):

                char_emb_mat = tf.get_variable("char_emb_mat", shape=[VC, dc], dtype='float')

                Acx = tf.nn.embedding_lookup(char_emb_mat, cx)  # [N, M, JX, W, dc]
                Acq = tf.nn.embedding_lookup(char_emb_mat, cq)  # [N, JQ, W, dc]
                Acx = tf.reshape(Acx, [-1, JX, W, dc])
                Acq = tf.reshape(Acq, [-1, JQ, W, dc])

                filter_sizes = list(map(int, config.out_channel_dims.split(',')))
                heights = list(map(int, config.filter_heights.split(',')))

                with tf.variable_scope("conv"):
                    xx = multi_conv1d(Acx, filter_sizes, heights, "VALID", is_train, config.keep_prob, scope="xx")
                    tf.get_variable_scope().reuse_variables()
                    qq = multi_conv1d(Acq, filter_sizes, heights, "VALID", is_train, config.keep_prob, scope="xx")

                    xx = tf.reshape(xx, [-1, M, JX, dco])
                    qq = tf.reshape(qq, [-1, JQ, dco])


        if config.use_word_emb:
            with tf.name_scope("word"):

                if config.mode == 'train':
                    word_emb_mat = tf.get_variable("word_emb_mat", dtype='float', shape=[VW, dw], initializer=get_initializer(config.emb_mat))
                else:
                    word_emb_mat = tf.get_variable("word_emb_mat", shape=[VW, dw], dtype='float')


                word_emb_mat = tf.concat([word_emb_mat, new_emb_mat], 0)

                Ax = tf.nn.embedding_lookup(word_emb_mat, x)  # [N, M, JX, d]
                Aq = tf.nn.embedding_lookup(word_emb_mat, q)  # [N, JQ, d]

                tensor_dict['x'] = Ax
                tensor_dict['q'] = Aq

            if config.use_char_emb:
                xx = tf.concat([xx, Ax], 3)  # [N, M, JX, di]
                qq = tf.concat([qq, Aq], 2)  # [N, JQ, di]
            else:
                xx = Ax
                qq = Aq

    with tf.variable_scope("highway_network_layer"):
        xx = highway_network(xx, config.highway_num_layers, is_train=is_train)
        tf.get_variable_scope().reuse_variables()
        qq = highway_network(qq, config.highway_num_layers, is_train=is_train)

        tensor_dict['xx'] = xx
        tensor_dict['qq'] = qq

    x_len = tf.reduce_sum(tf.cast(x_mask, 'int32'), 2)  # [N, M]
    q_len = tf.reduce_sum(tf.cast(q_mask, 'int32'), 1)  # [N]

    flat_len_q = None if q_len is None else tf.cast(flatten(q_len, 0), 'int64')
    flat_len_x = None if x_len is None else tf.cast(flatten(x_len, 0), 'int64')

    with tf.variable_scope("contextual_layer"):
        cell=tf.nn.rnn_cell.BasicLSTMCell(d,state_is_tuple=True);

        flat_qq = flatten(qq, 2)  
        (flat_fwu_outputs, flat_bwu_outputs), _ = tf.nn.bidirectional_dynamic_rnn(cell, cell, flat_qq, sequence_length=flat_len_q, dtype='float', scope='lstm')
        fw_u = reconstruct(flat_fwu_outputs, qq, 2)
        bw_u = reconstruct(flat_bwu_outputs, qq, 2)
        u = tf.concat([fw_u, bw_u], 2)

        tf.get_variable_scope().reuse_variables()

        flat_xx = flatten(xx, 2)  
        (flat_fwh_outputs, flat_bwh_outputs), _ = tf.nn.bidirectional_dynamic_rnn(cell, cell, flat_xx, sequence_length=flat_len_x, dtype='float', scope='lstm')
        fw_h = reconstruct(flat_fwh_outputs, xx, 2)
        bw_h = reconstruct(flat_bwh_outputs, xx, 2)
        h = tf.concat([fw_h, bw_h], 3)

        tensor_dict['u'] = u
        tensor_dict['h'] = h

    with tf.variable_scope("attention_layer"):
        h_aug = tf.tile(tf.expand_dims(h, 3), [1, 1, 1, JQ, 1])
        u_aug = tf.tile(tf.expand_dims(tf.expand_dims(u, 1), 1), [1, M, JX, 1, 1])
        h_mask_aug = tf.tile(tf.expand_dims(x_mask, 3), [1, 1, 1, JQ])
        u_mask_aug = tf.tile(tf.expand_dims(tf.expand_dims(q_mask, 1), 1), [1, M, JX, 1])
        hu_mask = h_mask_aug & u_mask_aug

        h_u = h_aug * u_aug

        with tf.variable_scope("similarity"):
            sim = linear([tf.concat([h_aug, u_aug, h_u], -1)], 1, is_train=is_train, scope="sim")
            sim = tf.squeeze(sim, [len(sim.get_shape().as_list())-1])
            sim = exp_mask(sim, hu_mask)

            # Tensor Dict
            a_u = tf.nn.softmax(sim)  
            a_h = tf.nn.softmax(tf.reduce_max(sim, 3))
            tensor_dict['a_u'] = a_u
            tensor_dict['a_h'] = a_h

        with tf.variable_scope("context_2_query"):
            a = softmax(sim)
            rank_u = len(u_aug.get_shape().as_list())
            u_a = tf.reduce_sum(tf.expand_dims(a, -1) * u_aug, rank_u-2)

        with tf.variable_scope("query_2_context"):
            b = softmax(tf.reduce_max(sim, 3))
            rank_h = len(h.get_shape().as_list())
            h_a = tf.reduce_sum(tf.expand_dims(b, -1) * h, rank_h-2)
            h_a = tf.tile(tf.expand_dims(h_a, 2), [1, 1, JX, 1])

        with tf.variable_scope("final"):
            g = tf.concat([h, u_a, h * u_a, h * h_a], 3)


    with tf.variable_scope("modeling_layer"):
        flat_g = flatten(g, 2)  
        cell1 = tf.nn.rnn_cell.BasicLSTMCell(d,state_is_tuple=True);
        (flat_fw_g0_outputs, flat_bw_g0_outputs), _ =tf.nn.bidirectional_dynamic_rnn(cell1, cell1, flat_g, sequence_length=flat_len_x, dtype='float', scope='g0')
        fw_g0 = reconstruct(flat_fw_g0_outputs, g, 2)
        bw_g0 = reconstruct(flat_bw_g0_outputs, g, 2)

        g0 = tf.concat([fw_g0, bw_g0], 3)

        flat_g0 = flatten(g0, 2)
        cell2 = tf.nn.rnn_cell.BasicLSTMCell(d,state_is_tuple=True);

        (flat_fw_g1_outputs, flat_bw_g1_outputs), _ =tf.nn.bidirectional_dynamic_rnn(cell2, cell2, flat_g0, sequence_length=flat_len_x, dtype='float', scope='g1')
        fw_g1 = reconstruct(flat_fw_g1_outputs, g0, 2)
        bw_g1 = reconstruct(flat_bw_g1_outputs, g0, 2)

        g1 = tf.concat([fw_g1, bw_g1], 3)

    with tf.variable_scope("output_layer"):
        logits1 = linear([tf.concat([g1, g], -1)], 1, input_keep_prob=config.input_keep_prob, is_train=is_train, scope="logits1")
        logits1 = tf.squeeze(logits1, [len(logits1.get_shape().as_list())-1])
        logits1 = exp_mask(logits1, x_mask)

        a = softmax(tf.reshape(logits1, [N, M * JX]))
        g1_reshaped = tf.reshape(g1, [N, M * JX, 2 * d])
        rank_g1 = len(g1_reshaped.get_shape().as_list())
        a1i = tf.reduce_sum(tf.expand_dims(a, -1) * g1_reshaped, rank_g1-2)
        a1i = tf.tile(tf.expand_dims(tf.expand_dims(a1i, 1), 1), [1, M, JX, 1])

        g2_input = tf.concat([g, g1, a1i, g1 * a1i], 3)
        flat_input = flatten(g2_input, 2)  
        cell = tf.nn.rnn_cell.BasicLSTMCell(d,state_is_tuple=True);
        (flat_fw_g2_outputs, flat_bw_g2_outputs), _ =tf.nn.bidirectional_dynamic_rnn(cell, cell, flat_input, sequence_length=flat_len_x, dtype='float', scope='g2')
        fw_g2 = reconstruct(flat_fw_g2_outputs, g, 2)
        bw_g2 = reconstruct(flat_bw_g2_outputs, g, 2)

        g2 = tf.concat([fw_g2, bw_g2], 3)

        logits2 = linear([tf.concat([g2, g], -1)], 1, input_keep_prob=config.input_keep_prob, is_train=is_train, scope="logits2")
        logits2 = tf.squeeze(logits2, [len(logits2.get_shape().as_list())-1])
        logits2 = exp_mask(logits2, x_mask)

        logits1 = tf.reshape(logits1, [-1, M * JX])
        flat_yp1 = tf.nn.softmax(logits1) 
        yp1 = tf.reshape(flat_yp1, [-1, M, JX])
        logits2 = tf.reshape(logits2, [-1, M * JX])
        flat_yp2 = tf.nn.softmax(logits2)
        yp2 = tf.reshape(flat_yp2, [-1, M, JX])

        tensor_dict['g1'] = g1
        tensor_dict['g2'] = g2


    #Loss 
    loss_mask = tf.reduce_max(tf.cast(q_mask, 'float'), 1)
    losses = tf.nn.softmax_cross_entropy_with_logits(logits=logits1, labels=tf.cast(tf.reshape(y1, [-1, M * JX]), 'float'))
    ce_loss1 = tf.reduce_mean(loss_mask * losses)
    ce_loss2 = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits2, labels=tf.cast(tf.reshape(y2, [-1, M * JX]), 'float')))
    tf.add_to_collection('losses', ce_loss1)
    tf.add_to_collection("losses", ce_loss2)

    loss = tf.add_n(tf.get_collection('losses'), name='loss')
    tf.summary.scalar(loss.op.name, loss)
    tf.add_to_collection('ema/scalar', loss)

    variables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=tf.get_variable_scope().name)
    for var in variables:
        tensor_dict[var.name] = var

    var_ema = tf.train.ExponentialMovingAverage(config.var_decay)
    ema_op = var_ema.apply(tf.trainable_variables())

    if config.mode == 'train':
        ema = tf.train.ExponentialMovingAverage(config.decay)
        ema_op = ema.apply(tf.get_collection("ema/scalar"))

        for var in tf.get_collection("ema/scalar"):
            ema_var = ema.average(var)
            tf.summary.scalar(ema_var.op.name, ema_var)
        for var in tf.get_collection("ema/vector"):
            ema_var = ema.average(var)
            tf.summary.histogram(ema_var.op.name, ema_var)

    with tf.control_dependencies([ema_op]):
        loss = tf.identity(loss)

    summary = tf.summary.merge_all()
    summary = tf.summary.merge(tf.get_collection("summaries"))

    optimizer = tf.train.AdamOptimizer(config.init_lr)
    grads = optimizer.compute_gradients(loss)
    train_op = optimizer.apply_gradients(grads, global_step=global_step)

In [4]:
def get_feed(batch, is_train_cond):
        assert isinstance(batch, DataSet)
        temp_x = np.zeros([N, M, JX], dtype='int32')
        temp_cx = np.zeros([N, M, JX, W], dtype='int32')
        temp_x_mask = np.zeros([N, M, JX], dtype='bool')
        temp_q = np.zeros([N, JQ], dtype='int32')
        temp_cq = np.zeros([N, JQ, W], dtype='int32')
        temp_q_mask = np.zeros([N, JQ], dtype='bool')
        temp_new_emb_mat = batch.shared['new_emb_mat']
        
        X = batch.data['x']
        CX = batch.data['cx']

        temp_y1 = np.zeros([N, M, JX], dtype='bool')
        temp_y2 = np.zeros([N, M, JX], dtype='bool')

        for i, (xi, cxi, yi) in enumerate(zip(X, CX, batch.data['y'])):
            start_idx, stop_idx = random.choice(yi)
            j, k = start_idx
            j2, k2 = stop_idx
            if config.single:
                X[i] = [xi[j]]
                CX[i] = [cxi[j]]
                j, j2 = 0, 0
            if config.squash:
                offset = sum(map(len, xi[:j]))
                j, k = 0, k + offset
                offset = sum(map(len, xi[:j2]))
                j2, k2 = 0, k2 + offset
            temp_y1[i, j, k] = True
            temp_y2[i, j2, k2-1] = True

        def _get_word(word):
            d = batch.shared['word2idx']
            for each in (word, word.lower(), word.capitalize(), word.upper()):
                if each in d:
                    return d[each]
            if config.use_glove_for_unk:
                d2 = batch.shared['new_word2idx']
                for each in (word, word.lower(), word.capitalize(), word.upper()):
                    if each in d2:
                        return d2[each] + len(d)
            return 1

        def _get_char(char):
            d = batch.shared['char2idx']
            if char in d:
                return d[char]
            return 1

        for i, xi in enumerate(X):
            if config.squash:
                xi = [list(itertools.chain(*xi))]
            for j, xij in enumerate(xi):
                if j == config.max_num_sents:
                    break
                for k, xijk in enumerate(xij):
                    if k == config.max_sent_size:
                        break
                    each = _get_word(xijk)
                    assert isinstance(each, int), each
                    temp_x[i, j, k] = each
                    temp_x_mask[i, j, k] = True

        for i, cxi in enumerate(CX):
            if config.squash:
                cxi = [list(itertools.chain(*cxi))]
            for j, cxij in enumerate(cxi):
                if j == config.max_num_sents:
                    break
                for k, cxijk in enumerate(cxij):
                    if k == config.max_sent_size:
                        break
                    for l, cxijkl in enumerate(cxijk):
                        if l == config.max_word_size:
                            break
                        temp_cx[i, j, k, l] = _get_char(cxijkl)

        for i, qi in enumerate(batch.data['q']):
            for j, qij in enumerate(qi):
                temp_q[i, j] = _get_word(qij)
                temp_q_mask[i, j] = True

        for i, cqi in enumerate(batch.data['cq']):
            for j, cqij in enumerate(cqi):
                for k, cqijk in enumerate(cqij):
                    temp_cq[i, j, k] = _get_char(cqijk)
                    if k + 1 == config.max_word_size:
                        break

        return temp_x, temp_cx, temp_x_mask, temp_q, temp_cq, temp_q_mask, temp_y1, temp_y2, temp_new_emb_mat
def get_feed_dict(data_set):

    temp_x, temp_cx, temp_x_mask, temp_q, temp_cq, temp_q_mask, temp_y1, temp_y2, temp_new_emb_mat = get_feed(data_set, True)
        
    feed_dict = {
        x: temp_x,
        cx: temp_cx,
        x_mask: temp_x_mask,
        q: temp_q,
        cq: temp_cq,
        q_mask: temp_q_mask,
        y1: temp_y1,
        y2: temp_y2,
        is_train: True,
        new_emb_mat: temp_new_emb_mat
        }
    return feed_dict

def step(batch, get_summary=False):
    _, data_set = batch

    feed_dict = get_feed_dict(data_set)
    if get_summary:
        temp_loss, temp_summary, temp_train_op = \
            sess.run([loss, summary, train_op], feed_dict=feed_dict)
    else:
        temp_loss, temp_train_op = sess.run([loss, train_op], feed_dict=feed_dict)
        temp_summary = None
    return temp_loss, temp_summary, temp_train_op

In [5]:
saver = tf.train.Saver()
save_path = os.path.join(config.save_dir, config.model_name)

# sess = tf.Session()
sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
sess.run(tf.global_variables_initializer())

writer = tf.summary.FileWriter(config.log_dir, graph=tf.get_default_graph())
   

In [6]:
checkpoint = tf.train.latest_checkpoint(config.save_dir)
saver.restore(sess, checkpoint)

INFO:tensorflow:Restoring parameters from out/basic_60/00/save/basic_60


In [7]:
num_steps = config.num_steps

In [8]:
batch_size = config.batch_size

batches = train_data.get_batches(batch_size, num_batches=num_steps, shuffle=True)

multi_batches = (tuple(zip(grouper(idxs, batch_size, shorten=True, num_groups=1),
                         data_set.divide(1))) for idxs, data_set in batches)
for batch in tqdm(multi_batches, total=num_steps):
    temp_global_step = sess.run(global_step) + 1  # +1 because all calculations are done after step
    
    get_summary = temp_global_step % config.log_period == 0
    temp_loss, temp_summary, temp_train_op = step(batch[0], get_summary=get_summary)
    
    if get_summary:
        print("Steps:{}".format(temp_global_step), ", Loss: {}".format(temp_loss))
        writer.add_summary(temp_summary, temp_global_step)

    # occasional saving
    if temp_global_step % config.save_period == 0:
        saver.save(sess, save_path=save_path)


  0%|          | 100/20000 [06:46<22:43:52,  4.11s/it]

Steps:5100 , Loss: 15.23134994506836


  1%|          | 200/20000 [13:08<20:10:01,  3.67s/it]

Steps:5200 , Loss: 14.505146980285645


  2%|▏         | 300/20000 [19:24<20:20:59,  3.72s/it]

Steps:5300 , Loss: 15.062029838562012


  2%|▏         | 400/20000 [25:40<20:45:41,  3.81s/it]

Steps:5400 , Loss: 15.184408187866211


  2%|▏         | 499/20000 [31:59<21:56:24,  4.05s/it]

Steps:5500 , Loss: 15.189064025878906


  3%|▎         | 600/20000 [38:29<20:34:58,  3.82s/it]

Steps:5600 , Loss: 14.065592765808105


  4%|▎         | 700/20000 [45:01<22:32:40,  4.21s/it]

Steps:5700 , Loss: 14.31540298461914


  4%|▍         | 800/20000 [51:46<21:18:03,  3.99s/it]

Steps:5800 , Loss: 14.272640228271484


  4%|▍         | 900/20000 [58:31<21:19:57,  4.02s/it]

Steps:5900 , Loss: 14.373470306396484


  5%|▍         | 999/20000 [1:05:05<22:21:13,  4.24s/it]

Steps:6000 , Loss: 14.63029670715332


  6%|▌         | 1100/20000 [1:11:56<20:46:51,  3.96s/it]

Steps:6100 , Loss: 15.130757331848145


  6%|▌         | 1200/20000 [1:18:33<20:37:35,  3.95s/it]

Steps:6200 , Loss: 14.371471405029297


  6%|▋         | 1300/20000 [1:25:19<20:09:47,  3.88s/it]

Steps:6300 , Loss: 14.554465293884277


  7%|▋         | 1400/20000 [1:32:03<20:49:18,  4.03s/it]

Steps:6400 , Loss: 14.131684303283691


  7%|▋         | 1499/20000 [1:38:43<21:04:46,  4.10s/it]

Steps:6500 , Loss: 15.059643745422363


  8%|▊         | 1600/20000 [1:45:37<21:47:04,  4.26s/it]

Steps:6600 , Loss: 15.38571834564209


  8%|▊         | 1700/20000 [1:52:18<20:07:34,  3.96s/it]

Steps:6700 , Loss: 14.068506240844727


  9%|▉         | 1800/20000 [1:59:09<19:51:26,  3.93s/it]

Steps:6800 , Loss: 14.29962158203125


 10%|▉         | 1900/20000 [2:05:36<20:19:36,  4.04s/it]

Steps:6900 , Loss: 15.896406173706055


 10%|▉         | 1999/20000 [2:12:12<19:51:26,  3.97s/it]

Steps:7000 , Loss: 14.76878547668457


 10%|█         | 2100/20000 [2:19:07<19:29:08,  3.92s/it]

Steps:7100 , Loss: 14.76662826538086


 11%|█         | 2200/20000 [2:25:50<19:28:29,  3.94s/it]

Steps:7200 , Loss: 15.228290557861328


 12%|█▏        | 2300/20000 [2:32:37<19:42:32,  4.01s/it]

Steps:7300 , Loss: 15.862361907958984


 12%|█▏        | 2400/20000 [2:39:23<19:53:19,  4.07s/it]

Steps:7400 , Loss: 13.540534973144531


 12%|█▏        | 2499/20000 [2:46:04<19:15:59,  3.96s/it]

Steps:7500 , Loss: 15.271758079528809


 13%|█▎        | 2600/20000 [2:52:54<18:19:29,  3.79s/it]

Steps:7600 , Loss: 15.431468963623047


 14%|█▎        | 2700/20000 [2:59:35<20:01:42,  4.17s/it]

Steps:7700 , Loss: 14.090729713439941


 14%|█▍        | 2800/20000 [3:06:26<19:23:54,  4.06s/it]

Steps:7800 , Loss: 14.657939910888672


 14%|█▍        | 2900/20000 [3:13:13<19:15:35,  4.05s/it]

Steps:7900 , Loss: 15.604558944702148


 15%|█▍        | 2999/20000 [3:19:55<19:31:35,  4.13s/it]

Steps:8000 , Loss: 15.315155029296875


 16%|█▌        | 3100/20000 [3:26:48<18:32:23,  3.95s/it]

Steps:8100 , Loss: 14.701061248779297


 16%|█▌        | 3200/20000 [3:33:31<18:36:12,  3.99s/it]

Steps:8200 , Loss: 15.093060493469238


 16%|█▋        | 3300/20000 [3:40:23<17:52:00,  3.85s/it]

Steps:8300 , Loss: 14.71125316619873


 17%|█▋        | 3400/20000 [3:47:08<19:41:22,  4.27s/it]

Steps:8400 , Loss: 15.423852920532227


 17%|█▋        | 3499/20000 [3:53:50<18:13:32,  3.98s/it]

Steps:8500 , Loss: 15.10849380493164


 18%|█▊        | 3600/20000 [4:00:45<18:10:11,  3.99s/it]

Steps:8600 , Loss: 15.223134994506836


 18%|█▊        | 3700/20000 [4:07:26<18:10:21,  4.01s/it]

Steps:8700 , Loss: 14.3543701171875


 19%|█▉        | 3800/20000 [4:14:18<18:59:01,  4.22s/it]

Steps:8800 , Loss: 15.002613067626953


 20%|█▉        | 3900/20000 [4:21:02<18:05:10,  4.04s/it]

Steps:8900 , Loss: 14.649711608886719


 20%|█▉        | 3999/20000 [4:27:48<18:09:07,  4.08s/it]

Steps:9000 , Loss: 14.864869117736816


 20%|██        | 4100/20000 [4:34:43<18:14:49,  4.13s/it]

Steps:9100 , Loss: 15.157808303833008


 21%|██        | 4200/20000 [4:41:32<18:03:27,  4.11s/it]

Steps:9200 , Loss: 15.018840789794922


 22%|██▏       | 4300/20000 [4:48:24<17:27:13,  4.00s/it]

Steps:9300 , Loss: 14.518464088439941


 22%|██▏       | 4400/20000 [4:55:12<17:11:50,  3.97s/it]

Steps:9400 , Loss: 14.315210342407227


 22%|██▏       | 4499/20000 [5:01:51<16:57:47,  3.94s/it]

Steps:9500 , Loss: 14.073043823242188


 23%|██▎       | 4600/20000 [5:08:46<17:37:34,  4.12s/it]

Steps:9600 , Loss: 15.045110702514648


 24%|██▎       | 4700/20000 [5:15:30<16:17:34,  3.83s/it]

Steps:9700 , Loss: 14.907819747924805


 24%|██▍       | 4800/20000 [5:22:18<17:23:31,  4.12s/it]

Steps:9800 , Loss: 15.457945823669434


 24%|██▍       | 4900/20000 [5:28:59<16:46:05,  4.00s/it]

Steps:9900 , Loss: 15.630388259887695


 25%|██▍       | 4999/20000 [5:35:40<16:37:54,  3.99s/it]

Steps:10000 , Loss: 14.560498237609863


 26%|██▌       | 5100/20000 [5:42:27<17:02:49,  4.12s/it]

Steps:10100 , Loss: 15.061397552490234


 26%|██▌       | 5200/20000 [5:49:13<16:46:52,  4.08s/it]

Steps:10200 , Loss: 14.157416343688965


 26%|██▋       | 5300/20000 [5:56:00<15:58:52,  3.91s/it]

Steps:10300 , Loss: 14.831853866577148


 27%|██▋       | 5400/20000 [6:02:46<16:45:30,  4.13s/it]

Steps:10400 , Loss: 14.796993255615234


 27%|██▋       | 5499/20000 [6:09:23<15:41:45,  3.90s/it]

Steps:10500 , Loss: 15.347439765930176


 28%|██▊       | 5600/20000 [6:16:18<16:29:46,  4.12s/it]

Steps:10600 , Loss: 15.072898864746094


 28%|██▊       | 5700/20000 [6:23:06<15:39:10,  3.94s/it]

Steps:10700 , Loss: 14.303107261657715


 29%|██▉       | 5800/20000 [6:29:56<15:43:16,  3.99s/it]

Steps:10800 , Loss: 15.361550331115723


 30%|██▉       | 5900/20000 [6:36:46<15:49:11,  4.04s/it]

Steps:10900 , Loss: 14.121033668518066


 30%|██▉       | 5999/20000 [6:43:24<15:04:16,  3.88s/it]

Steps:11000 , Loss: 15.581768989562988


 30%|███       | 6100/20000 [6:50:18<15:08:07,  3.92s/it]

Steps:11100 , Loss: 15.369640350341797


 31%|███       | 6200/20000 [6:57:05<15:54:20,  4.15s/it]

Steps:11200 , Loss: 16.44722557067871


 32%|███▏      | 6300/20000 [7:03:58<15:47:17,  4.15s/it]

Steps:11300 , Loss: 15.042375564575195


 32%|███▏      | 6400/20000 [7:10:44<15:28:29,  4.10s/it]

Steps:11400 , Loss: 15.49410629272461


 32%|███▏      | 6499/20000 [7:17:25<15:30:18,  4.13s/it]

Steps:11500 , Loss: 15.491521835327148


 33%|███▎      | 6600/20000 [7:24:26<15:32:41,  4.18s/it]

Steps:11600 , Loss: 14.27178955078125


 34%|███▎      | 6700/20000 [7:31:11<14:39:03,  3.97s/it]

Steps:11700 , Loss: 13.779086112976074


 34%|███▍      | 6800/20000 [7:38:03<15:03:13,  4.11s/it]

Steps:11800 , Loss: 13.960472106933594


 34%|███▍      | 6900/20000 [7:44:48<14:35:50,  4.01s/it]

Steps:11900 , Loss: 14.675827980041504


 35%|███▍      | 6999/20000 [7:51:26<14:39:59,  4.06s/it]

Steps:12000 , Loss: 15.164560317993164


 36%|███▌      | 7100/20000 [7:58:23<15:17:30,  4.27s/it]

Steps:12100 , Loss: 15.134659767150879


 36%|███▌      | 7200/20000 [8:05:10<14:56:21,  4.20s/it]

Steps:12200 , Loss: 14.817209243774414


 36%|███▋      | 7300/20000 [8:12:05<14:14:40,  4.04s/it]

Steps:12300 , Loss: 13.952064514160156


 37%|███▋      | 7400/20000 [8:18:49<13:59:18,  4.00s/it]

Steps:12400 , Loss: 14.08247184753418


 37%|███▋      | 7499/20000 [8:25:35<14:09:21,  4.08s/it]

Steps:12500 , Loss: 14.250460624694824


 38%|███▊      | 7600/20000 [8:32:28<14:31:31,  4.22s/it]

Steps:12600 , Loss: 14.214300155639648


 38%|███▊      | 7700/20000 [8:39:19<14:02:05,  4.11s/it]

Steps:12700 , Loss: 14.278118133544922


 39%|███▉      | 7800/20000 [8:46:11<13:57:59,  4.12s/it]

Steps:12800 , Loss: 14.34700870513916


 40%|███▉      | 7900/20000 [8:52:57<13:17:40,  3.96s/it]

Steps:12900 , Loss: 15.360479354858398


 40%|███▉      | 7999/20000 [8:59:42<13:40:12,  4.10s/it]

Steps:13000 , Loss: 14.785917282104492


 40%|████      | 8100/20000 [9:06:35<13:47:23,  4.17s/it]

Steps:13100 , Loss: 14.01404094696045


 41%|████      | 8200/20000 [9:13:22<13:15:41,  4.05s/it]

Steps:13200 , Loss: 14.691221237182617


 42%|████▏     | 8300/20000 [9:20:14<12:56:02,  3.98s/it]

Steps:13300 , Loss: 14.739660263061523


 42%|████▏     | 8400/20000 [9:25:59<8:46:30,  2.72s/it] 

Steps:13400 , Loss: 14.618082046508789


 42%|████▏     | 8499/20000 [9:30:21<8:33:42,  2.68s/it]

Steps:13500 , Loss: 15.164386749267578


 43%|████▎     | 8600/20000 [9:34:57<8:17:12,  2.62s/it] 

Steps:13600 , Loss: 13.933832168579102


 44%|████▎     | 8700/20000 [9:39:21<8:09:03,  2.60s/it]

Steps:13700 , Loss: 14.508267402648926


 44%|████▍     | 8800/20000 [9:43:56<8:13:22,  2.64s/it] 

Steps:13800 , Loss: 15.44676399230957


 44%|████▍     | 8900/20000 [9:48:23<8:22:42,  2.72s/it]

Steps:13900 , Loss: 15.354537963867188


 45%|████▍     | 8999/20000 [9:52:48<7:51:31,  2.57s/it]

Steps:14000 , Loss: 14.53687858581543


 46%|████▌     | 9100/20000 [9:57:21<8:03:36,  2.66s/it] 

Steps:14100 , Loss: 14.47445297241211


 46%|████▌     | 9200/20000 [10:01:49<8:04:49,  2.69s/it]

Steps:14200 , Loss: 14.122238159179688


 46%|████▋     | 9300/20000 [10:06:21<7:58:23,  2.68s/it] 

Steps:14300 , Loss: 14.800617218017578


 47%|████▋     | 9400/20000 [10:10:47<7:44:44,  2.63s/it]

Steps:14400 , Loss: 15.17135238647461


 47%|████▋     | 9499/20000 [10:15:10<7:39:17,  2.62s/it]

Steps:14500 , Loss: 14.24393081665039


 48%|████▊     | 9600/20000 [10:19:46<7:42:14,  2.67s/it] 

Steps:14600 , Loss: 14.606843948364258


 48%|████▊     | 9700/20000 [10:24:14<7:46:13,  2.72s/it]

Steps:14700 , Loss: 14.958858489990234


 49%|████▉     | 9800/20000 [10:28:49<7:37:01,  2.69s/it] 

Steps:14800 , Loss: 15.191873550415039


 50%|████▉     | 9900/20000 [10:33:15<7:35:41,  2.71s/it]

Steps:14900 , Loss: 14.778716087341309


 50%|████▉     | 9999/20000 [10:37:45<8:03:32,  2.90s/it]

Steps:15000 , Loss: 14.824642181396484


 50%|█████     | 10100/20000 [10:42:24<7:18:29,  2.66s/it] 

Steps:15100 , Loss: 13.624814987182617


 51%|█████     | 10200/20000 [10:46:53<7:09:00,  2.63s/it]

Steps:15200 , Loss: 13.764241218566895


 52%|█████▏    | 10300/20000 [10:51:29<7:26:47,  2.76s/it] 

Steps:15300 , Loss: 15.093403816223145


 52%|█████▏    | 10400/20000 [10:55:56<7:29:50,  2.81s/it]

Steps:15400 , Loss: 15.245136260986328


 52%|█████▏    | 10499/20000 [11:00:23<7:11:56,  2.73s/it]

Steps:15500 , Loss: 14.531217575073242


 53%|█████▎    | 10600/20000 [11:05:05<6:47:48,  2.60s/it] 

Steps:15600 , Loss: 14.68780517578125


 54%|█████▎    | 10700/20000 [11:10:00<7:45:50,  3.01s/it]

Steps:15700 , Loss: 14.64454174041748


 54%|█████▍    | 10800/20000 [11:14:52<7:05:41,  2.78s/it] 

Steps:15800 , Loss: 14.972594261169434


 55%|█████▍    | 10900/20000 [11:19:25<6:43:44,  2.66s/it]

Steps:15900 , Loss: 14.215225219726562


 55%|█████▍    | 10999/20000 [11:23:54<6:49:13,  2.73s/it]

Steps:16000 , Loss: 13.968769073486328


 56%|█████▌    | 11100/20000 [11:28:29<6:36:13,  2.67s/it] 

Steps:16100 , Loss: 14.517799377441406


 56%|█████▌    | 11200/20000 [11:32:58<6:38:08,  2.71s/it]

Steps:16200 , Loss: 14.933653831481934


 56%|█████▋    | 11300/20000 [11:37:34<6:48:11,  2.82s/it] 

Steps:16300 , Loss: 14.794271469116211


 57%|█████▋    | 11400/20000 [11:42:10<6:21:47,  2.66s/it]

Steps:16400 , Loss: 14.323554992675781


 57%|█████▋    | 11499/20000 [11:46:35<6:16:42,  2.66s/it]

Steps:16500 , Loss: 14.677517890930176


 58%|█████▊    | 11600/20000 [11:51:10<6:13:37,  2.67s/it]

Steps:16600 , Loss: 14.385706901550293


 58%|█████▊    | 11700/20000 [11:55:39<6:13:27,  2.70s/it]

Steps:16700 , Loss: 15.557685852050781


 59%|█████▉    | 11800/20000 [12:00:12<6:06:41,  2.68s/it]

Steps:16800 , Loss: 14.126219749450684


 60%|█████▉    | 11900/20000 [12:04:40<5:57:31,  2.65s/it]

Steps:16900 , Loss: 14.646833419799805


 60%|█████▉    | 11999/20000 [12:09:06<6:00:01,  2.70s/it]

Steps:17000 , Loss: 14.915512084960938


 60%|██████    | 12100/20000 [12:13:42<6:00:45,  2.74s/it]

Steps:17100 , Loss: 14.867250442504883


 61%|██████    | 12200/20000 [12:18:09<5:52:18,  2.71s/it]

Steps:17200 , Loss: 15.336478233337402


 62%|██████▏   | 12300/20000 [12:22:44<5:35:43,  2.62s/it]

Steps:17300 , Loss: 14.593650817871094


 62%|██████▏   | 12400/20000 [12:27:13<5:36:28,  2.66s/it]

Steps:17400 , Loss: 13.880107879638672


 62%|██████▏   | 12499/20000 [12:31:39<5:47:23,  2.78s/it]

Steps:17500 , Loss: 15.064801216125488


 63%|██████▎   | 12600/20000 [12:36:13<5:30:00,  2.68s/it]

Steps:17600 , Loss: 15.078417778015137


 64%|██████▎   | 12700/20000 [12:40:38<4:50:21,  2.39s/it]

Steps:17700 , Loss: 15.358346939086914


 64%|██████▍   | 12800/20000 [12:44:39<4:41:47,  2.35s/it]

Steps:17800 , Loss: 14.232908248901367


 64%|██████▍   | 12900/20000 [12:48:35<4:45:48,  2.42s/it]

Steps:17900 , Loss: 15.165191650390625


 65%|██████▍   | 12999/20000 [12:52:26<4:34:30,  2.35s/it]

Steps:18000 , Loss: 15.57406997680664


 66%|██████▌   | 13100/20000 [12:56:27<4:21:30,  2.27s/it]

Steps:18100 , Loss: 14.07095718383789


 66%|██████▌   | 13200/20000 [13:00:26<4:42:56,  2.50s/it]

Steps:18200 , Loss: 15.285772323608398


 66%|██████▋   | 13300/20000 [13:04:37<4:36:00,  2.47s/it]

Steps:18300 , Loss: 14.789139747619629


 67%|██████▋   | 13400/20000 [13:08:46<4:31:33,  2.47s/it]

Steps:18400 , Loss: 16.247711181640625


 67%|██████▋   | 13499/20000 [13:12:55<4:34:02,  2.53s/it]

Steps:18500 , Loss: 15.211629867553711


 68%|██████▊   | 13600/20000 [13:17:15<4:21:54,  2.46s/it]

Steps:18600 , Loss: 14.20438003540039


 68%|██████▊   | 13700/20000 [13:21:28<4:28:47,  2.56s/it]

Steps:18700 , Loss: 14.033635139465332


 69%|██████▉   | 13800/20000 [13:25:44<4:24:36,  2.56s/it]

Steps:18800 , Loss: 14.375838279724121


 70%|██████▉   | 13900/20000 [13:29:56<4:08:26,  2.44s/it]

Steps:18900 , Loss: 14.833932876586914


 70%|██████▉   | 13999/20000 [13:34:03<4:12:42,  2.53s/it]

Steps:19000 , Loss: 15.790282249450684


 70%|███████   | 14100/20000 [13:38:20<4:05:29,  2.50s/it]

Steps:19100 , Loss: 15.329586029052734


 71%|███████   | 14200/20000 [13:42:32<4:10:26,  2.59s/it]

Steps:19200 , Loss: 15.378355979919434


 72%|███████▏  | 14300/20000 [13:46:48<4:02:00,  2.55s/it]

Steps:19300 , Loss: 14.40875244140625


 72%|███████▏  | 14400/20000 [13:50:54<3:38:47,  2.34s/it]

Steps:19400 , Loss: 14.213653564453125


 72%|███████▏  | 14499/20000 [13:54:57<3:45:58,  2.46s/it]

Steps:19500 , Loss: 15.48906135559082


 73%|███████▎  | 14600/20000 [13:59:12<3:38:44,  2.43s/it]

Steps:19600 , Loss: 14.137927055358887


 74%|███████▎  | 14700/20000 [14:03:17<3:37:38,  2.46s/it]

Steps:19700 , Loss: 16.02526092529297


 74%|███████▍  | 14800/20000 [14:07:31<3:36:08,  2.49s/it]

Steps:19800 , Loss: 15.355169296264648


 74%|███████▍  | 14900/20000 [14:11:36<3:29:06,  2.46s/it]

Steps:19900 , Loss: 13.877995491027832


 75%|███████▍  | 14999/20000 [14:15:41<3:26:56,  2.48s/it]

Steps:20000 , Loss: 15.221321105957031


 76%|███████▌  | 15100/20000 [14:19:55<3:19:49,  2.45s/it]

Steps:20100 , Loss: 15.028512001037598


 76%|███████▌  | 15200/20000 [14:24:02<3:20:14,  2.50s/it]

Steps:20200 , Loss: 15.175687789916992


 76%|███████▋  | 15300/20000 [14:28:14<3:09:17,  2.42s/it]

Steps:20300 , Loss: 14.299673080444336


 77%|███████▋  | 15400/20000 [14:32:18<3:00:16,  2.35s/it]

Steps:20400 , Loss: 14.65815258026123


 77%|███████▋  | 15499/20000 [14:36:20<3:03:32,  2.45s/it]

Steps:20500 , Loss: 15.396820068359375


 78%|███████▊  | 15600/20000 [14:40:36<3:12:38,  2.63s/it]

Steps:20600 , Loss: 14.474602699279785


 78%|███████▊  | 15700/20000 [14:44:43<2:57:08,  2.47s/it]

Steps:20700 , Loss: 14.521370887756348


 79%|███████▉  | 15800/20000 [14:48:55<2:51:34,  2.45s/it]

Steps:20800 , Loss: 14.56252670288086


 80%|███████▉  | 15900/20000 [14:53:04<3:03:43,  2.69s/it]

Steps:20900 , Loss: 15.123163223266602


 80%|███████▉  | 15999/20000 [14:57:06<2:40:30,  2.41s/it]

Steps:21000 , Loss: 14.181221961975098


 80%|████████  | 16100/20000 [15:01:20<2:37:50,  2.43s/it]

Steps:21100 , Loss: 14.334107398986816


 81%|████████  | 16200/20000 [15:05:16<2:29:26,  2.36s/it]

Steps:21200 , Loss: 14.65679931640625


 82%|████████▏ | 16300/20000 [15:09:26<2:31:00,  2.45s/it]

Steps:21300 , Loss: 14.903881072998047


 82%|████████▏ | 16400/20000 [15:13:29<2:31:36,  2.53s/it]

Steps:21400 , Loss: 13.891475677490234


 82%|████████▏ | 16499/20000 [15:17:30<2:19:56,  2.40s/it]

Steps:21500 , Loss: 14.458420753479004


 83%|████████▎ | 16600/20000 [15:21:43<2:11:28,  2.32s/it]

Steps:21600 , Loss: 15.103397369384766


 84%|████████▎ | 16700/20000 [15:25:46<2:16:57,  2.49s/it]

Steps:21700 , Loss: 14.857398986816406


 84%|████████▍ | 16800/20000 [15:30:02<2:09:31,  2.43s/it]

Steps:21800 , Loss: 14.95412540435791


 84%|████████▍ | 16900/20000 [15:34:13<2:04:58,  2.42s/it]

Steps:21900 , Loss: 15.057619094848633


 85%|████████▍ | 16999/20000 [15:38:16<2:00:32,  2.41s/it]

Steps:22000 , Loss: 15.077006340026855


 86%|████████▌ | 17100/20000 [15:42:32<1:59:01,  2.46s/it]

Steps:22100 , Loss: 14.856897354125977


 86%|████████▌ | 17200/20000 [15:46:38<1:56:05,  2.49s/it]

Steps:22200 , Loss: 15.495718955993652


 86%|████████▋ | 17300/20000 [15:50:55<1:51:20,  2.47s/it]

Steps:22300 , Loss: 14.523073196411133


 87%|████████▋ | 17400/20000 [15:55:04<1:44:07,  2.40s/it]

Steps:22400 , Loss: 14.676665306091309


 87%|████████▋ | 17499/20000 [15:59:04<1:48:39,  2.61s/it]

Steps:22500 , Loss: 13.953422546386719


 88%|████████▊ | 17600/20000 [16:03:15<1:36:10,  2.40s/it]

Steps:22600 , Loss: 15.073321342468262


 88%|████████▊ | 17700/20000 [16:07:11<1:29:39,  2.34s/it]

Steps:22700 , Loss: 14.6736478805542


 89%|████████▉ | 17800/20000 [16:11:12<1:26:28,  2.36s/it]

Steps:22800 , Loss: 14.918957710266113


 90%|████████▉ | 17900/20000 [16:15:07<1:22:22,  2.35s/it]

Steps:22900 , Loss: 14.643200874328613


 90%|████████▉ | 17999/20000 [16:18:59<1:19:20,  2.38s/it]

Steps:23000 , Loss: 14.90592098236084


 90%|█████████ | 18100/20000 [16:23:02<1:17:06,  2.43s/it]

Steps:23100 , Loss: 15.484819412231445


 91%|█████████ | 18200/20000 [16:27:00<1:11:35,  2.39s/it]

Steps:23200 , Loss: 15.101150512695312


 92%|█████████▏| 18300/20000 [16:31:00<1:05:13,  2.30s/it]

Steps:23300 , Loss: 15.789478302001953


 92%|█████████▏| 18400/20000 [16:34:58<1:02:14,  2.33s/it]

Steps:23400 , Loss: 14.140653610229492


 92%|█████████▏| 18499/20000 [16:38:52<1:01:19,  2.45s/it]

Steps:23500 , Loss: 14.70889949798584


 93%|█████████▎| 18600/20000 [16:42:56<55:43,  2.39s/it]  

Steps:23600 , Loss: 14.486968040466309


 94%|█████████▎| 18700/20000 [16:46:51<50:32,  2.33s/it]

Steps:23700 , Loss: 15.430623054504395


 94%|█████████▍| 18800/20000 [16:50:52<46:52,  2.34s/it]  

Steps:23800 , Loss: 14.109142303466797


 94%|█████████▍| 18900/20000 [16:54:50<44:15,  2.41s/it]

Steps:23900 , Loss: 14.7577486038208


 95%|█████████▍| 18999/20000 [16:58:46<39:16,  2.35s/it]

Steps:24000 , Loss: 15.15677261352539


 96%|█████████▌| 19100/20000 [17:02:50<35:51,  2.39s/it]  

Steps:24100 , Loss: 14.963796615600586


 96%|█████████▌| 19200/20000 [17:06:48<32:56,  2.47s/it]

Steps:24200 , Loss: 14.083731651306152


 96%|█████████▋| 19300/20000 [17:10:52<28:55,  2.48s/it]

Steps:24300 , Loss: 15.340648651123047


 97%|█████████▋| 19400/20000 [17:14:52<23:46,  2.38s/it]

Steps:24400 , Loss: 13.951799392700195


 97%|█████████▋| 19499/20000 [17:18:50<19:55,  2.39s/it]

Steps:24500 , Loss: 14.896650314331055


 98%|█████████▊| 19600/20000 [17:22:56<15:29,  2.32s/it]

Steps:24600 , Loss: 13.385324478149414


 98%|█████████▊| 19700/20000 [17:26:54<11:42,  2.34s/it]

Steps:24700 , Loss: 14.283834457397461


 99%|█████████▉| 19800/20000 [17:31:00<07:58,  2.39s/it]

Steps:24800 , Loss: 14.646588325500488


100%|█████████▉| 19900/20000 [17:34:58<03:55,  2.36s/it]

Steps:24900 , Loss: 14.398000717163086


100%|█████████▉| 19999/20000 [17:38:54<00:02,  2.36s/it]

Steps:25000 , Loss: 14.2553071975708


100%|██████████| 20000/20000 [17:39:00<00:00,  3.65s/it]


In [None]:
# def get_evaluation(batch):
#         idxs, data_set = batch
#         feed_dict = get_feed_dict(data_set)
#         temp_global_step, temp_yp, temp_vals = sess.run([global_step, yp, list(tensor_dict.values())], feed_dict=feed_dict)
#         yp = yp[:data_set.num_examples]
#         tensor_dict = dict(zip(self.tensor_dict.keys(), vals))
#         e = Evaluation(data_set.data_type, int(global_step), idxs, yp.tolist(), tensor_dict=tensor_dict)
#         return e

# idxs, data_set = bttt[0]
# feed_dict = get_feed_dict(data_set)
# temp_global_step, temp_yp1, temp_vals = sess.run([global_step, yp1, list(tensor_dict.values())], feed_dict=feed_dict)
# temp_yp1 = temp_yp1[:data_set.num_examples]

# if global_step % config.eval_period == 0:
#     num_steps = math.ceil(dev_data.num_examples / (config.batch_size))
#     if 0 < config.val_num_batches < num_steps:
#         num_steps = config.val_num_batches
    
#     train_batches = train_data.get_batches(batch_size, num_batches=num_steps, shuffle=True)

#     train_multi_batches = (tuple(zip(grouper(idxs, batch_size, shorten=True, num_groups=1),
#                          data_set.divide(1))) for idxs, data_set in train_batches)    
    
#     e_train = evaluator.get_evaluation_from_batches(
#         sess, tqdm(train_multi_batches, total=num_steps))
    
#     graph_handler.add_summaries(e_train.summaries, global_step)
    
#     dev_batches = dev_data.get_batches(batch_size, num_batches=num_steps, shuffle=True)

#     dev_multi_batches = (tuple(zip(grouper(idxs, batch_size, shorten=True, num_groups=1),
#                          data_set.divide(1))) for idxs, data_set in dev_batches)

#     e_dev = evaluator.get_evaluation_from_batches(
#         sess, tqdm(dev_data.get_multi_batches(config.batch_size, config.num_gpus, num_steps=num_steps), total=num_steps))
    
#     graph_handler.add_summaries(e_dev.summaries, global_step)

#     if config.dump_eval:
#         graph_handler.dump_eval(e_dev)
#     if config.dump_answer:
#         graph_handler.dump_answer(e_dev)