In [1]:
import tensorflow as tf
from read_data import read_data, get_squad_data_filter, update_config
import flag as fg
import os
import json
import numpy as np
from pprint import pprint
from functools import reduce
from operator import mul
from helper import get_initializer, dropout, conv1d, multi_conv1d
from helper import flatten, reconstruct, linear, highway_layer, highway_network, mask, exp_mask, softmax
import math
import random
import itertools
from helper import grouper
from utils import index
from tqdm import tqdm
from read_data import DataSet

config = fg.main(_)
config.model_name = 'basic'
config.out_dir = os.path.join(config.out_base_dir, config.model_name, str(config.run_id).zfill(2))

assert config.load or config.mode == 'train', "config.load must be True if not training"
if not config.load and os.path.exists(config.out_dir):
    shutil.rmtree(config.out_dir)

config.save_dir = os.path.join(config.out_dir, "save")
config.log_dir = os.path.join(config.out_dir, "log")
config.eval_dir = os.path.join(config.out_dir, "eval")
config.answer_dir = os.path.join(config.out_dir, "answer")
if not os.path.exists(config.out_dir):
    os.makedirs(config.out_dir)
if not os.path.exists(config.save_dir):
    os.mkdir(config.save_dir)
if not os.path.exists(config.log_dir):
    os.mkdir(config.log_dir)
if not os.path.exists(config.answer_dir):
    os.mkdir(config.answer_dir)
if not os.path.exists(config.eval_dir):
    os.mkdir(config.eval_dir)

data_filter = get_squad_data_filter(config)

train_data = read_data(config, 'train', False, data_filter=data_filter)
dev_data = read_data(config, 'dev', False, data_filter=data_filter)

update_config(config, [train_data, dev_data])

word2vec_dict = train_data.shared['lower_word2vec'] if config.lower_word else train_data.shared['word2vec']
word2idx_dict = train_data.shared['word2idx']

idx2vec_dict = {word2idx_dict[word]: vec for word, vec in word2vec_dict.items() if word in word2idx_dict}
emb_mat = np.array([idx2vec_dict[idx] if idx in idx2vec_dict
                    else np.random.multivariate_normal(np.zeros(config.word_emb_size), np.eye(config.word_emb_size))
                    for idx in range(config.word_vocab_size)])
config.emb_mat = emb_mat

# pprint(config.__flags, indent=2)

Loaded 87507/87599 examples from train
Loaded 10544/10570 examples from dev


In [2]:
config.save_period = 250

In [3]:
# Context and Ques Parameters
N = config.batch_size
M = config.max_num_sents
JX = config.max_sent_size
JQ = config.max_ques_size
VW = config.word_vocab_size
VC = config.char_vocab_size
W = config.max_word_size
d =  config.hidden_size
dc = config.char_emb_size
dw = config.word_emb_size
dco = config.char_out_size

# Placeholders

x = tf.placeholder('int32', [N, None, None], name='x')
cx = tf.placeholder('int32', [N, None, None, W], name='cx')
x_mask = tf.placeholder('bool', [N, None, None], name='x_mask')
q = tf.placeholder('int32', [N, None], name='q')
cq = tf.placeholder('int32', [N, None, W], name='cq')
q_mask = tf.placeholder('bool', [N, None], name='q_mask')
y1 = tf.placeholder('bool', [N, None, None], name='y1')
y2 = tf.placeholder('bool', [N, None, None], name='y2')
is_train = tf.placeholder('bool', [], name='is_train')
new_emb_mat = tf.placeholder('float', [None, config.word_emb_size], name='new_emb_mat')

global_step = tf.get_variable('global_step', shape=[], dtype='int32', initializer=tf.constant_initializer(0), trainable=False)
tensor_dict = {}

with tf.variable_scope("embedding_layer"):
    if config.use_char_emb:
        with tf.variable_scope("char"):

            char_emb_mat = tf.get_variable("char_emb_mat", shape=[VC, dc], dtype='float')
    
            Acx = tf.nn.embedding_lookup(char_emb_mat, cx)  # [N, M, JX, W, dc]
            Acq = tf.nn.embedding_lookup(char_emb_mat, cq)  # [N, JQ, W, dc]
            Acx = tf.reshape(Acx, [-1, JX, W, dc])
            Acq = tf.reshape(Acq, [-1, JQ, W, dc])
            
            filter_sizes = list(map(int, config.out_channel_dims.split(',')))
            heights = list(map(int, config.filter_heights.split(',')))
            
            with tf.variable_scope("conv"):
                xx = multi_conv1d(Acx, filter_sizes, heights, "VALID", is_train, config.keep_prob, scope="xx")
                tf.get_variable_scope().reuse_variables()
                qq = multi_conv1d(Acq, filter_sizes, heights, "VALID", is_train, config.keep_prob, scope="xx")

                xx = tf.reshape(xx, [-1, M, JX, dco])
                qq = tf.reshape(qq, [-1, JQ, dco])
            
            
    if config.use_word_emb:
        with tf.name_scope("word"):
            
            if config.mode == 'train':
                word_emb_mat = tf.get_variable("word_emb_mat", dtype='float', shape=[VW, dw], initializer=get_initializer(config.emb_mat))
            else:
                word_emb_mat = tf.get_variable("word_emb_mat", shape=[VW, dw], dtype='float')
            
           
            word_emb_mat = tf.concat([word_emb_mat, new_emb_mat], 0)

            Ax = tf.nn.embedding_lookup(word_emb_mat, x)  # [N, M, JX, d]
            Aq = tf.nn.embedding_lookup(word_emb_mat, q)  # [N, JQ, d]
            
            tensor_dict['x'] = Ax
            tensor_dict['q'] = Aq
            
        if config.use_char_emb:
            xx = tf.concat([xx, Ax], 3)  # [N, M, JX, di]
            qq = tf.concat([qq, Aq], 2)  # [N, JQ, di]
        else:
            xx = Ax
            qq = Aq

with tf.variable_scope("highway_network_layer"):
    xx = highway_network(xx, config.highway_num_layers, is_train=is_train)
    tf.get_variable_scope().reuse_variables()
    qq = highway_network(qq, config.highway_num_layers, is_train=is_train)

    tensor_dict['xx'] = xx
    tensor_dict['qq'] = qq
    
x_len = tf.reduce_sum(tf.cast(x_mask, 'int32'), 2)  # [N, M]
q_len = tf.reduce_sum(tf.cast(q_mask, 'int32'), 1)  # [N]

flat_len_q = None if q_len is None else tf.cast(flatten(q_len, 0), 'int64')
flat_len_x = None if x_len is None else tf.cast(flatten(x_len, 0), 'int64')

with tf.variable_scope("contextual_layer"):
    cell=tf.nn.rnn_cell.BasicLSTMCell(d,state_is_tuple=True);

    flat_qq = flatten(qq, 2)  
    (flat_fwu_outputs, flat_bwu_outputs), _ = tf.nn.bidirectional_dynamic_rnn(cell, cell, flat_qq, sequence_length=flat_len_q, dtype='float', scope='lstm')
    fw_u = reconstruct(flat_fwu_outputs, qq, 2)
    bw_u = reconstruct(flat_bwu_outputs, qq, 2)
    u = tf.concat([fw_u, bw_u], 2)

    tf.get_variable_scope().reuse_variables()
    
    flat_xx = flatten(xx, 2)  
    (flat_fwh_outputs, flat_bwh_outputs), _ = tf.nn.bidirectional_dynamic_rnn(cell, cell, flat_xx, sequence_length=flat_len_x, dtype='float', scope='lstm')
    fw_h = reconstruct(flat_fwh_outputs, xx, 2)
    bw_h = reconstruct(flat_bwh_outputs, xx, 2)
    h = tf.concat([fw_h, bw_h], 3)
    
    tensor_dict['u'] = u
    tensor_dict['h'] = h
    
with tf.variable_scope("attention_layer"):
    h_aug = tf.tile(tf.expand_dims(h, 3), [1, 1, 1, JQ, 1])
    u_aug = tf.tile(tf.expand_dims(tf.expand_dims(u, 1), 1), [1, M, JX, 1, 1])
    h_mask_aug = tf.tile(tf.expand_dims(x_mask, 3), [1, 1, 1, JQ])
    u_mask_aug = tf.tile(tf.expand_dims(tf.expand_dims(q_mask, 1), 1), [1, M, JX, 1])
    hu_mask = h_mask_aug & u_mask_aug

    h_u = h_aug * u_aug

    with tf.variable_scope("similarity"):
        sim = linear([tf.concat([h_aug, u_aug, h_u], -1)], 1, is_train=is_train, scope="sim")
        sim = tf.squeeze(sim, [len(sim.get_shape().as_list())-1])
        sim = exp_mask(sim, hu_mask)
        
        # Tensor Dict
        a_u = tf.nn.softmax(sim)  
        a_h = tf.nn.softmax(tf.reduce_max(sim, 3))
        tensor_dict['a_u'] = a_u
        tensor_dict['a_h'] = a_h
        
    with tf.variable_scope("context_2_query"):
        a = softmax(sim)
        rank_u = len(u_aug.get_shape().as_list())
        u_a = tf.reduce_sum(tf.expand_dims(a, -1) * u_aug, rank_u-2)

    with tf.variable_scope("query_2_context"):
        b = softmax(tf.reduce_max(sim, 3))
        rank_h = len(h.get_shape().as_list())
        h_a = tf.reduce_sum(tf.expand_dims(b, -1) * h, rank_h-2)
        h_a = tf.tile(tf.expand_dims(h_a, 2), [1, 1, JX, 1])
    
    with tf.variable_scope("final"):
        g = tf.concat([h, u_a, h * u_a, h * h_a], 3)
            

with tf.variable_scope("modeling_layer"):
    flat_g = flatten(g, 2)  
    cell1 = tf.nn.rnn_cell.BasicLSTMCell(d,state_is_tuple=True);
    (flat_fw_g0_outputs, flat_bw_g0_outputs), _ =tf.nn.bidirectional_dynamic_rnn(cell1, cell1, flat_g, sequence_length=flat_len_x, dtype='float', scope='g0')
    fw_g0 = reconstruct(flat_fw_g0_outputs, g, 2)
    bw_g0 = reconstruct(flat_bw_g0_outputs, g, 2)

    g0 = tf.concat([fw_g0, bw_g0], 3)

    flat_g0 = flatten(g0, 2)
    cell2 = tf.nn.rnn_cell.BasicLSTMCell(d,state_is_tuple=True);

    (flat_fw_g1_outputs, flat_bw_g1_outputs), _ =tf.nn.bidirectional_dynamic_rnn(cell2, cell2, flat_g0, sequence_length=flat_len_x, dtype='float', scope='g1')
    fw_g1 = reconstruct(flat_fw_g1_outputs, g0, 2)
    bw_g1 = reconstruct(flat_bw_g1_outputs, g0, 2)

    g1 = tf.concat([fw_g1, bw_g1], 3)

with tf.variable_scope("output_layer"):
    logits1 = linear([tf.concat([g1, g], -1)], 1, input_keep_prob=config.input_keep_prob, is_train=is_train, scope="logits1")
    logits1 = tf.squeeze(logits1, [len(logits1.get_shape().as_list())-1])
    logits1 = exp_mask(logits1, x_mask)
    
    a = softmax(tf.reshape(logits1, [N, M * JX]))
    g1_reshaped = tf.reshape(g1, [N, M * JX, 2 * d])
    rank_g1 = len(g1_reshaped.get_shape().as_list())
    a1i = tf.reduce_sum(tf.expand_dims(a, -1) * g1_reshaped, rank_g1-2)
    a1i = tf.tile(tf.expand_dims(tf.expand_dims(a1i, 1), 1), [1, M, JX, 1])
    
    g2_input = tf.concat([g, g1, a1i, g1 * a1i], 3)
    flat_input = flatten(g2_input, 2)  
    cell = tf.nn.rnn_cell.BasicLSTMCell(d,state_is_tuple=True);
    (flat_fw_g2_outputs, flat_bw_g2_outputs), _ =tf.nn.bidirectional_dynamic_rnn(cell, cell, flat_input, sequence_length=flat_len_x, dtype='float', scope='g2')
    fw_g2 = reconstruct(flat_fw_g2_outputs, g, 2)
    bw_g2 = reconstruct(flat_bw_g2_outputs, g, 2)

    g2 = tf.concat([fw_g2, bw_g2], 3)
    
    logits2 = linear([tf.concat([g2, g], -1)], 1, input_keep_prob=config.input_keep_prob, is_train=is_train, scope="logits2")
    logits2 = tf.squeeze(logits2, [len(logits2.get_shape().as_list())-1])
    logits2 = exp_mask(logits2, x_mask)
    
    logits1 = tf.reshape(logits1, [-1, M * JX])
    flat_yp1 = tf.nn.softmax(logits1) 
    yp1 = tf.reshape(flat_yp1, [-1, M, JX])
    logits2 = tf.reshape(logits2, [-1, M * JX])
    flat_yp2 = tf.nn.softmax(logits2)
    yp2 = tf.reshape(flat_yp2, [-1, M, JX])
    
    tensor_dict['g1'] = g1
    tensor_dict['g2'] = g2
    

#Loss 
loss_mask = tf.reduce_max(tf.cast(q_mask, 'float'), 1)
losses = tf.nn.softmax_cross_entropy_with_logits(logits=logits1, labels=tf.cast(tf.reshape(y1, [-1, M * JX]), 'float'))
ce_loss1 = tf.reduce_mean(loss_mask * losses)
ce_loss2 = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits2, labels=tf.cast(tf.reshape(y2, [-1, M * JX]), 'float')))
tf.add_to_collection('losses', ce_loss1)
tf.add_to_collection("losses", ce_loss2)

loss = tf.add_n(tf.get_collection('losses'), name='loss')
tf.summary.scalar(loss.op.name, loss)
tf.add_to_collection('ema/scalar', loss)

variables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=tf.get_variable_scope().name)
for var in variables:
    tensor_dict[var.name] = var

var_ema = tf.train.ExponentialMovingAverage(config.var_decay)
ema_op = var_ema.apply(tf.trainable_variables())

if config.mode == 'train':
    ema = tf.train.ExponentialMovingAverage(config.decay)
    ema_op = ema.apply(tf.get_collection("ema/scalar"))

    for var in tf.get_collection("ema/scalar"):
        ema_var = ema.average(var)
        tf.summary.scalar(ema_var.op.name, ema_var)
    for var in tf.get_collection("ema/vector"):
        ema_var = ema.average(var)
        tf.summary.histogram(ema_var.op.name, ema_var)
    
with tf.control_dependencies([ema_op]):
    loss = tf.identity(loss)

summary = tf.summary.merge_all()
summary = tf.summary.merge(tf.get_collection("summaries"))

optimizer = tf.train.AdamOptimizer(config.init_lr)
grads = optimizer.compute_gradients(loss)
train_op = optimizer.apply_gradients(grads, global_step=global_step)

In [4]:
def get_feed(batch, is_train_cond):
        assert isinstance(batch, DataSet)
        temp_x = np.zeros([N, M, JX], dtype='int32')
        temp_cx = np.zeros([N, M, JX, W], dtype='int32')
        temp_x_mask = np.zeros([N, M, JX], dtype='bool')
        temp_q = np.zeros([N, JQ], dtype='int32')
        temp_cq = np.zeros([N, JQ, W], dtype='int32')
        temp_q_mask = np.zeros([N, JQ], dtype='bool')
        temp_new_emb_mat = batch.shared['new_emb_mat']
        
        X = batch.data['x']
        CX = batch.data['cx']

        temp_y1 = np.zeros([N, M, JX], dtype='bool')
        temp_y2 = np.zeros([N, M, JX], dtype='bool')

        for i, (xi, cxi, yi) in enumerate(zip(X, CX, batch.data['y'])):
            start_idx, stop_idx = random.choice(yi)
            j, k = start_idx
            j2, k2 = stop_idx
            if config.single:
                X[i] = [xi[j]]
                CX[i] = [cxi[j]]
                j, j2 = 0, 0
            if config.squash:
                offset = sum(map(len, xi[:j]))
                j, k = 0, k + offset
                offset = sum(map(len, xi[:j2]))
                j2, k2 = 0, k2 + offset
            temp_y1[i, j, k] = True
            temp_y2[i, j2, k2-1] = True

        def _get_word(word):
            d = batch.shared['word2idx']
            for each in (word, word.lower(), word.capitalize(), word.upper()):
                if each in d:
                    return d[each]
            if config.use_glove_for_unk:
                d2 = batch.shared['new_word2idx']
                for each in (word, word.lower(), word.capitalize(), word.upper()):
                    if each in d2:
                        return d2[each] + len(d)
            return 1

        def _get_char(char):
            d = batch.shared['char2idx']
            if char in d:
                return d[char]
            return 1

        for i, xi in enumerate(X):
            if config.squash:
                xi = [list(itertools.chain(*xi))]
            for j, xij in enumerate(xi):
                if j == config.max_num_sents:
                    break
                for k, xijk in enumerate(xij):
                    if k == config.max_sent_size:
                        break
                    each = _get_word(xijk)
                    assert isinstance(each, int), each
                    temp_x[i, j, k] = each
                    temp_x_mask[i, j, k] = True

        for i, cxi in enumerate(CX):
            if config.squash:
                cxi = [list(itertools.chain(*cxi))]
            for j, cxij in enumerate(cxi):
                if j == config.max_num_sents:
                    break
                for k, cxijk in enumerate(cxij):
                    if k == config.max_sent_size:
                        break
                    for l, cxijkl in enumerate(cxijk):
                        if l == config.max_word_size:
                            break
                        temp_cx[i, j, k, l] = _get_char(cxijkl)

        for i, qi in enumerate(batch.data['q']):
            for j, qij in enumerate(qi):
                temp_q[i, j] = _get_word(qij)
                temp_q_mask[i, j] = True

        for i, cqi in enumerate(batch.data['cq']):
            for j, cqij in enumerate(cqi):
                for k, cqijk in enumerate(cqij):
                    temp_cq[i, j, k] = _get_char(cqijk)
                    if k + 1 == config.max_word_size:
                        break

        return temp_x, temp_cx, temp_x_mask, temp_q, temp_cq, temp_q_mask, temp_y1, temp_y2, temp_new_emb_mat
def get_feed_dict(data_set):

    temp_x, temp_cx, temp_x_mask, temp_q, temp_cq, temp_q_mask, temp_y1, temp_y2, temp_new_emb_mat = get_feed(data_set, True)
        
    feed_dict = {
        x: temp_x,
        cx: temp_cx,
        x_mask: temp_x_mask,
        q: temp_q,
        cq: temp_cq,
        q_mask: temp_q_mask,
        y1: temp_y1,
        y2: temp_y2,
        is_train: True,
        new_emb_mat: temp_new_emb_mat
        }
    return feed_dict

def step(batch, get_summary=False):
    _, data_set = batch

    feed_dict = get_feed_dict(data_set)
    if get_summary:
        temp_loss, temp_summary, temp_train_op = \
            sess.run([loss, summary, train_op], feed_dict=feed_dict)
    else:
        temp_loss, temp_train_op = sess.run([loss, train_op], feed_dict=feed_dict)
        temp_summary = None
    return temp_loss, temp_summary, temp_train_op

In [5]:
saver = tf.train.Saver()
save_path = os.path.join(config.save_dir, config.model_name)

sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
sess.run(tf.global_variables_initializer())

writer = tf.summary.FileWriter(config.log_dir, graph=tf.get_default_graph())
   

In [6]:
checkpoint = tf.train.latest_checkpoint(config.save_dir)
saver.restore(sess, checkpoint)

INFO:tensorflow:Restoring parameters from out/basic/00/save/basic-1000


In [7]:
batch_size = config.batch_size
num_steps = config.num_steps

batches = train_data.get_batches(batch_size, num_batches=num_steps, shuffle=True)

multi_batches = (tuple(zip(grouper(idxs, batch_size, shorten=True, num_groups=1),
                         data_set.divide(1))) for idxs, data_set in batches)
for batch in tqdm(multi_batches, total=num_steps):
    temp_global_step = sess.run(global_step) + 1  # +1 because all calculations are done after step
    
    get_summary = temp_global_step % config.log_period == 0
    temp_loss, temp_summary, temp_train_op = step(batch[0], get_summary=get_summary)
    
    if get_summary:
        print("Steps:{}".format(temp_global_step), ", Loss: {}".format(temp_loss))
        writer.add_summary(temp_summary, temp_global_step)

    # occasional saving
    if temp_global_step % config.save_period == 0:
        saver.save(sess, save_path=save_path, global_step=temp_global_step)


  0%|          | 100/20000 [08:57<29:44:01,  5.38s/it]

Steps:1100 , Loss: 29.381088256835938


  1%|          | 200/20000 [17:46<29:19:49,  5.33s/it]

Steps:1200 , Loss: 28.02867889404297


  2%|▏         | 300/20000 [26:14<28:42:57,  5.25s/it]

Steps:1300 , Loss: 30.7631893157959


  2%|▏         | 400/20000 [34:37<28:16:16,  5.19s/it]

Steps:1400 , Loss: 29.724803924560547


  2%|▏         | 499/20000 [42:54<27:56:57,  5.16s/it]

Steps:1500 , Loss: 26.22266960144043


  3%|▎         | 600/20000 [51:23<27:41:24,  5.14s/it]

Steps:1600 , Loss: 23.8616886138916


  4%|▎         | 700/20000 [59:45<27:27:48,  5.12s/it]

Steps:1700 , Loss: 28.970989227294922


  4%|▍         | 800/20000 [1:08:08<27:15:30,  5.11s/it]

Steps:1800 , Loss: 27.471328735351562


  4%|▍         | 900/20000 [1:16:32<27:04:14,  5.10s/it]

Steps:1900 , Loss: 27.87445068359375


  5%|▍         | 999/20000 [1:24:50<26:53:43,  5.10s/it]

Steps:2000 , Loss: 25.67147445678711


  6%|▌         | 1100/20000 [1:33:19<26:43:35,  5.09s/it]

Steps:2100 , Loss: 24.601627349853516


  6%|▌         | 1200/20000 [1:41:42<26:33:31,  5.09s/it]

Steps:2200 , Loss: 26.28671646118164


  6%|▋         | 1300/20000 [1:50:06<26:23:55,  5.08s/it]

Steps:2300 , Loss: 24.48927116394043


  7%|▋         | 1400/20000 [1:58:29<26:14:09,  5.08s/it]

Steps:2400 , Loss: 30.191146850585938


  7%|▋         | 1499/20000 [2:06:47<26:04:54,  5.08s/it]

Steps:2500 , Loss: 28.20641326904297


  8%|▊         | 1600/20000 [2:15:16<25:55:42,  5.07s/it]

Steps:2600 , Loss: 28.211776733398438


  8%|▊         | 1700/20000 [2:23:39<25:46:22,  5.07s/it]

Steps:2700 , Loss: 30.269380569458008


  9%|▉         | 1800/20000 [2:32:03<25:37:24,  5.07s/it]

Steps:2800 , Loss: 26.475975036621094


 10%|▉         | 1900/20000 [2:40:25<25:28:18,  5.07s/it]

Steps:2900 , Loss: 26.913755416870117


 10%|▉         | 1999/20000 [2:48:43<25:19:22,  5.06s/it]

Steps:3000 , Loss: 28.225833892822266


 10%|█         | 2100/20000 [2:57:14<25:10:47,  5.06s/it]

Steps:3100 , Loss: 28.086538314819336


 11%|█         | 2200/20000 [3:05:41<25:02:21,  5.06s/it]

Steps:3200 , Loss: 27.36771011352539


 12%|█▏        | 2300/20000 [3:14:05<24:53:38,  5.06s/it]

Steps:3300 , Loss: 26.431201934814453


 12%|█▏        | 2400/20000 [3:23:20<24:51:13,  5.08s/it]

Steps:3400 , Loss: 26.66646957397461


 12%|█▏        | 2499/20000 [3:31:54<24:44:00,  5.09s/it]

Steps:3500 , Loss: 30.258975982666016


 13%|█▎        | 2600/20000 [3:40:23<24:34:57,  5.09s/it]

Steps:3600 , Loss: 29.20461082458496


 14%|█▎        | 2700/20000 [3:48:45<24:25:47,  5.08s/it]

Steps:3700 , Loss: 30.037321090698242


 14%|█▍        | 2800/20000 [3:57:08<24:16:43,  5.08s/it]

Steps:3800 , Loss: 28.26224708557129


 14%|█▍        | 2900/20000 [4:05:30<24:07:39,  5.08s/it]

Steps:3900 , Loss: 25.129308700561523


 15%|█▍        | 2999/20000 [4:13:47<23:58:44,  5.08s/it]

Steps:4000 , Loss: 26.663951873779297


 16%|█▌        | 3100/20000 [4:22:15<23:49:42,  5.08s/it]

Steps:4100 , Loss: 28.464481353759766


 16%|█▌        | 3200/20000 [4:30:37<23:40:44,  5.07s/it]

Steps:4200 , Loss: 27.281089782714844


 16%|█▋        | 3300/20000 [4:38:59<23:31:53,  5.07s/it]

Steps:4300 , Loss: 24.424930572509766


 17%|█▋        | 3400/20000 [4:47:21<23:23:00,  5.07s/it]

Steps:4400 , Loss: 28.789875030517578


 17%|█▋        | 3499/20000 [4:55:39<23:14:15,  5.07s/it]

Steps:4500 , Loss: 26.972442626953125


 18%|█▊        | 3600/20000 [5:04:06<23:05:24,  5.07s/it]

Steps:4600 , Loss: 30.913883209228516


 18%|█▊        | 3700/20000 [5:12:28<22:56:36,  5.07s/it]

Steps:4700 , Loss: 27.49459457397461


 19%|█▉        | 3800/20000 [5:20:51<22:47:50,  5.07s/it]

Steps:4800 , Loss: 31.392715454101562


 20%|█▉        | 3900/20000 [5:29:13<22:39:05,  5.06s/it]

Steps:4900 , Loss: 26.10135269165039


 20%|█▉        | 3999/20000 [5:37:30<22:30:26,  5.06s/it]

Steps:5000 , Loss: 24.63340187072754


 20%|██        | 4100/20000 [5:45:57<22:21:39,  5.06s/it]

Steps:5100 , Loss: 30.945491790771484


 21%|██        | 4200/20000 [5:54:19<22:12:57,  5.06s/it]

Steps:5200 , Loss: 26.83769416809082


 22%|██▏       | 4300/20000 [6:02:42<22:04:18,  5.06s/it]

Steps:5300 , Loss: 30.713207244873047


 22%|██▏       | 4400/20000 [6:11:04<21:55:37,  5.06s/it]

Steps:5400 , Loss: 25.370479583740234


 22%|██▏       | 4499/20000 [6:19:21<21:47:03,  5.06s/it]

Steps:5500 , Loss: 26.66337013244629


 23%|██▎       | 4600/20000 [6:27:48<21:38:20,  5.06s/it]

Steps:5600 , Loss: 27.16093635559082


 24%|██▎       | 4700/20000 [6:36:11<21:29:42,  5.06s/it]

Steps:5700 , Loss: 26.67422866821289


 24%|██▍       | 4800/20000 [6:44:33<21:21:07,  5.06s/it]

Steps:5800 , Loss: 26.47469711303711


 24%|██▍       | 4900/20000 [6:52:55<21:12:30,  5.06s/it]

Steps:5900 , Loss: 24.879159927368164


 25%|██▍       | 4999/20000 [7:01:12<21:03:58,  5.06s/it]

Steps:6000 , Loss: 26.560543060302734


 26%|██▌       | 5100/20000 [7:09:40<20:55:18,  5.05s/it]

Steps:6100 , Loss: 25.915300369262695


 26%|██▌       | 5200/20000 [7:18:02<20:46:42,  5.05s/it]

Steps:6200 , Loss: 26.58963394165039


 26%|██▋       | 5300/20000 [7:26:24<20:38:09,  5.05s/it]

Steps:6300 , Loss: 29.09634017944336


 27%|██▋       | 5400/20000 [7:34:46<20:29:35,  5.05s/it]

Steps:6400 , Loss: 28.53133773803711


 27%|██▋       | 5499/20000 [7:43:04<20:21:07,  5.05s/it]

Steps:6500 , Loss: 26.999595642089844


 28%|██▊       | 5600/20000 [7:51:31<20:12:30,  5.05s/it]

Steps:6600 , Loss: 28.256845474243164


 28%|██▊       | 5700/20000 [7:59:53<20:03:57,  5.05s/it]

Steps:6700 , Loss: 26.54570770263672


 29%|██▉       | 5800/20000 [8:08:16<19:55:26,  5.05s/it]

Steps:6800 , Loss: 26.744823455810547


 30%|██▉       | 5900/20000 [8:16:38<19:46:54,  5.05s/it]

Steps:6900 , Loss: 26.883739471435547


 30%|██▉       | 5999/20000 [8:24:55<19:38:26,  5.05s/it]

Steps:7000 , Loss: 27.767654418945312


 30%|███       | 6100/20000 [8:33:23<19:29:50,  5.05s/it]

Steps:7100 , Loss: 30.783267974853516


 31%|███       | 6200/20000 [8:41:45<19:21:20,  5.05s/it]

Steps:7200 , Loss: 27.211841583251953


 32%|███▏      | 6300/20000 [8:50:08<19:12:50,  5.05s/it]

Steps:7300 , Loss: 27.147369384765625


 32%|███▏      | 6400/20000 [8:58:30<19:04:20,  5.05s/it]

Steps:7400 , Loss: 25.785770416259766


 32%|███▏      | 6499/20000 [9:06:47<18:55:54,  5.05s/it]

Steps:7500 , Loss: 27.892412185668945


 33%|███▎      | 6600/20000 [9:15:15<18:47:20,  5.05s/it]

Steps:7600 , Loss: 29.186386108398438


 34%|███▎      | 6700/20000 [9:23:37<18:38:51,  5.05s/it]

Steps:7700 , Loss: 26.32439613342285


 34%|███▍      | 6800/20000 [9:32:00<18:30:21,  5.05s/it]

Steps:7800 , Loss: 28.694904327392578


 34%|███▍      | 6900/20000 [9:40:22<18:21:53,  5.05s/it]

Steps:7900 , Loss: 25.997726440429688


 35%|███▍      | 6999/20000 [9:48:40<18:13:29,  5.05s/it]

Steps:8000 , Loss: 26.44402313232422


 36%|███▌      | 7100/20000 [9:57:08<18:04:56,  5.05s/it]

Steps:8100 , Loss: 28.366100311279297


 36%|███▌      | 7200/20000 [10:05:30<17:56:27,  5.05s/it]

Steps:8200 , Loss: 29.363224029541016


 36%|███▋      | 7300/20000 [10:13:53<17:48:00,  5.05s/it]

Steps:8300 , Loss: 24.61585807800293


 37%|███▋      | 7400/20000 [10:22:16<17:39:32,  5.05s/it]

Steps:8400 , Loss: 29.264060974121094


 37%|███▋      | 7499/20000 [10:30:42<17:31:24,  5.05s/it]

Steps:8500 , Loss: 25.48598861694336


 38%|███▊      | 7600/20000 [10:39:22<17:23:12,  5.05s/it]

Steps:8600 , Loss: 24.6551456451416


 38%|███▊      | 7700/20000 [10:47:46<17:14:46,  5.05s/it]

Steps:8700 , Loss: 28.913488388061523


 39%|███▉      | 7800/20000 [10:56:09<17:06:18,  5.05s/it]

Steps:8800 , Loss: 28.337024688720703


 40%|███▉      | 7900/20000 [11:04:32<16:57:50,  5.05s/it]

Steps:8900 , Loss: 26.276824951171875


 40%|███▉      | 7999/20000 [11:12:49<16:49:26,  5.05s/it]

Steps:9000 , Loss: 30.912914276123047


 40%|████      | 8100/20000 [11:21:17<16:40:54,  5.05s/it]

Steps:9100 , Loss: 25.922332763671875


 41%|████      | 8200/20000 [11:29:39<16:32:26,  5.05s/it]

Steps:9200 , Loss: 27.92974853515625


 42%|████▏     | 8300/20000 [11:38:02<16:23:59,  5.05s/it]

Steps:9300 , Loss: 30.098838806152344


 42%|████▏     | 8400/20000 [11:46:25<16:15:32,  5.05s/it]

Steps:9400 , Loss: 29.28714370727539


 42%|████▏     | 8499/20000 [11:54:43<16:07:10,  5.05s/it]

Steps:9500 , Loss: 25.530681610107422


 43%|████▎     | 8600/20000 [12:03:11<15:58:38,  5.05s/it]

Steps:9600 , Loss: 28.689929962158203


 44%|████▎     | 8700/20000 [12:11:33<15:50:10,  5.05s/it]

Steps:9700 , Loss: 24.36498260498047


 44%|████▍     | 8800/20000 [12:19:56<15:41:44,  5.05s/it]

Steps:9800 , Loss: 25.486434936523438


 44%|████▍     | 8900/20000 [12:28:18<15:33:17,  5.04s/it]

Steps:9900 , Loss: 29.77520751953125


 45%|████▍     | 8999/20000 [12:36:35<15:24:54,  5.04s/it]

Steps:10000 , Loss: 24.734020233154297


 46%|████▌     | 9100/20000 [12:45:03<15:16:23,  5.04s/it]

Steps:10100 , Loss: 27.9647159576416


 46%|████▌     | 9200/20000 [12:53:26<15:07:56,  5.04s/it]

Steps:10200 , Loss: 29.51024055480957


 46%|████▋     | 9300/20000 [13:01:49<14:59:30,  5.04s/it]

Steps:10300 , Loss: 26.188030242919922


 47%|████▋     | 9400/20000 [13:10:11<14:51:04,  5.04s/it]

Steps:10400 , Loss: 27.295352935791016


 47%|████▋     | 9499/20000 [13:18:29<14:42:42,  5.04s/it]

Steps:10500 , Loss: 25.889707565307617


 48%|████▊     | 9600/20000 [13:26:57<14:34:12,  5.04s/it]

Steps:10600 , Loss: 25.055286407470703


 48%|████▊     | 9700/20000 [13:35:19<14:25:45,  5.04s/it]

Steps:10700 , Loss: 30.647932052612305


 49%|████▉     | 9800/20000 [13:43:43<14:17:20,  5.04s/it]

Steps:10800 , Loss: 27.29828453063965


 50%|████▉     | 9900/20000 [13:52:14<14:09:03,  5.04s/it]

Steps:10900 , Loss: 30.79519271850586


 50%|████▉     | 9999/20000 [14:00:35<14:00:45,  5.04s/it]

Steps:11000 , Loss: 26.983049392700195


 50%|█████     | 10100/20000 [14:09:03<13:52:15,  5.04s/it]

Steps:11100 , Loss: 29.446495056152344


 51%|█████     | 10200/20000 [14:17:27<13:43:49,  5.04s/it]

Steps:11200 , Loss: 29.78670310974121


 52%|█████▏    | 10300/20000 [14:25:51<13:35:25,  5.04s/it]

Steps:11300 , Loss: 25.974056243896484


 52%|█████▏    | 10400/20000 [14:34:14<13:26:59,  5.04s/it]

Steps:11400 , Loss: 26.872432708740234


 52%|█████▏    | 10499/20000 [14:42:33<13:18:39,  5.04s/it]

Steps:11500 , Loss: 25.446300506591797


 53%|█████▎    | 10600/20000 [14:51:01<13:10:09,  5.04s/it]

Steps:11600 , Loss: 26.760326385498047


 54%|█████▎    | 10700/20000 [15:01:42<13:03:43,  5.06s/it]

Steps:11700 , Loss: 29.026569366455078


 54%|█████▍    | 10800/20000 [15:11:58<12:56:52,  5.07s/it]

Steps:11800 , Loss: 27.13667106628418


 55%|█████▍    | 10900/20000 [15:20:54<12:48:49,  5.07s/it]

Steps:11900 , Loss: 25.540855407714844


 55%|█████▍    | 10999/20000 [15:29:13<12:40:25,  5.07s/it]

Steps:12000 , Loss: 26.708438873291016


 56%|█████▌    | 11100/20000 [15:37:41<12:31:50,  5.07s/it]

Steps:12100 , Loss: 30.383560180664062


 56%|█████▌    | 11200/20000 [15:46:03<12:23:19,  5.07s/it]

Steps:12200 , Loss: 25.238147735595703


 56%|█████▋    | 11300/20000 [15:54:26<12:14:49,  5.07s/it]

Steps:12300 , Loss: 29.135356903076172


 57%|█████▋    | 11400/20000 [16:02:49<12:06:20,  5.07s/it]

Steps:12400 , Loss: 30.987913131713867


 57%|█████▋    | 11499/20000 [16:11:06<11:57:55,  5.07s/it]

Steps:12500 , Loss: 27.36532974243164


 58%|█████▊    | 11600/20000 [16:19:33<11:49:20,  5.07s/it]

Steps:12600 , Loss: 28.302595138549805


 58%|█████▊    | 11700/20000 [16:27:56<11:40:50,  5.07s/it]

Steps:12700 , Loss: 28.625511169433594


 59%|█████▉    | 11800/20000 [16:36:19<11:32:21,  5.07s/it]

Steps:12800 , Loss: 27.623380661010742


 60%|█████▉    | 11900/20000 [16:44:41<11:23:51,  5.07s/it]

Steps:12900 , Loss: 30.87074089050293


 60%|█████▉    | 11999/20000 [16:52:59<11:15:27,  5.07s/it]

Steps:13000 , Loss: 26.725149154663086


 60%|██████    | 12100/20000 [17:01:27<11:06:53,  5.07s/it]

Steps:13100 , Loss: 28.875232696533203


 61%|██████    | 12200/20000 [17:09:49<10:58:24,  5.06s/it]

Steps:13200 , Loss: 25.372650146484375


 62%|██████▏   | 12300/20000 [17:18:12<10:49:55,  5.06s/it]

Steps:13300 , Loss: 24.18120002746582


 62%|██████▏   | 12400/20000 [17:26:35<10:41:27,  5.06s/it]

Steps:13400 , Loss: 26.069210052490234


 62%|██████▏   | 12499/20000 [17:34:55<10:33:05,  5.06s/it]

Steps:13500 , Loss: 31.025367736816406


 63%|██████▎   | 12600/20000 [17:43:24<10:24:32,  5.06s/it]

Steps:13600 , Loss: 27.317012786865234


 64%|██████▎   | 12700/20000 [17:51:46<10:16:03,  5.06s/it]

Steps:13700 , Loss: 28.638404846191406


 64%|██████▍   | 12800/20000 [18:00:09<10:07:35,  5.06s/it]

Steps:13800 , Loss: 28.345611572265625


 64%|██████▍   | 12900/20000 [18:08:32<9:59:06,  5.06s/it] 

Steps:13900 , Loss: 28.707050323486328


 65%|██████▍   | 12999/20000 [18:16:49<9:50:43,  5.06s/it]

Steps:14000 , Loss: 29.288219451904297


 66%|██████▌   | 13100/20000 [18:25:16<9:42:10,  5.06s/it]

Steps:14100 , Loss: 32.06867218017578


 66%|██████▌   | 13200/20000 [18:33:39<9:33:42,  5.06s/it]

Steps:14200 , Loss: 26.274377822875977


 66%|██████▋   | 13300/20000 [18:42:02<9:25:14,  5.06s/it]

Steps:14300 , Loss: 30.728744506835938


 67%|██████▋   | 13400/20000 [18:50:25<9:16:46,  5.06s/it]

Steps:14400 , Loss: 29.269546508789062


 67%|██████▋   | 13499/20000 [18:58:42<9:08:23,  5.06s/it]

Steps:14500 , Loss: 27.017539978027344


 68%|██████▊   | 13600/20000 [19:07:11<8:59:51,  5.06s/it]

Steps:14600 , Loss: 24.501150131225586


 68%|██████▊   | 13700/20000 [19:15:33<8:51:23,  5.06s/it]

Steps:14700 , Loss: 29.15127944946289


 69%|██████▉   | 13800/20000 [19:23:56<8:42:55,  5.06s/it]

Steps:14800 , Loss: 29.48619842529297


 70%|██████▉   | 13900/20000 [19:33:19<8:34:54,  5.06s/it]

Steps:14900 , Loss: 28.08778190612793


 70%|██████▉   | 13999/20000 [19:42:37<8:26:57,  5.07s/it]

Steps:15000 , Loss: 26.966320037841797


 70%|███████   | 14100/20000 [19:52:09<8:18:50,  5.07s/it]

Steps:15100 , Loss: 27.32146453857422


 71%|███████   | 14200/20000 [20:00:35<8:10:22,  5.07s/it]

Steps:15200 , Loss: 23.887374877929688


 72%|███████▏  | 14300/20000 [20:09:14<8:02:00,  5.07s/it]

Steps:15300 , Loss: 25.429689407348633


 72%|███████▏  | 14400/20000 [20:20:21<7:54:34,  5.08s/it]

Steps:15400 , Loss: 26.335651397705078


 72%|███████▏  | 14499/20000 [20:31:22<7:47:11,  5.10s/it]

Steps:15500 , Loss: 30.358596801757812


 73%|███████▎  | 14600/20000 [20:41:35<7:39:13,  5.10s/it]

Steps:15600 , Loss: 28.30583381652832


 74%|███████▎  | 14700/20000 [20:52:16<7:31:29,  5.11s/it]

Steps:15700 , Loss: 28.331266403198242


 74%|███████▍  | 14800/20000 [21:02:58<7:23:45,  5.12s/it]

Steps:15800 , Loss: 27.16587257385254


 74%|███████▍  | 14900/20000 [21:12:25<7:15:31,  5.12s/it]

Steps:15900 , Loss: 27.525693893432617


 75%|███████▍  | 14999/20000 [21:21:11<7:07:10,  5.13s/it]

Steps:16000 , Loss: 29.221715927124023


 76%|███████▌  | 15100/20000 [21:30:50<6:58:53,  5.13s/it]

Steps:16100 , Loss: 25.446773529052734


 76%|███████▌  | 15200/20000 [21:39:37<6:50:24,  5.13s/it]

Steps:16200 , Loss: 25.31222915649414


 76%|███████▋  | 15300/20000 [21:49:17<6:42:11,  5.13s/it]

Steps:16300 , Loss: 28.659645080566406


 77%|███████▋  | 15400/20000 [21:58:14<6:33:45,  5.14s/it]

Steps:16400 , Loss: 28.98410987854004


 77%|███████▋  | 15499/20000 [22:07:18<6:25:27,  5.14s/it]

Steps:16500 , Loss: 25.534576416015625


 78%|███████▊  | 15600/20000 [22:16:30<6:16:57,  5.14s/it]

Steps:16600 , Loss: 28.18372917175293


 78%|███████▊  | 15700/20000 [22:25:25<6:08:29,  5.14s/it]

Steps:16700 , Loss: 25.49984359741211


 79%|███████▉  | 15800/20000 [22:34:19<6:00:00,  5.14s/it]

Steps:16800 , Loss: 27.6923828125


 80%|███████▉  | 15900/20000 [22:43:24<5:51:34,  5.14s/it]

Steps:16900 , Loss: 25.417400360107422


 80%|███████▉  | 15999/20000 [22:52:05<5:43:07,  5.15s/it]

Steps:17000 , Loss: 26.74734115600586


 80%|████████  | 16100/20000 [23:01:34<5:34:39,  5.15s/it]

Steps:17100 , Loss: 27.923755645751953


 81%|████████  | 16200/20000 [23:10:52<5:26:15,  5.15s/it]

Steps:17200 , Loss: 26.03350257873535


 82%|████████▏ | 16300/20000 [23:19:20<5:17:38,  5.15s/it]

Steps:17300 , Loss: 26.18216896057129


 82%|████████▏ | 16400/20000 [23:27:43<5:09:00,  5.15s/it]

Steps:17400 , Loss: 25.849960327148438


 82%|████████▏ | 16499/20000 [23:36:01<5:00:28,  5.15s/it]

Steps:17500 , Loss: 26.541139602661133


 83%|████████▎ | 16600/20000 [23:45:04<4:51:52,  5.15s/it]

Steps:17600 , Loss: 30.36270523071289


 84%|████████▎ | 16700/20000 [23:55:36<4:43:40,  5.16s/it]

Steps:17700 , Loss: 26.633792877197266


 84%|████████▍ | 16800/20000 [24:06:46<4:35:34,  5.17s/it]

Steps:17800 , Loss: 28.446231842041016


 84%|████████▍ | 16900/20000 [24:15:20<4:26:57,  5.17s/it]

Steps:17900 , Loss: 28.341623306274414


 85%|████████▍ | 16999/20000 [24:23:39<4:18:23,  5.17s/it]

Steps:18000 , Loss: 27.035743713378906


 86%|████████▌ | 17100/20000 [24:33:03<4:09:48,  5.17s/it]

Steps:18100 , Loss: 28.56396484375


 86%|████████▌ | 17200/20000 [24:43:41<4:01:31,  5.18s/it]

Steps:18200 , Loss: 29.394088745117188


 86%|████████▋ | 17300/20000 [24:55:34<3:53:24,  5.19s/it]

Steps:18300 , Loss: 27.37038230895996


 87%|████████▋ | 17400/20000 [25:07:09<3:45:12,  5.20s/it]

Steps:18400 , Loss: 27.197267532348633


 87%|████████▋ | 17499/20000 [25:18:26<3:37:01,  5.21s/it]

Steps:18500 , Loss: 32.465126037597656


 88%|████████▊ | 17600/20000 [25:30:21<3:28:41,  5.22s/it]

Steps:18600 , Loss: 28.861602783203125


 88%|████████▊ | 17700/20000 [25:41:44<3:20:20,  5.23s/it]

Steps:18700 , Loss: 26.275474548339844


 89%|████████▉ | 17800/20000 [25:51:19<3:11:44,  5.23s/it]

Steps:18800 , Loss: 31.819808959960938


 90%|████████▉ | 17900/20000 [26:01:52<3:03:14,  5.24s/it]

Steps:18900 , Loss: 31.535076141357422


 90%|████████▉ | 17999/20000 [26:12:13<2:54:47,  5.24s/it]

Steps:19000 , Loss: 25.797698974609375


 90%|█████████ | 18100/20000 [26:21:48<2:46:02,  5.24s/it]

Steps:19100 , Loss: 27.16624641418457


 91%|█████████ | 18200/20000 [26:33:57<2:37:38,  5.25s/it]

Steps:19200 , Loss: 29.00971794128418


 92%|█████████▏| 18300/20000 [26:45:40<2:29:09,  5.26s/it]

Steps:19300 , Loss: 24.607994079589844


 92%|█████████▏| 18400/20000 [26:55:02<2:20:26,  5.27s/it]

Steps:19400 , Loss: 23.156953811645508


 92%|█████████▏| 18499/20000 [27:04:10<2:11:47,  5.27s/it]

Steps:19500 , Loss: 30.03864288330078


 93%|█████████▎| 18600/20000 [27:13:30<2:02:57,  5.27s/it]

Steps:19600 , Loss: 27.2390193939209


 94%|█████████▎| 18700/20000 [27:22:54<1:54:12,  5.27s/it]

Steps:19700 , Loss: 29.095287322998047


 94%|█████████▍| 18800/20000 [27:31:24<1:45:24,  5.27s/it]

Steps:19800 , Loss: 27.469844818115234


 94%|█████████▍| 18900/20000 [27:39:51<1:36:36,  5.27s/it]

Steps:19900 , Loss: 32.13965606689453


 95%|█████████▍| 18999/20000 [27:48:11<1:27:53,  5.27s/it]

Steps:20000 , Loss: 29.25994110107422


 96%|█████████▌| 19100/20000 [27:56:46<1:19:00,  5.27s/it]

Steps:20100 , Loss: 30.815303802490234


 96%|█████████▌| 19200/20000 [28:05:12<1:10:13,  5.27s/it]

Steps:20200 , Loss: 28.383535385131836


 96%|█████████▋| 19300/20000 [28:13:36<1:01:25,  5.27s/it]

Steps:20300 , Loss: 29.238231658935547


 97%|█████████▋| 19400/20000 [28:22:00<52:38,  5.26s/it]  

Steps:20400 , Loss: 27.82547950744629


 97%|█████████▋| 19499/20000 [28:30:19<43:56,  5.26s/it]

Steps:20500 , Loss: 28.80118179321289


 98%|█████████▊| 19600/20000 [28:38:48<35:04,  5.26s/it]

Steps:20600 , Loss: 29.14789581298828


 98%|█████████▊| 19700/20000 [28:47:12<26:18,  5.26s/it]

Steps:20700 , Loss: 24.55880355834961


 99%|█████████▉| 19800/20000 [28:55:36<17:31,  5.26s/it]

Steps:20800 , Loss: 26.63278579711914


100%|█████████▉| 19900/20000 [29:04:00<08:45,  5.26s/it]

Steps:20900 , Loss: 29.201570510864258


100%|█████████▉| 19999/20000 [29:12:19<00:05,  5.26s/it]

Steps:21000 , Loss: 25.49114227294922


100%|██████████| 20000/20000 [29:12:24<00:00,  5.26s/it]


In [8]:
# def get_evaluation(batch):
#         idxs, data_set = batch
#         fee _dict = get_feed_dict(data_set)
#         temp_global_step, temp_yp, temp_vals = sess.run([global_step, yp, list(tensor_dict.values())], feed_dict=feed_dict)
#         yp = yp[:data_set.num_examples]
#         tensor_dict = dict(zip(self.tensor_dict.keys(), vals))
#         e = Evaluation(data_set.data_type, int(global_step), idxs, yp.tolist(), tensor_dict=tensor_dict)
#         return e

# idxs, data_set = bttt[0]
# feed_dict = get_feed_dict(data_set)
# temp_global_step, temp_yp1, temp_vals = sess.run([global_step, yp1, list(tensor_dict.values())], feed_dict=feed_dict)
# temp_yp1 = temp_yp1[:data_set.num_examples]

# if global_step % config.eval_period == 0:
#     num_steps = math.ceil(dev_data.num_examples / (config.batch_size))
#     if 0 < config.val_num_batches < num_steps:
#         num_steps = config.val_num_batches
    
#     train_batches = train_data.get_batches(batch_size, num_batches=num_steps, shuffle=True)

#     train_multi_batches = (tuple(zip(grouper(idxs, batch_size, shorten=True, num_groups=1),
#                          data_set.divide(1))) for idxs, data_set in train_batches)    
    
#     e_train = evaluator.get_evaluation_from_batches(
#         sess, tqdm(train_multi_batches, total=num_steps))
    
#     graph_handler.add_summaries(e_train.summaries, global_step)
    
#     dev_batches = dev_data.get_batches(batch_size, num_batches=num_steps, shuffle=True)

#     dev_multi_batches = (tuple(zip(grouper(idxs, batch_size, shorten=True, num_groups=1),
#                          data_set.divide(1))) for idxs, data_set in dev_batches)

#     e_dev = evaluator.get_evaluation_from_batches(
#         sess, tqdm(dev_data.get_multi_batches(config.batch_size, config.num_gpus, num_steps=num_steps), total=num_steps))
    
#     graph_handler.add_summaries(e_dev.summaries, global_step)

#     if config.dump_eval:
#         graph_handler.dump_eval(e_dev)
#     if config.dump_answer:
#         graph_handler.dump_answer(e_dev)