# Basic Encoder

## Imports

In [1]:
%matplotlib inline
import math
import numpy as np
import os
import random
import tensorflow as tf
from matplotlib import pylab
from collections import Counter
import csv
import pandas as pd

# Seq2Seq Items
import tensorflow.contrib.seq2seq as seq2seq
from tensorflow.python.ops.rnn_cell import LSTMCell,LSTMStateTuple
from tensorflow.python.ops.rnn_cell import MultiRNNCell
from tensorflow.contrib.seq2seq.python.ops import attention_wrapper
from tensorflow.python.layers.core import Dense

  from ._conv import register_converters as _register_converters


Instructions for updating:
Use the retry module or similar alternatives.


In [2]:
def splitdata(inp,outp,delimiter=' '):
    df = pd.read_csv(inp,header = None, delimiter=delimiter)
    df[0].to_csv(outp,index=None,header=None)
    return

In [3]:
def addToken(file_name, token, loc='start'):
    string_to_add = token
    if loc=='start':
        with open(file_name, 'r') as f:
            file_lines = [' '.join([string_to_add, x]) for x in f.readlines()]
    elif loc == 'end':
        with open(file_name, 'r') as f:
            file_lines = [' '.join([x.strip(), string_to_add, '\n']) for x in f.readlines()]
    else:
        print("Please Enter proper location (start/end)")
    
    with open(file_name, 'w') as f:
        f.writelines(file_lines)
    
    return

In [4]:
# addToken('./data/WeatherGov/train/summaries.txt', token='<s>', loc='start')
# addToken('./data/WeatherGov/train/summaries.txt', token='<unk>', loc='end')

## Hyperparameters

In [5]:
# target_vocab_size = 392
# source_vocab_size = 192
vocab_size= 392
num_units = 512
input_size = 256
batch_size = 32
source_sequence_length=122
target_sequence_length=88
decoder_type = 'basic' # could be basic or attention
sentences_to_read = 25000
tgt_sos_id = '<s>'
tgt_eos_id = '</s>'

## Loading vocabularies

In [6]:
# splitdata('./data/table_vocabcount.txt','./data/table_vocab.txt')
# splitdata('./data/summary_vocabcount.txt','./data/summary_vocab.txt')

In [7]:
# inp = './data/summary_vec_bk.txt'
# df = pd.read_csv(inp,header = None, delimiter=' ')
# x = df[df.columns[1:-1]].values
# np.save('summary_vec', x)
# word = np.load('./summary_vec.npy')
# print word

In [8]:
src_dictionary = dict()
with open('./data/table_vocab.txt', 'r') as f:
    for line in f:
        #we are discarding last char as it is new line char
        src_dictionary[line[:-1]] = len(src_dictionary)

src_reverse_dictionary = dict(zip(src_dictionary.values(),src_dictionary.keys()))

print('Source')
print('\t',list(src_dictionary.items())[:10])
print('\t',list(src_reverse_dictionary.items())[:10])
print('\t','Vocabulary size: ', len(src_dictionary))

tgt_dictionary = dict()
with open('./data/summary_vocab.txt', 'r') as f:
    for line in f:
        #we are discarding last char as it is new line char
        tgt_dictionary[line[:-1]] = len(tgt_dictionary)

tgt_reverse_dictionary = dict(zip(tgt_dictionary.values(),tgt_dictionary.keys()))

print('Target')
print('\t',list(tgt_dictionary.items())[:10])
print('\t',list(tgt_reverse_dictionary.items())[:10])
print('\t','Vocabulary size: ', len(tgt_dictionary))


Source
	 [('</s>', 0), ('time', 1), ('min', 2), ('mean', 3), ('max', 4), ('skyCover', 5), ('mode-bucket-0-100-4', 6), ('0', 7), ('6-21', 8), ('17-30', 9)]
	 [(0, '</s>'), (1, 'time'), (2, 'min'), (3, 'mean'), (4, 'max'), (5, 'skyCover'), (6, 'mode-bucket-0-100-4'), (7, '0'), (8, '6-21'), (9, '17-30')]
	 Vocabulary size:  192
Target
	 [('</s>', 0), ('.', 1), (',', 2), ('mph', 3), ('with', 4), ('a', 5), ('of', 6), ('<s>', 7), ('wind', 8), ('<unk>', 9)]
	 [(0, '</s>'), (1, '.'), (2, ','), (3, 'mph'), (4, 'with'), (5, 'a'), (6, 'of'), (7, '<s>'), (8, 'wind'), (9, '<unk>')]
	 Vocabulary size:  392


## Loading Tables and Summaries

In [9]:
# Loading train.combined as source_sent and summaries.txt as target_sent
source_sent = []
target_sent = []

test_source_sent = []
test_target_sent = []


with open('/home/nitesh/Documents/PA_4/data/WeatherGov/train/train.combined', 'r') as f:
    for l_i, line in enumerate(f):
#         # discarding first 20 translations as there was some
#         # english to english translations found in the first few. which are wrong
#         if l_i<50:
#             continue
        source_sent.append(line)
        if len(source_sent)>=sentences_to_read:
            break
        
            
with open('/home/nitesh/Documents/PA_4/data/WeatherGov/train/summaries.txt', 'r') as f:
    for l_i, line in enumerate(f):
#         if l_i<50:
#             continue
        
        target_sent.append(line)
        if len(target_sent)>=sentences_to_read:
            break
        
            
assert len(source_sent)==len(target_sent),'Source: %d, Target: %d'%(len(source_sent),len(target_sent))

print('Sample translations (%d)'%len(source_sent))
for i in range(0,sentences_to_read,10000):
    print('(',i,') Table: ', source_sent[i])
    print('(',i,') Summary: ', target_sent[i])


Sample translations (25000)
( 0 ) Table:  temperature time 6-21 min 26 mean 43 max 53 windChill time 6-21 min 0 mean 22 max 46 windSpeed time 6-21 min 6 mean 8 max 10 mode-bucket-0-20-2 0-10 windDir time 6-21 mode SE gust time 6-21 min 0 mean 0 max 0 skyCover time 6-21 mode-bucket-0-100-4 50-75 skyCover time 6-9 mode-bucket-0-100-4 0-25 skyCover time 6-13 mode-bucket-0-100-4 25-50 skyCover time 9-21 mode-bucket-0-100-4 50-75 skyCover time 13-21 mode-bucket-0-100-4 50-75 precipPotential time 6-21 min 0 mean 2 max 12

( 0 ) Summary:  Mostly sunny , with a high near 53 . Southeast wind between 7 and 9 mph . 

( 10000 ) Table:  temperature time 6-21 min 29 mean 31 max 35 windChill time 6-21 min 16 mean 21 max 25 windSpeed time 6-21 min 8 mean 15 max 21 mode-bucket-0-20-2 10-20 windDir time 6-21 mode WNW gust time 6-21 min 0 mean 12 max 29 skyCover time 6-21 mode-bucket-0-100-4 50-75 skyCover time 6-9 mode-bucket-0-100-4 50-75 skyCover time 6-13 mode-bucket-0-100-4 50-75 skyCover time 9-21 

## Statistics 

In [10]:
def split_to_tokens(sent,is_source):
    #sent = sent.replace('-',' ')
#     sent = sent.replace(',','')
#     sent = sent.replace('.','')
    sent = sent.replace('\n','') 
    
    sent_toks = sent.split(' ')
    for t_i, tok in enumerate(sent_toks):
        if is_source:
            if tok not in src_dictionary.keys():
                sent_toks[t_i] = '<unk>'
                #print tok
        else:
            if tok not in tgt_dictionary.keys():
                sent_toks[t_i] = '<unk>'
                #print(tok)

    return sent_toks

In [11]:
# Let us first look at some statistics of the sentences
source_len = []
source_mean, source_std = 0,0
for sent in source_sent:
    source_len.append(len(split_to_tokens(sent.strip(),True)))

print('(Source) Sentence mean length: ', np.mean(source_len))
print('(Source) Sentence stddev length: ', np.std(source_len))
print('(Source) Sentence max length: ', np.max(source_len))
#print source_len

target_len = []
target_mean, target_std = 0,0
for sent in target_sent:
    target_len.append(len(split_to_tokens(sent.strip(),False)))

print('(Target) Sentence mean length: ', np.mean(target_len))
print('(Target) Sentence stddev length: ', np.std(target_len))
print('(Target) Sentence max length: ', np.max(target_len))

(Source) Sentence mean length:  89.21
(Source) Sentence stddev length:  14.010963564294926
(Source) Sentence max length:  122
(Target) Sentence mean length:  30.59104
(Target) Sentence stddev length:  13.863538931975485
(Target) Sentence max length:  88


## Batch-processing

In [12]:
train_inputs = []
train_outputs = []
train_inp_lengths = []
train_out_lengths = []

max_tgt_sent_lengths = 0

src_max_sent_length = source_sequence_length+1
tgt_max_sent_length = target_sequence_length+1
for s_i, (src_sent, tgt_sent) in enumerate(zip(source_sent,target_sent)):
    
    src_sent_tokens = split_to_tokens(src_sent.strip(),True)
    tgt_sent_tokens = split_to_tokens(tgt_sent.strip(),False)
        
    num_src_sent = []
    for tok in src_sent_tokens:
        num_src_sent.append(src_dictionary[tok])

    num_src_set = num_src_sent[::-1] # we reverse the source sentence. This improves performance
    num_src_sent.insert(0,src_dictionary['<s>'])
    train_inp_lengths.append(min(len(num_src_sent)+1,src_max_sent_length))
    
    # append until the sentence reaches max length
    if len(num_src_sent)<src_max_sent_length:
        num_src_sent.extend([src_dictionary['</s>'] for _ in range(src_max_sent_length - len(num_src_sent))])
    # if more than max length, truncate the sentence
    elif len(num_src_sent)>src_max_sent_length:
        num_src_sent = num_src_sent[:src_max_sent_length]
    assert len(num_src_sent)==src_max_sent_length,len(num_src_sent)

    train_inputs.append(num_src_sent)

    #num_tgt_sent = [tgt_dictionary['</s>']]
    num_tgt_sent = [tgt_dictionary['<s>']]
    for tok in tgt_sent_tokens:
        num_tgt_sent.append(tgt_dictionary[tok])
    
    #num_tgt_sent.insert(0,tgt_dictionary['<s>'])
    train_out_lengths.append(min(len(num_tgt_sent)+1,tgt_max_sent_length))
    
    if len(num_tgt_sent)<tgt_max_sent_length:
        num_tgt_sent.extend([tgt_dictionary['</s>'] for _ in range(tgt_max_sent_length - len(num_tgt_sent))])
    elif len(num_tgt_sent)>tgt_max_sent_length:
        num_tgt_sent = num_tgt_sent[:tgt_max_sent_length]
    
    train_outputs.append(num_tgt_sent)
    assert len(train_outputs[s_i])==tgt_max_sent_length, 'Sent length needs to be 60, but is %d'%len(binned_outputs[s_i])    

assert len(train_inputs)  == len(source_sent),\
        'Size of total bin elements: %d, Total sentences: %d'\
                %(len(train_inputs),len(source_sent))

print('Max sent lengths: ', max_tgt_sent_lengths)


train_inputs = np.array(train_inputs, dtype=np.int32)
train_outputs = np.array(train_outputs, dtype=np.int32)
train_inp_lengths = np.array(train_inp_lengths, dtype=np.int32)
train_out_lengths = np.array(train_out_lengths, dtype=np.int32)
print('Samples from bin')
print('\t',[src_reverse_dictionary[w]  for w in train_inputs[0,:].tolist()])
print('\t',[tgt_reverse_dictionary[w]  for w in train_outputs[0,:].tolist()])
print('\t',[src_reverse_dictionary[w]  for w in train_inputs[10,:].tolist()])
print('\t',[tgt_reverse_dictionary[w]  for w in train_outputs[10,:].tolist()])
print()
print('\tSentences ',train_inputs.shape[0])
print('train inp lengths',train_inp_lengths)
print('train out lengths',train_out_lengths)

Max sent lengths:  0
Samples from bin
	 ['<s>', 'temperature', 'time', '6-21', 'min', '26', 'mean', '43', 'max', '53', 'windChill', 'time', '6-21', 'min', '0', 'mean', '22', 'max', '46', 'windSpeed', 'time', '6-21', 'min', '6', 'mean', '8', 'max', '10', 'mode-bucket-0-20-2', '0-10', 'windDir', 'time', '6-21', 'mode', 'SE', 'gust', 'time', '6-21', 'min', '0', 'mean', '0', 'max', '0', 'skyCover', 'time', '6-21', 'mode-bucket-0-100-4', '50-75', 'skyCover', 'time', '6-9', 'mode-bucket-0-100-4', '0-25', 'skyCover', 'time', '6-13', 'mode-bucket-0-100-4', '25-50', 'skyCover', 'time', '9-21', 'mode-bucket-0-100-4', '50-75', 'skyCover', 'time', '13-21', 'mode-bucket-0-100-4', '50-75', 'precipPotential', 'time', '6-21', 'min', '0', 'mean', '2', 'max', '12', '</s>', '</s>', '</s>', '</s>', '</s>', '</s>', '</s>', '</s>', '</s>', '</s>', '</s>', '</s>', '</s>', '</s>', '</s>', '</s>', '</s>', '</s>', '</s>', '</s>', '</s>', '</s>', '</s>', '</s>', '</s>', '</s>', '</s>', '</s>', '</s>', '</s>', '<

## Creating Batches

In [13]:
input_size = 256

class DataGeneratorMT(object):
    
    def __init__(self,batch_size,num_unroll,is_source):
        self._batch_size = batch_size
        self._num_unroll = num_unroll
        self._cursor = [0 for offset in range(self._batch_size)]
        
        
        self._src_word_embeddings = np.load('./data/table_vec.npy')
        
        self._tgt_word_embeddings = np.load('./data/summary_vec.npy')
        
        self._sent_ids = None
        
        self._is_source = is_source
        
                
    def next_batch(self, sent_ids, first_set):
        
        if self._is_source:
            max_sent_length = src_max_sent_length
        else:
            max_sent_length = tgt_max_sent_length
        batch_labels_ind = []
        batch_data = np.zeros((self._batch_size),dtype=np.float64)
        batch_labels = np.zeros((self._batch_size),dtype=np.float64)
        
        for b in range(self._batch_size):
            
            sent_id = sent_ids[b]
            
            if self._is_source:
                sent_text = train_inputs[sent_id]
                             
                batch_data[b] = sent_text[self._cursor[b]]
                batch_labels[b]=sent_text[self._cursor[b]+1]

            else:
                sent_text = train_outputs[sent_id]
                
                # We cannot avoid having two different embedding vectors for <s> token
                # in soruce and target languages
                # Therefore, if the symbol appears, we always take the source embedding vector
                if sent_text[self._cursor[b]]!=src_dictionary['<s>']:
                    batch_data[b] = sent_text[self._cursor[b]]
                else:
                    batch_data[b] = sent_text[self._cursor[b]]
                batch_labels[b] = sent_text[self._cursor[b]+1]

            self._cursor[b] = (self._cursor[b]+1)%(max_sent_length-1)
                                    
        return batch_data,batch_labels
        
    def unroll_batches(self,sent_ids):
        
        if sent_ids is not None:
            
            self._sent_ids = sent_ids
            
            #if self._is_source:
                # we dont star at the very beginning, becaues the very beginning is a bunch of </s> symbols.
                # so we start from the middel s.t we get a minimum number of </s> symbols in our training data
                # this is only needed for source language
                #self._cursor = ((start_indices_for_bins[bin_id][self._sent_ids]//self._num_unroll)*self._num_unroll).tolist()
            #else:
            self._cursor = [0 for _ in range(self._batch_size)]
                
        unroll_data,unroll_labels = [],[]
        inp_lengths = None
        for ui in range(self._num_unroll):
            # The first batch in any batch of captions is different
            if self._is_source:
                data, labels = self.next_batch(self._sent_ids, False)
            else:
                data, labels = self.next_batch(self._sent_ids, False)
                    
            unroll_data.append(data)
            unroll_labels.append(labels)
            if self._is_source:
                inp_lengths = train_inp_lengths[sent_ids]
            else:
                inp_lengths = train_out_lengths[sent_ids]
        return unroll_data, unroll_labels, self._sent_ids, inp_lengths
    
    def reset_indices(self):
        self._cursor = [0 for offset in range(self._batch_size)]
        

## Embeddings

In [14]:
tf.reset_default_graph()

enc_train_inputs = []
dec_train_inputs = []

# Need to use pre-trained word embeddings
encoder_emb_layer = tf.convert_to_tensor(np.load('./data/table_vec.npy'))
decoder_emb_layer = tf.convert_to_tensor(np.load('./data/summary_vec.npy'))

# Defining unrolled training inputs
for ui in range(source_sequence_length):
    enc_train_inputs.append(tf.placeholder(tf.int32, shape=[batch_size],name='enc_train_inputs_%d'%ui))

dec_train_labels=[]
dec_label_masks = []
for ui in range(target_sequence_length):
    dec_train_inputs.append(tf.placeholder(tf.int32, shape=[batch_size],name='dec_train_inputs_%d'%ui))
    dec_train_labels.append(tf.placeholder(tf.int32, shape=[batch_size],name='dec-train_outputs_%d'%ui))
    dec_label_masks.append(tf.placeholder(tf.float64, shape=[batch_size],name='dec-label_masks_%d'%ui))
    
encoder_emb_inp = [tf.nn.embedding_lookup(encoder_emb_layer, src) for src in enc_train_inputs]
encoder_emb_inp = tf.stack(encoder_emb_inp)

decoder_emb_inp = [tf.nn.embedding_lookup(decoder_emb_layer, src) for src in dec_train_inputs]
decoder_emb_inp = tf.stack(decoder_emb_inp)

enc_train_inp_lengths = tf.placeholder(tf.int32, shape=[batch_size],name='train_input_lengths')
dec_train_inp_lengths = tf.placeholder(tf.int32, shape=[batch_size],name='train_output_lengths')

In [15]:
dec_train_inp_lengths

<tf.Tensor 'train_output_lengths:0' shape=(32,) dtype=int32>

## Encoder

In [16]:
encoder_cell = tf.nn.rnn_cell.BasicLSTMCell(num_units)

initial_state = encoder_cell.zero_state(batch_size, dtype=tf.float64)

encoder_outputs, encoder_state = tf.nn.dynamic_rnn(
    encoder_cell, encoder_emb_inp, initial_state=initial_state,
    sequence_length=enc_train_inp_lengths, 
    time_major=True, swap_memory=True)

In [17]:
encoder_state

LSTMStateTuple(c=<tf.Tensor 'rnn/while/Exit_3:0' shape=(32, 512) dtype=float64>, h=<tf.Tensor 'rnn/while/Exit_4:0' shape=(32, 512) dtype=float64>)

## Bidirectional Encoder

In [18]:
# forward_cell = tf.nn.rnn_cell.BasicLSTMCell(num_units)
# backward_cell = tf.nn.rnn_cell.BasicLSTMCell(num_units)

# ((encoder_fw_outputs,
#   encoder_bw_outputs),
#  (encoder_fw_final_state,
#   encoder_bw_final_state)) = tf.nn.bidirectional_dynamic_rnn(
#     forward_cell, backward_cell, encoder_emb_inp, dtype = tf.float64,
#     sequence_length=enc_train_inp_lengths, time_major=True,swap_memory=True)

# encoder_outputs = tf.concat((encoder_fw_outputs, encoder_bw_outputs), 2)

In [19]:
enc_train_inp_lengths.shape.as_list()

[32]

In [20]:
encoder_outputs

<tf.Tensor 'rnn/TensorArrayStack/TensorArrayGatherV3:0' shape=(122, 32, 512) dtype=float64>

In [21]:
print( encoder_outputs)

Tensor("rnn/TensorArrayStack/TensorArrayGatherV3:0", shape=(122, 32, 512), dtype=float64)


In [22]:
# encoder_final_state_c = tf.concat(
#     (encoder_fw_final_state.c, encoder_bw_final_state.c), 1)

# encoder_final_state_h = tf.concat(
#     (encoder_fw_final_state.h, encoder_bw_final_state.h), 1)

# #TF Tuple used by LSTM Cells for state_size, zero_state, and output state.
# encoder_final_state = LSTMStateTuple(
#     c=encoder_final_state_c,
#     h=encoder_final_state_h
# )

In [23]:
# encoder_state=encoder_final_state
# print encoder_state

In [24]:
# encoder_state[0].get_shape().as_list()[1]

In [25]:
decoder_emb_inp

<tf.Tensor 'stack_1:0' shape=(88, 32, 256) dtype=float64>

## Decoder

In [26]:
# Build RNN cell
#num_units = encoder_state[0].get_shape().as_list()[1]
decoder_cell = tf.nn.rnn_cell.BasicLSTMCell(num_units)

projection_layer = Dense(units=vocab_size, use_bias=True)

# Helper
helper = tf.contrib.seq2seq.TrainingHelper(
    decoder_emb_inp, [tgt_max_sent_length-1 for _ in range(batch_size)], time_major=True)

In [27]:
# helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(
#     decoder_emb_layer,
#     tf.fill([batch_size], tgt_dictionary[tgt_sos_id]), tgt_dictionary[tgt_eos_id])
# Decoder
if decoder_type == 'basic':
    decoder = tf.contrib.seq2seq.BasicDecoder(
        decoder_cell, helper, encoder_state,
        output_layer=projection_layer)
    
elif decoder_type == 'attention':
    decoder = tf.contrib.seq2seq.BahdanauAttention(
        decoder_cell, helper, encoder_state,
        output_layer=projection_layer)

In [28]:
   
# Dynamic decoding
outputs, _, _ = tf.contrib.seq2seq.dynamic_decode(
    decoder, output_time_major=True,
    swap_memory=True)



## Loss and Predictions

In [29]:
logits = outputs.rnn_output

crossent = tf.nn.sparse_softmax_cross_entropy_with_logits(
    labels=dec_train_labels, logits=logits)
loss = (tf.reduce_sum(crossent*tf.stack(dec_label_masks)) / (batch_size*target_sequence_length))
#loss = tf.reduce_mean(crossent)

train_prediction = outputs.sample_id

## Optimizer

In [30]:
print('Defining Optimizer')
# Adam Optimizer. And gradient clipping.
global_step = tf.Variable(0, trainable=False)
inc_gstep = tf.assign(global_step,global_step + 1)
learning_rate = tf.train.exponential_decay(
    0.01, global_step, decay_steps=10, decay_rate=0.9, staircase=True)

with tf.variable_scope('Adam'):
    adam_optimizer = tf.train.AdamOptimizer(learning_rate)

adam_gradients, v = zip(*adam_optimizer.compute_gradients(loss))
adam_gradients, _ = tf.clip_by_global_norm(adam_gradients, 25.0)
adam_optimize = adam_optimizer.apply_gradients(zip(adam_gradients, v))

with tf.variable_scope('SGD'):
    sgd_optimizer = tf.train.GradientDescentOptimizer(learning_rate)

sgd_gradients, v = zip(*sgd_optimizer.compute_gradients(loss))
sgd_gradients, _ = tf.clip_by_global_norm(sgd_gradients, 25.0)
sgd_optimize = sgd_optimizer.apply_gradients(zip(sgd_gradients, v))

sess = tf.InteractiveSession()

Defining Optimizer


In [31]:
train_inputs.shape[0]

25000

In [32]:
source_sequence_length

122

## Running the NMT

In [33]:
enc_train_inp_lengths

<tf.Tensor 'train_input_lengths:0' shape=(32,) dtype=int32>

In [34]:
if not os.path.exists('logs'):
    os.mkdir('logs')
log_dir = 'logs'

bleu_scores_over_time = []
loss_over_time = []
tf.global_variables_initializer().run()

src_word_embeddings = np.load('./data/table_vec.npy')
tgt_word_embeddings = np.load('./data/summary_vec.npy')

# Defining data generators
enc_data_generator = DataGeneratorMT(batch_size=batch_size,num_unroll=source_sequence_length,is_source=True)
dec_data_generator = DataGeneratorMT(batch_size=batch_size,num_unroll=target_sequence_length,is_source=False)

num_steps = 2
avg_loss = 0

bleu_labels, bleu_preds = [],[]

print('Started Training')

for step in range(num_steps):

    # input_sizes for each bin: [40]
    # output_sizes for each bin: [60]
    #print ('.'),
    if (step+1)%100==0:
        print('')
        
    sent_ids = np.random.randint(low=0,high=train_inputs.shape[0],size=(batch_size))
    # ====================== ENCODER ================================================
    
    eu_data, eu_labels, _, eu_lengths = enc_data_generator.unroll_batches(sent_ids=sent_ids)
    
    feed_dict = {}
    feed_dict[enc_train_inp_lengths] = eu_lengths
    for ui,(dat,lbl) in enumerate(zip(eu_data,eu_labels)):            
        feed_dict[enc_train_inputs[ui]] = dat                
    
    # ====================== DECODER ===========================
    # First step we change the ids in a batch
    du_data, du_labels, _, du_lengths = dec_data_generator.unroll_batches(sent_ids=sent_ids)
    
    feed_dict[dec_train_inp_lengths] = du_lengths
    for ui,(dat,lbl) in enumerate(zip(du_data,du_labels)):            
        feed_dict[dec_train_inputs[ui]] = dat
        feed_dict[dec_train_labels[ui]] = lbl
        feed_dict[dec_label_masks[ui]] = (np.array([ui for _ in range(batch_size)])<du_lengths).astype(np.int32)
    
    # ======================= OPTIMIZATION ==========================
    if step < 10000:
        _,l,tr_pred = sess.run([adam_optimize,loss,train_prediction], feed_dict=feed_dict)
    else:
        _,l,tr_pred = sess.run([sgd_optimize,loss,train_prediction], feed_dict=feed_dict)
    tr_pred = tr_pred.flatten()
        
            
    if (step+1)%250==0:  
        
        print('Step ',step+1)

        print_str = 'Actual: '
        for w in np.concatenate(du_labels,axis=0)[::batch_size].tolist():
            print_str += tgt_reverse_dictionary[w] + ' '                    
            if tgt_reverse_dictionary[w] == '</s>':
                break
                      
        print(print_str)
        print()
        
        print_str = 'Predicted: '
        for w in tr_pred[::batch_size].tolist():
            print_str += tgt_reverse_dictionary[w] + ' '
            if tgt_reverse_dictionary[w] == '</s>':
                break
        print(print_str)
       
        print('\n')  
        
        rand_idx = np.random.randint(low=1,high=batch_size)
        print_str = 'Actual: '
        for w in np.concatenate(du_labels,axis=0)[rand_idx::batch_size].tolist():
            print_str += tgt_reverse_dictionary[w] + ' '
            if tgt_reverse_dictionary[w] == '</s>':
                break
        print(print_str)

            
        print()
        print_str = 'Predicted: '
        for w in tr_pred[rand_idx::batch_size].tolist():
            print_str += tgt_reverse_dictionary[w] + ' '
            if tgt_reverse_dictionary[w] == '</s>':
                break
        print(print_str)
        print()        
        
    avg_loss += l
    
    #sess.run(reset_train_state) # resetting hidden state for each batch
    
    if (step+1)%500==0:
        print('============= Step ', str(step+1), ' =============')
        print('\t Loss: ',avg_loss/500.0)
        
        loss_over_time.append(avg_loss/500.0)
             
        avg_loss = 0.0
        sess.run(inc_gstep)
            
        

Started Training


In [35]:
len(feed_dict)

388

In [None]:
feed_dict.values()[1]