In [1]:
import math
import numpy as np
import tensorflow as tf

from tensorflow.python.ops.rnn_cell import GRUCell
from tensorflow.python.ops.rnn_cell import LSTMCell
from tensorflow.python.ops.rnn_cell import MultiRNNCell
from tensorflow.python.ops.rnn_cell import DropoutWrapper, ResidualWrapper

from tensorflow.python.ops import array_ops
from tensorflow.python.ops import control_flow_ops
from tensorflow.python.framework import constant_op
from tensorflow.python.framework import dtypes
from tensorflow.python.layers.core import Dense
from tensorflow.python.util import nest

from tensorflow.contrib.seq2seq.python.ops import attention_wrapper
from tensorflow.contrib.seq2seq.python.ops import beam_search_decoder

from preprocess import *
from loading_util import *

  from ._conv import register_converters as _register_converters


In [2]:
#Resetter
def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

In [3]:
#embedding parameters
embedding_size = 50
vocab_size = 400003

#data parameters
eMax_allowed_length = 20
dMax_allowed_length = 15

In [4]:
#Fetching data
#default directory: 'data/data_10.csv'
X,Y= read_csv()

In [5]:
#Fetching glove vectors
#default directory: "./glove.6B.50d.txt"
embedding_size = 50
wi,iw,wv = read_glove_vecs()

In [6]:
#Adding extra tokens to glove dictionary
go_index,eos_index,unk_index = add_extra_to_dict(wi,iw,wv,embedding_size)
emb = map_dict_to_list(iw,wv)

In [7]:
#preprocessing data
#Mapping each word in a sentence to its glove index
eInput,eLengths = fit_encoder_text(data= X[1:],word_to_index = wi,max_allowed_seq_length = eMax_allowed_length)
dInput,dOutput,dLengths = fit_decoder_text(data= Y[1:],word_to_index = wi,max_allowed_seq_length = dMax_allowed_length)

In [8]:
class Seq2SeqModel():
    def __init__(self,mode,enc_depth,hidden_units,mydtype):
        assert mode.lower() in ['train','decode']
        
        self.mode = mode.lower()
        self.depth = enc_depth
        self.hidden_units=hidden_units
        self.dtype = tf.float16 if mydtype else tf.float32
        
        self.encoder_vocab_size = 50
        self.decoder_vocab_size = 50

        '''
        self.config = config
        
        
        self.cell_type = config['cell_type']
        self.hidden_units = config['hidden_units']
        self.depth = config['depth']
        self.attention_type = config['attention_type']
        self.embedding_size = config['embedding_size']
        
        self.num_encoder_symbols = config['num_encoder_symbols']
        self.num_decoder_symbols = config['num_decoder_symbols']
        
        self.use_residual = config['use_residual']
        self.attn_input_feeding = config['attn_input_feeding']
        self.use_dropout = config['use_dropout']
        self.keep_prob = 1.0 - config['dropout_rate']
        
        self.optimizer = config['optimizer']
        self.learning_rate = config['learning_rate']
        self.max_gradient_norm = config['max_gradient_norm']
        self.global_step = tf.Variable(0, trainable = Flase, name = 'global_step')
        self.global_epoch_step = tf.Variable(0,trainable=False, name = "global_epoch_step")
        self.global_epoch_step_op= tf.assign(self.global_epoch_step,self.global_epoch_step+1)
        
        self.dtype = tf.float16 if config['use_float16'] else tf.float32
        self.keep_prob_placeholder = tf.placeholder(self.dtype, shape=[], name = 'keep_prob')
        
        self.use_beamsearch_decode = False
        if self.mode == 'decode':
            self.beam_width = config['beam_width']
            self.use_beamsearch_decode = True if self.beam_width > 1 else False
            self.max_decode_step = config['max_decode_step']
        
        self.build_model()
        '''
    
    def build_model(self):
            print('building model..')

            #building encoder and decoder networks
            self.init_placeholders()
            '''
            self.build_encoder()
            self.build_decoder()
            self.summary_op = tf.summary.merge_all()
            '''    
    def init_placeholders(self):
            #encoder inputs: [batch_size, max_time_steps]
            self.encoder_inputs = tf.placeholder(dtype = tf.int32, shape = (None,None), name = 'encoder_inputs')
            #encoder_inputs_length: [batch_size]
            self.encoder_inputs_length = tf.placeholder(dtype=tf.int32, shape=(None,) , name = 'encoder_inputs_length')
            
            #get dynamic batch_size
            self.batch_size = tf.shape(self.encoder_inputs)[0]
            
            if(self.mode=='train'):
                
                #decoder_inputs: [batch_size,max_time_steps]
                self.decoder_inputs = tf.placeholder(dtype=tf.int32,shape=(None,None), name ='decoder_inputs')
                #decoder_inputs_length: [batch_size]
                self.decoder_inputs_length = tf.placeholder(dtype=tf.int32, shape=(None,), name='decoder_inputs_length')
                
                self.decoder_targets = tf.placeholder(dtype=tf.int32,shape=(None,None), name ='decoder_targets')
                
                '''
                #No need, already preprocessed
                decoder_start_token=tf.ones(shape=[self.batch_size,1], dtype=tf.int32)*data_utils.start_token
                
                decoder_end_token=tf.ones(shape=[self.batch_size,1], dtype=tf.int32)*data_utils.end_token
                '''
                
    def build_single_cell(self):
        cell_type = LSTMCell
        cell = cell_type(self.hidden_units)
        return cell

    def build_encoder_cell (self):
        return MultiRNNCell([self.build_single_cell() for i in range(self.depth)])
    
    def build_decoder_cell(self):
        encoder_outputs = self.encoder_outputs
        encoder_last_state = self.encoder_last_state
        encoder_inputs_length = self.encoder_inputs_length
    
    def build_encoder(self):
        print('Building Encoder..')
        with tf.variable_scope('encoder'):
            self.encoder_cell = self.build_encoder_cell()
            
            #Instantiating pretrained embeddings
            embedding_variable = tf.Variable(tf.constant(0.0, shape = [self.encoder_vocab_size, embedding_size]),trainable = False, name = 'embedding')
                           
            self.encoder_embedding_placeholder = tf.placeholder(tf.float32, shape=[self.encoder_vocab_size,embedding_size], name = 'embedding_placeholder' )
            self.encoder_embeddings = embedding_variable.assign(self.encoder_embedding_placeholder)
            self.encoder_inputs_embedded=tf.nn.embedding_lookup(self.encoder_embeddings,self.encoder_inputs)
            
    
            #instantiating dense layer
            input_layer = Dense(self.hidden_units, dtype = self.dtype, name = 'input_projection')
            #passing the embedding through dense layer
            self.encoder_input_embedded = input_layer(self.encoder_inputs_embedded)
            
            #Encode input sequences into context vectors
            #encoder_outputs: [batch_size, max_time_step, cell_output_size]
            #encoder_state: [batch_size,cell_output_size]
            self.encoder_outputs, self.encoder_last_state = tf.nn.dynamic_rnn(cell = self.encoder_cell,
                                                                               inputs=self.encoder_inputs_embedded,
                                                                               sequence_length=self.encoder_inputs_length,
                                                                               dtype=self.dtype,
                                                                               time_major=False)
            
            '''
            init = tf.global_variables_initializer()
            with tf.Session() as sess:
                sess.run(init)
                enc_outputs,enc_laststate=sess.run([self.encoder_outputs,self.encoder_last_state], 
                                                   feed_dict={self.encoder_embedding_placeholder:emb ,
                                                              self.encoder_inputs:eInput, 
                                                              self.encoder_inputs_length: eLengths })
                print('encoder Outputs:',enc_outputs.shape)
                print(enc_outputs)
                print()
                print('Encoder last state:',len(enc_laststate))
                print(enc_laststate)
            '''
        

        def build_decoder(self):
            print('Building decoder and attention...')
            with tf.variable_scope('decoder'):
                
                #Recheck this code
                self.decoder_cell,self.decoder_initial_state = self.build_decoder_cell()
                
                #Instantiating pretrained embeddings
                embedding_variable = tf.Variable(tf.constant(0.0, shape = [self.decoder_vocab_size, embedding_size]),trainable = False, name = 'embedding')

                self.decoder_embedding_placeholder = tf.placeholder(tf.float32, shape=[self.decoder_vocab_size,embedding_size], name = 'embedding_placeholder' )
                self.decoder_embeddings = embedding_variable.assign(self.decoder_embedding_placeholder)
                self.decoder_inputs_embedded=tf.nn.embedding_lookup(self.decoder_embeddings,self.encoder_inputs)
                
                #instantiating dense layer --> DOUBT
                input_layer = Dense(self.hidden_units, dtype = self.dtype, name = 'input_projection')
                
                #Output projection layer to convert cell outputs to logits --> DOUBT
                output_layer = Dense(vocab_size,"output_projection")
                
                if self.mode == 'train':
                    #decoder_inputs_embedded: [batch_size,max_time_step,embedding_size]
                    self.decoder_inputs_embededded = tf.nn.embedding_lookup(self.decoder_embeddings,
                                                                           self.decoder_inputs_train)
                    
                    #Embedded inputs going through projection layer
                    self.decoder_inputs_embedded=input_layer(self.decoder_inputs_embedded)
                    
                    #Helper to feed inputs for training: read inputs from dense ground truth vectors
                    training_helper = seq2seq.TrainingHelper(inputs = self.decoder_inputs_embedded,
                                                            sequence_length=self.decoder_inputs_length,
                                                            time_major=False,
                                                            name='training_helper')
                    training_decoder = seq2seq.BasicDecoder(cell=self.decoder_cell,
                                                           helper = training_helper,
                                                           initial_state = self.decoder_initial_state,
                                                           output_layer = outputl_layer)
                    
                    #Maximum decoder time_steps in current batch
                    max_decoder_length = tf.reduce_max(self.decoder_inputs_length)
                    
                    # decoder_outputs_train: BasicDecoderOutput
                    #                        namedtuple(rnn_outputs, sample_id)
                    # decoder_outputs_train.rnn_output: [batch_size, max_time_step + 1, num_decoder_symbols] if output_time_major=False
                    #                                   [max_time_step + 1, batch_size, num_decoder_symbols] if output_time_major=True
                    # decoder_outputs_train.sample_id: [batch_size], tf.int32
                    (self.decoder_outputs_train, self.decoder_last_state_train, 
                    self.decoder_outputs_length_train) = (seq2seq.dynamic_decode(
                    decoder=training_decoder,
                    output_time_major=False,
                    impute_finished=True,
                    maximum_iterations=max_decoder_length))
                    
                    
                    # More efficient to do the projection on the batch-time-concatenated tensor
                    # logits_train: [batch_size, max_time_step + 1, num_decoder_symbols]
                    # self.decoder_logits_train = output_layer(self.decoder_outputs_train.rnn_output)
                    self.decoder_logits_train = tf.identity(self.decoder_outputs_train.rnn_output) 
                    # Use argmax to extract decoder symbols to emit
                    self.decoder_pred_train = tf.argmax(self.decoder_logits_train, axis=-1,
                                                        name='decoder_pred_train')
                    
                    # masks: masking for valid and padded time steps, [batch_size, max_time_step + 1]
                    masks = tf.sequence_mask(lengths=self.decoder_inputs_length, 
                                         maxlen=max_decoder_length, dtype=self.dtype, name='masks')

                    # Computes per word average cross-entropy over a batch
                    # Internally calls 'nn_ops.sparse_softmax_cross_entropy_with_logits' by default
                    self.loss = seq2seq.sequence_loss(logits=self.decoder_logits_train, 
                                                  targets=self.decoder_targets,
                                                  weights=masks,
                                                  average_across_timesteps=True,
                                                  average_across_batch=True,)
                    # Training summary for the current batch_loss
                    
                    # Training summary for the current batch_loss
                    tf.summary.scalar('loss', self.loss)

                    # Contruct graphs for minimizing loss
                    self.init_optimizer()
                 
                
                #NOT TRAINING
                elif self.mode == 'decode':
                    
                    
                
                
            
            

In [9]:
#Testing Seq2Seq
config ={}

obj = Seq2SeqModel('train',1,150,False)
obj.build_model()
obj.build_encoder()

building model..
Building Encoder..
encoder Outputs: (6, 20, 150)
[[[-0.05762042  0.02685566  0.00973333 ... -0.10139786 -0.08643378
   -0.00411691]
  [-0.11273073 -0.04082831  0.01372339 ... -0.18441643 -0.14985572
    0.06799476]
  [-0.15240778 -0.03633224  0.07021248 ... -0.19439423 -0.18385549
    0.14006957]
  ...
  [-0.1984036   0.13669704  0.22605549 ... -0.17484288 -0.03526106
    0.09680352]
  [-0.0511231   0.03034962  0.18782715 ... -0.19914497  0.08092974
   -0.01022786]
  [-0.03546507  0.00767517  0.13554008 ... -0.16243319  0.08822359
    0.01649233]]

 [[-0.07305688  0.05720627  0.10539098 ... -0.05799546 -0.10593469
   -0.00916362]
  [-0.1250463   0.09961725  0.18959376 ... -0.10241318 -0.02890493
    0.02321122]
  [-0.01632207  0.03634805  0.0875487  ... -0.07028568 -0.00133933
    0.00139211]
  ...
  [-0.19145322  0.1008764  -0.11696041 ... -0.22059815 -0.01195039
    0.12622695]
  [-0.19516967  0.0559051  -0.07420645 ... -0.2459103  -0.00667882
    0.11362134]
  [-0.1