In [1]:
import math
import numpy as np
import tensorflow as tf

from tensorflow.python.ops.rnn_cell import GRUCell
from tensorflow.python.ops.rnn_cell import LSTMCell
from tensorflow.python.ops.rnn_cell import MultiRNNCell
from tensorflow.python.ops.rnn_cell import DropoutWrapper, ResidualWrapper

from tensorflow.python.ops import array_ops
from tensorflow.python.ops import control_flow_ops
from tensorflow.python.framework import constant_op
from tensorflow.python.framework import dtypes
from tensorflow.python.layers.core import Dense
from tensorflow.python.util import nest

from tensorflow.contrib.seq2seq.python.ops import attention_wrapper
from tensorflow.contrib.seq2seq.python.ops import beam_search_decoder

from preprocess import *
from loading_util import *

  from ._conv import register_converters as _register_converters


In [2]:
#Resetter
def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

In [3]:
#embedding parameters
embedding_size = 50
vocab_size = 400003

#data parameters
eMax_allowed_length = 100
dMax_allowed_length = 15

#network parameters
hidden_units = 150
depth = 1

In [4]:
#Fetching data
#default directory: 'data/data_10.csv'
X,Y= read_csv()

In [5]:
#Fetching glove vectors
#default directory: "./glove.6B.50d.txt"
embedding_size = 50
wi,iw,wv = read_glove_vecs()
len(wi)
len(iw)

400000

In [6]:
#Adding extra tokens to glove dictionary
#IMPORTANT: Don't run more than once
go_index,eos_index,unk_index = add_extra_to_dict(wi,iw,wv,embedding_size)
emb = map_dict_to_list(iw,wv)

_GO
EOS
UNK
400000
400001


In [7]:
#preprocessing data
#Mapping each word in a sentence to its glove index
eInput,eLengths = fit_encoder_text(data= X[1:],word_to_index = wi,max_allowed_seq_length = eMax_allowed_length)
dInput,dOutput,dLengths = fit_decoder_text(data= Y[1:],word_to_index = wi,max_allowed_seq_length = dMax_allowed_length)

eInput = np.array(eInput)
eLengths = np.array(eLengths)
dInput = np.array(dInput)
dOutput = np.array(dOutput)
dLengths = np.array(dLengths)

In [8]:
#encoder inputs: [batch_size, max_time_steps]
encoder_inputs = tf.placeholder(dtype = tf.int32, shape = (None,None), name = 'encoder_inputs')
#encoder_inputs_length: [batch_size]
encoder_inputs_length = tf.placeholder(dtype=tf.int32, shape=(None,) , name = 'encoder_inputs_length')
            
#get dynamic batch_size
batch_size = tf.shape(encoder_inputs)[0]

In [9]:
#Testing
with tf.Session() as sess:
    masize = sess.run(batch_size,feed_dict={encoder_inputs: eInput})
    print(masize)

6


In [11]:
#Instantiating embeddings
embedding_variable = tf.Variable(tf.constant(0.0, shape = [vocab_size, embedding_size]),trainable = False, name = 'embedding')
embedding_placeholder = tf.placeholder(tf.float32, shape=[vocab_size,embedding_size], name = 'embedding_placeholder' )
encoder_embeddings = embedding_variable.assign(embedding_placeholder)

encoder_inputs_embedded=tf.nn.embedding_lookup(encoder_embeddings,encoder_inputs)

In [15]:
#Testing embedding lookup
with tf.Session() as sess:
    embed=sess.run(encoder_inputs_embedded, feed_dict={embedding_placeholder:emb ,encoder_inputs:eInput })
    print(embed.shape)
    print(embed[0][0])

(6, 100, 50)
[-0.14168    0.41108   -0.31227    0.16633    0.26124    0.45708
 -1.2001     0.014923  -0.22779   -0.16937    0.34633   -0.12419
 -0.65711    0.29226    0.62407   -0.57916   -0.33947   -0.22046
 -1.4832     0.28958    0.081396  -0.21696    0.0056613 -0.054199
  0.098504  -1.5874    -0.22867   -0.62957   -0.39542   -0.080841
  3.5949    -0.16872   -0.39024    0.026912   0.52646   -0.022844
  0.63289    0.62702   -0.22171   -0.45045   -0.14998   -0.27723
 -0.46658   -0.44268   -0.43691    0.38455    0.1369    -0.25424
  0.017821  -0.1489   ]


In [16]:
cell = LSTMCell(hidden_units)
cells = MultiRNNCell([cell for i in range(depth)])