In [1]:
import numpy as np
from __future__ import division

filename = 'glove.6B.50d.txt'
def loadGloVe(filename):
    vocab = []
    embd = []
    file = open(filename,'r')
    for line in file.readlines():
        row = line.strip().split(' ')
        vocab.append(row[0])
        embd.append(row[1:])
    print('Loaded GloVe!')
    file.close()
    return vocab,embd
vocab,embd = loadGloVe(filename)

embedding = np.asarray(embd)
embedding = embedding.astype(np.float32)

word_vec_dim = len(embedding[0])
#Pre-trained GloVe embedding

Loaded GloVe!


In [2]:
def np_nearest_neighbour(x):
    #returns array in embedding that's most similar (in terms of cosine similarity) to x
        
    xdoty = np.multiply(embedding,x)
    xdoty = np.sum(xdoty,1)
    xlen = np.square(x)
    xlen = np.sum(xlen,0)
    xlen = np.sqrt(xlen)
    ylen = np.square(embedding)
    ylen = np.sum(ylen,1)
    ylen = np.sqrt(ylen)
    xlenylen = np.multiply(xlen,ylen)
    cosine_similarities = np.divide(xdoty,xlenylen)

    return embedding[np.argmax(cosine_similarities)]


def word2vec(word):  # converts a given word into its vector representation
    if word in vocab:
        return embedding[vocab.index(word)]
    else:
        return embedding[vocab.index('unk')]

def vec2word(vec):   # converts a given vector representation into the represented word 
    for x in xrange(0, len(embedding)):
        if np.array_equal(embedding[x],np.asarray(vec)):
            return vocab[x]
    return vec2word(np_nearest_neighbour(np.asarray(vec)))

In [3]:
import pickle

with open ('vec_summaries', 'rb') as fp:
    vec_summaries = pickle.load(fp)

with open ('vec_texts', 'rb') as fp:
    vec_texts = pickle.load(fp)
    

In [4]:
with open ('vocab_limit', 'rb') as fp:
    vocab_limit = pickle.load(fp)

with open ('embd_limit', 'rb') as fp:
    embd_limit = pickle.load(fp)
    

In [5]:
#DIAGNOSIS

count = 0

LEN = 7

for summary in vec_summaries:
    if len(summary)-1>LEN:
        count = count + 1
print "Percentage of dataset with summary length beyond "+str(LEN)+": "+str((count/len(vec_summaries))*100)+"% "

count = 0

LEN = 80

for text in vec_texts:
    if len(text)>LEN:
        count = count + 1
print "Percentage of dataset with text length more than "+str(LEN)+": "+str((count/len(vec_texts))*100)+"% "

Percentage of dataset with summary length beyond 7: 16.146% 
Percentage of dataset with text length more than 80: 40.412% 


In [6]:
MAX_SUMMARY_LEN = 7
MAX_TEXT_LEN = 80



#REMOVE DATA WHOSE SUMMARIES ARE TOO BIG
#OR WHOSE TEXT LENGTH IS TOO BIG

vec_summaries_reduced = []
vec_texts_reduced = []

i = 0
for summary in vec_summaries:
    if len(summary)-1<=MAX_SUMMARY_LEN and len(vec_texts[i])<=MAX_TEXT_LEN:
        vec_summaries_reduced.append(summary)
        vec_texts_reduced.append(vec_texts[i])
    i=i+1

In [7]:
train_len = int((.7)*len(vec_summaries_reduced))

train_texts = vec_texts_reduced[0:train_len]
train_summaries = vec_summaries_reduced[0:train_len]

val_len = int((.15)*len(vec_summaries_reduced))

val_texts = vec_texts_reduced[train_len:train_len+val_len]
val_summaries = vec_summaries_reduced[train_len:train_len+val_len]

test_texts = vec_texts_reduced[train_len+val_len:len(vec_summaries_reduced)]
test_summaries = vec_summaries_reduced[train_len+val_len:len(vec_summaries_reduced)]

In [8]:
print train_len

18858


In [9]:
def transform_out(output_text):
    output_len = len(output_text)
    transformed_output = np.zeros([output_len],dtype=np.int32)
    for i in xrange(0,output_len):
        transformed_output[i] = vocab_limit.index(vec2word(output_text[i]))
    #transformed_output[output_len:MAX_LEN] = vocab_limit.index('<PAD>')
    return transformed_output   

In [10]:
#Some MORE hyperparameters and other stuffs

hidden_size = 250
learning_rate = 0.003
vocab_len = len(vocab_limit)
training_iters = 1

In [11]:
import tensorflow as tf

#placeholders
tf_text = tf.placeholder(tf.float32, [None,word_vec_dim])
tf_seq_len = tf.placeholder(tf.int32)
tf_summary = tf.placeholder(tf.int32,[None])
tf_output_len = tf.placeholder(tf.int32)

In [12]:
def score(hs,ht,seq_len):
    return tf.reshape(tf.matmul(hs,tf.transpose(ht)),[seq_len])


def align(hs,ht,seq_len):

    G = tf.nn.softmax(score(hs,ht,seq_len))
    G = tf.reshape(G,[seq_len,1])
    
    return G


In [13]:
def forward_encoder(x,seq_len,inp_dim):
    
    #PARAMETERS
    
    Wxh = tf.Variable(tf.truncated_normal(shape=[word_vec_dim,hidden_size],stddev=0.01))
    #Whh = tf.Variable(np.eye(hidden_size),dtype=tf.float32)
    Whh = tf.get_variable(name="whhf",shape=[hidden_size,hidden_size],dtype=tf.float32,initializer=tf.orthogonal_initializer())
    B = tf.Variable(tf.zeros([1,hidden_size]),dtype=tf.float32)
    
    Wc = tf.Variable(tf.truncated_normal(shape=[2*hidden_size,hidden_size],stddev=0.01))
    
    #CONSTANTS AND ARRAYS
    hidden = tf.zeros([1,hidden_size],dtype=tf.float32)
    hidden_list = tf.TensorArray(size=1,dynamic_size=True,dtype=tf.float32,clear_after_read=False)
    forward_list = tf.TensorArray(size=seq_len,dtype=tf.float32)
    context_vector = tf.zeros([1,hidden_size],dtype=tf.float32)
    
    #some initial operations
    i = 0
    inp = tf.reshape(x[i],[1,inp_dim])
    inp_comp = tf.matmul(inp,Wxh)
    
    candidate_hidden = tf.nn.elu(inp_comp + tf.matmul(hidden,Whh) + B)
                                 
    attended_hidden = tf.tanh(tf.matmul(tf.concat([context_vector,candidate_hidden],1),Wc))
                                 
    hidden = tf.nn.elu(inp_comp + tf.matmul(attended_hidden,Whh) + B)
    
    hidden_list = hidden_list.write(i,tf.reshape(hidden,[hidden_size]))
    forward_list = forward_list.write(i,tf.reshape(hidden,[hidden_size]))
    
    i = 1
    
    def cond(i,hidden,hidden_list,forward_list):
        return i<seq_len
    
    def body(i,hidden,hidden_list,forward_list):
        
        hidden_list_stack = hidden_list.stack()
        
        inp = tf.reshape(x[i],[1,inp_dim])
        inp_comp = tf.matmul(inp,Wxh)
        
        candidate_hidden = tf.nn.elu(inp_comp + tf.matmul(hidden,Whh) + B)
        
        G = align(hidden_list_stack,candidate_hidden,i)
        
        weighted_prev_hiddens = tf.multiply(hidden_list_stack,G)
        context_vector = tf.reduce_sum(weighted_prev_hiddens,0)
        context_vector = tf.reshape(context_vector,[1,hidden_size])
        
        attended_hidden = tf.tanh(tf.matmul(tf.concat([context_vector,candidate_hidden],1),Wc))
        
        hidden = tf.nn.elu(inp_comp + tf.matmul(attended_hidden,Whh) + B)

        hidden_list = tf.cond(i<seq_len-1,
                              lambda: hidden_list.write(i,tf.reshape(hidden,[hidden_size])),
                              lambda: hidden_list)
        forward_list = forward_list.write(i,tf.reshape(hidden,[hidden_size]))
        
        return i+1,hidden,hidden_list,forward_list
    
    _,_,hidden_list,forward_list = tf.while_loop(cond,body,[i,hidden,hidden_list,forward_list])
    
    hidden_list.close().mark_used()
    
    return forward_list.stack()
                                

In [14]:
def backward_encoder(x,seq_len,inp_dim):
    
    #PARAMETERS
    
    Wxh = tf.Variable(tf.truncated_normal(shape=[word_vec_dim,hidden_size],stddev=0.01))
    Whh = tf.get_variable(name="whhb",shape=[hidden_size,hidden_size],dtype=tf.float32,initializer = tf.orthogonal_initializer())
    Whh = tf.Variable(np.eye(hidden_size),dtype=tf.float32)
    B = tf.Variable(tf.zeros([1,hidden_size]),dtype=tf.float32)
    
    Wc = tf.Variable(tf.truncated_normal(shape=[2*hidden_size,hidden_size],stddev=0.01))
    
    #CONSTANTS AND ARRAYS
    hidden = tf.zeros([1,hidden_size],dtype=tf.float32)
    hidden_list = tf.TensorArray(size=1,dynamic_size=True,dtype=tf.float32,clear_after_read=False)
    hidden_list_ordered = tf.TensorArray(size=seq_len,dtype=tf.float32)
    context_vector = tf.zeros([1,hidden_size],dtype=tf.float32)
    
    #some initial operations
    i = seq_len-1
    j = 0
    inp = tf.reshape(x[i],[1,inp_dim])
    inp_comp = tf.matmul(inp,Wxh)
    
    candidate_hidden = tf.nn.elu(inp_comp + tf.matmul(hidden,Whh) + B)
                                 
    attended_hidden = tf.tanh(tf.matmul(tf.concat([context_vector,candidate_hidden],1),Wc))
                                 
    hidden = tf.nn.elu(inp_comp + tf.matmul(attended_hidden,Whh) + B)
    
    hidden_list = hidden_list.write(j,tf.reshape(hidden,[hidden_size]))
    hidden_list_ordered = hidden_list_ordered.write(i,tf.reshape(hidden,[hidden_size]))
    
    i = seq_len-2
    j = 1
    
    def cond(i,j,hidden,hidden_list,hidden_list_ordered):
        return i>-1
    
    def body(i,j,hidden,hidden_list,hidden_list_ordered):
        
        hidden_list_stack = hidden_list.stack()
        
        inp = tf.reshape(x[i],[1,inp_dim])
        inp_comp = tf.matmul(inp,Wxh)
        
        candidate_hidden = tf.nn.elu(inp_comp + tf.matmul(hidden,Whh) + B)
        
        G = align(hidden_list_stack,candidate_hidden,j)
        
        weighted_prev_hiddens = tf.multiply(hidden_list_stack,G)
        context_vector = tf.reduce_sum(weighted_prev_hiddens,0)
        context_vector = tf.reshape(context_vector,[1,hidden_size])
        
        attended_hidden = tf.tanh(tf.matmul(tf.concat([context_vector,candidate_hidden],1),Wc))
        
        hidden = tf.nn.elu(inp_comp + tf.matmul(attended_hidden,Whh) + B)

        hidden_list = tf.cond(j<seq_len-1,
                              lambda: hidden_list.write(j,tf.reshape(hidden,[hidden_size])),
                              lambda: hidden_list)
        hidden_list_ordered = hidden_list_ordered.write(i,tf.reshape(hidden,[hidden_size]))
        
        return i-1,j+1,hidden,hidden_list,hidden_list_ordered
    
    _,_,_,hidden_list,hidden_list_ordered = tf.while_loop(cond,body,[i,j,hidden,hidden_list,hidden_list_ordered])

    hidden_list.close().mark_used()
    
    return hidden_list_ordered.stack()
                                

In [15]:
def decoder(hidden_size,encoded_hidden,tf_seq_len,tf_output_len):
    
    #PARAMETERS
    
    Wyh = tf.Variable(tf.truncated_normal(shape=[vocab_len,hidden_size],stddev=0.01))
    Whh = Whh = tf.get_variable(name="whho",shape=[hidden_size,hidden_size],dtype=tf.float32,initializer = tf.orthogonal_initializer())
    B = tf.zeros([1,hidden_size],dtype=tf.float32)
    Wc = tf.Variable(tf.truncated_normal(shape=[2*hidden_size,hidden_size],stddev=0.01))
    Wcl = tf.Variable(tf.truncated_normal(shape=[2*hidden_size,hidden_size],stddev=0.01))
    Ws = tf.Variable(tf.truncated_normal(shape=[hidden_size,vocab_len],stddev=0.01))
    
    #other non-trainable values
    hidden = encoded_hidden[0]
    hidden = tf.reshape(hidden,[1,hidden_size])
    
    hidden_list_d = tf.TensorArray(dtype=tf.float32,size=1,dynamic_size=True,clear_after_read=False)
    output = tf.TensorArray(size=tf_output_len,dtype=tf.float32)
    
    decoder_context_vector = tf.zeros([1,hidden_size],dtype=tf.float32)
    
    i = 0
    
    G = align(encoded_hidden,hidden,tf_seq_len)
        
    weighted_encoded_hiddens = tf.multiply(encoded_hidden,G)
    encoder_context_vector = tf.reduce_sum(weighted_encoded_hiddens,0)
    encoder_context_vector = tf.reshape(encoder_context_vector,[1,hidden_size])
        
    layer_attended_hidden = tf.tanh(tf.matmul(tf.concat([encoder_context_vector,hidden],1),Wcl))
    
    y = tf.matmul(layer_attended_hidden,Ws)
    output = output.write(i,tf.reshape(y,[vocab_len]))
    y = tf.nn.softmax(y)
    
    candidate_hidden = tf.nn.elu(tf.matmul(y,Wyh) + tf.matmul(layer_attended_hidden,Whh) + B)
                                 
    attended_hidden = tf.tanh(tf.matmul(tf.concat([decoder_context_vector,candidate_hidden],1),Wc))
    
    hidden = tf.nn.elu(tf.matmul(y,Wyh) + tf.matmul(attended_hidden,Whh) + B)
    
    hidden_list_d = tf.cond((i+1)<tf_output_len,
                            lambda:hidden_list_d.write(i,tf.reshape(hidden,[hidden_size])),
                            lambda:hidden_list_d)
    
    i = 1
    
    def cond(i,hidden,hidden_list_d,output):
        return i < tf_output_len
    def body(i,hidden,hidden_list_d,output):
        
        G = align(encoded_hidden,hidden,tf_seq_len)
        weighted_encoded_hiddens = tf.multiply(encoded_hidden,G)
        encoder_context_vector = tf.reduce_sum(weighted_encoded_hiddens,0)
        encoder_context_vector = tf.reshape(encoder_context_vector,[1,hidden_size])
        layer_attended_hidden = tf.tanh(tf.matmul(tf.concat([encoder_context_vector,hidden],1),Wcl))
        
        y = tf.matmul(layer_attended_hidden,Ws)
        output = output.write(i,tf.reshape(y,[vocab_len]))
        y = tf.nn.softmax(y)
        
        candidate_hidden = tf.nn.elu(tf.matmul(y,Wyh) + tf.matmul(layer_attended_hidden,Whh) + B)
        
        hidden_list_stack = hidden_list_d.stack()
        
        G_dec = align(hidden_list_stack,candidate_hidden,i)
        
        weighted_prev_hiddens = tf.multiply(hidden_list_stack,G_dec)
        decoder_context_vector = tf.reduce_sum(weighted_prev_hiddens,0)
        decoder_context_vector = tf.reshape(decoder_context_vector,[1,hidden_size])
        
        attended_hidden = tf.tanh(tf.matmul(tf.concat([decoder_context_vector,candidate_hidden],1),Wc))
        
        hidden = tf.nn.elu(tf.matmul(y,Wyh) + tf.matmul(attended_hidden,Whh) + B)
        
        hidden_list_d = tf.cond(i<tf_output_len-1,
                              lambda:hidden_list_d.write(i,tf.reshape(hidden,[hidden_size])),
                              lambda:hidden_list_d)                         
        
        
        return i+1,hidden,hidden_list_d,output
    
    _,_,hidden_list_d,output = tf.while_loop(cond,body,[i,hidden,hidden_list_d,output])
    
    #hidden_list_stack_write_all = hidden_list_d.stack()
    hidden_list_d.close().mark_used()
    
    return output.stack()

In [16]:
def model(tf_text,tf_seq_len,tf_output_len):
                               
    forward_hidden = forward_encoder(tf_text,
                                     tf_seq_len,
                                     word_vec_dim)
    
    backward_hidden = backward_encoder(tf_text,
                                       tf_seq_len,
                                       word_vec_dim)
    
    encoded_hidden = tf.concat([forward_hidden,backward_hidden],1)
    
    output = decoder(2*hidden_size,
                    encoded_hidden,
                    tf_seq_len,
                    tf_output_len)
    
    return output

In [None]:
output = model(tf_text,tf_seq_len,tf_output_len)

#OPTIMIZER

cost = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=output, labels=tf_summary))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

#PREDICTION

pred = tf.TensorArray(size=tf_output_len,dtype=tf.int32)

i=0

def cond_pred(i,pred):
    return i<tf_output_len
def body_pred(i,pred):
    pred = pred.write(i,tf.cast(tf.argmax(output[i]),tf.int32))
    return i+1,pred

i,pred = tf.while_loop(cond_pred,body_pred,[i,pred]) 

prediction = pred.stack()

In [None]:
import string
from __future__ import print_function

init = tf.global_variables_initializer()


with tf.Session() as sess: # Start Tensorflow Session
    
    saver = tf.train.Saver() 
    # Prepares variable for saving the model
    sess.run(init) #initialize all variables
    step = 0   
    loss_list=[]
    acc_list=[]
    val_loss_list=[]
    val_acc_list=[]
    best_val_acc=0
    display_step = 1
    
    while step < training_iters:
        
        total_loss=0
        total_acc=0
        total_val_loss = 0
        total_val_acc = 0
           
        for i in xrange(0,train_len):
            
            train_out = transform_out(train_summaries[i][0:len(train_summaries[i])-1])
            
            if i%display_step==0:
                print("\nIteration: "+str(i))
                print("Training input sequence length: "+str(len(train_texts[i])))
                print("Training target outputs sequence length: "+str(len(train_out)))
            
                print("\nTEXT:")
                flag = 0
                for vec in train_texts[i]:
                    if vec2word(vec) in string.punctuation or flag==0:
                        print(str(vec2word(vec)),end='')
                    else:
                        print((" "+str(vec2word(vec))),end='')
                    flag=1

                print("\n")


            # Run optimization operation (backpropagation)
            _,loss,pred = sess.run([optimizer,cost,prediction],feed_dict={tf_text: train_texts[i], 
                                                    tf_seq_len: len(train_texts[i]), 
                                                    tf_summary: train_out,
                                                    tf_output_len: len(train_out)})
            
         
            if i%display_step==0:
                print("\nPREDICTED SUMMARY:\n")
                flag = 0
                for index in pred:
                    #if int(index)!=vocab_limit.index('eos'):
                    if vocab_limit[int(index)] in string.punctuation or flag==0:
                        print(str(vocab_limit[int(index)]),end='')
                    else:
                        print(" "+str(vocab_limit[int(index)]),end='')
                    flag=1
                print("\n")
                
                print("ACTUAL SUMMARY:\n")
                flag = 0
                for vec in train_summaries[i]:
                    if vec2word(vec)!='eos':
                        if vec2word(vec) in string.punctuation or flag==0:
                            print(str(vec2word(vec)),end='')
                        else:
                            print((" "+str(vec2word(vec))),end='')
                    flag=1

                print("\n")
            
                #print(hs)
            
                print("loss="+str(loss))
            
            #print(h)
            #print(out)
            #print(ht_s)
            
        step=step+1
    


Iteration: 0
Training input sequence length: 51
Training target outputs sequence length: 4

TEXT:
i have bought several of the vitality canned dog food products and have found them all to be of good quality. the product looks more like a stew than a processed meat and it smells better. my labrador is finicky and she appreciates this product better than most.


PREDICTED SUMMARY:

yellow-brown cons cons cons

ACTUAL SUMMARY:

good quality dog food

loss=10.3813

Iteration: 1
Training input sequence length: 37
Training target outputs sequence length: 3

TEXT:
product arrived labeled as jumbo salted peanuts ... the peanuts were actually small sized unsalted. not sure if this was an error or if the vendor intended to represent the product as `` jumbo ''.


PREDICTED SUMMARY:

good food food

ACTUAL SUMMARY:

not as advertised

loss=10.3795

Iteration: 2
Training input sequence length: 46
Training target outputs sequence length: 2

TEXT:
if you are looking for the secret ingredient in robi

it 's oatmeal

loss=9.35884

Iteration: 23
Training input sequence length: 79
Training target outputs sequence length: 4

TEXT:
i ordered this for my wife as it was unk by our daughter. she has this almost every morning and likes all flavors. she 's happy, i 'm happy!!!< br/>< a unk '' http: unk ''> mccann 's instant irish oatmeal, variety pack of regular, apples& cinnamon, and maple& brown sugar, 10-count boxes( pack of 6)< unk>


PREDICTED SUMMARY:

twizzlers taffy taffy taffy

ACTUAL SUMMARY:

wife 's favorite breakfast

loss=10.3175

Iteration: 24
Training input sequence length: 38
Training target outputs sequence length: 1

TEXT:
i have mccann 's oatmeal every morning and by ordering it from amazon i am able to save almost$ 3.00 per unk< br/> it is a great product. tastes great and very healthy


PREDICTED SUMMARY:

twizzlers

ACTUAL SUMMARY:

unk

loss=8.10648

Iteration: 25
Training input sequence length: 41
Training target outputs sequence length: 3

TEXT:
mccann 's oatmeal is 