In [1]:
import numpy as np
from keras.models import Sequential
from keras.utils import plot_model
import matplotlib.pyplot as plt
from keras.utils.np_utils import to_categorical
from keras.callbacks import EarlyStopping
#from keras.layers import Dense, Flatten,Dropout
from keras.layers.normalization import BatchNormalization
from keras import optimizers
from keras.callbacks import EarlyStopping
from tensorflow.keras import Model
from tensorflow.keras.layers import Input, LSTM, Embedding,Flatten,Dropout, Dense, Concatenate, TimeDistributed, Bidirectional
from keras.preprocessing.text import Tokenizer 
from keras.preprocessing.sequence import pad_sequences
from nltk.corpus import stopwords 
from sklearn.model_selection import train_test_split
import pandas as pd

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
import tensorflow as tf
import os
from tensorflow.python.keras.layers import Layer
from tensorflow.python.keras import backend as K


class AttentionLayer(Layer):
    """
    This class implements Bahdanau attention (https://arxiv.org/pdf/1409.0473.pdf).
    There are three sets of weights introduced W_a, U_a, and V_a
     """

    def __init__(self, **kwargs):
        super(AttentionLayer, self).__init__(**kwargs)

    def build(self, input_shape):
        assert isinstance(input_shape, list)
        # Create a trainable weight variable for this layer.

        self.W_a = self.add_weight(name='W_a',
                                   shape=tf.TensorShape((input_shape[0][2], input_shape[0][2])),
                                   initializer='uniform',
                                   trainable=True)
        self.U_a = self.add_weight(name='U_a',
                                   shape=tf.TensorShape((input_shape[1][2], input_shape[0][2])),
                                   initializer='uniform',
                                   trainable=True)
        self.V_a = self.add_weight(name='V_a',
                                   shape=tf.TensorShape((input_shape[0][2], 1)),
                                   initializer='uniform',
                                   trainable=True)

        super(AttentionLayer, self).build(input_shape)  # Be sure to call this at the end

    def call(self, inputs, verbose=False):
        """
        inputs: [encoder_output_sequence, decoder_output_sequence]
        """
        assert type(inputs) == list
        encoder_out_seq, decoder_out_seq = inputs
        if verbose:
            print('encoder_out_seq>', encoder_out_seq.shape)
            print('decoder_out_seq>', decoder_out_seq.shape)

        def energy_step(inputs, states):
            """ Step function for computing energy for a single decoder state
            inputs: (batchsize * 1 * de_in_dim)
            states: (batchsize * 1 * de_latent_dim)
            """

            assert_msg = "States must be an iterable. Got {} of type {}".format(states, type(states))
            assert isinstance(states, list) or isinstance(states, tuple), assert_msg

            """ Some parameters required for shaping tensors"""
            en_seq_len, en_hidden = encoder_out_seq.shape[1], encoder_out_seq.shape[2]
            de_hidden = inputs.shape[-1]

            """ Computing S.Wa where S=[s0, s1, ..., si]"""
            # <= batch size * en_seq_len * latent_dim
            W_a_dot_s = K.dot(encoder_out_seq, self.W_a)

            """ Computing hj.Ua """
            U_a_dot_h = K.expand_dims(K.dot(inputs, self.U_a), 1)  # <= batch_size, 1, latent_dim
            if verbose:
                print('Ua.h>', U_a_dot_h.shape)

            """ tanh(S.Wa + hj.Ua) """
            # <= batch_size*en_seq_len, latent_dim
            Ws_plus_Uh = K.tanh(W_a_dot_s + U_a_dot_h)
            if verbose:
                print('Ws+Uh>', Ws_plus_Uh.shape)

            """ softmax(va.tanh(S.Wa + hj.Ua)) """
            # <= batch_size, en_seq_len
            e_i = K.squeeze(K.dot(Ws_plus_Uh, self.V_a), axis=-1)
            # <= batch_size, en_seq_len
            e_i = K.softmax(e_i)

            if verbose:
                print('ei>', e_i.shape)

            return e_i, [e_i]

        def context_step(inputs, states):
            """ Step function for computing ci using ei """

            assert_msg = "States must be an iterable. Got {} of type {}".format(states, type(states))
            assert isinstance(states, list) or isinstance(states, tuple), assert_msg

            # <= batch_size, hidden_size
            c_i = K.sum(encoder_out_seq * K.expand_dims(inputs, -1), axis=1)
            if verbose:
                print('ci>', c_i.shape)
            return c_i, [c_i]
        
        fake_state_c = K.sum(encoder_out_seq, axis=1)
        fake_state_e = K.sum(encoder_out_seq, axis=2)  # <= (batch_size, enc_seq_len, latent_dim

        """ Computing energy outputs """
        # e_outputs => (batch_size, de_seq_len, en_seq_len)
        last_out, e_outputs, _ = K.rnn(
            energy_step, decoder_out_seq, [fake_state_e],
        )
        """ Computing context vectors """
        last_out, c_outputs, _ = K.rnn(
            context_step, e_outputs, [fake_state_c],
        )
        print(c_outputs, e_outputs)

        return c_outputs, e_outputs

    def compute_output_shape(self, input_shape):
        """ Outputs produced by the layer """
        return [
            tf.TensorShape((input_shape[1][0], input_shape[1][1], input_shape[1][2])),
            tf.TensorShape((input_shape[1][0], input_shape[1][1], input_shape[0][1]))
        ]



In [28]:
df=pd.read_csv("Reviews.csv",nrows=10)
print(data.columns)
df.drop_duplicates(subset=['Text'],inplace=True)  #dropping duplicates\n",
df.dropna(axis=0,inplace=True)
    
print(max([len(x) for x in data['Text']]))
print(max([len(x) for x in data['Summary']]))
for detail,summary in zip(df['Text'][:3],df['Summary'][:3]):
    print("Review:",detail,"\n\tSummary:",summary,"\n")
    
    
data=[]

for sent in df['Text']:
    # Add sos and eos tokens using string.join
    sent_new = " ".join(['sos', sent, 'eos'])
    data.append(sent_new)

summary=[]
for sent in df['Summary']:
    # Add sos and eos tokens using string.join
    sent_new = " ".join(['sos', sent, 'eos'])
    summary.append(sent_new)
    

Index(['Id', 'ProductId', 'UserId', 'ProfileName', 'HelpfulnessNumerator',
       'HelpfulnessDenominator', 'Score', 'Time', 'Summary', 'Text'],
      dtype='object')
509
45
Review: I have bought several of the Vitality canned dog food products and have found them all to be of good quality. The product looks more like a stew than a processed meat and it smells better. My Labrador is finicky and she appreciates this product better than  most. 
	Summary: Good Quality Dog Food 

Review: Product arrived labeled as Jumbo Salted Peanuts...the peanuts were actually small sized unsalted. Not sure if this was an error or if the vendor intended to represent the product as "Jumbo". 
	Summary: Not as Advertised 

Review: This is a confection that has been around a few centuries.  It is a light, pillowy citrus gelatin with nuts - in this case Filberts. And it is cut into tiny squares and then liberally coated with powdered sugar.  And it is a tiny mouthful of heaven.  Not too chewy, and very flavor

In [21]:
    max_len_text=80
    max_len_summary=45
    latent_dim = 50 


In [29]:
X_train,X_test,Y_train,Y_test=train_test_split(data,summary,test_size=0.1,random_state=0,shuffle=True)

In [30]:
stop_words = set(stopwords.words('english')) 
x_tokenizer = Tokenizer()
x_tokenizer.fit_on_texts(list(X_train))

#convert text sequences into integer sequences
x_train    =   x_tokenizer.texts_to_sequences(X_train) 
x_test   =   x_tokenizer.texts_to_sequences(X_test)

#print(x_tokenizer.sequences_to_texts(x_train))

#padding zero upto maximum length
x_train    =   pad_sequences(x_train,  maxlen=max_len_text, padding='post') 
x_test   =   pad_sequences(x_test, maxlen=max_len_text, padding='post')

x_voc_size   =  len(x_tokenizer.word_index) +1

#print(x_train)
#print(x_tokenizer.sequences_to_texts(x_train))



In [31]:
y_tokenizer = Tokenizer()
y_tokenizer.fit_on_texts(list(Y_train))

#convert summary sequences into integer sequences
y_train    =   y_tokenizer.texts_to_sequences(Y_train) 
y_test   =   y_tokenizer.texts_to_sequences(Y_test) 

#padding zero upto maximum length
y_train    =   pad_sequences(y_train, maxlen=max_len_summary, padding='post')
y_test   =   pad_sequences(y_test, maxlen=max_len_summary, padding='post')

y_voc_size  =   len(y_tokenizer.word_index) +1


In [32]:
# [[[[[Encoder]]]]]
#batch_size=100
encoder_inputs = Input(shape=(max_len_text,)) 
#encoder_inputs = Input(batch_shape=(batch_size, max_len_text, x_voc_size))

enc_emb = Embedding(x_voc_size, latent_dim,trainable=True)(encoder_inputs) 
#print(enc_emb.shape)

#LSTM 1 
encoder_lstm1 = LSTM(latent_dim,return_sequences=True,return_state=True) 
encoder_output1, state_h1, state_c1 = encoder_lstm1(enc_emb) 

#LSTM 2 
encoder_lstm2 = LSTM(latent_dim,return_sequences=True,return_state=True) 
encoder_output2, state_h2, state_c2 = encoder_lstm2(encoder_output1) 

#LSTM 3 
encoder_lstm3=LSTM(latent_dim, return_state=True, return_sequences=True) 
encoder_outputs, state_h, state_c= encoder_lstm3(encoder_output2) 

# Set up the [[[[[[[[decoder]]]]]]]]. 
decoder_inputs = Input(shape=(None,)) 
dec_emb_layer = Embedding(y_voc_size, latent_dim,trainable=True) 
dec_emb = dec_emb_layer(decoder_inputs) 

#LSTM using encoder_states as initial state
decoder_lstm = LSTM(latent_dim, return_sequences=True, return_state=True) 
decoder_outputs,decoder_fwd_state, decoder_back_state = decoder_lstm(dec_emb,initial_state=[state_h, state_c]) 

#print(decoder_outputs)
attn_layer = AttentionLayer(name='attention_layer') 
attn_out, attn_states = attn_layer([encoder_outputs, decoder_outputs]) 

# Concat attention output and decoder LSTM output 
decoder_concat_input = Concatenate(axis=-1, name='concat_layer')([decoder_outputs, attn_out])

#Dense layer
decoder_dense = TimeDistributed(Dense(y_voc_size, activation='softmax')) 
decoder_outputs = decoder_dense(decoder_concat_input) 

# Define the model
model = Model([encoder_inputs, decoder_inputs], decoder_outputs) 
model.summary()


Tensor("attention_layer_3/transpose_5:0", shape=(?, ?, 50), dtype=float32) Tensor("attention_layer_3/transpose_3:0", shape=(?, ?, 80), dtype=float32)
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_8 (InputLayer)            (None, 80)           0                                            
__________________________________________________________________________________________________
embedding_4 (Embedding)         (None, 80, 50)       9700        input_8[0][0]                    
__________________________________________________________________________________________________
lstm_8 (LSTM)                   [(None, 80, 50), (No 20200       embedding_4[0][0]                
__________________________________________________________________________________________________
input_9 (InputLayer)            (None, None)         0    

In [33]:
#https://github.com/thushv89/attention_keras/blob/master/src/examples/nmt_bidirectional/model.py
model.compile(loss='sparse_categorical_crossentropy', optimizer='rmsprop', metrics=['acc'])

epochs = 5
batch_size = len(x_train)


history=model.fit([x_train,y_train[:,:-1]], y_train.reshape(y_train.shape[0],y_train.shape[1], 1)[:,1:] ,epochs=50,batch_size=128)
#history=model.fit([x_train,y_train[:,:-1]],epochs=50,batch_size=128)



Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [34]:
reverse_target_word_index=y_tokenizer.index_word
reverse_source_word_index=x_tokenizer.index_word
reverse_source_index_word=x_tokenizer.word_index
target_word_index=y_tokenizer.word_index

print(target_word_index)

encoder_model = Model(inputs=encoder_inputs,outputs=[encoder_outputs, state_h, state_c])

# Decoder setup
# Below tensors will hold the states of the previous time step
decoder_state_input_h = Input(shape=(latent_dim,))
decoder_state_input_c = Input(shape=(latent_dim,))
decoder_hidden_state_input = Input(shape=(max_len_text,latent_dim))

# Get the embeddings of the decoder sequence
dec_emb2= dec_emb_layer(decoder_inputs) 
# To predict the next word in the sequence, set the initial states to the states from the previous time step
decoder_outputs2, state_h2, state_c2 = decoder_lstm(dec_emb2, initial_state=[decoder_state_input_h, decoder_state_input_c])

#attention inference
attn_out_inf, attn_states_inf = attn_layer([decoder_hidden_state_input, decoder_outputs2])
decoder_inf_concat = Concatenate(axis=-1, name='concat')([decoder_outputs2, attn_out_inf])

# A dense softmax layer to generate prob dist. over the target vocabulary
decoder_outputs2 = decoder_dense(decoder_inf_concat) 

decoder_model = Model(
    [decoder_inputs] + [decoder_hidden_state_input,decoder_state_input_h, decoder_state_input_c],
    [decoder_outputs2] + [state_h2, state_c2])





{'sos': 1, 'eos': 2, 'taffy': 3, 'as': 4, 'great': 5, 'dog': 6, 'food': 7, 'good': 8, 'yay': 9, 'barley': 10, 'healthy': 11, 'not': 12, 'advertised': 13, 'just': 14, 'the': 15, 'expensive': 16, 'brands': 17, 'wonderful': 18, 'tasty': 19, 'cough': 20, 'medicine': 21, 'quality': 22, 'nice': 23}
Tensor("attention_layer_4/transpose_5:0", shape=(?, ?, 50), dtype=float32) Tensor("attention_layer_4/transpose_3:0", shape=(?, ?, 80), dtype=float32)


In [35]:
def decode_sequence(input_seq,n_steps,cardinality):
    # Encode the input as state vectors.
    e_out, e_h, e_c = encoder_model.predict(input_seq)
    
    #target_seq = np.array([0.0 for _ in range(cardinality)]).reshape(1, 1, cardinality)
    # collect predictions
    #output = list()
    #for t in range(n_steps):
        # predict next char
        #yhat, h, c = infdec.predict([target_seq] + state)
    #    yhat, h, c = decoder_model.predict([target_seq] + [e_out, e_h, e_c])
        # store prediction
    #    output.append(yhat[0,0,:])
        # update state
    #    state = [h, c]
        # update target sequence
    #    target_seq = yhat
    #return array(output)

    # Generate empty target sequence of length 1.
    #target_seq = np.zeros((1,1))
    
    # Populate the first word of target sequence with the start word.
    target_seq[0, 0] = target_word_index['sos']
    stop_condition = False
    decoded_sentence = ''
    count=0
    while not stop_condition:
      
        count+=1
        if count>100:
            break
        output_tokens, h, c = decoder_model.predict([target_seq] + [e_out, e_h, e_c])
        output_tokens=output_tokens[0, -1, :].flatten()
        print(output_tokens)
       # Sample a token
        sampled_token_index = np.argmax(output_tokens)+1
        print(sampled_token_index)
        print(reverse_target_word_index)
        sampled_token = reverse_target_word_index[sampled_token_index]
        
        decoded_sentence += ' '+sampled_token
        
        print(decoded_sentence)

        # Exit condition: either hit max length or find stop word.
        if (len(decoded_sentence.split()) >= (max_len_summary-1)):
            stop_condition = True

        # Update the target sequence (of length 1).
        target_seq = np.zeros((1,1))
        target_seq[0, 0] = sampled_token_index

        # Update internal states
        e_h, e_c = h, c

    return decoded_sentence

In [36]:
test_text="sos flavors eos"
bow=test_text.split(" ")

test_seq=[reverse_source_index_word[word] for word in bow]
test_seq =pad_sequences([test_seq], maxlen=max_len_text, padding='post')

test_seq=test_seq.reshape(1,max_len_text)   
#print(test_seq)
#seq, n_steps, cardinality
n_steps=6
decode_sequence(test_seq,n_steps,x_voc_size)

#e_out, e_h, e_c = encoder_model.predict(test_seq)
    
#target_seq = np.array([0.0 for _ in range(x_voc_size)]).reshape(1, 1, x_voc_size)
#yhat, h, c = decoder_model.predict([target_seq] + [e_out, e_h, e_c])

UnboundLocalError: local variable 'target_seq' referenced before assignment