In [1]:
#https://medium.com/geekculture/neural-machine-translation-using-seq2seq-model-with-attention-9faea357d70b
import numpy as np
from tensorflow.keras.models import Sequential
from keras.utils import plot_model
import matplotlib.pyplot as plt
from keras.utils.np_utils import to_categorical
from keras.callbacks import EarlyStopping
from keras.layers.normalization import BatchNormalization
from keras import optimizers
from tensorflow.keras import Model
from tensorflow.keras.layers import Input, LSTM, Embedding,Flatten,Dropout, Dense, Concatenate, TimeDistributed, Bidirectional
from keras.preprocessing.text import Tokenizer 
from keras.preprocessing.sequence import pad_sequences
from nltk.corpus import stopwords 
from sklearn.model_selection import train_test_split
import pandas as pd
from tensorflow.keras.losses import sparse_categorical_crossentropy
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
Using TensorFlow backend.


In [2]:
import tensorflow as tf
import os
from tensorflow.python.keras.layers import Layer
from tensorflow.python.keras import backend as K


class AttentionLayer(Layer):
    """
    This class implements Bahdanau attention (https://arxiv.org/pdf/1409.0473.pdf).
    There are three sets of weights introduced W_a, U_a, and V_a
     """

    def __init__(self, **kwargs):
        super(AttentionLayer, self).__init__(**kwargs)

    def build(self, input_shape):
        assert isinstance(input_shape, list)
        # Create a trainable weight variable for this layer.

        self.W_a = self.add_weight(name='W_a',
                                   shape=tf.TensorShape((input_shape[0][2], input_shape[0][2])),
                                   initializer='uniform',
                                   trainable=True)
        self.U_a = self.add_weight(name='U_a',
                                   shape=tf.TensorShape((input_shape[1][2], input_shape[0][2])),
                                   initializer='uniform',
                                   trainable=True)
        self.V_a = self.add_weight(name='V_a',
                                   shape=tf.TensorShape((input_shape[0][2], 1)),
                                   initializer='uniform',
                                   trainable=True)

        super(AttentionLayer, self).build(input_shape)  # Be sure to call this at the end

    def call(self, inputs, verbose=False):
        """
        inputs: [encoder_output_sequence, decoder_output_sequence]
        """
        assert type(inputs) == list
        encoder_out_seq, decoder_out_seq = inputs
        if verbose:
            print('encoder_out_seq>', encoder_out_seq.shape)
            print('decoder_out_seq>', decoder_out_seq.shape)

        def energy_step(inputs, states):
            """ Step function for computing energy for a single decoder state
            inputs: (batchsize * 1 * de_in_dim)
            states: (batchsize * 1 * de_latent_dim)
            """

            assert_msg = "States must be an iterable. Got {} of type {}".format(states, type(states))
            assert isinstance(states, list) or isinstance(states, tuple), assert_msg

            """ Some parameters required for shaping tensors"""
            en_seq_len, en_hidden = encoder_out_seq.shape[1], encoder_out_seq.shape[2]
            de_hidden = inputs.shape[-1]

            """ Computing S.Wa where S=[s0, s1, ..., si]"""
            # <= batch size * en_seq_len * latent_dim
            W_a_dot_s = K.dot(encoder_out_seq, self.W_a)

            """ Computing hj.Ua """
            U_a_dot_h = K.expand_dims(K.dot(inputs, self.U_a), 1)  # <= batch_size, 1, latent_dim
            if verbose:
                print('Ua.h>', U_a_dot_h.shape)

            """ tanh(S.Wa + hj.Ua) """
            # <= batch_size*en_seq_len, latent_dim
            Ws_plus_Uh = K.tanh(W_a_dot_s + U_a_dot_h)
            if verbose:
                print('Ws+Uh>', Ws_plus_Uh.shape)

            """ softmax(va.tanh(S.Wa + hj.Ua)) """
            # <= batch_size, en_seq_len
            e_i = K.squeeze(K.dot(Ws_plus_Uh, self.V_a), axis=-1)
            # <= batch_size, en_seq_len
            e_i = K.softmax(e_i)

            if verbose:
                print('ei>', e_i.shape)

            return e_i, [e_i]

        def context_step(inputs, states):
            """ Step function for computing ci using ei """

            assert_msg = "States must be an iterable. Got {} of type {}".format(states, type(states))
            assert isinstance(states, list) or isinstance(states, tuple), assert_msg

            # <= batch_size, hidden_size
            c_i = K.sum(encoder_out_seq * K.expand_dims(inputs, -1), axis=1)
            if verbose:
                print('ci>', c_i.shape)
            return c_i, [c_i]
        
        fake_state_c = K.sum(encoder_out_seq, axis=1)
        fake_state_e = K.sum(encoder_out_seq, axis=2)  # <= (batch_size, enc_seq_len, latent_dim

        """ Computing energy outputs """
        # e_outputs => (batch_size, de_seq_len, en_seq_len)
        last_out, e_outputs, _ = K.rnn(
            energy_step, decoder_out_seq, [fake_state_e],
        )
        """ Computing context vectors """
        last_out, c_outputs, _ = K.rnn(
            context_step, e_outputs, [fake_state_c],
        )
        print(c_outputs, e_outputs)

        return c_outputs, e_outputs

    def compute_output_shape(self, input_shape):
        """ Outputs produced by the layer """
        return [
            tf.TensorShape((input_shape[1][0], input_shape[1][1], input_shape[1][2])),
            tf.TensorShape((input_shape[1][0], input_shape[1][1], input_shape[0][1]))
        ]



In [3]:
df=pd.read_csv("Reviews.csv",nrows=50)
print(df.columns)
df.drop_duplicates(subset=['Text'],inplace=True)  #dropping duplicates\n",
df.dropna(axis=0,inplace=True)
 
detail_sentences=[]
summary_sentences=[]

#print(max([len(x) for x in df['Text']]))
#print(max([len(x) for x in df['Summary']]))
for detail,summary in zip(df['Text'][:3],df['Summary'][:3]):
    print("Review:",detail,"\n\tSummary:",summary,"\n")
    

for sent in df['Text']:
    # Add sos and eos tokens using string.join
    sent_new = " ".join(['sos', sent, 'eos'])
    detail_sentences.append(sent_new)

for sent in df['Summary']:
    # Add sos and eos tokens using string.join
    sent_new = " ".join(['sos', sent, 'eos'])
    summary_sentences.append(sent_new)
    

Index(['Id', 'ProductId', 'UserId', 'ProfileName', 'HelpfulnessNumerator',
       'HelpfulnessDenominator', 'Score', 'Time', 'Summary', 'Text'],
      dtype='object')
Review: I have bought several of the Vitality canned dog food products and have found them all to be of good quality. The product looks more like a stew than a processed meat and it smells better. My Labrador is finicky and she appreciates this product better than  most. 
	Summary: Good Quality Dog Food 

Review: Product arrived labeled as Jumbo Salted Peanuts...the peanuts were actually small sized unsalted. Not sure if this was an error or if the vendor intended to represent the product as "Jumbo". 
	Summary: Not as Advertised 

Review: This is a confection that has been around a few centuries.  It is a light, pillowy citrus gelatin with nuts - in this case Filberts. And it is cut into tiny squares and then liberally coated with powdered sugar.  And it is a tiny mouthful of heaven.  Not too chewy, and very flavorful.  I

In [4]:
max_len_text= max([len(sentence) for sentence in detail_sentences])
max_len_summary=max([len(sentence) for sentence in summary_sentences])
latent_dim = 50 
print(max_len_text,max_len_summary)

1231 68


In [5]:
X_train,X_test,y_train,y_test=train_test_split(detail_sentences,summary_sentences,test_size=0.2,random_state=0,shuffle=True)

In [6]:
stop_words = set(stopwords.words('english')) 
x_tokenizer = Tokenizer()
x_tokenizer.fit_on_texts(list(X_train))

#convert text sequences into integer sequences
x_train    =   x_tokenizer.texts_to_sequences(X_train) 
x_test   =   x_tokenizer.texts_to_sequences(X_test)

#print(x_tokenizer.sequences_to_texts(x_train))

#padding zero upto maximum length
x_train    =   pad_sequences(x_train,  maxlen=max_len_text, padding='post') 
x_test   =   pad_sequences(x_test, maxlen=max_len_text, padding='post')

#reverse
x_train=x_train[::-1]
x_test=x_test[::-1]

x_voc_size   =  len(x_tokenizer.word_index) +1

#print(x_train)
#print(x_tokenizer.sequences_to_texts(x_train))



In [7]:
y_tokenizer = Tokenizer()
y_tokenizer.fit_on_texts(list(y_train))

#convert summary sequences into integer sequences
y_train    =   y_tokenizer.texts_to_sequences(y_train) 
y_test   =   y_tokenizer.texts_to_sequences(y_test) 

#padding zero upto maximum length
y_train    =   pad_sequences(y_train, maxlen=max_len_summary, padding='post')
y_test   =   pad_sequences(y_test, maxlen=max_len_summary, padding='post')

#y_train=y_train[::-1]
#y_test=y_test[::-1]

y_voc_size  =   len(y_tokenizer.word_index) +1


In [8]:
def tokenize(x):
    x_tk = Tokenizer(char_level = False)
    x_tk.fit_on_texts(x)
    return x_tk.texts_to_sequences(x), x_tk

def pad(x, length=None):
    if length is None:
        length = max([len(sentence) for sentence in x])
    return pad_sequences(x, maxlen = length, padding = 'post')

def preprocess_embedding(x, y):
    preprocess_x, x_tk = tokenize(x)
    preprocess_y, y_tk = tokenize(y)
    preprocess_x = pad(preprocess_x)
    preprocess_y = pad(preprocess_y)
    # Keras's sparse_categorical_crossentropy function requires the labels to be in 3 dimensions
    preprocess_y = preprocess_y.reshape(*preprocess_y.shape, 1)
    return preprocess_x, preprocess_y, x_tk, y_tk

In [9]:
# [[[[[Encoder]]]]]
latent_dim=256

encoder_inputs = Input(shape=(max_len_text,)) 
enc_emb = Embedding(x_voc_size, 512)(encoder_inputs) 
#LSTM 1 
encoder_lstm1 = Bidirectional(LSTM(latent_dim,return_sequences=True,return_state=True,name='lstm1')) 
encoder_output1,forw_state_h1,forw_state_c1, back_state_h1, back_state_c1 = encoder_lstm1(enc_emb) 
#LSTM 2 
encoder_lstm2 = Bidirectional(LSTM(latent_dim,return_sequences=True,return_state=True,name='lstm2')) 
encoder_output2, forw_state_h2,forw_state_c2,back_state_h2, back_state_c2 = encoder_lstm2(encoder_output1) 
#LSTM 3 
encoder_lstm3=Bidirectional(LSTM(latent_dim, return_state=True, return_sequences=True,name='lstm3')) 
encoder_outputs1,forw_state_h,forw_state_c, back_state_h, back_state_c= encoder_lstm3(encoder_output2) 
# Set up the [[[[[[[[decoder]]]]]]]]. 

#encoder_states = [state_h, state_c]

final_enc_h = Concatenate()([forw_state_h,back_state_h])
final_enc_c = Concatenate()([forw_state_c,back_state_c])

#get context vector
encoder_states = [final_enc_h,final_enc_c]








Instructions for updating:
Colocations handled automatically by placer.


In [10]:
# decoder input and embedding

decoder_inputs = Input(shape=(None,)) 
dec_emb_layer = Embedding(y_voc_size, 512) 
dec_emb = dec_emb_layer(decoder_inputs) 
#LSTM using encoder_states as initial state
decoder_lstm = LSTM(512, return_sequences=True, return_state=True) 
decoder_outputs,_,_ = decoder_lstm(dec_emb,initial_state=encoder_states) 

#[[attention layer]]
attention_layer = AttentionLayer(name='attention_layer') 
attention_out, attention_weights = attention_layer([encoder_outputs1, decoder_outputs]) 
# Concat attention output and decoder LSTM output 
decoder_concat_input = Concatenate(axis=-1, name='concat_layer')([decoder_outputs, attention_out])
#Dense layer
decoder_dense = TimeDistributed(Dense(y_voc_size, activation='softmax')) 
decoder_outputs2 = decoder_dense(decoder_concat_input) 

# Define the model
model = Model([encoder_inputs, decoder_inputs], decoder_outputs2) 

model.compile(loss = sparse_categorical_crossentropy, 
                 optimizer = 'rmsprop', 
                 metrics = ['acc'])
    
model.summary()


Tensor("attention_layer/transpose_5:0", shape=(?, ?, 512), dtype=float32) Tensor("attention_layer/transpose_3:0", shape=(?, ?, 1231), dtype=float32)
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 1231)         0                                            
__________________________________________________________________________________________________
embedding (Embedding)           (None, 1231, 512)    366080      input_1[0][0]                    
__________________________________________________________________________________________________
bidirectional (Bidirectional)   [(None, 1231, 512),  1574912     embedding[0][0]                  
__________________________________________________________________________________________________
bidirectional_1 (Bidirectional) [(None, 1231, 512),  157491

In [None]:
#https://github.com/thushv89/attention_keras/blob/master/src/examples/nmt_bidirectional/model.py
model.compile(loss='sparse_categorical_crossentropy', optimizer='rmsprop', metrics=['acc'])

checkpoint = ModelCheckpoint("give Your path to save check points", monitor='val_accuracy')
early_stopping = EarlyStopping(monitor='val_accuracy', patience=5)
callbacks_list = [checkpoint, early_stopping]

encoder_input_data = x_train
decoder_input_data = y_train[:,:-1]
#decoder_target_data =  y_train[:,1:]
#number of rows, sequence length, one step ahead not including sos
decoder_target_data=y_train.reshape(y_train.shape[0],y_train.shape[1], 1)[:,1:]
#history=model.fit([x_train,y_train[:,:-1]], y_train.reshape(y_train.shape[0],y_train.shape[1], 1)[:,1:] ,epochs=50,batch_size=128)

encoder_input_test = x_test
decoder_input_test = y_test[:,:-1]
decoder_target_test=  y_test[:,1:]

history=model.fit([encoder_input_data ,decoder_input_data], decoder_target_data ,epochs=3,batch_size=128,callbacks=callbacks_list)

#istory = model.fit([encoder_input_data, decoder_input_data],decoder_target_data, 
#                   epochs=epochs, 
#                   batch_size=128,
#                   validation_data = ([encoder_input_test, decoder_input_test],decoder_target_test),
#                   callbacks= callbacks_list)
model.save_weights("lstm_model.h5") # can give whole path to save model

Instructions for updating:
Use tf.cast instead.
Epoch 1/3


In [None]:
#https://medium.com/geekculture/neural-machine-translation-using-seq2seq-model-with-attention-9faea357d70b
#https://data-flair.training/blogs/machine-learning-text-summarization/
model.load_weights("lstm_model.h5")

#[[[[[[[INFERENCE MODEL]]]]]]]
# encoder Inference model
#encoder_model = Model(encoder_inputs, outputs = [encoder_outputs1, final_enc_h, final_enc_c])
#encoder_model = Model(encoder_inputs,encoder_states)

en_outputs,state_h_enc,state_c_enc=model.layers[6].ouput

en_states=[state_h_enc,state_c_enc]

encoder_model = Model(model.input[0],[en_outputs]+en_states)

# Decoder Inference
decoder_state_h = Input(shape=(512,)) # This numbers has to be same as units of lstm's on which model is trained
decoder_state_c = Input(shape=(512,))

# we need hidden state for attention layer
decoder_hidden_state_input = Input(shape=(max_len_text,512)) 
# get decoder states
#decoder_states_inputs = [decoder_state_h, decoder_state_c]

decoder_inputs=model.input[1]
decoder_emb_layer=model.layers[5]
decoder_lstm=model.layers[7]
decoder_embedding=decoder_emb_layer(decoder_inputs)

decoder_outputs2, state_h2, state_c2=dec(decoder_embedding,initial_state=[decoder_state_h,decoder_state_c])

# embedding layer 
#dec_emb2 = dec_emb_layer(decoder_inputs)
#decoder_outputs2, state_h2, state_c2 = decoder_lstm(dec_emb2, initial_state=decoder_states_inputs)

attention=model.layers[8]
attn_out2=attend([decoder_outputs2,decoder_hidden_state_input])
merge2=Concatenate(axis=-1)([decoder_outputs2,att_out2])
# Attention inference
#attention_result_inf, attention_weights_inf = attention_layer([decoder_hidden_state_input, decoder_outputs2])
#decoder_concat_input_inf = Concatenate(axis=-1, name='concat_layer')([decoder_outputs2, attention_result_inf])

decoder_dense=model.layers[10]
decoder_output2=decoder_dense(merge2)
#decoder_states2= [state_h2, state_c2]
#decoder_outputs2 = decoder_dense(decoder_concat_input_inf)

# get decoder model
decoder_model= Model(
                    [decoder_inputs] + [decoder_hidden_state_input, decoder_state_h, decoder_state_c],
                     [decoder_outputs2]+ decoder_states2)

#decoder_model = Model([decoder_inputs] + decoder_states_inputs, [decoder_outputs2] + decoder_states2)

print(decoder_model.summary())

In [None]:
target_word_index=y_tokenizer.index_word
source_word_index=x_tokenizer.index_word
source_index_word=x_tokenizer.word_index
target_word_index=y_tokenizer.word_index

In [None]:
sentence=["""sos Product arrived labeled as Jumbo Salted Peanuts...the peanuts were actually small sized unsalted. Not sure if this was an error or if the vendor intended to represent the product as "Jumbo" eos"""]

input_sentence    =   x_tokenizer.texts_to_sequences(sentence) 
input_sentence_seq    =   pad_sequences(input_sentence,  maxlen=max_len_text, padding='post') 


#print(max_len_text)
#for i in input_sentence_seq:
#    print(i)


In [None]:
def get_predicted_sentence(input_seq):
    # Encode the input as state vectors.
    enc_output, enc_h, enc_c = encoder_model.predict(input_seq)
  
    # Generate empty target sequence of length 1.
    target_seq = np.zeros((1,1))
    
    # Populate the first character of target sequence with the start character.
    target_seq[0, 0] = source_index_word['sos']
    
    # Sampling loop for a batch of sequences
    # (to simplify, here we assume a batch of size 1).
    stop_condition = False
    decoded_sentence = ''
    
    count=0
    while not stop_condition:
        count+=1
        if count>1000:
            print('count exceeded')
            stop_condition=True
        output_tokens, h, c = decoder_model.predict([target_seq] + [enc_output, enc_h, enc_c ])
        #print(output_tokens)
        sampled_token_index = np.argmax(output_tokens[0, -1, :])
        if sampled_token_index in target_word_index:
            sampled_char = target_word_index[sampled_token_index]
            decoded_sentence += ' '+sampled_char
            print(decoded_sentence)
        
            if (sampled_char == 'eos' or len(decoded_sentence.split()) >= 512):
                stop_condition = True
        
        # Update the target sequence (of length 1).
        target_seq = np.zeros((1,1))
        target_seq[0, 0] = sampled_token_index
        # Update states
        enc_h, enc_c = h, c
    
    return decoded_sentence

In [None]:
#print(input_sentence_seq)
#print(x_test[0])
#sentence=get_predicted_sentence(input_sentence_seq.reshape(1,max_len_text))[:-4]
sentence=get_predicted_sentence(input_sentence_seq)
print("done",sentence)

