In [5]:

import traceback
import nltk
from nltk.corpus import stopwords
from nltk import word_tokenize
from nltk.stem import LancasterStemmer

import string
import re
import joblib
import numpy as np
from bs4 import BeautifulSoup

from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import Input,Concatenate
from tensorflow.keras.models import Model
from tensorflow.keras import models



import warnings
warnings.filterwarnings('ignore') 

nltk.data.path.append('./nltk_data/')

latent_dim=500
max_in_len=74
max_tr_len=17


stemm = LancasterStemmer()
stop_words = set(stopwords.words('english'))
contractions = {"ain't": "is not", "aren't": "are not","can't": "cannot", "'cause": "because", "could've": "could have", "couldn't": "could not",
                           "didn't": "did not",  "doesn't": "does not", "don't": "do not", "hadn't": "had not", "hasn't": "has not", "haven't": "have not",
                           "he'd": "he would","he'll": "he will", "he's": "he is", "how'd": "how did", "how'd'y": "how do you", "how'll": "how will", "how's": "how is",
                           "I'd": "I would", "I'd've": "I would have", "I'll": "I will", "I'll've": "I will have","I'm": "I am", "I've": "I have", "i'd": "i would",
                           "i'd've": "i would have", "i'll": "i will",  "i'll've": "i will have","i'm": "i am", "i've": "i have", "isn't": "is not", "it'd": "it would",
                           "it'd've": "it would have", "it'll": "it will", "it'll've": "it will have","it's": "it is", "let's": "let us", "ma'am": "madam",
                           "mayn't": "may not", "might've": "might have","mightn't": "might not","mightn't've": "might not have", "must've": "must have",
                           "mustn't": "must not", "mustn't've": "must not have", "needn't": "need not", "needn't've": "need not have","o'clock": "of the clock",
                           "oughtn't": "ought not", "oughtn't've": "ought not have", "shan't": "shall not", "sha'n't": "shall not", "shan't've": "shall not have",
                           "she'd": "she would", "she'd've": "she would have", "she'll": "she will", "she'll've": "she will have", "she's": "she is",
                           "should've": "should have", "shouldn't": "should not", "shouldn't've": "should not have", "so've": "so have","so's": "so as",
                           "this's": "this is","that'd": "that would", "that'd've": "that would have", "that's": "that is", "there'd": "there would",
                           "there'd've": "there would have", "there's": "there is", "here's": "here is","they'd": "they would", "they'd've": "they would have",
                           "they'll": "they will", "they'll've": "they will have", "they're": "they are", "they've": "they have", "to've": "to have",
                           "wasn't": "was not", "we'd": "we would", "we'd've": "we would have", "we'll": "we will", "we'll've": "we will have", "we're": "we are",
                           "we've": "we have", "weren't": "were not", "what'll": "what will", "what'll've": "what will have", "what're": "what are",
                           "what's": "what is", "what've": "what have", "when's": "when is", "when've": "when have", "where'd": "where did", "where's": "where is",
                           "where've": "where have", "who'll": "who will", "who'll've": "who will have", "who's": "who is", "who've": "who have",
                           "why's": "why is", "why've": "why have", "will've": "will have", "won't": "will not", "won't've": "will not have",
                           "would've": "would have", "wouldn't": "would not", "wouldn't've": "would not have", "y'all": "you all",
                           "y'all'd": "you all would","y'all'd've": "you all would have","y'all're": "you all are","y'all've": "you all have",
                           "you'd": "you would", "you'd've": "you would have", "you'll": "you will", "you'll've": "you will have",
                           "you're": "you are", "you've": "you have"}





def clean_text(texts):
    texts = BeautifulSoup(texts, "lxml").text   #remove the html tags
    words=word_tokenize(texts.lower())  #tokenize the text into words 
    words= list(filter(lambda w:(w.isalpha() and len(w)>=3),words))
    words= [contractions[w] if w in contractions else w for w in words ]
    words= [stemm.stem(w) for w in words if w not in stop_words]
    return words


def helper(clean_text):
    
    # seq2seq model
    model = models.load_model("./model/text_summarizer.h5")
    # text tokenizer
    in_tokenizer = joblib.load('./model/text_tokenizer.pkl')
    # summary tokenizer
    tr_tokenizer = joblib.load('./model/summary_tokenizer.pkl')

    
    inp_x = in_tokenizer.texts_to_sequences([clean_text])
    inp_x = pad_sequences(inp_x, maxlen=max_in_len, padding='post')
    inp_x.reshape(1, max_in_len)
    
#     print(inp_x)

    # encoder part
    en_outputs,state_h_enc,state_c_enc = model.layers[6].output
    en_states=[state_h_enc,state_c_enc]
    en_model = Model(model.input[0],[en_outputs]+en_states)

    # decoder inference
    #create Input object for hidden and cell state for decoder shape of layer with hidden or latent dimension
    dec_state_input_h = Input(shape=(latent_dim,))
    dec_state_input_c = Input(shape=(latent_dim,))
    dec_hidden_state_input = Input(shape=(max_in_len,latent_dim))
 
    # Get the embeddings and input layer from the model
    dec_inputs = model.input[1]
    dec_emb_layer = model.layers[5]
    dec_lstm = model.layers[7]
    dec_embedding= dec_emb_layer(dec_inputs)
 
    #add input and initialize LSTM layer with encoder LSTM states.
    dec_outputs2, state_h2, state_c2 = dec_lstm(dec_embedding, initial_state=[dec_state_input_h,dec_state_input_c])

    attention = model.layers[8]
    attn_out2 = attention([dec_outputs2,dec_hidden_state_input])
 
    merge2 = Concatenate(axis=-1)([dec_outputs2, attn_out2])

    #Dense layer
    dec_dense = model.layers[10]
    dec_outputs3 = dec_dense(merge2)
 
    # Finally define the Model Class
    dec_model = Model(inputs=[dec_inputs] + [dec_hidden_state_input,dec_state_input_h,dec_state_input_c],outputs=[dec_outputs3] + [state_h2, state_c2])

    #create a dictionary with a key as index and value as words.
    reverse_target_word_index = tr_tokenizer.index_word
    reverse_source_word_index = in_tokenizer.index_word
    target_word_index = tr_tokenizer.word_index

    # get the encoder output and states by passing the input sequence
    en_out, en_h, en_c = en_model.predict(inp_x)

    # target sequence with inital word as 'sos'
    target_seq = np.zeros((1, 1))
    target_seq[0, 0] = target_word_index['sos']

    # if the iteration reaches the end of text than it will be stop the iteration
    stop_condition = False
    # append every predicted word in decoded sentence
    decoded_sentence = ""
    while not stop_condition:
        # get predicted output, hidden and cell state.
        output_words, dec_h, dec_c = dec_model.predict([target_seq] + [en_out, en_h, en_c])

        # get the index and from the dictionary get the word for that index.
        word_index = np.argmax(output_words[0, -1, :])
        text_word = reverse_target_word_index[word_index]
        decoded_sentence += text_word + " "

        # Exit condition: either hit max length
        # or find a stop word or last word.
        if text_word == "eos" or len(decoded_sentence) > max_tr_len:
            stop_condition = True

        # update target sequence to the current word index.
        target_seq = np.zeros((1, 1))
        target_seq[0, 0] = word_index
        en_h, en_c = dec_h, dec_c

    # return the deocded sentence
    return decoded_sentence


def predict(text):
    try:
        input_text = clean_text(text)
        input_text = ' '.join(input_text)
        
        summary = helper(input_text)
        if 'eos' in summary:
            summary = summary.replace('eos', '')
        return summary
    except Exception:
        print(traceback.print_exc())
        return ''



inp_review = """hi this is my first review , gota love this feeling i love it"""
print('Review :-\n',inp_review)

summary = predict(inp_review)
print('Summary :- ',summary)

Review :-
 hi this is my first review , gota love this feeling i love it
Summary :-  great product  
