In [1]:
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import pandas as pd
import re
import tensorflow as tf
from tensorflow.keras.layers import Embedding, LSTM, Dense,Input,GRU
from tensorflow.keras.models import Model,load_model
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np
import tensorflow as tf
tf.compat.v1.enable_eager_execution()
import pickle
from tensorflow.keras.layers import Input, Softmax, RNN, Dense, Embedding, LSTM,TimeDistributed,Concatenate


# Encoder and Decoder with Attention Mechanism

In [2]:
class Encoder(tf.keras.Model):
    '''
    Encoder model -- That takes a input sequence and returns output sequence
    '''

    def __init__(self,inp_vocab_size,embedding_size,lstm_size,input_length):

        super().__init__()
        self.inp_vocab_size = inp_vocab_size
        self.embedding_size = embedding_size
        self.input_length = input_length
        self.lstm_size= lstm_size
        self.lstm_output = 0
        self.lstm_state_h=0
        self.lstm_state_c=0

        #Initialize Embedding layer
        #Intialize Encoder LSTM layer
        self.embedding = Embedding(input_dim=self.inp_vocab_size, output_dim=self.embedding_size, input_length=self.input_length,
                           mask_zero=True, name="embedding_layer_decoder")
        self.lstm = LSTM(self.lstm_size, return_state=True, return_sequences=True, name="Encoder_LSTM")


    def call(self,input_sequence,states):
      '''
          This function takes a sequence input and the initial states of the encoder.
          returns -- All encoder_outputs, last time steps hidden and cell state
      '''
      input_embedd = self.embedding(input_sequence)
      self.lstm_output, self.lstm_state_h,self.lstm_state_c = self.lstm(input_embedd)
      return self.lstm_output, self.lstm_state_h,self.lstm_state_c
    
    def initialize_states(self,batch_size):
      '''
      Given a batch size it will return intial hidden state and intial cell state.
      If batch size is 32- Hidden state is zeros of size [32,lstm_units], cell state zeros is of size [32,lstm_units]
      '''
      return self.lstm_state_h,self.lstm_state_c
      


In [3]:
import pdb
class Attention(tf.keras.layers.Layer):
  '''
    Class the calculates score based on the scoring_function using Bahdanu attention mechanism.
  '''
  def __init__(self,scoring_function, att_units):
    super(Attention, self).__init__()
    self.scoring_function = scoring_function

    # Please go through the reference notebook and research paper to complete the scoring functions

    if self.scoring_function=='dot':
      # Intialize variables needed for Dot score function here
      pass
    if self.scoring_function == 'general':
      # Intialize variables needed for General score function here
      self.W = tf.keras.layers.Dense(att_units)
    elif self.scoring_function == 'concat':
      # Intialize variables needed for Concat score function here
      self.W1 = tf.keras.layers.Dense(att_units)
      self.W2 = tf.keras.layers.Dense(att_units)
      self.V = tf.keras.layers.Dense(1)
  
  
  def call(self,decoder_hidden_state,encoder_output):
    '''
      Attention mechanism takes two inputs current step -- decoder_hidden_state and all the encoder_outputs.
      * Based on the scoring function we will find the score or similarity between decoder_hidden_state and encoder_output.
        Multiply the score function with your encoder_outputs to get the context vector.
        Function returns context vector and attention weights(softmax - scores)
    '''
    
    decoder_hidden_state = tf.expand_dims(decoder_hidden_state, 1)
    if self.scoring_function == 'dot':
        # Implement Dot score function here
        score = tf.matmul(encoder_output , decoder_hidden_state,transpose_b=True)
    elif self.scoring_function == 'general':
        # Implement General score function here
        #pdb.set_trace()
        score = tf.matmul(self.W(encoder_output), decoder_hidden_state, transpose_b=True)
    elif self.scoring_function == 'concat':
        # Implement General score function here
        score = self.V(tf.nn.tanh(self.W1(decoder_hidden_state) + self.W2(encoder_output)))

    attention_weights = score
    softmax_scores = tf.nn.softmax(score, axis=1)
    context_vector = softmax_scores * encoder_output    
    context_vector = tf.reduce_sum(context_vector, axis=1)
    return context_vector, attention_weights
    

In [4]:
class One_Step_Decoder(tf.keras.Model):
      
      def __init__(self,tar_vocab_size, embedding_dim, input_length, dec_units ,score_fun ,att_units):
            super(One_Step_Decoder, self).__init__()
            self.dec_units=dec_units
            self.vocab_size = tar_vocab_size
            self.embedding_dim = embedding_dim
            self.input_length = input_length
            self.attention = Attention(score_fun, dec_units)
            self.embedding = tf.keras.layers.Embedding(input_dim=self.vocab_size, output_dim=self.embedding_dim, 
                                   name="embedding_layer_encoder",weights=[decoder_embedding_matrix])

            self.lstm = LSTM(self.dec_units, return_state=True, return_sequences=True, name="Encoder_LSTM")
           # Initialize decoder embedding layer, LSTM and any other objects needed
            self.DenseLayer = tf.keras.layers.Dense(self.vocab_size)


      def call(self,input_to_decoder, encoder_output, state_h,state_c):
        
        embedding= self.embedding(input_to_decoder)
        context_vector,attention_weights = self.attention(state_h,encoder_output)
        context_vector=tf.expand_dims(context_vector, 1)
        lstm_input = tf.concat(
                [tf.squeeze(context_vector, 1), tf.squeeze(embedding, 1)], 1)
        states=[state_h,state_c]
        lstm_input=tf.expand_dims(lstm_input, 1)
        self.lstm_output, self.lstm_state_h,self.lstm_state_c= self.lstm(lstm_input, initial_state = states)
        Output=self.DenseLayer(self.lstm_output)
        Output=tf.reduce_sum(Output, axis=1)

        return Output,self.lstm_state_h,self.lstm_state_c,attention_weights,context_vector

In [5]:
class Decoder(tf.keras.Model):
    def __init__(self,out_vocab_size, embedding_dim, input_length, dec_units ,score_fun ,att_units):
        
        super(Decoder, self).__init__()
        self.out_vocab_size=out_vocab_size
        self.embedding_dim=embedding_dim
        self.input_length=input_length
        self.dec_units=dec_units
        self.score_fun=score_fun
        self.att_units=att_units

        #Intialize necessary variables and create an object from the class onestepdecoder
        self.onestepdecoder=One_Step_Decoder(self.out_vocab_size, self.embedding_dim,  self.input_length, self.dec_units ,self.score_fun,self.att_units)
  
    def call(self, input_to_decoder,encoder_output,decoder_hidden_state,decoder_cell_state ):
      
        
        all_outputs=tf.TensorArray(tf.float32,size=self.input_length,name="outputArray")

        for i in range(self.input_length):
          decoder_input = tf.expand_dims(input_to_decoder[:, i], 1)
          output,decoder_hidden_state,decoder_cell_state,attention_weights,context_vector=self.onestepdecoder(decoder_input,
                                                                                                              encoder_output,
                                                                                                              decoder_hidden_state,
                                                                                                              decoder_cell_state)
          
          all_outputs=all_outputs.write(i,output)
        
        all_outputs=tf.transpose(all_outputs.stack(),[1,0,2])
        
        return all_outputs

In [6]:
class encoder_decoder(tf.keras.Model):
    def __init__(self, encoder_inputs_length,decoder_inputs_length, output_vocab_size,score_fun,attn_units):
        super().__init__() # https://stackoverflow.com/a/27134600/4084039
        self.score_fun=score_fun
        self.attn_units=attn_units
        self.encoder = Encoder(inp_vocab_size=vocab_size_sms+1, embedding_size=300,lstm_size=300, input_length=encoder_inputs_length)
        self.decoder = Decoder(out_vocab_size=vocab_size_eng+1, embedding_dim=300, input_length=decoder_inputs_length,dec_units=300,score_fun=self.score_fun,att_units=self.attn_units)
        
               
    def call(self, data):
        input,output = data[0], data[1]
    
    
        initial_state=self.encoder.initialize_states(50)
        encoder_output, encoder_h, encoder_c = self.encoder(input,initial_state)
        decoder_output                       = self.decoder(output,encoder_output, encoder_h, encoder_c)
        return decoder_output

In [7]:
def lossfunction(y_true, y_pred): 
  crossentropy = tf.keras.losses.SparseCategoricalCrossentropy(
      from_logits=True)
    
  mask = tf.math.logical_not(tf.math.equal(y_true, 0))
  mask = tf.cast(mask, dtype=tf.int64)
  loss = crossentropy(y_true, y_pred, sample_weight=mask)
  
  return loss

In [8]:
from tensorflow.keras import backend as K
def accuracy(y_true, y_pred): 
  pred_value= K.cast(K.argmax(y_pred, axis=-1), dtype='float32')
  true_value = K.cast(K.equal(y_true, pred_value), dtype='float32')

  mask = K.cast(K.greater(y_true, 0), dtype='float32')
  n_correct = K.sum(mask * true_value)
  n_total = K.sum(mask)
  
  return n_correct / n_total

In [9]:
# Loading all the required information(embedding matrices and tokenziers)

vocab_size_sms = 11613
vocab_size_eng = 2800

infile = open("/content/drive/MyDrive/CaseStudy2/decoder_embedding_matrix.pickle",'rb')
decoder_embedding_matrix = pickle.load(infile)
infile.close()

infile = open("/content/drive/MyDrive/CaseStudy2/encoder_embedding_matrix.pickle",'rb')
encoder_embedding_matrix = pickle.load(infile)
infile.close()

infile = open("/content/drive/MyDrive/CaseStudy2/sms_inp_test.pickle",'rb')
sms_inp_test = pickle.load(infile)
infile.close()

infile = open("/content/drive/MyDrive/CaseStudy2/eng_inp_test.pickle",'rb')
eng_inp_test = pickle.load(infile)
infile.close()

infile = open("/content/drive/MyDrive/CaseStudy2/eng_out_test.pickle",'rb')
eng_out_test = pickle.load(infile)
infile.close()

#pdb.set_trace()
infile = open("/content/drive/MyDrive/CaseStudy2/sms_tokenizer.pickle",'rb')
sms_tokenizer = pickle.load(infile)
infile.close()

infile = open("/content/drive/MyDrive/CaseStudy2/eng_tokenizer.pickle",'rb')
eng_tokenizer = pickle.load(infile)
infile.close()

model  = encoder_decoder(encoder_inputs_length=51,decoder_inputs_length=42,output_vocab_size=vocab_size_eng+1,
                         score_fun='concat',attn_units=50)

optimizer = tf.keras.optimizers.Adam()

model.compile(optimizer=optimizer,loss=lossfunction,metrics=[accuracy])

model.fit([sms_inp_test, eng_inp_test], eng_out_test,
          batch_size=50,
          epochs=1)



<keras.callbacks.History at 0x7fcba77f2490>

In [10]:
# https://machinelearningmastery.com/beam-search-decoder-natural-language-processing/

from math import log
from numpy import array
from numpy import argmax
 
# beam search
def beam_search_decoder(data, k):
  sequences = [[list(), 0.0]]
  for row in data:
    all_candidates = list()
    for i in range(len(sequences)):
      seq, score = sequences[i]
      for j in range(len(row)):
        try:
          candidate = [seq + [j], score - log(row[j])]
          all_candidates.append(candidate)
        except ValueError as e:
          candidate = [seq + [j], 0]
          all_candidates.append(candidate)
      # order all candidates by score
    ordered = sorted(all_candidates, key=lambda tup:tup[1])
    # select k best
    sequences = ordered[:k]
  return sequences

In [11]:
import re
def decontractions(phrase):
  """decontracted takes text and convert contractions into natural form.
  https://stackoverflow.com/questions/19790188/expanding-english-language-contractions-in-python/47091490#47091490"""
  # specific
  phrase = re.sub(r"won\'t", "will not", phrase)
  phrase = re.sub(r"can\'t", "can not", phrase)
  phrase = re.sub(r"won\’t", "will not", phrase)
  phrase = re.sub(r"can\’t", "can not", phrase)

  # general
  phrase = re.sub(r"n\'t", " not", phrase)
  phrase = re.sub(r"\'re", " are", phrase)
  phrase = re.sub(r"\'s", " is", phrase)
  phrase = re.sub(r"\'d", " would", phrase)
  phrase = re.sub(r"\'ll", " will", phrase)
  phrase = re.sub(r"\'t", " not", phrase)
  phrase = re.sub(r"\'ve", " have", phrase)
  phrase = re.sub(r"\'m", " am", phrase)

  phrase = re.sub(r"n\’t", " not", phrase)
  phrase = re.sub(r"\’re", " are", phrase)
  phrase = re.sub(r"\’s", " is", phrase)
  phrase = re.sub(r"\’d", " would", phrase)
  phrase = re.sub(r"\’ll", " will", phrase)
  phrase = re.sub(r"\’t", " not", phrase)
  phrase = re.sub(r"\’ve", " have", phrase)
  phrase = re.sub(r"\’m", " am", phrase)
  phrase = re.sub(r"haha"," ",phrase)
  phrase = re.sub(r"hehe"," ",phrase)

  return phrase

def preprocess(text):
  text = text.lower()
  text = decontractions(text)
  text = re.sub('[$)\?"’.°!;\'€%:,(/]', '', text)
  text = re.sub('[^A-Za-z0-9 ]+', '', text)
  text = re.sub('[0-9]','',text)
  return text  

In [12]:
def load_model():
  """
  return the model with loaded weights.
  """
  model.load_weights("/content/drive/MyDrive/CaseStudy2/attention_model.h5")

  return model

In [13]:
class ModelPrediction():
  def __init__(self):
    self.model_predict = load_model()

  def predict(self,input_sentence):

    max_length_targ = 42
    max_length_inp  = 50
    
    attention_plot = np.zeros((max_length_targ, max_length_inp))
    sentence = preprocess(input_sentence)
    sentence = sentence.strip()
    #pdb.set_trace()
    inputs = [] 
    for word in sentence.split():
        inputs.append(sms_tokenizer.word_index[word])
    inputs =  tf.keras.preprocessing.sequence.pad_sequences([inputs],maxlen=max_length_inp,padding='post') 
    inputs = tf.convert_to_tensor(inputs)
    result = '' 

    initial_state=self.model_predict.layers[0].initialize_states(batch_size=1)
    encoder_outputs, state_h,state_c = self.model_predict.layers[0](inputs,initial_state)   
    dec_input = tf.expand_dims([eng_tokenizer.word_index['<start>']], 0)

    for i in range(max_length_targ):
      Output,state_h,state_c,att_weights,_ = self.model_predict.layers[1].onestepdecoder(dec_input,encoder_outputs,
                                                                                state_h,state_c,training=False)
      #Beam Search Decoder
      Result_beam_list=beam_search_decoder(Output,k=1)
      Result_beam=Result_beam_list[0][0]

      predicted_id = tf.argmax(Output[0]).numpy()
    
      result += eng_tokenizer.index_word[Result_beam[0]] + ' '
      if eng_tokenizer.index_word[predicted_id] == '<end>':
              return result

      # the predicted ID is fed back into the model
      dec_input = tf.expand_dims([predicted_id], 0)

    return result   

In [14]:
model1 = ModelPrediction()

In [15]:
model1.predict("i juz said  also looking for someone to go")

'i just said you also looking for someone to go <end> '

In [16]:
model1.predict("haha ok one m going to make up late too")

'ok i am going to get up late too <end> '

In [17]:
model1.predict("okso did u get the idea i wuz green")

'ok so did you get the idea i was green <end> '