In [None]:
#!pip install tensorflow-gpu==2.3

In [1]:
from google.colab import files
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Dense,Input,GRU,Embedding,Flatten
from tensorflow.keras.models import Model
import keras.backend as K
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import warnings
warnings.filterwarnings("ignore")
import seaborn as sns
import matplotlib as plt
from joblib import dump, load

In [3]:
class Encoder(tf.keras.Model):
    '''
    Encoder model -- That takes a input sequence and returns output sequence
    '''
 
    def __init__(self,inp_vocab_size,embedding_size,lstm_size,input_length):
        self.inp_vocab_size=inp_vocab_size
        self.embedding_size=embedding_size
        self.lstm_size=lstm_size
        self.input_length=input_length
 
        #Initialize Embedding layer
 
        #Intialize Encoder LSTM layer
        super().__init__()
        self.Embedding_Layer=tf.keras.layers.Embedding(input_dim=self.inp_vocab_size,output_dim=self.embedding_size,input_length=input_length,mask_zero=True,name="Encoder_Embedding")
        self.LSTM_Layer=tf.keras.layers.LSTM(self.lstm_size,return_state=True,return_sequences=True,name="Encoder_LSTM")
 
    def call(self,input_sequence,states):
      '''
          This function takes a sequence input and the initial states of the encoder.
          Pass the input_sequence input to the Embedding layer, Pass the embedding layer ouput to encoder_lstm
          returns -- All encoder_outputs, last time steps hidden and cell state
      '''
      input_embedd=self.Embedding_Layer(input_sequence)
      self.lstm_output, self.lstm_state_h,self.lstm_state_c = self.LSTM_Layer(input_embedd)
     
      return self.lstm_output,self.lstm_state_h,self.lstm_state_c
 
    
    def initialize_states(self,batch_size):
      '''
      Given a batch size it will return intial hidden state and intial cell state.
      If batch size is 32- Hidden state is zeros of size [32,lstm_units], cell state zeros is of size [32,lstm_units]
      '''
      intial_hidden_state=np.zeros((batch_size,self.lstm_size))
      intial_cell_state=np.zeros((batch_size,self.lstm_size))
      return intial_hidden_state,intial_cell_state

In [5]:
class Attention(tf.keras.layers.Layer):
  '''
    Class the calculates score based on the scoring_function using Bahdanu attention mechanism.
  '''
  def __init__(self,scoring_function, att_units):
    self.scoring_function=scoring_function
    self.att_units=att_units 
    super().__init__()
 
    # Please go through the reference notebook and research paper to complete the scoring functions
 
    if self.scoring_function=='dot':
      # Intialize variables needed for Dot score function here
      pass
 
      
      
    if scoring_function == 'general':
      # Intialize variables needed for General score function here
      self.W=tf.keras.layers.Dense(att_units)
    elif scoring_function == 'concat':
      # Intialize variables needed for Concat score function here
      self.W1 = tf.keras.layers.Dense(att_units)
      self.W2 = tf.keras.layers.Dense(att_units)
      self.V = tf.keras.layers.Dense(1)
  
  
  def call(self,decoder_hidden_state,encoder_output):
    '''
      Attention mechanism takes two inputs current step -- decoder_hidden_state and all the encoder_outputs.
      * Based on the scoring function we will find the score or similarity between decoder_hidden_state and encoder_output.
        Multiply the score function with your encoder_outputs to get the context vector.
        Function returns context vector and attention weights(softmax - scores)
    '''
    
    if self.scoring_function == 'dot':
        # Implement Dot score function here
        
        
        scoring=tf.matmul(encoder_output,tf.expand_dims(decoder_hidden_state,axis=-1))
        
        attention_weight=tf.nn.softmax(scoring,axis=1)
        
        context_vector=attention_weight*encoder_output
        context_vector=tf.reduce_sum(context_vector,axis=1)
        return context_vector,attention_weight
              
        
        
        
 
        
    elif self.scoring_function == 'general':
        # Implement General score function here
        
        score=tf.matmul(self.W(encoder_output),tf.expand_dims(decoder_hidden_state,axis=-1))
        attention_weight=tf.nn.softmax(score,axis=1)
        context_vector=attention_weight*encoder_output
        context_vector=tf.reduce_sum(context_vector,axis=1)
        return context_vector,attention_weight
    
    elif self.scoring_function == 'concat':
        # Implement General score function here
        score = self.V(tf.nn.tanh(self.W1(encoder_output) + self.W2(tf.expand_dims(decoder_hidden_state,axis=1))))
        attention_weight=tf.nn.softmax(score,axis=1)
        context_vector=attention_weight*encoder_output
        context_vector=tf.reduce_sum(context_vector,axis=1)
        return context_vector,attention_weight

In [6]:
class OneStepDecoder(tf.keras.Model):
  def __init__(self,tar_vocab_size, embedding_dim, input_length, dec_units ,score_fun ,att_units):

      # Initialize decoder embedding layer, LSTM and any other objects needed
      self.tar_vocab_size=tar_vocab_size
      self.embedding_dim=embedding_dim
      self.input_length=input_length
      self.dec_units=dec_units
      self.score_fun=score_fun
      self.att_units=att_units
      super().__init__()
      self.DEmbedding=tf.keras.layers.Embedding(input_dim=self.tar_vocab_size,output_dim=self.embedding_dim,input_length=self.input_length,mask_zero=True,name="Decoder_Embedding", trainable=False)
      self.DLSTM_Layer=tf.keras.layers.LSTM(self.dec_units,return_state=True,return_sequences=True,name="Encoder_LSTM")
      self.full_connected=tf.keras.layers.Dense(self.tar_vocab_size)
      self.attention=Attention(self.score_fun,self.att_units)




  def call(self,input_to_decoder, encoder_output, state_h,state_c):
    '''
        One step decoder mechanisim step by step:
      A. Pass the input_to_decoder to the embedding layer and then get the output(batch_size,1,embedding_dim)
      B. Using the encoder_output and decoder hidden state, compute the context vector.
      C. Concat the context vector with the step A output
      D. Pass the Step-C output to LSTM/GRU and get the decoder output and states(hidden and cell state)
      E. Pass the decoder output to dense layer(vocab size) and store the result into output.
      F. Return the states from step D, output from Step E, attention weights from Step -B
    '''
    #step-A
    output=self.DEmbedding(input_to_decoder)
    #step-B
   
    context_vector,attention_weights=self.attention(state_h,encoder_output)
    #step-C
  
    concat=tf.concat([tf.expand_dims(context_vector,1),output],axis=-1)
    #step-D
    decoder_output,hidden_states,cell_states=self.DLSTM_Layer(concat,initial_state=[state_h,state_c])
    decoder_output=tf.reshape(decoder_output,(-1,decoder_output.shape[2]))
    output=self.full_connected(decoder_output)
    return output,hidden_states,cell_states,attention_weights,context_vector

In [7]:
class Decoder(tf.keras.Model):
    def __init__(self,out_vocab_size, embedding_dim, input_length, dec_units ,score_fun ,att_units):
      #Intialize necessary variables and create an object from the class onestepdecoder
      self.out_vocab_size=out_vocab_size
      self.embedding_dim=embedding_dim
      self.input_length=input_length
      self.dec_units=dec_units
      self.score_fun=score_fun
      self.att_units=att_units
      super().__init__()
      self.onestepdecoder=OneStepDecoder(self.out_vocab_size, self.embedding_dim, self.input_length, self.dec_units ,self.score_fun ,self.att_units)


     
    def call(self, input_to_decoder,encoder_output,decoder_hidden_state,decoder_cell_state ):

        #Initialize an empty Tensor array, that will store the outputs at each and every time step
        #Create a tensor array as shown in the reference notebook
        
        #Iterate till the length of the decoder input
            # Call onestepdecoder for each token in decoder_input
            # Store the output in tensorarray
        # Return the tensor array
        
        all_outputs=tf.TensorArray(tf.float32,size=tf.shape(input_to_decoder)[1],name="output_arrays")
      
        
        for timestep in range(tf.shape(input_to_decoder)[1]):
            output,decoder_hidden_state,decoder_cell_state,_,_=self.onestepdecoder(input_to_decoder[:,timestep:timestep+1],encoder_output,decoder_hidden_state,decoder_cell_state)
            all_outputs=all_outputs.write(timestep,output)
            
        all_outputs=tf.transpose(all_outputs.stack(),[1,0,2])
        return all_outputs


In [8]:

class encoder_decoder(tf.keras.Model):
    def __init__(self,embedding_size,lstm_size,input_length,decoder_input_length,dec_units ,score_fun ,att_units, batch_size):
        #Intialize objects from encoder decoder
        super().__init__()
        self.encoder=Encoder(104,embedding_size,lstm_size,input_length)
        self.decoder=Decoder(93,embedding_size, decoder_input_length, dec_units ,score_fun ,att_units)
        self.batch_size= batch_size
    def call(self,data):
        #Intialize encoder states, Pass the encoder_sequence to the embedding layer
        # Decoder initial states are encoder final states, Initialize it accordingly
        # Pass the decoder sequence,encoder_output,decoder states to Decoder
        # return the decoder output
        input_,target_sentences = data[0], data[1]
        initial_state= self.encoder.initialize_states(self.batch_size)
        encoder_output,state_h,state_c=self.encoder(input_,initial_state)
        output=self.decoder(target_sentences,encoder_output, state_h, state_c)
        return output

In [9]:
def custom_lossfunction(targets,logits):

  # Custom loss function that will not consider the loss for padded zeros.
  # Refer https://www.tensorflow.org/tutorials/text/nmt_with_attention#define_the_optimizer_and_the_loss_function
   
   loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True, reduction='none')
   mask = tf.math.logical_not(tf.math.equal(targets, 0))
   loss_ = loss_object(targets, logits)

   mask = tf.cast(mask, dtype=loss_.dtype)
   loss_ *= mask

   return tf.reduce_mean(loss_)

In [10]:
class Dataset:
    def __init__(self, data, tknizer_ita, tknizer_eng, max_len,max_len1):
        self.encoder_inps = data['SMS_TEXT'].values
        self.decoder_inps = data['ENGLISH_INPUT'].values
        self.decoder_outs = data['ENGLISH_OUTPUT'].values
        self.tknizer_eng = tknizer_eng
        self.tknizer_ita = tknizer_ita
        self.max_len = max_len
        self.max_len1=max_len1
 
    def __getitem__(self, i):
        self.encoder_seq = self.tknizer_ita.texts_to_sequences([self.encoder_inps[i]]) # need to pass list of values
        self.decoder_inp_seq = self.tknizer_eng.texts_to_sequences([self.decoder_inps[i]])
        self.decoder_out_seq = self.tknizer_eng.texts_to_sequences([self.decoder_outs[i]])
 
        self.encoder_seq = pad_sequences(self.encoder_seq, maxlen=self.max_len, dtype='int32', padding='post')
        self.decoder_inp_seq = pad_sequences(self.decoder_inp_seq, maxlen=self.max_len1, dtype='int32', padding='post')
        self.decoder_out_seq = pad_sequences(self.decoder_out_seq, maxlen=self.max_len1, dtype='int32', padding='post')
        return self.encoder_seq, self.decoder_inp_seq, self.decoder_out_seq
 
    def __len__(self): # your model.fit_gen requires this function
        return len(self.encoder_inps)
 
    
class Dataloder(tf.keras.utils.Sequence):    
    def __init__(self, dataset, batch_size=1):
        self.dataset = dataset
        self.batch_size = batch_size
        self.indexes = np.arange(len(self.dataset.encoder_inps))
 
 
    def __getitem__(self, i):
        start = i * self.batch_size
        stop = (i + 1) * self.batch_size
        data = []
        for j in range(start, stop):
            data.append(self.dataset[j])
 
        batch = [np.squeeze(np.stack(samples, axis=1), axis=0) for samples in zip(*data)]
        # we are creating data like ([italian, english_inp], english_out) these are already converted into seq
        return tuple([[batch[0],batch[1]],batch[2]])
 
    def __len__(self):  # your model.fit_gen requires this function
        return len(self.indexes) // self.batch_size
 
    def on_epoch_end(self):
        self.indexes = np.random.permutation(self.indexes)

In [11]:
def load_model():
  """1. Load test_dataloader from joblib files
     2. run the model for 1 epoch to compile and load_weights 
     3. return the model with loaded weights."""
  test_dataloader=load('/content/test_dataloader.joblib')   
  model2  = encoder_decoder(300,100,170,202,100,'concat',100,64)
  optimizer = tf.keras.optimizers.Adam(0.01)
  model2.compile(optimizer=optimizer,loss=custom_lossfunction)
  model2.fit_generator(test_dataloader, steps_per_epoch=1, epochs=1, validation_data=test_dataloader, validation_steps=1)
  model2.load_weights('/content/attention_model_0.3930.h5')
  return model2

In [97]:
class ModelPrediction():
  def __init__(self):
    """Loading the tokenizer
    calling the load_model which returns the model with trained model weights"""
    self.tokenizer=load('/content/tokenizer.joblib') 
    self.tokenizer_e=load('/content/tokenizer_e.joblib') 
    self.model2=load_model()
  def prediction(self,input_sentence):
    """
    1.Type of input sentence is list which works for n number of sentences in the list.
    2.Tokenizing,Padding,convertinng the sentences into tensor array
    3.Intialising hidden states of shape length of list and units.
    4.passing the tensor array and hidden states to encoder returns encoder output,hidden state and cell state
    5.assigning decoder hidden states with state of encoder 
    6. Intialising the start token which is '\t' for length of list times and assign as decoder input
    7. Intialisng list with '' for length of list times and append into the list.
    8. for loop till max length:
       1. passing encoder output,decoder input and states to one step decoder returns prediction and states 
       2. find the max value in rows and send to tokenizer to get the character
       3. Appending the character into the string of respective indexes.
    9. Slicing the string to end token '\n'
    10. Append into the final list and returns the final list.    


     
     """
    n=len(input_sentence)
    input_sequence=self.tokenizer.texts_to_sequences(input_sentence)
    inputs=pad_sequences(input_sequence,maxlen=170,padding='post')
    inputs=tf.convert_to_tensor(inputs)
    result=''
    result1=''
    units=100
    hidden=[tf.zeros((n,units))]
    encoder_output,hidden_state,cell_state=self.model2.layers[0](inputs,hidden)
   

    dec_hidden=hidden_state
    dec_input=tf.expand_dims([self.tokenizer_e.word_index['\t']],0)
    dec_input=tf.repeat(dec_input,n,axis=0).numpy()
    dec_input=dec_input.reshape((n,1))
    print(dec_input.shape)
    print(dec_input)   
    output_array=[]
    for index in range(n):
      output_array.append('')

    for t in range(202):
        predictions,dec_hidden,cell_state,_,_=self.model2.layers[1].onestepdecoder(dec_input,encoder_output,dec_hidden,cell_state)
        
        predicted_id=tf.argmax(predictions,axis=1).numpy()
        
        predicted_id=np.array(predicted_id).reshape((n,1))     
        predictions=self.tokenizer_e.sequences_to_texts(predicted_id)
        for index in range(len(output_array)):
          output_array[index]+=predictions[index]
        dec_input=predicted_id
    
       
    final_output=[]
    for index in range(len(output_array)):
      string_value= output_array[index][:output_array[index].find('\n')]  
      final_output.append(string_value)
        
    return final_output

In [98]:
model=ModelPrediction()



In [99]:
list1=["Hello","Hi","bye",'Lets go']
model.prediction(list1)

['Hh. elo.', 'Hi.', 'Cwy o ee.', 'Hl.e ets o o es.']

In [100]:
list2=["Lets go to New York","Uttar Pradesh"]
model.prediction(list2)

['Hele ets g o nt ew o?', 'A ottr ar t adprae.']

In [102]:
list3=["Lets"]
model.prediction(list3)

['Wleet ots.']