In [1]:
import tensorflow as tf
import tensorflow_addons as tfa
from tensorflow.keras.preprocessing.text import Tokenizer
import numpy as np
import os
import sys
import pickle as pkl
import re

In [2]:
weight_path_encoder = "../chatbot/model_v1/model_2_v3/encoder/weights.ckpt"
weight_path_decoder = "../chatbot/model_v1/model_2_v3/encoder/weights.ckpt"
tokenizer_path = "../chatbot/model_v1/model_2_v3/tokenizer.pkl"
encoder_embedding_layer_path = "../chatbot/model_v1/model_2_v3/encoder_embedding_layer.pkl"
decoder_embedding_layer_path = "../chatbot/model_v1/model_2_v3/decoder_embedding_layer.pkl"


In [3]:
vocab_size = 30000 + 1
units = 1024
embedding_dim = 100
BATCH_SIZE=64

In [4]:

with open(tokenizer_path, "rb") as handle:
    tokenizer = pkl.load(handle)

with open(encoder_embedding_layer_path, "rb") as handle:
    encoder_embedding_variables = pkl.load(handle)

with open(decoder_embedding_layer_path, "rb") as handle:
    decoder_embedding_variables = pkl.load(handle)



In [5]:
class EncoderAttention(tf.keras.Model):
    def __init__(self, vocab_size, embedding_dims, hidden_units):
        super().__init__()
        self.hidden_units = hidden_units
        self.embedding_layer = tf.keras.layers.Embedding(vocab_size, embedding_dims, tf.keras.initializers.Constant(encoder_embedding_variables),
                trainable=True)
        self.lstm_layer = tf.keras.layers.LSTM(hidden_units, return_sequences=True, 
                                                     return_state=True ) # We need the lstm outputs 
                                                                         # to calculate attention!
    
    def initialize_hidden_state(self): 
        return [tf.zeros((BATCH_SIZE, self.hidden_units)), 
                tf.zeros((BATCH_SIZE, self.hidden_units))] 
                                                               
    def call(self, inputs, hidden_state):
        embedding = self.embedding_layer(inputs)
        output, h_state, c_state = self.lstm_layer(embedding, initial_state = hidden_state)
        return output, h_state, c_state


In [6]:

class DecoderAttention(tf.keras.Model):
    def __init__(self, vocab_size, embedding_dim, hidden_units):
        super().__init__()
        
        
        self.embedding_layer = self.embedding_layer = tf.keras.layers.Embedding(vocab_size, embedding_dim, tf.keras.initializers.Constant(decoder_embedding_variables),
                trainable=True)

        self.lstm_cell = tf.keras.layers.LSTMCell(hidden_units)

        self.sampler = tfa.seq2seq.sampler.TrainingSampler()

        self.attention_mechanism = tfa.seq2seq.LuongAttention(hidden_units, memory_sequence_length=BATCH_SIZE*[15]) #N

        self.attention_cell = tfa.seq2seq.AttentionWrapper(cell=self.lstm_cell, # N
                                      attention_mechanism=self.attention_mechanism, 
                                      attention_layer_size=hidden_units)

        self.output_layer = tf.keras.layers.Dense(vocab_size)
        self.decoder = tfa.seq2seq.BasicDecoder(self.attention_cell, # N
                                                sampler=self.sampler, 
                                                output_layer=self.output_layer)

    def build_initial_state(self, batch_size, encoder_state): #N
        decoder_initial_state = self.attention_cell.get_initial_state(batch_size=batch_size, dtype=tf.float32)
        decoder_initial_state = decoder_initial_state.clone(cell_state=encoder_state)
        return decoder_initial_state


    def call(self, inputs, initial_state):
        embedding = self.embedding_layer(inputs)
        outputs, _, _ = self.decoder(embedding, initial_state=initial_state, sequence_length=BATCH_SIZE*[15-1])
        return outputs


In [7]:
example_x, example_y = tf.random.uniform((BATCH_SIZE, 15)), tf.random.uniform((BATCH_SIZE, 15))

##ENCODER
encoder = EncoderAttention(vocab_size, embedding_dim, units)
# Test  the encoder
sample_initial_state = encoder.initialize_hidden_state()
sample_output, sample_h, sample_c = encoder(example_x, sample_initial_state)
print(sample_output.shape)
print(sample_h.shape)


##DECODER
decoder = DecoderAttention(vocab_size, embedding_dim, units)
decoder.attention_mechanism.setup_memory(sample_output) # Attention needs the last output of the Encoder as starting point
initial_state = decoder.build_initial_state(BATCH_SIZE, [sample_h, sample_c]) # N
sample_decoder_output = decoder(example_y, initial_state)


encoder.load_weights(weight_path_encoder)
decoder.load_weights(weight_path_decoder)

print("All set")

(64, 15, 1024)
(64, 1024)
All set


In [8]:
def decontracted(phrase):
    # specific
    phrase = re.sub(r"won\'t", "will not", phrase)
    phrase = re.sub(r"can\'t", "can not", phrase)

    # general
    phrase = re.sub(r"n\'t", " not", phrase)
    phrase = re.sub(r"\'re", " are", phrase)
    phrase = re.sub(r"\'s", " is", phrase)
    phrase = re.sub(r"\'d", " would", phrase)
    phrase = re.sub(r"\'ll", " will", phrase)
    phrase = re.sub(r"\'t", " not", phrase)
    phrase = re.sub(r"\'ve", " have", phrase)
    phrase = re.sub(r"\'m", " am", phrase)
    return phrase 

In [9]:
import unicodedata
def preprocess_sentence(w):
    w = w.lower().strip()
    # This next line is confusing!
    # We normalize unicode data, umlauts will be converted to normal letters
    #w = w.replace("ß", "ss")
    #w = ''.join(c for c in unicodedata.normalize('NFD', w) if unicodedata.category(c) != 'Mn')

    # creating a space between a word and the punctuation following it
    # eg: "he is a boy." => "he is a boy ."
    # Reference:- https://stackoverflow.com/questions/3645931/python-padding-punctuation-with-white-spaces-keeping-punctuation
    w = re.sub(r"\[\w+\]",'', w)
    w = " ".join(re.findall(r"\w+",w))
    w = re.sub(r"([?.!,¿])", r" \1 ", w)
    w = re.sub(r'[" "]+', " ", w)

    # replacing everything with space except (a-z, A-Z, ".", "?", "!", ",")
    w = re.sub(r"[^a-zA-Z?.!]+", " ", w)
    w = w.strip()
    w = decontracted(w)

    # adding a start and an end token to the sentence
    # so that the model know when to start and stop predicting.
    w = '<start> ' + w + ' <end>'
    return w


In [11]:
#def reply(sentence, preprocess=True):
preprocess = True
sentence = "Hi there whats up"

if preprocess:
    sentence = preprocess_sentence(sentence)
    sentence_tokens = tokenizer.texts_to_sequences([sentence])
    input = tf.keras.preprocessing.sequence.pad_sequences(sentence_tokens, maxlen=15, padding='post')
else:
    input = sentence
input = tf.convert_to_tensor(input)

print("After if")

encoder_hidden = [tf.zeros((1, units)), tf.zeros((1, units))]
encoder_output, encoder_h, encoder_c = encoder(input, encoder_hidden)
start_token = tf.convert_to_tensor([tokenizer.word_index['<start>']])
end_token = tokenizer.word_index['<end>']

print("After first block")

# This time we use the greedy sampler because we want the word with the highest probability!
# We are not generating new text, where a probability sampling would be better
greedy_sampler = tfa.seq2seq.GreedyEmbeddingSampler(decoder.embedding_layer)
print("Greedy sampler set")
# Instantiate a BasicDecoder object
decoder_instance = tfa.seq2seq.BasicDecoder(cell=decoder.attention_cell, # N
                                            sampler=greedy_sampler, output_layer=decoder.output_layer)
print("Decoder sampler set")
# Setup Memory in decoder stack
decoder.attention_mechanism.setup_memory(encoder_output) # N

print("Attention mechanism up!")
# set decoder_initial_state
decoder_initial_state = decoder.build_initial_state(batch_size=1, encoder_state=[encoder_h, encoder_c]) # N
print("Initial state ready")
### Since the BasicDecoder wraps around Decoder's rnn cell only, you have to ensure that the inputs to BasicDecoder 
### decoding step is output of embedding layer. tfa.seq2seq.GreedyEmbeddingSampler() takes care of this. 
### You only need to get the weights of embedding layer, which can be done by decoder.embedding.variables[0] and pass this callabble to BasicDecoder's call() function

decoder_embedding_matrix = decoder.embedding_layer.variables[0]
print("Got embedding layer")


After if
After first block
Greedy sampler set
Decoder sampler set
Attention mechanism up!
Initial state ready
Got embedding layer


In [12]:
decoder_initial_state

AttentionWrapperState(cell_state=[<tf.Tensor: shape=(1, 1024), dtype=float32, numpy=
array([[ 1.9493668e-03,  1.6567821e-02, -2.3841590e-03, ...,
         6.5299624e-05, -2.3378809e-04,  2.0499024e-01]], dtype=float32)>, <tf.Tensor: shape=(1, 1024), dtype=float32, numpy=
array([[ 0.02606296,  0.15195727, -0.01356386, ...,  0.00397725,
        -0.0104762 ,  0.82305944]], dtype=float32)>], attention=<tf.Tensor: shape=(1, 1024), dtype=float32, numpy=array([[0., 0., 0., ..., 0., 0., 0.]], dtype=float32)>, alignments=<tf.Tensor: shape=(1, 15), dtype=float32, numpy=
array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],
      dtype=float32)>, alignment_history=(), attention_state=<tf.Tensor: shape=(1, 15), dtype=float32, numpy=
array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],
      dtype=float32)>)

In [None]:
outputs, _, _ = decoder_instance(decoder_embedding_matrix, start_tokens = start_token, end_token= end_token, initial_state=decoder_initial_state)
print("Done")
result_sequence  = outputs.sample_id.numpy()
#return tokenizer.sequences_to_texts(result_sequence)[0]

print(return_sentence)

In [None]:
import tensorflow as tf
import tensorflow_addons as tfa

batch_size = 4
hidden_size = 32
vocab_size = 64
start_token_id = 1
end_token_id = 2

embedding_layer = tf.keras.layers.Embedding(vocab_size, hidden_size)
decoder_cell = tf.keras.layers.LSTMCell(hidden_size)
output_layer = tf.keras.layers.Dense(vocab_size)

sampler = tfa.seq2seq.GreedyEmbeddingSampler(embedding_layer)
decoder = tfa.seq2seq.BasicDecoder(
    decoder_cell, sampler, output_layer, maximum_iterations=10
)

start_tokens = tf.fill([batch_size], start_token_id)
initial_state = decoder_cell.get_initial_state(batch_size=batch_size, dtype=tf.float32)
final_output, final_state, final_lengths = decoder(
    None, start_tokens=start_tokens, end_token=end_token_id, initial_state=initial_state
)

print(final_output.sample_id)