In [1]:
!python --version


Python 3.10.11


In [2]:
import tensorflow as tf
import numpy as np
import tensorflow_datasets as tfds
import os
import re
import json




In [3]:
# Maximum sentence length
MAX_LENGTH = 25

# Maximum number of samples to preprocess
MAX_SAMPLES = 50000

# For tf.data.Dataset
BATCH_SIZE = 32
BUFFER_SIZE = 20000

# For Transformer
NUM_LAYERS = 2
D_MODEL = 256
NUM_HEADS = 8
UNITS = 512
DROPOUT = 0.1

EPOCHS = 40

In [4]:
# path_to_zip = tf.keras.utils.get_file(
#     "ubuntu_dialogs.tgz",
#     origin="http://www.cs.mcgill.ca/~jpineau/datasets/ubuntu-corpus-1.0/ubuntu_dialogs.tgz",
#     extract=True,
# )

# path_to_dataset = os.path.join(
#     os.path.dirname(path_to_zip), "cornell movie-dialogs corpus"
# )

# path_to_movie_lines = os.path.join(path_to_dataset, "movie_lines.txt")
# path_to_movie_conversations = os.path.join(path_to_dataset, "movie_conversations.txt")

In [5]:
def preprocess_sentence(sentence):
    sentence = sentence.lower().strip()
    # creating a space between a word and the punctuation following it
    # eg: "he is a boy." => "he is a boy ."
    sentence = re.sub(r"([?.!,])", r" \1 ", sentence)
    sentence = re.sub(r'[" "]+', " ", sentence)
    # # removing contractions
    sentence = re.sub(r"i'm", "i am", sentence)
    sentence = re.sub(r"he's", "he is", sentence)
    sentence = re.sub(r"she's", "she is", sentence)
    sentence = re.sub(r"it's", "it is", sentence)
    sentence = re.sub(r"that's", "that is", sentence)
    sentence = re.sub(r"what's", "that is", sentence)
    sentence = re.sub(r"where's", "where is", sentence)
    sentence = re.sub(r"how's", "how is", sentence)
    sentence = re.sub(r"\'ll", " will", sentence)
    sentence = re.sub(r"\'ve", " have", sentence)
    sentence = re.sub(r"\'re", " are", sentence)
    sentence = re.sub(r"\'d", " would", sentence)
    sentence = re.sub(r"\'re", " are", sentence)
    sentence = re.sub(r"won't", "will not", sentence)
    sentence = re.sub(r"can't", "cannot", sentence)
    sentence = re.sub(r"n't", " not", sentence)
    sentence = re.sub(r"n'", "ng", sentence)
    sentence = re.sub(r"'bout", "about", sentence)
    # replacing everything with space except (a-z, A-Z, ".", "?", "!", ",")
    sentence = re.sub(r"[^a-zA-Z?.!,0-1000]+", " ", sentence)
    sentence = sentence.strip()
    return sentence



# def load_conversations():
#     # dictionary of line id to text
#     id2line = {}
#     with open(path_to_movie_lines, errors="ignore") as file:
#         lines = file.readlines()
#     for line in lines:
#         parts = line.replace("\n", "").split(" +++$+++ ")
#         id2line[parts[0]] = parts[4]

#     inputs, outputs = [], []
#     with open(path_to_movie_conversations, "r") as file:
#         lines = file.readlines()
#     for line in lines:
#         parts = line.replace("\n", "").split(" +++$+++ ")
#         # get conversation in a list of line ID
#         conversation = [line[1:-1] for line in parts[3][1:-1].split(", ")]
#         for i in range(len(conversation) - 1):
#             inputs.append(preprocess_sentence(id2line[conversation[i]]))
#             outputs.append(preprocess_sentence(id2line[conversation[i + 1]]))
#             if len(inputs) >= MAX_SAMPLES:
#                 return inputs, outputs
#     return inputs, outputs


# questions, answers = load_conversations()

questions = []
answers = []

In [6]:
preprocess_sentence("My night's good")

'my night s good'

In [7]:
with open("C:\\Users\\anang\\OneDrive\\Desktop\\Joshua's folder\\DocGPT\\datasets\\gpt.json", 'r') as f:
  gpt = json.load(f)

print(gpt['questions'])

for d in gpt['questions'].split('++'):
  questions.append(preprocess_sentence(d))

for d in gpt['answers'].split('=='):
  answers.append(preprocess_sentence(d))

How can I improve my cardiovascular health++ What are some symptoms of the flu++ Can stress affect my digestive system++ How much water should I drink in a day++ What are some ways to manage anxiety naturally++ Is it normal to feel tired all the time++ What foods are good for promoting brain health++ How can I prevent back pain++ What are the early signs of diabetes++ Can you recommend some healthy snacks for weight loss++ How often should I get my cholesterol checked++ What are the benefits of regular exercise for mental health++ How can I improve my sleep quality++ What are the benefits of drinking green tea++ Is it safe to take supplements for vitamin deficiencies++ Can certain foods help boost the immune system++ What's the difference between type 1 and type 2 diabetes++ How can I manage my stress levels during busy times++ Are there any natural remedies for headaches++ Can a healthy diet help prevent chronic diseases++ How can I quit smoking++ What are the benefits of practicing m

In [9]:
max_num = 272
for i in questions:
    max_num = len(i.split()) if len(i.split()) > max_num else max_num
    

for i in answers:
    max_num = len(i.split()) if len(i.split()) > max_num else max_num
    


MAX_LENGTH = max_num

In [10]:
MAX_LENGTH

272

In [11]:
# Build tokenizer using tfds for both questions and answers
tokenizer = tfds.deprecated.text.SubwordTextEncoder.build_from_corpus(
    questions + answers, target_vocab_size=2**13
)

# Define start and end token to indicate the start and end of a sentence
START_TOKEN, END_TOKEN = [tokenizer.vocab_size], [tokenizer.vocab_size + 1]

# Vocabulary size plus start and end token
VOCAB_SIZE = tokenizer.vocab_size + 2

In [12]:
VOCAB_SIZE

5306

In [13]:
# Tokenize, filter and pad sentences
def tokenize_and_filter(inputs, outputs):
    tokenized_inputs, tokenized_outputs = [], []

    for (sentence1, sentence2) in zip(inputs, outputs):
        # tokenize sentence
        sentence1 = START_TOKEN + tokenizer.encode(sentence1) + END_TOKEN
        sentence2 = START_TOKEN + tokenizer.encode(sentence2) + END_TOKEN
        # check tokenized sentence max length
        if len(sentence1) <= MAX_LENGTH and len(sentence2) <= MAX_LENGTH:
            tokenized_inputs.append(sentence1)
            tokenized_outputs.append(sentence2)

    # pad tokenized sentences
    tokenized_inputs = tf.keras.preprocessing.sequence.pad_sequences(
        tokenized_inputs, maxlen=MAX_LENGTH, padding="post"
    )
    tokenized_outputs = tf.keras.preprocessing.sequence.pad_sequences(
        tokenized_outputs, maxlen=MAX_LENGTH, padding="post"
    )

    return tokenized_inputs, tokenized_outputs


questions, answers = tokenize_and_filter(questions, answers)

In [14]:
# decoder inputs use the previous target as input
# remove START_TOKEN from targets
dataset = tf.data.Dataset.from_tensor_slices(
    (
        {"inputs": questions, "dec_inputs": answers[:, :-1]},
        {"outputs": answers[:, 1:]},
    )
)

dataset = dataset.cache()
dataset = dataset.shuffle(BUFFER_SIZE)
dataset = dataset.batch(BATCH_SIZE)
dataset = dataset.prefetch(tf.data.AUTOTUNE)

In [15]:
def scaled_dot_product_attention(Q, K, V, mask=None):
    attention = tf.matmul(Q, K, transpose_b = True)
    depth = tf.cast(tf.shape(K)[-1], tf.float32)

    logits = attention / tf.math.sqrt(depth)

    if mask != None:
        logits += -1e9 * mask

    attention = tf.nn.softmax(logits, axis=-1)

    final = tf.matmul(attention, V)

    return final

In [16]:
class MultiHeadAttentionLayer(tf.keras.layers.Layer):
  def __init__(self, num_heads, d_model, **kwargs):
    assert d_model % num_heads == 0
    super(MultiHeadAttentionLayer, self).__init__(**kwargs)

    self.d_model = d_model
    self.num_heads = num_heads

    self.depth = self.d_model // self.num_heads

    self.q_dense = tf.keras.layers.Dense(units=d_model)
    self.k_dense = tf.keras.layers.Dense(units=d_model)
    self.v_dense = tf.keras.layers.Dense(units=d_model)

    self.dense = tf.keras.layers.Dense(units=d_model)


  def get_config(self):
    config = super(MultiHeadAttentionLayer, self).get_config()
    config.update(
        {
            "num_heads": self.num_heads,
            "d_model": self.d_model,
        }
    )
    return config


  def split_heads(self, input, batch_size):
    inputs = tf.keras.layers.Lambda(
        lambda input: tf.reshape(input, shape=(batch_size, -1, self.num_heads, self.depth))
    )(input)

    return tf.keras.layers.Lambda(
        lambda inputs: tf.transpose(inputs, perm=[0,2,1,3])
    )(inputs)


  def call(self, inputs):
    query, key, value, mask = (inputs['query'], inputs['key'], inputs['value'], inputs['mask'])

    batch_size = tf.shape(query)[0]

    query = self.q_dense(query)
    key = self.k_dense(key)
    value = self.v_dense(value)

    q = self.split_heads(query, batch_size)
    k = self.split_heads(key, batch_size)
    v = self.split_heads(value, batch_size)

    scaled_attention = scaled_dot_product_attention(q, k, v, mask)

    attention = tf.keras.layers.Lambda(
        lambda scaled_attention: tf.transpose(scaled_attention, perm=[0,2,1,3])
    )(scaled_attention)

    concat_attention = tf.keras.layers.Lambda(
        lambda attention: tf.reshape(attention, shape=(batch_size, -1, self.d_model))
    )(attention)

    dense = self.dense(concat_attention)

    return dense


In [17]:
class PositionalEncoding(tf.keras.layers.Layer):
    def __init__(self, position, d_model, **kwargs):
        super(PositionalEncoding, self).__init__(**kwargs)
        self.position = position
        self.d_model = d_model
        self.pos_encoding = self.positional_encoding(position, d_model)

    def get_config(self):
        config = super(PositionalEncoding, self).get_config()
        config.update(
            {
                "position": self.position,
                "d_model": self.d_model,
            }
        )
        return config

    def get_angles(self, position, i, d_model):
        angles = 1 / tf.pow(10000, (2 * (i // 2)) / tf.cast(d_model, tf.float32))
        return position * angles

    def positional_encoding(self, position, d_model):
        angle_rads = self.get_angles(
            position=tf.range(position, dtype=tf.float32)[:, tf.newaxis],
            i=tf.range(d_model, dtype=tf.float32)[tf.newaxis, :],
            d_model=d_model,
        )
        # apply sin to even index in the array
        sines = tf.math.sin(angle_rads[:, 0::2])
        # apply cos to odd index in the array
        cosines = tf.math.cos(angle_rads[:, 1::2])

        pos_encoding = tf.concat([sines, cosines], axis=-1)
        pos_encoding = pos_encoding[tf.newaxis, ...]
        return tf.cast(pos_encoding, tf.float32)

    def call(self, inputs):
        return inputs + self.pos_encoding[:, : tf.shape(inputs)[1], :]

In [18]:
def encoder_layer(units, num_heads, d_model, dropout, name='encoder_layer'):
  inputs = tf.keras.Input(shape=(None, d_model), name='input')
  padding_mask = tf.keras.Input(shape=(1, 1, None), name='padding_mask')

  attention = MultiHeadAttentionLayer(num_heads, d_model)({
      'query': inputs,
      'key': inputs,
      'value': inputs,
      'mask': padding_mask
  })

  attention = tf.keras.layers.Dropout(rate=dropout)(attention)
  add = tf.keras.layers.add([inputs, attention])
  attention = tf.keras.layers.LayerNormalization(epsilon=1e-6)(add)

  dense = tf.keras.layers.Dense(units=units, activation='relu')(attention)
  dense1 = tf.keras.layers.Dense(units=d_model)(dense)
  outputs = tf.keras.layers.Dropout(rate=dropout)(dense1)

  add = tf.keras.layers.add([attention, outputs])
  attention = tf.keras.layers.LayerNormalization(epsilon=1e-6)(add)

  return tf.keras.Model(inputs=[inputs, padding_mask], outputs=attention, name=name)

In [19]:
sample_encoder_layer = encoder_layer(
    units=512, d_model=128, num_heads=4, dropout=0.3, name="sample_encoder_layer"
)

# tf.keras.utils.plot_model(
#     sample_encoder_layer, to_file="encoder_layer.png", show_shapes=True
# )







In [20]:
def encoder(vocab_size, units, num_heads, num_layers, d_model, dropout, name='encoder'):
  inputs = tf.keras.Input(shape=(None,), name='input')
  padding_mask = tf.keras.Input(shape=(1, 1, None), name='padding_mask')

  embeddings = tf.keras.layers.Embedding(vocab_size, d_model)(inputs)
  embeddings *= tf.keras.layers.Lambda(
      lambda d_model: tf.math.sqrt(tf.cast(d_model, tf.float32))
  )(d_model)

  embeddings = PositionalEncoding(vocab_size, d_model)(embeddings)

  outputs = tf.keras.layers.Dropout(rate=dropout)(embeddings)



  for i in range(num_layers):
    outputs = encoder_layer(
        units=units,
        num_heads=num_heads,
        dropout=dropout,
        d_model=d_model,
        name="encoder_layer_{}".format(i),
    )([outputs, padding_mask])

  return tf.keras.Model(inputs=[inputs, padding_mask], outputs=outputs, name=name)

In [21]:
sample_encoder = encoder(
    vocab_size=8192,
    num_layers=2,
    units=512,
    d_model=128,
    num_heads=4,
    dropout=0.3,
    name="sample_encoder",
)

# tf.keras.utils.plot_model(sample_encoder, to_file="encoder.png", show_shapes=True)

In [22]:
def decoder_layer(units, num_heads, d_model, dropout, name='decoder_layer'):
  inputs = tf.keras.Input(shape=(None, d_model), name='inputs')
  enc_outputs = tf.keras.Input(shape=(None, d_model), name='enc_outputs')
  look_ahead_mask = tf.keras.Input(shape=(1, None, None), name='look_ahead_mask')
  padding_mask = tf.keras.Input(shape=(1, 1, None), name='padding_mask')

  attention1 = MultiHeadAttentionLayer(num_heads, d_model)(
      inputs={
          'query': inputs,
          'key': inputs,
          'value': inputs,
          'mask': look_ahead_mask
      }
  )

  add = tf.keras.layers.add([attention1, inputs])
  attention1 = tf.keras.layers.LayerNormalization(epsilon=1e-6)(add)

  attention2 = MultiHeadAttentionLayer(num_heads, d_model)(
      inputs={
          'query': attention1,
          'key': enc_outputs,
          'value': enc_outputs,
          'mask': padding_mask
      }
  )

  attention2 = tf.keras.layers.Dropout(rate=dropout)(attention2)
  add = tf.keras.layers.add([attention2, attention1])
  attention2 = tf.keras.layers.LayerNormalization(epsilon=1e-6)(add)

  dense = tf.keras.layers.Dense(units=units, activation='relu')(attention2)
  dense1 = tf.keras.layers.Dense(units=d_model)(dense)

  outputs = tf.keras.layers.Dropout(rate=dropout)(dense1)
  add = tf.keras.layers.add([outputs, attention2])
  outputs = tf.keras.layers.LayerNormalization(epsilon=1e-6)(add)

  return tf.keras.Model(inputs=[inputs, enc_outputs, look_ahead_mask, padding_mask], outputs=outputs, name=name)

In [23]:
sample_decoder_layer = decoder_layer(
    units=512, d_model=128, num_heads=4, dropout=0.3, name="sample_decoder_layer"
)

In [24]:
def decoder(vocab_size, num_layers, units, d_model, num_heads, dropout, name="decoder"):
    inputs = tf.keras.Input(shape=(None,), name="inputs")
    enc_outputs = tf.keras.Input(shape=(None, d_model), name="encoder_outputs")
    look_ahead_mask = tf.keras.Input(shape=(1, None, None), name="look_ahead_mask")
    padding_mask = tf.keras.Input(shape=(1, 1, None), name="padding_mask")

    embeddings = tf.keras.layers.Embedding(vocab_size, d_model)(inputs)
    embeddings *= tf.keras.layers.Lambda(
        lambda d_model: tf.math.sqrt(tf.cast(d_model, tf.float32))
    )(d_model)
    embeddings = PositionalEncoding(vocab_size, d_model)(embeddings)

    outputs = tf.keras.layers.Dropout(rate=dropout)(embeddings)

    for i in range(num_layers):
        outputs = decoder_layer(
            units=units,
            d_model=d_model,
            num_heads=num_heads,
            dropout=dropout,
            name="decoder_layer_{}".format(i),
        )(inputs=[outputs, enc_outputs, look_ahead_mask, padding_mask])

    return tf.keras.Model(
        inputs=[inputs, enc_outputs, look_ahead_mask, padding_mask],
        outputs=outputs,
        name=name,
    )

In [25]:
sample_decoder = decoder(
    vocab_size=8192,
    num_layers=2,
    units=512,
    d_model=128,
    num_heads=4,
    dropout=0.3,
    name="sample_decoder",
)


In [26]:
def create_padding_mask(x):
  x = tf.cast(tf.math.equal(x, 0), tf.float32)

  return x[:, tf.newaxis, tf.newaxis, :]

In [27]:
def create_look_ahead_mask(x):
    seq_len = tf.shape(x)[1]
    look_ahead_mask = 1 - tf.linalg.band_part(tf.ones((seq_len, seq_len)), -1, 0)
    padding_mask = create_padding_mask(x)
    return tf.maximum(look_ahead_mask, padding_mask)

In [28]:
def transformer(vocab_size, units, num_layers, num_heads, dropout, d_model, name='transformer'):
  inputs = tf.keras.layers.Input(shape=(None,), name='inputs')
  dec_inputs = tf.keras.layers.Input(shape=(None,), name='dec_inputs')

  enc_padding_mask = tf.keras.layers.Lambda(
      create_padding_mask, output_shape=(1,1,None), name='enc_padding_mask'
  )(inputs)

  look_ahead_mask = tf.keras.layers.Lambda(
      create_look_ahead_mask, output_shape=(1, None, None), name='look_ahead_mask'
  )(dec_inputs)

  dec_padding_mask = tf.keras.layers.Lambda(
      create_padding_mask, output_shape=(1,1,None), name='dec_padding_mask'
  )(inputs)


  enc_outputs = encoder(
        vocab_size=vocab_size,
        num_layers=num_layers,
        units=units,
        d_model=d_model,
        num_heads=num_heads,
        dropout=dropout,
    )(inputs=[inputs, enc_padding_mask])

  dec_outputs = decoder(
      vocab_size=vocab_size,
      num_layers=num_layers,
      units=units,
      d_model=d_model,
      num_heads=num_heads,
      dropout=dropout,
  )(inputs=[dec_inputs, enc_outputs, look_ahead_mask, dec_padding_mask])

  outputs = tf.keras.layers.Dense(units=vocab_size, name="outputs")(dec_outputs)

  return tf.keras.Model(inputs=[inputs, dec_inputs], outputs=outputs, name=name)

In [29]:
def loss_function(ytrue, y_pred):
  ytrue = tf.reshape(ytrue, (-1, MAX_LENGTH - 1))

  loss = tf.keras.losses.SparseCategoricalCrossentropy(
      reduction='none', from_logits=True
  )(ytrue, y_pred)

  mask = tf.cast(tf.math.not_equal(ytrue, 0), tf.float32)
  loss = tf.multiply(loss, mask)

  return tf.reduce_mean(loss)

In [30]:
def accuracy(y_true, y_pred):
    # ensure labels have shape (batch_size, MAX_LENGTH - 1)
    y_true = tf.reshape(y_true, shape=(-1, MAX_LENGTH - 1))
    return tf.keras.metrics.sparse_categorical_accuracy(y_true, y_pred)



model = transformer(
    vocab_size=VOCAB_SIZE,
    num_layers=NUM_LAYERS,
    units=UNITS,
    d_model=D_MODEL,
    num_heads=NUM_HEADS,
    dropout=DROPOUT,
)

model.compile(optimizer='adam', loss=loss_function, metrics=[accuracy])

model.summary()







Model: "transformer"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 inputs (InputLayer)         [(None, None)]               0         []                            
                                                                                                  
 dec_inputs (InputLayer)     [(None, None)]               0         []                            
                                                                                                  
 enc_padding_mask (Lambda)   (None, 1, 1, None)           0         ['inputs[0][0]']              
                                                                                                  
 encoder (Functional)        (None, None, 256)            2412544   ['inputs[0][0]',              
                                                                     'enc_padding_mask[0

In [31]:
# for i in range(10):
model.fit(dataset, epochs=50)

Epoch 1/50












Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.src.callbacks.History at 0x2661ea76f20>

In [49]:
# for i in range(10):
model.fit(dataset, epochs=30)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.src.callbacks.History at 0x2661b471840>

In [62]:
tf.keras.models.save_model(model, 'location_gpt.h5')

  tf.keras.models.save_model(model, 'location_gpt.h5')


In [34]:
# MAX_LENGTH = 25

In [35]:
# load_model = tf.keras.models.load_model(
#     "h-model.h5",
#     custom_objects={
#         "PositionalEncoding": PositionalEncoding,
#         "MultiHeadAttentionLayer": MultiHeadAttentionLayer,
#     },
#     compile=False,
# )


# # Step 2: Quantize the model
# converter = tf.lite.TFLiteConverter.from_keras_model(load_model)
# converter.optimizations = [tf.lite.Optimize.DEFAULT]  # Apply default optimizations including post-training quantization
# converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, tf.lite.OpsSet.SELECT_TF_OPS]
# converter.allow_custom_ops = True  # Enable custom ops

# # Convert the model
# quantized_tflite_model = converter.convert()

# # Step 3: Save the quantized model
# with open("quantized_model.tflite", "wb") as f:
#     f.write(quantized_tflite_model)


In [63]:
import pickle

# Assuming you have a tokenizer object named 'tokenizer'
with open('location_gpt_tokenizer.pkl', 'wb') as handle:
    tokens = {
        "tokenizer": tokenizer,
        "max": MAX_LENGTH,
    }
    pickle.dump(tokens, handle)


In [37]:
# import pickle

# with open("C:/Users/anang/OneDrive/Desktop/Joshua's folder/DocGPT/tokenizer.pkl", "rb") as handle:
#     tokenizer = pickle.load(handle)

# START_TOKEN, END_TOKEN = [tokenizer.vocab_size], [tokenizer.vocab_size + 1]

In [50]:
def evaluate(sentence):
    sentence = preprocess_sentence(sentence)

    sentence = tf.expand_dims(
        START_TOKEN + tokenizer.encode(sentence) + END_TOKEN, axis=0
    )

    output = tf.expand_dims(START_TOKEN, 0)

    for i in range(MAX_LENGTH):
        predictions = model(inputs=[sentence, output], training=False)

        # select the last word from the seq_len dimension
        predictions = predictions[:, -1:, :]
        predicted_id = tf.cast(tf.argmax(predictions, axis=-1), tf.int32)

        # return the result if the predicted_id is equal to the end token
        if tf.equal(predicted_id, END_TOKEN[0]):
            break

        # concatenated the predicted_id to the output which is given to the decoder
        # as its input.
        output = tf.concat([output, predicted_id], axis=-1)

            
        

    return tf.squeeze(output, axis=0)


def predict(sentence):
    prediction = evaluate(sentence)
    predicted_sentence = tokenizer.decode(
        [i for i in prediction if i < tokenizer.vocab_size]
    )
    return predicted_sentence

In [51]:
predict("i have been peeing a lot for the past three weeks, about 9 to 12 times a day, i have lower back pain on my right side, and my stomach hurts just below my belly button.")

'babies with reflux are often fussy and agitated because reflux is the upward movement of stomach contents into the esophagus and sometimes into or out of the mouth . for improving this condition i suggest elevating the head of the children bed . keeping the child upright for at least two hours after eating . serving several small meals throughout the day , rather than three large meals . limiting foods and beverages that seem to worsen your children reflux . encouraging your child to get regular exercise . if the symptoms do not improve i suggest to talk to your pediatrician in order to take reflux medications . also , you can give your baby a lactose free formula if baby still .'

In [52]:
predict("Okay and I have to exercise at 9:00 PM can you remind me when it's time?")

'alright . i will let you know when it is time for your exercise . . hey joshua ! it is time for your exercise .'

In [66]:
predict("I'm in Hong Kong, China, and I need to find a good clinic for a check-up. Where should I go?")

'no problem , i will help you find it right away .'

In [42]:
print(questions[360])
print()
print(answers[360])

[5304   23    8 1608    1    3   64   27 1383 3365 2573 1771    1   13
    8   68  348    4  148  150  788    2    7 1050   17 5305    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0 