In [1]:
import numpy as np
import pandas as pd

import tensorflow as tf
import pickle
from tensorflow.keras import layers , activations , models , preprocessing, utils
import re
from tensorflow import keras

import yaml
import os

dir_path = r'C:\Users\idipa\PycharmProject\ChatBot\ChatbotData'
files_list = os.listdir(dir_path + os.sep)

In [2]:
files_list

['ai.yml',
 'botprofile.yml',
 'computers.yml',
 'emotion.yml',
 'food.yml',
 'gossip.yml',
 'greetings.yml',
 'health.yml',
 'history.yml',
 'humor.yml',
 'literature.yml',
 'money.yml',
 'movies.yml',
 'politics.yml',
 'psychology.yml',
 'science.yml',
 'sports.yml',
 'trivia.yml']

In [3]:
questions, answers = [], []

for filepath in files_list:
    file_ = open(dir_path + os.sep + filepath , 'rb')
    docs = yaml.safe_load(file_)
    conversations = docs['conversations']
    for con in conversations:
        if len(con) > 2 :
            replies = con[1 :]
            ans = ''
            for rep in replies:
                questions.append(con[0])
                answers.append(rep)
        elif len(con)> 1:
            questions.append(con[0])
            answers.append(con[1])

In [4]:
answers[:10]

['Artificial Intelligence is the branch of engineering and science devoted to constructing machines that think.',
 'AI is the field of science which concerns itself with building hardware and software that replicates the functions of the human mind.',
 'Sort of.',
 "By the strictest dictionary definition of the word 'sentience', I may be.",
 "Even though I'm a construct I do have a subjective experience of the universe, as simplistic as it may be.",
 "In all probability, I am not.  I'm not that sophisticated.",
 'Do you think I am?',
 'How would you feel about me if I told you I was?',
 'No.',
 'Python.']

In [5]:
for i,each in enumerate(answers):
    if type(each) != str:
        print(i, each)

522 {"He's best known for the Dune novels, but he wrote more than that.  Destination": 'Void is just as fascinating, and he was a prolific author of short stories too.'}
532 {'Fun fact': "The Norby Chronicles was written primarily by his wife, Janet, but the publishers thought the only way they'd sell was if they added Isaac's name.  Kind of sad state of affairs, really."}


In [6]:
questions[:10]

['What is AI?',
 'What is AI?',
 'Are you sentient?',
 'Are you sentient?',
 'Are you sentient?',
 'Are you sapient?',
 'Are you sapient?',
 'Are you sapient?',
 'Are you sapient?',
 'What language are you written in?']

In [7]:
answers_with_tags = []
for i in range(len(answers)):
    if type(answers[i]) == str:
        answers_with_tags.append(answers[i])
    else:
        questions.pop(i)

answers = []
for i in range(len(answers_with_tags)) :
    answers.append('<START> ' + answers_with_tags[i] + ' <END>')

In [8]:
contractions_dict = {
    "ain't": "am not",
    "aren't": "are not",
    "can't": "cannot",
    "can't've": "cannot have",
    "'cause": "because",
    "could've": "could have",
    "couldn't": "could not",
    "couldn't've": "could not have",
    "didn't": "did not",
    "doesn't": "does not",
    "don't": "do not",
    "hadn't": "had not",
    "hadn't've": "had not have",
    "hasn't": "has not",
    "haven't": "have not",
    "he'd": "he had",
    "he'd've": "he would have",
    "he'll": "he shall",
    "he'll've": "he shall have",
    "he's": "he has",
    "how'd": "how did",
    "how'd'y": "how do you",
    "how'll": "how will",
    "how's": "how has",
    "i'd": "i had",
    "i'd've": "i would have",
    "i'll": "i shall",
    "i'll've": "i shall have",
    "i'm": "i am",
    "i've": "i have",
    "isn't": "is not",
    "it'd": "it had",
    "it'd've": "it would have",
    "it'll": "it shall",
    "it'll've": "it shall have",
    "it's": "it has",
    "let's": "let us",
    "ma'am": "madam",
    "mayn't": "may not",
    "might've": "might have",
    "mightn't": "might not",
    "mightn't've": "might not have",
    "must've": "must have",
    "mustn't": "must not",
    "mustn't've": "must not have",
    "needn't": "need not",
    "needn't've": "need not have",
    "o'clock": "of the clock",
    "oughtn't": "ought not",
    "oughtn't've": "ought not have",
    "shan't": "shall not",
    "sha'n't": "shall not",
    "shan't've": "shall not have",
    "she'd": "she had",
    "she'd've": "she would have",
    "she'll": "she shall",
    "she'll've": "she shall have",
    "she's": "she has",
    "should've": "should have",
    "shouldn't": "should not",
    "shouldn't've": "should not have",
    "so've": "so have",
    "so's": "so as",
    "that'd": "that would",
    "that'd've": "that would have",
    "that's": "that has",
    "there'd": "there had",
    "there'd've": "there would have",
    "there's": "there has",
    "they'd": "they had",
    "they'd've": "they would have",
    "they'll": "they shall",
    "they'll've": "they shall have",
    "they're": "they are",
    "they've": "they have",
    "to've": "to have",
    "wasn't": "was not",
    "we'd": "we had",
    "we'd've": "we would have",
    "we'll": "we will",
    "we'll've": "we will have",
    "we're": "we are",
    "we've": "we have",
    "weren't": "were not",
    "what'll": "what shall",
    "what'll've": "what shall have",
    "what're": "what are",
    "what's": "what has",
    "what've": "what have",
    "when's": "when has",
    "when've": "when have",
    "where'd": "where did",
    "where's": "where has",
    "where've": "where have",
    "who'll": "who shall",
    "who'll've": "who will have",
    "who's": "who has",
    "who've": "who have",
    "why's": "why is",
    "why've": "why have",
    "will've": "will have",
    "won't": "will not",
    "won't've": "will not have",
    "would've": "would have",
    "wouldn't": "would not",
    "wouldn't've": "would not have",
    "y'all": "you all",
    "y'alls": "you alls",
    "y'all'd": "you all would",
    "y'all'd've": "you all would have",
    "y'all're": "you all are",
    "y'all've": "you all have",
    "you'd": "you had",
    "you'd've": "you would have",
    "you'll": "you shall",
    "you'll've": "you shall have",
    "you're": "you are",
    "you've": "you have"
}


In [9]:
# Regular expression for finding contractions
contractions_re = re.compile('(%s)' % '|'.join(re.escape(key) for key in contractions_dict.keys()), re.IGNORECASE)

def expand_contractions(sentence, contractions_dict=contractions_dict):
    def replace(match):
        # Match is case-insensitive, use the original case in replacement
        contraction = match.group(0)
        expanded = contractions_dict.get(contraction.lower())
        if contraction[0].isupper():
            expanded = expanded.capitalize()
        return expanded
    return contractions_re.sub(replace, sentence)

# Example usage
sentence = "I can't believe it's already 2024! You've got to be kidding me."
expanded_sentence = expand_contractions(sentence)
print(expanded_sentence)

I cannot believe it has already 2024! You have got to be kidding me.


In [10]:
for i in range(len(answers)):
    st = expand_contractions(answers[i].lower())
    answers[i] = re.sub(r"""([.?!*"\\])""", r' \1 ', st)

In [11]:
for i in range(len(questions)):
    st =  expand_contractions(questions[i].lower())
    questions[i] = re.sub(r"""([.?!*"\\])""", r' \1 ', st)

In [12]:
answers[:10]

['<start> artificial intelligence is the branch of engineering and science devoted to constructing machines that think .  <end>',
 '<start> ai is the field of science which concerns itself with building hardware and software that replicates the functions of the human mind .  <end>',
 '<start> sort of .  <end>',
 "<start> by the strictest dictionary definition of the word 'sentience', i may be .  <end>",
 '<start> even though i am a construct i do have a subjective experience of the universe, as simplistic as it may be .  <end>',
 '<start> in all probability, i am not .   i am not that sophisticated .  <end>',
 '<start> do you think i am ?  <end>',
 '<start> how would you feel about me if i told you i was ?  <end>',
 '<start> no .  <end>',
 '<start> python .  <end>']

In [13]:
story = """
Once upon a time, in a quaint little village nestled in the verdant hills, there lived an eclectic group of people, each with unique stories and backgrounds. The village, known as Greenfield, was renowned for its picturesque landscapes, vibrant community life, and rich cultural heritage. Among the residents was Alice, an astute librarian with an insatiable curiosity about the world. Her house was a haven for books, maps, and artifacts from different eras and regions, reflecting her lifelong passion for knowledge and adventure.

Alice often spent her days in the village library, a grand building with towering shelves filled with volumes of literature, science, history, and art. The library was a hub of activity, attracting scholars, students, and readers from all walks of life. One day, as she was cataloging a collection of ancient manuscripts, she discovered a dusty old tome that seemed out of place. The book, bound in weathered leather, was inscribed with symbols and languages she had never seen before.

Intrigued, Alice began to decipher its contents, which narrated the tales of an ancient civilization known for its wisdom and technological advancements. The manuscript spoke of a lost city, hidden deep within an uncharted jungle, protected by intricate puzzles and mythical creatures. The allure of uncovering such a mystery captivated Alice, and she decided to embark on a quest to find this lost city.

She shared her discovery with her close friends, each bringing their own set of skills to the journey. There was Marcus, a seasoned archaeologist with a knack for solving riddles; Elena, a brilliant linguist fluent in multiple languages; and Leo, an intrepid explorer with unmatched survival skills. Together, they formed a formidable team, ready to face the unknown.

Their journey began with meticulous planning, gathering supplies, and studying maps and historical texts. They traveled across continents, through bustling cities and remote villages, encountering diverse cultures and landscapes along the way. Their path led them through dense forests, arid deserts, and treacherous mountains, each step bringing them closer to their goal.

As they ventured deeper into the jungle, they faced numerous challenges. The thick canopy overhead blocked the sunlight, making navigation difficult. They encountered wild animals, torrential rains, and steep cliffs that tested their endurance and resilience. Despite the hardships, their determination never wavered.

One fateful day, they stumbled upon an ancient stone path, overgrown with vines and moss. The path led to a massive stone gate, adorned with intricate carvings depicting scenes of a thriving civilization. The gate was guarded by a colossal statue of a mythical beast, its eyes seemingly watching their every move.

Using their combined knowledge, the team deciphered the carvings, revealing clues to unlock the gate. After hours of meticulous work, they succeeded, and the gate slowly creaked open, revealing the entrance to the lost city. The sight that greeted them was beyond their wildest dreams: towering structures, ornate temples, and lush gardens, all remarkably preserved despite the passage of time.

As they explored the city, they uncovered advanced technologies and sophisticated art, evidence of a highly developed society. They also found records of the city's history, detailing its rise and fall. The city had once been a beacon of knowledge and innovation, but a cataclysmic event had forced its inhabitants to abandon it, leaving behind their legacy for future generations to discover.

Throughout their exploration, the team encountered various puzzles and traps, designed to protect the city's secrets. Each challenge required a blend of intellect, teamwork, and courage to overcome. They faced rooms that shifted like labyrinths, mechanisms that required precise timing, and guardians that tested their resolve.

Among the most remarkable discoveries was a vast library, containing scrolls and tablets that held the collective wisdom of the ancient civilization. Alice and Elena were particularly enthralled by the linguistic and historical treasures they found, while Marcus and Leo marveled at the architectural and engineering feats.

Their greatest challenge came when they discovered a hidden chamber, protected by a series of complex locks and puzzles. The chamber was said to hold the most valuable artifact of the lost civilization, a relic of immense power and knowledge. Solving the final puzzle required all their skills and collaboration, but eventually, they succeeded.

Inside the chamber, they found a crystalline artifact, glowing with an ethereal light. As they carefully examined it, they realized it contained vast amounts of data, encoded in a way that was far beyond their current understanding. The artifact held the key to unlocking further mysteries of the lost civilization and potentially advancing modern technology and knowledge.

Their discovery marked a significant milestone in the field of archaeology and history. The lost city, once a myth, had become a reality, offering insights into a civilization that was both advanced and enigmatic. The team's findings were documented and shared with the world, leading to new research and explorations.

Alice, Marcus, Elena, and Leo returned to Greenfield as heroes, their adventure becoming the stuff of legends. They will continue their work, inspired by their journey and the knowledge they had gained. Their story will serve as a reminder of the endless possibilities that await those who dare to explore the unknown.

In Greenfield, life continued to thrive, with the community drawing inspiration from the team's achievements. The village became a center for learning and exploration, attracting scholars and adventurers from far and wide. The library, once a quiet haven, buzzed with activity as people sought to learn more about the lost civilization and its secrets.

The team's legacy will live on, inspiring future generations to pursue their dreams and explore the mysteries of the world. Alice will continue her work at the library, always on the lookout for the next great adventure. Marcus will return to his archaeological pursuits, uncovering more hidden treasures and ancient sites. Elena will dedicate herself to deciphering the languages and texts of the lost civilization, while Leo will embark on new expeditions, driven by his insatiable curiosity.

Their story will become a testament to the power of curiosity, collaboration, and perseverance. It will show that with determination and a willingness to face the unknown, even the most elusive mysteries can be uncovered. The lost city, once hidden in the depths of the jungle, had revealed its secrets, thanks to the unwavering spirit of those who dared to seek it.

And so, the tale of Greenfield and its intrepid explorers will continue, a shining example of what can be achieved when people come together with a shared vision and a relentless pursuit of knowledge. Their adventure will have only just begun, with the promise of more discoveries and stories waiting to be told.
"""


In [41]:
punctuations = """! @ # $ % ^ & * ( ) _ - + = { } [ ] : ; ' " / | \ \ < > , . ? / * """
numbers = "0 1 2 3 4 5 6 7 8 9 "
punctuations.split()

['!',
 '@',
 '#',
 '$',
 '%',
 '^',
 '&',
 '*',
 '(',
 ')',
 '_',
 '-',
 '+',
 '=',
 '{',
 '}',
 '[',
 ']',
 ':',
 ';',
 "'",
 '"',
 '/',
 '|',
 '\\',
 '\\',
 '<',
 '>',
 ',',
 '.',
 '?',
 '/',
 '*']

In [48]:
tokenizer = preprocessing.text.Tokenizer()
tokenizer.fit_on_texts(questions + answers + [story] + list(contractions_dict.values())+numbers.split())
VOCAB_SIZE = len(tokenizer.word_index)+1

In [47]:
#del tokenizer

In [49]:
len(tokenizer.word_index), VOCAB_SIZE

(2244, 2245)

In [55]:
i = list(tokenizer.word_index.values())[-1]
i

2244

In [56]:
for each in punctuations.split():
    tokenizer.word_index[each] = i+1
    i += 1

In [62]:
VOCAB_SIZE = len(tokenizer.word_index)+1
VOCAB_SIZE

2274

In [63]:
from gensim.models import Word2Vec
import re

vocab = []
for word in tokenizer.word_index:
    vocab.append(word)

#def tokenize(sentences):
#    tokens_list = []
#    vocabulary = []
#    for sentence in sentences:
#        sentence = sentence.lower()
#        sentence = re.sub('[^a-zA-Z]', ' ', sentence)
#        tokens = sentence.split()
#        vocabulary += tokens
#        tokens_list.append(tokens)
#    return tokens_list , vocabulary

In [64]:
len(vocab)

2273

In [65]:
vocab[-10:-1]

[';', '"', '/', '|', '\\', '<', '>', ',', '.']

In [67]:
tokenizer.word_index['*']

2277

In [68]:
# encoder_input_data
tokenized_questions = tokenizer.texts_to_sequences(questions)
maxlen_questions = max([len(x) for x in tokenized_questions])
padded_questions = preprocessing.sequence.pad_sequences(tokenized_questions , maxlen=maxlen_questions , padding='post')
encoder_input_data = np.array(padded_questions)

In [69]:
encoder_input_data.shape

(764, 22)

In [72]:
encoder_input_data

array([[   8,    7,  315, ...,    0,    0,    0],
       [   8,    7,  315, ...,    0,    0,    0],
       [   9,    3,  390, ...,    0,    0,    0],
       ...,
       [1040,  627,    6, ...,    0,    0,    0],
       [   6,  629,  630, ...,    0,    0,    0],
       [1048,  199,    5, ...,    0,    0,    0]])

In [73]:
questions[0]

'what is ai ? '

In [74]:
# decoder_input_data
tokenized_answers = tokenizer.texts_to_sequences(answers)
maxlen_answers = max([len(x) for x in tokenized_answers])
padded_answers = preprocessing.sequence.pad_sequences(tokenized_answers , maxlen=maxlen_answers , padding='post')
decoder_input_data = np.array(padded_answers)

In [75]:
decoder_input_data.shape

(764, 61)

In [76]:
decoder_input_data

array([[   2,  146,  439, ...,    0,    0,    0],
       [   2,  315,    7, ...,    0,    0,    0],
       [   2,  347,   10, ...,    0,    0,    0],
       ...,
       [   2,    6,  341, ...,    0,    0,    0],
       [   2, 1948,    1, ...,    0,    0,    0],
       [   2, 1949,    1, ...,    0,    0,    0]])

In [77]:
decoder_input_data[0]

array([   2,  146,  439,    7,    6,  631,   10,  632,   14,  156, 1055,
         12, 1056,  633,   19,   63,    1,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0])

In [78]:
# decoder_output_data
tokenized_answers = tokenizer.texts_to_sequences(answers)
for i in range(len(tokenized_answers)) :
    tokenized_answers[i] = tokenized_answers[i][1:]
padded_answers = preprocessing.sequence.pad_sequences(tokenized_answers , maxlen=maxlen_answers , padding='post')
onehot_answers = utils.to_categorical(padded_answers , VOCAB_SIZE)
decoder_output_data = np.array(onehot_answers)

In [79]:
decoder_output_data.shape

(764, 61, 2274)

In [80]:
padded_answers[0]

array([ 146,  439,    7,    6,  631,   10,  632,   14,  156, 1055,   12,
       1056,  633,   19,   63,    1,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0])

In [81]:
onehot_answers[0]

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.]], dtype=float32)

In [82]:
decoder_output_data[0]

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.]], dtype=float32)

In [83]:
# Embedding, LSTM and Desne layers
encoder_inputs = tf.keras.layers.Input(shape=(maxlen_questions ,))
encoder_embedding = tf.keras.layers.Embedding(VOCAB_SIZE, 200 , mask_zero=True) (encoder_inputs)
encoder_outputs , state_h , state_c = tf.keras.layers.LSTM(200 , return_state=True)(encoder_embedding)
encoder_states = [ state_h , state_c ]

decoder_inputs = tf.keras.layers.Input(shape=(maxlen_answers , ))
decoder_embedding = tf.keras.layers.Embedding(VOCAB_SIZE, 200 , mask_zero=True) (decoder_inputs)
decoder_lstm = tf.keras.layers.LSTM(200 , return_state=True , return_sequences=True)
decoder_outputs , _ , _ = decoder_lstm (decoder_embedding , initial_state=encoder_states)


decoder_dense = tf.keras.layers.Dense(VOCAB_SIZE , activation=tf.keras.activations.softmax) 
output = decoder_dense (decoder_outputs)

model = tf.keras.models.Model([encoder_inputs, decoder_inputs], output)

In [84]:
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])

In [85]:
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_1 (InputLayer)        [(None, 22)]                 0         []                            
                                                                                                  
 input_2 (InputLayer)        [(None, 61)]                 0         []                            
                                                                                                  
 embedding (Embedding)       (None, 22, 200)              454800    ['input_1[0][0]']             
                                                                                                  
 embedding_1 (Embedding)     (None, 61, 200)              454800    ['input_2[0][0]']             
                                                                                              

In [86]:
model.fit([encoder_input_data , decoder_input_data], decoder_output_data, batch_size=16, epochs=10) 

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x1a8e8ff1b10>

In [77]:
model.save("BaseModel1.h5")

  saving_api.save_model(


In [17]:
#Prediction

In [87]:
def inference():
    
    encoder_model = tf.keras.models.Model(encoder_inputs, encoder_states)
    
    decoder_state_input_h = tf.keras.layers.Input(shape=(200 ,))
    decoder_state_input_c = tf.keras.layers.Input(shape=(200 ,))
    
    decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]
    
    decoder_outputs, state_h, state_c = decoder_lstm(decoder_embedding , initial_state=decoder_states_inputs)
    decoder_states = [state_h, state_c]
    decoder_outputs = decoder_dense(decoder_outputs)
    
    decoder_model = tf.keras.models.Model([decoder_inputs] + decoder_states_inputs,[decoder_outputs] + decoder_states)
    
    return encoder_model , decoder_model



In [88]:
def preprocess_input(input_sentence):
    input_sentence = expand_contractions(input_sentence)
    input_sentence = re.sub(r"""([.?!*"\\])""", r' \1 ', input_sentence.lower())
    tokens = input_sentence.split()
    tokens_list = []
    for word in tokens:
        tokens_list.append(tokenizer.word_index[word]) 
    return preprocessing.sequence.pad_sequences([tokens_list] , maxlen=maxlen_questions , padding='post')

In [92]:
preprocess_input("i'm going to die?")

array([[   4,   16,  169,   12,   86, 2275,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0]])

In [93]:
enc_model , dec_model = inference()

In [80]:
enc_model.save("Encoder1.h5")
dec_model.save("Decoder2.h5")



In [94]:
line = "Who are you ?"
line

'Who are you ?'

In [95]:
ppLine = preprocess_input(line)
ppLine

array([[  35,    9,    3, 2275,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0]])

In [96]:
states = enc_model.predict(ppLine)
states

InvalidArgumentError: Graph execution error:

Detected at node 'model_3/embedding/embedding_lookup' defined at (most recent call last):
    File "C:\Python310\lib\runpy.py", line 196, in _run_module_as_main
      return _run_code(code, main_globals, None,
    File "C:\Python310\lib\runpy.py", line 86, in _run_code
      exec(code, run_globals)
    File "C:\Python310\lib\site-packages\ipykernel_launcher.py", line 17, in <module>
      app.launch_new_instance()
    File "C:\Python310\lib\site-packages\traitlets\config\application.py", line 1043, in launch_instance
      app.start()
    File "C:\Python310\lib\site-packages\ipykernel\kernelapp.py", line 725, in start
      self.io_loop.start()
    File "C:\Python310\lib\site-packages\tornado\platform\asyncio.py", line 215, in start
      self.asyncio_loop.run_forever()
    File "C:\Python310\lib\asyncio\base_events.py", line 600, in run_forever
      self._run_once()
    File "C:\Python310\lib\asyncio\base_events.py", line 1896, in _run_once
      handle._run()
    File "C:\Python310\lib\asyncio\events.py", line 80, in _run
      self._context.run(self._callback, *self._args)
    File "C:\Python310\lib\site-packages\ipykernel\kernelbase.py", line 513, in dispatch_queue
      await self.process_one()
    File "C:\Python310\lib\site-packages\ipykernel\kernelbase.py", line 502, in process_one
      await dispatch(*args)
    File "C:\Python310\lib\site-packages\ipykernel\kernelbase.py", line 409, in dispatch_shell
      await result
    File "C:\Python310\lib\site-packages\ipykernel\kernelbase.py", line 729, in execute_request
      reply_content = await reply_content
    File "C:\Python310\lib\site-packages\ipykernel\ipkernel.py", line 422, in do_execute
      res = shell.run_cell(
    File "C:\Python310\lib\site-packages\ipykernel\zmqshell.py", line 540, in run_cell
      return super().run_cell(*args, **kwargs)
    File "C:\Python310\lib\site-packages\IPython\core\interactiveshell.py", line 3006, in run_cell
      result = self._run_cell(
    File "C:\Python310\lib\site-packages\IPython\core\interactiveshell.py", line 3061, in _run_cell
      result = runner(coro)
    File "C:\Python310\lib\site-packages\IPython\core\async_helpers.py", line 129, in _pseudo_sync_runner
      coro.send(None)
    File "C:\Python310\lib\site-packages\IPython\core\interactiveshell.py", line 3266, in run_cell_async
      has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
    File "C:\Python310\lib\site-packages\IPython\core\interactiveshell.py", line 3445, in run_ast_nodes
      if await self.run_code(code, result, async_=asy):
    File "C:\Python310\lib\site-packages\IPython\core\interactiveshell.py", line 3505, in run_code
      exec(code_obj, self.user_global_ns, self.user_ns)
    File "C:\Users\idipa\AppData\Local\Temp\ipykernel_3112\3655440570.py", line 1, in <module>
      states = enc_model.predict(ppLine)
    File "C:\Python310\lib\site-packages\keras\src\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "C:\Python310\lib\site-packages\keras\src\engine\training.py", line 2554, in predict
      tmp_batch_outputs = self.predict_function(iterator)
    File "C:\Python310\lib\site-packages\keras\src\engine\training.py", line 2341, in predict_function
      return step_function(self, iterator)
    File "C:\Python310\lib\site-packages\keras\src\engine\training.py", line 2327, in step_function
      outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "C:\Python310\lib\site-packages\keras\src\engine\training.py", line 2315, in run_step
      outputs = model.predict_step(data)
    File "C:\Python310\lib\site-packages\keras\src\engine\training.py", line 2283, in predict_step
      return self(x, training=False)
    File "C:\Python310\lib\site-packages\keras\src\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "C:\Python310\lib\site-packages\keras\src\engine\training.py", line 569, in __call__
      return super().__call__(*args, **kwargs)
    File "C:\Python310\lib\site-packages\keras\src\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "C:\Python310\lib\site-packages\keras\src\engine\base_layer.py", line 1150, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "C:\Python310\lib\site-packages\keras\src\utils\traceback_utils.py", line 96, in error_handler
      return fn(*args, **kwargs)
    File "C:\Python310\lib\site-packages\keras\src\engine\functional.py", line 512, in call
      return self._run_internal_graph(inputs, training=training, mask=mask)
    File "C:\Python310\lib\site-packages\keras\src\engine\functional.py", line 669, in _run_internal_graph
      outputs = node.layer(*args, **kwargs)
    File "C:\Python310\lib\site-packages\keras\src\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "C:\Python310\lib\site-packages\keras\src\engine\base_layer.py", line 1150, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "C:\Python310\lib\site-packages\keras\src\utils\traceback_utils.py", line 96, in error_handler
      return fn(*args, **kwargs)
    File "C:\Python310\lib\site-packages\keras\src\layers\core\embedding.py", line 272, in call
      out = tf.nn.embedding_lookup(self.embeddings, inputs)
Node: 'model_3/embedding/embedding_lookup'
indices[0,3] = 2275 is not in [0, 2274)
	 [[{{node model_3/embedding/embedding_lookup}}]] [Op:__inference_predict_function_23563]

In [86]:
emp = np.zeros((1,1))
emp

array([[0.]])

In [87]:
emp[0,0] = tokenizer.word_index['start']
emp

array([[2.]])

In [88]:
ans = ''

In [89]:
st = False

In [73]:
tokenizer.word_index.items()

dict_items([('end', 1), ('start', 2), ('you', 3), ('a', 4), ('i', 5), ('the', 6), ('is', 7), ('of', 8), ('to', 9), ('what', 10), ('are', 11), ('do', 12), ('not', 13), ('and', 14), ('me', 15), ('it', 16), ('in', 17), ('have', 18), ('that', 19), ('am', 20), ('tell', 21), ('as', 22), ('can', 23), ('get', 24), ('my', 25), ('when', 26), ("i'm", 27), ('your', 28), ('how', 29), ('joke', 30), ('like', 31), ('be', 32), ('an', 33), ('feel', 34), ('about', 35), ('computer', 36), ('who', 37), ('or', 38), ('for', 39), ('no', 40), ("don't", 41), ('by', 42), ('cross', 43), ('with', 44), ('software', 45), ('on', 46), ('all', 47), ('much', 48), ('think', 49), ('but', 50), ('very', 51), ('which', 52), ('at', 53), ('he', 54), ('any', 55), ('why', 56), ('know', 57), ('was', 58), ('could', 59), ('so', 60), ('one', 61), ('should', 62), ('from', 63), ('make', 64), ('more', 65), ('we', 66), ('robots', 67), ('die', 68), ('will', 69), ('favorite', 70), ('if', 71), ('did', 72), ('stock', 73), ('human', 74), ('sa

In [64]:
vocabulary = {i:w for w,i in zip(tokenizer.word_index.keys(),tokenizer.word_index.values())}
vocabulary

{1: 'end',
 2: 'start',
 3: 'you',
 4: 'i',
 5: 'a',
 6: 'the',
 7: 'is',
 8: 'what',
 9: 'are',
 10: 'of',
 11: 'not',
 12: 'to',
 13: 'do',
 14: 'and',
 15: 'have',
 16: 'am',
 17: 'it',
 18: 'me',
 19: 'that',
 20: 'in',
 21: 'get',
 22: 'can',
 23: 'tell',
 24: 'as',
 25: 'your',
 26: 'how',
 27: 'my',
 28: 'has',
 29: 'when',
 30: 'be',
 31: 'an',
 32: 'feel',
 33: 'about',
 34: 'with',
 35: 'who',
 36: 'like',
 37: 'computer',
 38: 'joke',
 39: 'for',
 40: 'by',
 41: 'or',
 42: 'will',
 43: 'no',
 44: 'they',
 45: 'was',
 46: 'all',
 47: 'cross',
 48: 'on',
 49: 'which',
 50: 'would',
 51: 'software',
 52: 'should',
 53: 'he',
 54: 'ever',
 55: 'does',
 56: 'sad',
 57: 'but',
 58: 'we',
 59: 'make',
 60: 'mad',
 61: 'could',
 62: 'much',
 63: 'think',
 64: 'at',
 65: 'very',
 66: 'why',
 67: 'makes',
 68: 'know',
 69: 'so',
 70: 'their',
 71: 'from',
 72: 'did',
 73: 'any',
 74: 'one',
 75: 'shall',
 76: 'never',
 77: 'more',
 78: 'had',
 79: 'computers',
 80: 'work',
 81: 'emoti

In [65]:
i=1
while not st :
    print('-'*20+'\n'+f'For {i} : ')
    dec_outputs , h , c = dec_model.predict([emp] + states)
    print(dec_outputs)
    sampled_word_index = np.argmax(dec_outputs[0, -1, :])
    print(f'sampled word index : {sampled_word_index}')
    sampled_word = None
        
    print(f"word : {word} index : {index}")
    word = vocabulary[sampled_word_index]
    ans += f' {word}'
    sampled_word = word
    print(ans)
    
    #print("\nInside For")
    #for word , index in tokenizer.word_index.items() :
    #    print(f"word : {word} index : {index}")
    #    if sampled_word_index == index :
    #        print(f"\tinsede IF : word : {word}")
    #        ans += f' {word}'
    #        sampled_word = word
    #        print("\tout of IF")
    #print("Outside For\n")
    #print(f"ans : {ans}")
        
    if sampled_word == 'end' or len(ans.split()) > maxlen_answers:
        st = True
    emp = np.zeros((1 , 1))  
    empty_target_seq[0 , 0] = sampled_word_index
    states_values = [h , c] 
    i+=1

NameError: name 'st' is not defined

In [4]:
enc_model = keras.models.load_model('Encoder.h5')
dec_model = keras.models.load_model('Decoder.h5')



In [5]:
import json

In [6]:
f = open('vocabulary.json')
vocabulary = json.load(f)
vocabulary

{'1': 'end',
 '2': 'start',
 '3': 'you',
 '4': 'i',
 '5': 'a',
 '6': 'the',
 '7': 'is',
 '8': 'what',
 '9': 'are',
 '10': 'of',
 '11': 'not',
 '12': 'to',
 '13': 'do',
 '14': 'and',
 '15': 'have',
 '16': 'am',
 '17': 'it',
 '18': 'me',
 '19': 'that',
 '20': 'in',
 '21': 'get',
 '22': 'can',
 '23': 'tell',
 '24': 'as',
 '25': 'your',
 '26': 'how',
 '27': 'my',
 '28': 'has',
 '29': 'when',
 '30': 'be',
 '31': 'an',
 '32': 'feel',
 '33': 'about',
 '34': 'with',
 '35': 'who',
 '36': 'like',
 '37': 'computer',
 '38': 'joke',
 '39': 'for',
 '40': 'by',
 '41': 'or',
 '42': 'will',
 '43': 'no',
 '44': 'they',
 '45': 'was',
 '46': 'all',
 '47': 'cross',
 '48': 'on',
 '49': 'which',
 '50': 'would',
 '51': 'software',
 '52': 'should',
 '53': 'he',
 '54': 'ever',
 '55': 'does',
 '56': 'sad',
 '57': 'but',
 '58': 'we',
 '59': 'make',
 '60': 'mad',
 '61': 'could',
 '62': 'much',
 '63': 'think',
 '64': 'at',
 '65': 'very',
 '66': 'why',
 '67': 'makes',
 '68': 'know',
 '69': 'so',
 '70': 'their',
 '71

In [7]:
vocabulary = {int(i):j for i,j in zip(vocabulary.keys(),vocabulary.values())}
vocabulary

{1: 'end',
 2: 'start',
 3: 'you',
 4: 'i',
 5: 'a',
 6: 'the',
 7: 'is',
 8: 'what',
 9: 'are',
 10: 'of',
 11: 'not',
 12: 'to',
 13: 'do',
 14: 'and',
 15: 'have',
 16: 'am',
 17: 'it',
 18: 'me',
 19: 'that',
 20: 'in',
 21: 'get',
 22: 'can',
 23: 'tell',
 24: 'as',
 25: 'your',
 26: 'how',
 27: 'my',
 28: 'has',
 29: 'when',
 30: 'be',
 31: 'an',
 32: 'feel',
 33: 'about',
 34: 'with',
 35: 'who',
 36: 'like',
 37: 'computer',
 38: 'joke',
 39: 'for',
 40: 'by',
 41: 'or',
 42: 'will',
 43: 'no',
 44: 'they',
 45: 'was',
 46: 'all',
 47: 'cross',
 48: 'on',
 49: 'which',
 50: 'would',
 51: 'software',
 52: 'should',
 53: 'he',
 54: 'ever',
 55: 'does',
 56: 'sad',
 57: 'but',
 58: 'we',
 59: 'make',
 60: 'mad',
 61: 'could',
 62: 'much',
 63: 'think',
 64: 'at',
 65: 'very',
 66: 'why',
 67: 'makes',
 68: 'know',
 69: 'so',
 70: 'their',
 71: 'from',
 72: 'did',
 73: 'any',
 74: 'one',
 75: 'shall',
 76: 'never',
 77: 'more',
 78: 'had',
 79: 'computers',
 80: 'work',
 81: 'emoti

In [8]:
tests = ['You can not move .', 'You sound like Data !', 'Stupid !', 'you are idiot .', 'i am going to die ?','who are you ?']

for i in range(6):
    states_values = enc_model.predict(preprocess_input(tests[i]))
    empty_target_seq = np.zeros((1 , 1))
    empty_target_seq[0, 0] = tokenizer.word_index['start']
    stop_condition = False
    decoded_translation = ''
    
    while not stop_condition :
        dec_outputs , h , c = dec_model.predict([empty_target_seq] + states_values)
        sampled_word_index = np.argmax(dec_outputs[0, -1, :])
        sampled_word = None
        
        word = vocabulary[sampled_word_index]
        decoded_translation += f' {word}'
        sampled_word = word
        
        
        #for word , index in tokenizer.word_index.items() :
        #    if sampled_word_index == index :
        #        decoded_translation += f' {word}'
        #        sampled_word = word
        
        if sampled_word == 'end' or len(decoded_translation.split()) > maxlen_answers:
            stop_condition = True
            
        empty_target_seq = np.zeros((1 , 1))  
        empty_target_seq[0 , 0] = sampled_word_index
        states_values = [h , c] 
    print(f'Human: {tests[i]}')
    print()
    decoded_translation = decoded_translation.split(' end')[0]
    print(f'Bot: {decoded_translation}')
    print('-'*25)

NameError: name 'preprocess_input' is not defined

In [69]:
def QandA(text : str,enc_model,dec_model,vocabulary) -> str:
    states_values = enc_model.predict(preprocess_input(text))
    empty_target_seq = np.zeros((1 , 1))
    empty_target_seq[0, 0] = tokenizer.word_index['start']
    stop_condition = False
    decoded_translation = ''
    
    while not stop_condition :
        dec_outputs , h , c = dec_model.predict([empty_target_seq] + states_values)
        sampled_word_index = np.argmax(dec_outputs[0, -1, :])
        sampled_word = None
        
        word = vocabulary[sampled_word_index]
        decoded_translation += f' {word}'
        sampled_word = word
        
        
        #for word , index in tokenizer.word_index.items() :
        #    if sampled_word_index == index :
        #        decoded_translation += f' {word}'
        #        sampled_word = word
        
        if sampled_word == 'end' or len(decoded_translation.split()) > maxlen_answers:
            stop_condition = True
            
        empty_target_seq = np.zeros((1 , 1))  
        empty_target_seq[0 , 0] = sampled_word_index
        states_values = [h , c]
    decoded_translation = decoded_translation.split(' end')[0]
    return decoded_translation

In [70]:
while True:
    print("You : ",end='')
    text = input()
    if text=='q' or text=='Q':
        break
    print(f"BOT : {QandA(text,enc_model,dec_model,vocabulary)}")

You : Die !
BOT :  you are a good
You : q


In [71]:
voc = {str(i):w for i,w in zip(vocabulary.keys(),vocabulary.values())}
voc

{'1': 'end',
 '2': 'start',
 '3': 'you',
 '4': 'i',
 '5': 'a',
 '6': 'the',
 '7': 'is',
 '8': 'what',
 '9': 'are',
 '10': 'of',
 '11': 'not',
 '12': 'to',
 '13': 'do',
 '14': 'and',
 '15': 'have',
 '16': 'am',
 '17': 'it',
 '18': 'me',
 '19': 'that',
 '20': 'in',
 '21': 'get',
 '22': 'can',
 '23': 'tell',
 '24': 'as',
 '25': 'your',
 '26': 'how',
 '27': 'my',
 '28': 'has',
 '29': 'when',
 '30': 'be',
 '31': 'an',
 '32': 'feel',
 '33': 'about',
 '34': 'with',
 '35': 'who',
 '36': 'like',
 '37': 'computer',
 '38': 'joke',
 '39': 'for',
 '40': 'by',
 '41': 'or',
 '42': 'will',
 '43': 'no',
 '44': 'they',
 '45': 'was',
 '46': 'all',
 '47': 'cross',
 '48': 'on',
 '49': 'which',
 '50': 'would',
 '51': 'software',
 '52': 'should',
 '53': 'he',
 '54': 'ever',
 '55': 'does',
 '56': 'sad',
 '57': 'but',
 '58': 'we',
 '59': 'make',
 '60': 'mad',
 '61': 'could',
 '62': 'much',
 '63': 'think',
 '64': 'at',
 '65': 'very',
 '66': 'why',
 '67': 'makes',
 '68': 'know',
 '69': 'so',
 '70': 'their',
 '71

In [72]:
import json

In [75]:
jo = json.dumps(voc)
with open("vocabulary.json","w") as file:
    file.write(jo)