<a href="https://colab.research.google.com/github/ArpitaChatterjee/Demo-ChatBot/blob/main/ChatBot.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Import Libraries

In [1]:
import numpy as np
import json
import re
import tensorflow as tf
import random
import spacy
nlp = spacy.load('en_core_web_sm')

##Import JSON file

In [2]:
with open('/content/drive/MyDrive/Colab Notebooks/ChatBot/Intent.json') as f:
  intents= json.load(f)

##Preprocessing Data

- Clean the data
- split them into inputs and target tensors
-build  a tokenizer dictionary and turn sentences into sequences.

The target tensors has a bunch of list with a length of unique title list.


In [4]:
def preprocessing(line):
  line= re.sub(r'[^a-zA-z.?!\']', ' ', line)
  line= re.sub(r'[ ]+', ' ', line)
  return line

In [5]:
#get text and intent title from json data
inputs, targets= [], []
classes= []
intent_doc= {}

for intent in intents['intents']:
  if intent['intent'] not in classes:
    classes.append(intent['intent'])
  if intent['intent'] not in intent_doc:
    intent_doc[intent['intent']] = []
        
  for text in intent['text']:
    inputs.append(preprocessing(text))
    targets.append(intent['intent'])
        
  for response in intent['responses']:
    intent_doc[intent['intent']].append(response)

In [7]:
#tokenize
def tokenize_data(input_list):
  tokenizer= tf.keras.preprocessing.text.Tokenizer(filters='', oov_token='<unk>')

  tokenizer.fit_on_texts(input_list)
  input_seq = tokenizer.texts_to_sequences(input_list)
  input_seq = tf.keras.preprocessing.sequence.pad_sequences(input_seq, padding='pre')

  return tokenizer, input_seq

#preprocess input data
tokenizer , input_tensor = tokenize_data(inputs)

In [8]:
def create_categorical_target(targets):
    word={}
    categorical_target=[]
    counter=0
    for trg in targets:
        if trg not in word:
            word[trg]=counter
            counter+=1
        categorical_target.append(word[trg])
    
    categorical_tensor = tf.keras.utils.to_categorical(categorical_target, num_classes=len(word), dtype='int32')
    return categorical_tensor, dict((v,k) for k, v in word.items())

# preprocess output data
target_tensor, trg_index_word = create_categorical_target(targets)

In [9]:
print('input shape: {} and output shape: {}'.format(input_tensor.shape, target_tensor.shape))

input shape: (143, 9) and output shape: (143, 22)


##Building the Model

In [10]:
#hyperparameters
epochs=50
vocab_size=len(tokenizer.word_index) + 1
embed_dim=512
units=128
target_length=target_tensor.shape[1]

In [11]:
# build RNN Model with tensorflow
model = tf.keras.models.Sequential([
    tf.keras.layers.Embedding(vocab_size, embed_dim),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(units, dropout=0.2)),
    tf.keras.layers.Dense(units, activation='relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(target_length, activation='softmax')
])

optimizer = tf.keras.optimizers.Adam(lr=1e-2)
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, None, 512)         66048     
_________________________________________________________________
bidirectional (Bidirectional (None, 256)               656384    
_________________________________________________________________
dense (Dense)                (None, 128)               32896     
_________________________________________________________________
dropout (Dropout)            (None, 128)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 22)                2838      
Total params: 758,166
Trainable params: 758,166
Non-trainable params: 0
_________________________________________________________________


  "The `lr` argument is deprecated, use `learning_rate` instead.")


In [12]:
early_stop = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=4)

# train the model
model.fit(input_tensor, target_tensor, epochs=epochs, callbacks=[early_stop])

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50


<keras.callbacks.History at 0x7f3828f267d0>

In [13]:
def response(sentence):
    sent_seq = []
    doc = nlp(repr(sentence))
    
    # split the input sentences into words
    for token in doc:
        if token.text in tokenizer.word_index:
            sent_seq.append(tokenizer.word_index[token.text])

        # handle the unknown words error
        else:
            sent_seq.append(tokenizer.word_index['<unk>'])

    sent_seq = tf.expand_dims(sent_seq, 0)
    # predict the category of input sentences
    pred = model(sent_seq)

    pred_class = np.argmax(pred.numpy(), axis=1)
    
    # choice a random response for predicted sentence
    return random.choice(intent_doc[trg_index_word[pred_class[0]]]), trg_index_word[pred_class[0]]

# chat with bot
print("Note: Enter 'quit' to break the loop.")
while True:
    input_ = input('You: ')
    if input_.lower() == 'quit':
        break
    res, typ = response(input_)
    print('Bot: {} -- TYPE: {}'.format(res, typ))
    print()

Note: Enter 'quit' to break the loop.
You: hello
Bot: Hola human, please tell me your GeniSys user -- TYPE: Greeting

You: how are you
Bot: Hi, I am good thank you, how are you? Please tell me your GeniSys user -- TYPE: CourtesyGreeting

You: whats your name
Bot: Your name is <HUMAN>, how can I help you? -- TYPE: CurrentHumanQuery

You: what's your name ?
Bot: Your name is <HUMAN>, how can I help you? -- TYPE: CurrentHumanQuery

You: no i want your real name
Bot: My real name is GeniSys -- TYPE: RealNameQuery

You: can you tell me if you are self- aware
Bot: Your name is <HUMAN>, how can I help you? -- TYPE: CurrentHumanQuery

You: Can you tell me if you are self-aware ?
Bot: You are <HUMAN>! How can I help? -- TYPE: CurrentHumanQuery

You: tell me a joke 
Bot: Man: Doctor, I've just swallowed a pillow. Doctor: How do you feel? Man: A little down in the mouth. -- TYPE: Jokes

You: can you open the door please ?
Bot: I’m sorry, I’m afraid I can’t do that! -- TYPE: PodBayDoor

You: are y