In [43]:
#Mounting the Google Drive
from google.colab import drive
drive.mount('/content/drive')
data_root='/content/drive/My Drive/Colab Notebooks/Chatbot'



Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [44]:

#Importing Relevant Libraries
import json
import string
import random

import nltk
nltk.download('omw-1.4')
import numpy as np
from nltk.stem import WordNetLemmatizer
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Dropout
nltk.download("punkt")
nltk.download("wordnet")

[nltk_data] Downloading package omw-1.4 to /root/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [45]:
#Loading the dataset (intents.json)
data_file = open(data_root + '/general intents.json').read()
data = json.loads(data_file)


In [46]:
#creating data_X and data_Y

words = [] #For Bow model/vocabulary for patterns
classes = [] #For Bow model/vocabulary for tags
data_x = [] #For storing each pattern
data_y = [] #For storing tag corresponding to each pattern in data_x

#Iterating over all the intents
for intent in data["intents"]:
  for pattern in intent["patterns"]:
    tokens = nltk.word_tokenize(pattern) #tokenize each pattern
    words.extend(tokens) #and append token to words
    data_x.append(pattern) #appending pattern to data_x
    data_y.append(intent["tag"]) #appending associated tag to data_y

    #Adding tag to the classes if it's not there already
    if intent["tag"] not in classes:
      classes.append(intent["tag"])

#Initialising lemmatizer to get stem of words
lemmatizer = WordNetLemmatizer()

#Lemmatize all the words in the vocab and convert them to lowercase
#if words don't appear in punctuation
words = [lemmatizer.lemmatize(word.lower()) for word in words if word not in string.punctuation]

#Sorting the vocabulary and classes in alphabetical order and
#taking the set to ensure not duplicates occur
words = sorted(set(words))
classes = sorted(set(classes))

In [47]:
#Converting texts to numbers for Bag of Words model
training = []
out_empty = [0] * len(classes)

#Creating bag of words model
for idx, doc in enumerate(data_x):
  bow = []
  text = lemmatizer.lemmatize(doc.lower())
  for word in words:
    bow.append(1) if word in text else bow.append(0)
  
  #Mark the index of class that the current pattern is associated to
  output_row = list(out_empty)
  output_row[classes.index(data_y[idx])] = 1

  #Add the one hot encoded BoW and associated classes to training
  training.append([bow,output_row])

#Shuffle the data and convert it into an array
random.shuffle(training)
training = np.array(training,dtype=object)

#Split the features and target labels
train_x = np.array(list(training[:,0]))
train_y = np.array(list(training[:,1]))


In [48]:
#Deep Neural Networds model
model = Sequential()
model.add(Dense(128,input_shape=(len(train_x[0]),), activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(64,activation = 'relu'))
model.add(Dropout(0.5))
model.add(Dense(len(train_y[0]),activation = 'softmax'))

adam = tf.keras.optimizers.Adam(learning_rate = 0.01, decay = 1e-6)
model.compile(loss = 'categorical_crossentropy', optimizer=adam, metrics = ["accuracy"])
print(model.summary())

model.fit(x=train_x,y=train_y,epochs=150,verbose=1)


Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_12 (Dense)            (None, 128)               3712      
                                                                 
 dropout_8 (Dropout)         (None, 128)               0         
                                                                 
 dense_13 (Dense)            (None, 64)                8256      
                                                                 
 dropout_9 (Dropout)         (None, 64)                0         
                                                                 
 dense_14 (Dense)            (None, 3)                 195       
                                                                 
Total params: 12,163
Trainable params: 12,163
Non-trainable params: 0
_________________________________________________________________
None
Epoch 1/150
Epoch 2/150
Epoch 3/150
Epoch 4/1

<keras.callbacks.History at 0x7f6e6cccc7c0>

In [49]:
#Preprocessing Input
def clean_text(text):
  tokens = nltk.word_tokenize(text) #Receives each text as an input and tokenizes it
  tokens = [lemmatizer.lemmatize(word) for word in tokens] #Receives the token and then converts it into root form via lemmatizer
  return tokens #Output is a list of words in their root form

def bag_of_words(text,vocab):
  tokens = clean_text(text)
  bow = [0] * len(vocab) #forming the Bag-of-Words Model
  for w in tokens:
    for idx,word in enumerate(vocab):
      if word == w:
        bow[idx] = 1 #Converts text into array using the model and input vocabulary
  return np.array(bow) 

def pred_class(text,vocab,labels): 
#Takes text, vocabulary and labels as input and returns a list containing 
#a tag that corresponds to the highest probability
  bow = bag_of_words(text,vocab)
  result = model.predict(np.array([bow]))[0] # Extracting possibilities
  thresh = 0.5
  y_pred = [[indx,res] for indx,res in enumerate(result) if res > thresh]
  y_pred.sort(key=lambda x: x[1],reverse = True) #Sorting by values of probability in decreasing order
  return_list = []
  for r in y_pred:
    return_list.append(labels[r[0]]) #Contains labels(tags) for highest probability

  return return_list

def get_response(intents_list, intents_json):
#Takes the tag returned by "Pred_class" , then uses it to randomly choose a response
#corresponding to the same tag in "intents.json".
  if len(intents_list) == 0:
    result = "Sorry! I don't understand" #If intents_list is empty, then probability does not cross Threshold
    #As a result, we put in "Sorry" as a response.
  else:
    tag = intents_list[0]
    list_of_intents = intents_json["intents"]
    for i in list_of_intents:
      if i["tag"] == tag:
        result = random.choice(i["responses"])
        break
  return result
  

In [50]:
#Calling relevant function and interacting with the Chatbot
print("Press 0 if you don't want to chat with the ChatBot")

while True:
  message = input("")
  if message == "0":
    break
  intents = pred_class(message, words, classes)
  result = get_response(intents, data)
  print(result)

Press 0 if you don't want to chat with the ChatBot
hello
Hi there, how can I help?
hi
Hello
Hi there
Hi there, how can I help?
thanks
Happy to help!
Thank you
See you soon!
Thanks
Hi there, how can I help?


KeyboardInterrupt: ignored