In [None]:
pip install nltk

In [None]:
pip install tensorflow

In [None]:
!pip install keras

In [None]:
!pip install keras_preprocessing

In [None]:
"ChatBot"

import nltk
from nltk.stem import WordNetLemmatizer
from nltk.corpus import stopwords

#Download NLTK data
nltk.download("stopwords")
nltk.download("punkt")
nltk.download("wordnet")

#Load data
import json
#f = open("/home/user/Downloads/intents.json")
f = open("intents.json")
data = json.load(f)

#Preprocess data
import string
words=[]
classes=[]
data_x=[]
data_y = []
ignore_words=["!","?"]
for intent in data["intents"]:
    for pattern in intent["patterns"]:
        #Tokenize data
        tokens = nltk.word_tokenize(pattern) #tokenize each pattern
        words.extend(tokens) #and append tokens and words

        data_x.append(pattern) #appending pattern to data_x
        data_y.append(intent["tag"]) #appending the associated tag to each pattern

        if intent["tag"] not in classes :
          classes.append(intent["tag"])

# initializing lemmetizer to get stem words       
lemmatizer = WordNetLemmatizer()

#lemmatize all words in the vocab and convert them to lowercase
#if the words don't appear in punctuation
words = [lemmatizer.lemmatize(word.lower()) for word in words if word not in string.punctuation]
#sorting the vocab and classes in alphabetical order and taking the # set to ensure no duplication error
words = sorted(set(words))
classes = sorted(set(classes))
print(len(classes)*[0])

In [None]:
# In this step convert text into numbers using the bag of words model
# Create an array of number of size the same as the length of vocabulary lists.
# Array = 1 if word is in pattern/tag being read(data_x) and 0 if absent

import random
import numpy as np
#Text to Numbers
training = []
out_empty = [0] * len(classes)
# creating the bag of words model
for idx, doc in enumerate(data_x):
  bow = []
  text = lemmatizer.lemmatize(doc.lower())
  for word in words :
    bow.append(1) if word in text else bow.append(0)
    
    #mark the index of class that the current pattern is associated to
    output_row = list(out_empty)
    output_row[classes.index(data_y[idx])]=1

    #add the one hot encoded Bow and associated classes to training
    training.append([bow, output_row])

#shuffle the data and convert it to an array
random.shuffle(training)
training = np.array(training, dtype = object)
#split the features and target labels
train_x = np.array(list(training[:, 0]))
train_y = np.array(list(training[:, 1]))

In [None]:
!pip install tensorflow-cpu

In [None]:
# Neural Network Model
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Dropout
model = Sequential()
model.add(Dense(128, input_shape=(len(train_x[0]),),activation ="relu"))
model.add(Dropout(0,5))
model.add(Dense(64, activation ="relu"))
model.add(Dropout(0,5))
model.add(Dense(len(train_y[0]), activation = "softmax"))
adam = tf.keras.optimizers.Adam(learning_rate=0.01, weight_decay = 1e-6)
model.compile(loss ="categorical_crossentropy",
              optimizer = adam,
              metrics = ["accuracy"])
print(model.summary())
model.fit(x = train_x, y = train_y, epochs = 200, verbose = 1)

In [None]:
# Preprocess the Input
def clean_text(text):
  tokens = nltk.word_tokenize(text)
  tokens = [lemmatizer.lemmatize(word) for word in tokens]
  return tokens

""""Receives text(string) as an input and then tokenizes using word_tokenize
Each token is then converted into ist lemmatizer.
"""

def bag_of_words(text, vocab):
  tokens = clean_text(text)
  bow = [0] * len(vocab)
  for w in tokens :
    for idx, word in enumerate(vocab):
      if word == w :
        bow[idx] = 1
  return np.array(bow)

"""Calls clean text func, converts the text into an array using the 
bow model using the input vocabulary, then return the same arrays"""

def pred_class(text, vocab, labels):
  bow = bag_of_words(text,vocab)
  result = model.predict(np.array([bow]))[0] #Extracting probabilities
  print(result)
  thresh = 0.5
  y_pred = [[indx, res] for indx, res in enumerate(result) if res> thresh]
  y_pred.sort(key = lambda x : x[1], reverse = True) #sort values of probability in decreasing order
  return_list = []
  for r in y_pred :
    print(r)
    return_list.append(labels[r[0]]) #Contains labels(tags) for highest probability
  return return_list
  print(return_list)

"""
Takes text, vocab, and labels as input and returns a list that contains a tag
corresponding to the highest probability
"""

def get_response(intents_list, intents_json):
  if len(intents_list) == 0 :
    result = "sorry! I don't understand"
  else :
    tag = intents_list[0]
    list_of_intents = intents_json["intents"]
    for i in list_of_intents :
      if i["tag"] == tag :
        result = random.choice(i["responses"])
        break
  return result

  """
  Takes the tag returned by previous func and uses it to randomly chocose a response
  corresponding to the same tag in intent.json.
  And if inten_list is empty, that is when the prob dont cross the threshold and will pas string "Sorry" 
  as ChatBot's response
  """

In [None]:
# Interacting with chatbot
print("Press 0 if you don't want to chat with our Chatbot")
while True :
  message = input("")
  if message == "0" :
    break
  intents = pred_class(message,words,classes)
  result = get_response(intents, data)
  print(result)