# Simple Chatbot


In [None]:
#Imports
import json
import string
import random
import nltk
import numpy as num
from nltk.stem import WordNetLemmatizer
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Dropout

# required package for tokenization
nltk.download("punkt")
# word database
nltk.download("wordnet")

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [None]:
data = {"intents": [

             {"tag": "age",
              "patterns": ["how old are you?"],
              "responses": ["I am 28 years old.", "I was born on October 7th 1995.", "My Birthday is October 7th 1995."],
             },
              {"tag": "greeting",
              "patterns": [ "Hi", "Hello", "Hey", "Hi There"],
              "responses": ["Hi there", "Hello", "Hi :)"],
             },
              {"tag": "goodbye",
              "patterns": [ "bye", "later"],
              "responses": ["Bye", "take care"]
             },
             {"tag": "name",
              "patterns": ["what's your name?", "Who are you?"],
              "responses": ["My name is Kierra," ,"Kierra is my name."]
             },
             {"tag": "conversation",
              "patterns": ["How are you?", "How you been"],
              "responses": ["I am good," ,"Great", "I've been better."]
             }

]}

# Preprocess Data

In [None]:
lm = WordNetLemmatizer() #for getting words

# lists
classes = []
words = []
documentX = []
documentY = []

#Loop through Intents
for intent in data["intents"]:
    for pattern in intent["patterns"]:

        # tokenize the patterns
        ournewTkns = nltk.word_tokenize(pattern)

        # extends the tokens
        words.extend(ournewTkns)

        documentX.append(pattern)
        documentY.append(intent["tag"])

    # add unexisting tags to their respective classes
    if intent["tag"] not in classes:
        classes.append(intent["tag"])

# set words to lowercase if not in punctuation
words = [lm.lemmatize(word.lower()) for word in words if word not in string.punctuation]

# sort words
words = sorted(set(words))

# sort classes
classes = sorted(set(classes))

In [None]:
print(words)

["'s", 'are', 'been', 'bye', 'hello', 'hey', 'hi', 'how', 'later', 'name', 'old', 'there', 'what', 'who', 'you', 'your']


In [None]:
print(classes)

['age', 'conversation', 'goodbye', 'greeting', 'name']


In [None]:
print(documentX)

['how old are you?', 'Hi', 'Hello', 'Hey', 'Hi There', 'bye', 'later', "what's your name?", 'who are you?', 'How are you?', 'How you been']


In [None]:
print(documentY)

['age', 'greeting', 'greeting', 'greeting', 'greeting', 'goodbye', 'goodbye', 'name', 'name', 'conversation', 'conversation']


# Design Neural network

Converting to numerical data

In [None]:
# training list array
trainingData = []
outEmpty = [0] * len(classes)

# bow model
for idx, doc in enumerate(documentX):
    bagOfwords = []
    text = lm.lemmatize(doc.lower())
    for word in words:
        bagOfwords.append(1) if word in text else bagOfwords.append(0)

    outputRow = list(outEmpty)
    outputRow[classes.index(documentY[idx])] = 1
    trainingData.append([bagOfwords, outputRow])

random.shuffle(trainingData)
# convert data into an array after shuffling
trainingData = num.array(trainingData, dtype=object)

#input
x = num.array(list(trainingData[:, 0]))
#output
y = num.array(list(trainingData[:, 1]))

# Model

In [None]:
inputShape = (len(x[0]),)
outputShape = len(y[0])

# model
model = Sequential()

# Dense function adds an output layer
model.add(Dense(128, input_shape=inputShape, activation="relu"))

model.add(Dropout(0.5))
# Dropout is used to enhance visual perception of input neurons

model.add(Dense(64, activation="relu"))
model.add(Dropout(0.3))
model.add(Dense(outputShape, activation = "softmax"))

md = tf.keras.optimizers.Adam(learning_rate=0.01, weight_decay=1e-6)

model.compile(loss='categorical_crossentropy',
              optimizer=md,
              metrics=["accuracy"])

# Output the model in summary
print(model.summary())


model.fit(x, y, epochs=200, verbose=1)


Model: "sequential_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_15 (Dense)            (None, 128)               2176      
                                                                 
 dropout_10 (Dropout)        (None, 128)               0         
                                                                 
 dense_16 (Dense)            (None, 64)                8256      
                                                                 
 dropout_11 (Dropout)        (None, 64)                0         
                                                                 
 dense_17 (Dense)            (None, 5)                 325       
                                                                 
Total params: 10757 (42.02 KB)
Trainable params: 10757 (42.02 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
None
Epoch 1/200
E

<keras.src.callbacks.History at 0x79a792e49f90>

# Add Features

In [None]:
def wordText(text):
  newtkns = nltk.word_tokenize(text)
  newtkns = [lm.lemmatize(word) for word in newtkns]
  return newtkns

def wordBag(text, vocab):
  newtkns = wordText(text)
  bagOwords = [0] * len(vocab)
  for w in newtkns:
    for idx, word in enumerate(vocab):
      if word == w:
        bagOwords[idx] = 1
  return num.array(bagOwords)

def Pclass(text, vocab, labels):
  bagOwords = wordBag(text, vocab)
  ourResult = model.predict(num.array([bagOwords]))[0]
  newThresh = 0.2
  yp = [[idx, res] for idx, res in enumerate(ourResult) if res > newThresh]

  yp.sort(key=lambda x: x[1], reverse=True)
  newList = []
  for r in yp:
    newList.append(labels[r[0]])
  return newList

def getRes(firstlist, fJson):
  tag = firstlist[0]
  listOfIntents = fJson["intents"]
  for i in listOfIntents:
    if i["tag"] == tag:
      ourResult = random.choice(i["responses"])
      break
  return ourResult

In [None]:
# running the chatbot
while True:
    newMessage = input("")
    intents = Pclass(newMessage, words, classes)
    ourResult = getRes(intents, data)
    print(ourResult)

hi
Hi there
How are you
Kierra is my name.
what is your nam
Kierra is my name.
How are you?
Kierra is my name.
bye
take care
exit
Hi there


KeyboardInterrupt: ignored

The next step is to use a library to make it more dynamic.