# Customer Support ChatBot

In [1]:
#Import dependency libraries
import numpy as np
import random

import pickle
import json
import string
import nltk
from nltk.stem import WordNetLemmatizer

import keras
from keras.models import Sequential
from keras.layers import Dense,Dropout,BatchNormalization
import keras.backend as k

import warnings 
warnings.filterwarnings('ignore')

In [2]:
#Load the Intent file
with open('commands.json') as file:
    data = json.load(file)

In [3]:
#data

### Data Preprocessing

In [4]:
#Instantiate the WordnetLemmatizer
stemm = WordNetLemmatizer()

#get the punctuation in english grammar
punctuation = string.punctuation

In [5]:
try:
    #If the required file(words,labels,documents) exist it will load the data 
    with open('file.pkl','rb') as f:
        words,labels,documents = pickle.load(f)
except:
    #If not it will execute this except block of code and creates the pickel file
    #Create empty lists of words, labels and documents
    words = []
    labels = []
    documents = []
    
    for intent in data['intents']:
        #loop through all patterns in intent file 
        for pattern in intent['patterns']:
            
            # apply string tokenization to each pattern
            words_list = nltk.word_tokenize(pattern.lower())
            #add each word into the words list
            words.extend(words_list)
            
            #add each document with respect to its tag in corpus(documents)
            documents.append((words_list,intent['tag']))
            # Add only unique 'tag' into the Labels list
            if intent['tag'] not in labels:
                labels.append(intent['tag'])

    #Stemming and removing dublicate words in words list
    words = [stemm.lemmatize(w) for w in words if w not in punctuation]
    words = sorted(set(words)) #it will remove dublicate words in words list
    
    labels = sorted(labels)

    #Creating the serialized file for words and labels
    with open('file.pkl','wb') as f:
        pickle.dump((words,labels,documents),f)

### Creating training and testing data

In [6]:
training = []
output_empty = [0]*len(labels)

In [7]:
# Creating dataset which contain bag-of-words and associated label to it
for doc in documents:
    bag = []
    
    #get only words from each document
    word_patterns = doc[0]
    
    #lemmatize the each word present in document
    word_patterns = [stemm.lemmatize(word.lower()) for word in word_patterns if word not in punctuation]

    #get the bag of words for each document in documents
    for word in words:
        bag.append(1) if word in word_patterns else bag.append(0)
    
    output_raw = list(output_empty)
    # add label index to the output raw based on each document
    output_raw[labels.index(doc[1])] = 1
    #append both bag of words and label to the training dataset  
    training.append([bag,output_raw])

In [8]:
#shuffle the data
random.shuffle(training)

#Converting data into array type
training = np.array(training)

In [9]:
#Creating dependent and independent features
traning_x = list(training[:,0])
traning_y = list(training[:,1])

### Model Building

In [10]:
# Creating deep neural network model by using sequential API
k.clear_session()
model = Sequential()

model.add(Dense(150,input_shape=(len(traning_x[0]),)  #adding 150 neurons of dense layer
                ,activation='relu'))
model.add(Dropout(0.30))

model.add(Dense(100,activation='relu'))      #adding 100 neurons of dense layer
model.add(BatchNormalization())          #apply batch normalization
model.add(Dropout(0.30))               #apply dropout layer

model.add(Dense(100,activation='relu'))      #adding 100 neurons of dense layer
model.add(Dropout(0.30))                    #apply dropout layer

model.add(Dense(len(traning_y[0]),activation='softmax')) 

In [11]:
#Get the model summary
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 150)               13200     
_________________________________________________________________
dropout (Dropout)            (None, 150)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 100)               15100     
_________________________________________________________________
batch_normalization (BatchNo (None, 100)               400       
_________________________________________________________________
dropout_1 (Dropout)          (None, 100)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 100)               10100     
_________________________________________________________________
dropout_2 (Dropout)          (None, 100)               0

In [12]:
# compile model
model.compile(loss="categorical_crossentropy",optimizer = 'adam',metrics=['accuracy'])

In [15]:
#fitting data into model
model.fit(traning_x,traning_y,epochs=100,batch_size=20,verbose=1)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


<tensorflow.python.keras.callbacks.History at 0x1bf3c6988b0>

In [14]:
#save the model
model.save('chatbot_model.h5')

### Predict the response

Create the function to Tokenize and Lemmatize the user input/sentence

In [16]:
def clean_up (sentence):
    #Tokenizing into words
    sent_word = nltk.word_tokenize(sentence.lower())
    
    #Lemmatizing the each word
    sent_word = [stemm.lemmatize(word) for word in sent_word if word not in punctuation]
    return sent_word

Creating function which generates bag of words by taking user input/sentence and words list

In [17]:
def bag_of_words(sentence):
    
    #call the clean_function to Tokenize and lemmatize the user input
    sentence_words = clean_up(sentence)
    
    #Creating the empty bag which has same length of words list
    bag = [0]*len(words)
    
    #Performing one hot encoding to the bag
    for w in sentence_words:
        for i, word in enumerate(words):
            #if user inputs(words) present in words list, then its going to add 1 into the bag based on index value
            if word == w:
                bag[i]=1
    return bag

Creating function which returns prdicted label(or intent) and probability based on user input

In [18]:
def predict_labels(sentence):
    #call bag_of_words function to get the bag_of_words
    bow = bag_of_words(sentence)
    
    #Predicting the label by taking bag_of_words
    result = model.predict([bow])[0] 
    
    #Assiging threshhold value to prevent the uncertain in model output
    thr = 0.65
    results = [[i,r] for i,r in enumerate(result) if r > thr]
    
    #Sort the results based on probability, which is predicted by model
    results.sort(key=lambda x:x[1],reverse=True)
    
    #Convert numerical results into strings by looping through results
      #creating empty list
    return_list = []
    for r in results:
        return_list.append({'intent':labels[r[0]], 'probability':str(r[1])})
        
    return return_list

Creating response function to response back to user based highest probability of intent which is predicted by model

In [19]:
def response(intents_list,data_intents_json):
    #Creating tag which can have highest probability value
    tag = intents_list[0]['intent']
    
    # get the intents from main data
    list_of_intents = data['intents']
    
    #get the random response from responses list in main data based on highest probability of intent/tag
    for i in list_of_intents:
        if i['tag'] == tag:
            #get the random response
            result = random.choice(i['responses'])
            break
            
    return result,tag

In final stage, we will feed the user’s input to the bot which it will response back to user based on input.

In [21]:
flag=True
print("BOT: Im bot. I will answer your queries. If you want to exit, type Bye!")
while(flag==True):
    user_response = input('user: ').lower()
    
    #predicting labels by passing user_response to predict_labels function
    ints=predict_labels(user_response)
    
    #if there is no input form user
    if ints == []:
        noanswer = ["Sorry, can't understand you", "Please give me more info", "Not sure I understand"]
        print('BOT:',random.choice(noanswer))
    
    #Creating else block to replay back to user by calling response function
    else:
        #call the response function to get the response
        res = response(ints,data)
        
        #if the predicted intent is other then 'goodbye', it will print the response
        if res[1] != 'goodbye':
            print('BOT:',res[0])
            
        else:
            #Terminate the bot if user says bye
            flag=False
            print('BOT:',res[0])

BOT: Im bot. I will answer your queries. If you want to exit, type Bye!
user: hi
BOT: Hello, thanks for asking
user: what can u do
BOT: Offering support for Adverse drug reaction, Blood pressure, Hospitals and Pharmacies
user: hjbvf
BOT: Please give me more info
user: List pharmacies
BOT: Please provide pharmacy name
user: thanks
BOT: Happy to help!
user: Show blood pressure results for patient
BOT: Patient ID?
user: 
BOT: Please give me more info
user: drugs module
BOT: Navigating to Adverse drug reaction module
user: thanks
BOT: Happy to help!
user: bye
BOT: Bye! Come back again soon.
