In [1]:
pip install tensorflow keras nltk

Note: you may need to restart the kernel to use updated packages.


In [9]:
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout
from keras.optimizers import SGD
import random
import nltk

from nltk.stem import WordNetLemmatizer

word_normalizer = WordNetLemmatizer()

import json

import pickle


chat_parsed_intents_file = open('parsed_intents.json').read()

parsed_intents = json.loads(chat_parsed_intents_file)

In [4]:
import nltk
nltk.download('punkt')

[nltk_data] Downloading package punkt to /Users/karan/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [10]:
vocabulary_list=[]

intent_categories = []

input_corpus = []

punctuation_marks = ['!', '?', ',', '.']


for intent in parsed_intents['parsed_intents']:

    for pattern in intent['patterns']:

        # Break down each sentence into individual tokens

        word = nltk.word_tokenize(pattern)

        vocabulary_list.extend(word)        

        #add input_corpus in the corpus

        input_corpus.append((word, intent['tag']))

        # add to our intent_categories list

        if intent['tag'] not in intent_categories:

            intent_categories.append(intent['tag'])


print(input_corpus)

[(['Hi', 'there'], 'greeting'), (['How', 'are', 'you'], 'greeting'), (['Is', 'anyone', 'there', '?'], 'greeting'), (['Hey'], 'greeting'), (['Hola'], 'greeting'), (['Hello'], 'greeting'), (['Good', 'day'], 'greeting'), (['Bye'], 'goodbye'), (['See', 'you', 'later'], 'goodbye'), (['Goodbye'], 'goodbye'), (['Nice', 'chatting', 'to', 'you', ',', 'bye'], 'goodbye'), (['Till', 'next', 'time'], 'goodbye'), (['Thanks'], 'thanks'), (['Thank', 'you'], 'thanks'), (['That', "'s", 'helpful'], 'thanks'), (['Awesome', ',', 'thanks'], 'thanks'), (['Thanks', 'for', 'helping', 'me'], 'thanks'), (['How', 'you', 'could', 'help', 'me', '?'], 'options'), (['What', 'you', 'can', 'do', '?'], 'options'), (['What', 'help', 'you', 'provide', '?'], 'options'), (['How', 'you', 'can', 'be', 'helpful', '?'], 'options'), (['What', 'support', 'is', 'offered'], 'options'), (['How', 'to', 'check', 'Adverse', 'drug', 'reaction', '?'], 'adverse_drug'), (['Open', 'adverse', 'drugs', 'module'], 'adverse_drug'), (['Give', 'm

In [11]:
import nltk
nltk.download('wordnet')

[nltk_data] Downloading package wordnet to /Users/karan/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [12]:
# Normalize, lowercase, and remove duplicates from the word list

vocabulary_list = [word_normalizer.lemmatize(w.lower()) for w in vocabulary_list if w not in punctuation_marks]

vocabulary_list = sorted(list(set(vocabulary_list)))

# sort intent_categories

intent_categories = sorted(list(set(intent_categories)))

# input_corpus = combination between patterns and parsed_intents

print (len(input_corpus), "input_corpus")

# intent_categories = parsed_intents

print (len(intent_categories), "intent_categories", intent_categories)

# vocabulary_list = all vocabulary_list, vocabulary

print (len(vocabulary_list), "unique lemmatized vocabulary_list", vocabulary_list)


pickle.dump(vocabulary_list,open('vocabulary_list.pkl','wb'))

pickle.dump(intent_categories,open('intent_categories.pkl','wb'))

47 input_corpus
9 intent_categories ['adverse_drug', 'blood_pressure', 'blood_pressure_search', 'goodbye', 'greeting', 'hospital_search', 'options', 'pharmacy_search', 'thanks']
87 unique lemmatized vocabulary_list ["'s", 'a', 'adverse', 'all', 'anyone', 'are', 'awesome', 'be', 'behavior', 'blood', 'by', 'bye', 'can', 'causing', 'chatting', 'check', 'could', 'data', 'day', 'detail', 'do', 'dont', 'drug', 'entry', 'find', 'for', 'give', 'good', 'goodbye', 'have', 'hello', 'help', 'helpful', 'helping', 'hey', 'hi', 'history', 'hola', 'hospital', 'how', 'i', 'id', 'is', 'later', 'list', 'load', 'locate', 'log', 'looking', 'lookup', 'management', 'me', 'module', 'nearby', 'next', 'nice', 'of', 'offered', 'open', 'patient', 'pharmacy', 'pressure', 'provide', 'reaction', 'related', 'result', 'search', 'searching', 'see', 'show', 'suitable', 'support', 'task', 'thank', 'thanks', 'that', 'there', 'till', 'time', 'to', 'transfer', 'up', 'want', 'what', 'which', 'with', 'you']


In [13]:
training_data = []

# Create empty array for the output
initial_output = [0] * len(intent_categories)

# Training set, word_presence_bag of vocabulary_list for every sentence
for doc in input_corpus:

    # Initializing word_presence_bag of vocabulary_list
    word_presence_bag = []

    # List of tokenized vocabulary_list for the pattern
    word_patterns = doc[0]

    # Lemmatize each word - create base word, in attempt to represent related vocabulary_list
    word_patterns = [word_normalizer.lemmatize(word.lower()) for word in word_patterns]

    # Create the word_presence_bag of vocabulary_list array with 1 if the word is found in current pattern
    for word in vocabulary_list:
        word_presence_bag.append(1 if word in word_patterns else 0)

    # Output is a '0' for each tag and '1' for current tag (for each pattern)
    output_row = list(initial_output)  # Copy the empty array
    output_row[intent_categories.index(doc[1])] = 1

    # Append both the word_presence_bag of vocabulary_list and the corresponding one-hot encoded class to the training_data data
    training_data.append([word_presence_bag, output_row])

# Convert training_data data to a NumPy array
import random
import numpy as np

# Shuffle the features and make numpy array
random.shuffle(training_data)

# Separate the data into X (features) and Y (labels)
train_x = np.array([item[0] for item in training_data])  # Extract word_presence_bags of vocabulary_list
train_y = np.array([item[1] for item in training_data])  # Extract one-hot encoded outputs

print("Training data is created")


Training data is created


In [14]:
# deep neural netvocabulary_list model

model = Sequential()

model.add(Dense(128, input_shape=(len(train_x[0]),), activation='relu'))

model.add(Dropout(0.5))

model.add(Dense(64, activation='relu'))

model.add(Dropout(0.5))

model.add(Dense(len(train_y[0]), activation='softmax'))


# Compiling model. SGD with Nesterov accelerated gradient gives good results for this model

sgd = SGD(learning_rate=0.01, decay=1e-6, momentum=0.9, nesterov=True)

model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])


#Training and saving the model 

hist = model.fit(np.array(train_x), np.array(train_y), epochs=200, batch_size=5, verbose=1)

model.save('chatbot_model.h5', hist)


print("model is created")

Epoch 1/200


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 626us/step - accuracy: 0.1332 - loss: 2.2741
Epoch 2/200
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 542us/step - accuracy: 0.2203 - loss: 2.1138
Epoch 3/200
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 509us/step - accuracy: 0.1806 - loss: 2.1288  
Epoch 4/200
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 483us/step - accuracy: 0.3194 - loss: 1.9799
Epoch 5/200
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 466us/step - accuracy: 0.3625 - loss: 1.9462
Epoch 6/200
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 484us/step - accuracy: 0.5126 - loss: 1.8113
Epoch 7/200
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 706us/step - accuracy: 0.4812 - loss: 1.7686
Epoch 8/200
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 677us/step - accuracy: 0.4642 - loss: 1.6574
Epoch 9/200
[1m10/10[0m [32m━━━━━━━━━━━



model is created


In [15]:
import nltk

from nltk.stem import WordNetLemmatizer

word_normalizer = WordNetLemmatizer()

import pickle

import numpy as np


from keras.models import load_model

model = load_model('chatbot_model.h5')

import json

import random

parsed_intents = json.loads(open('parsed_intents.json').read())

vocabulary_list = pickle.load(open('vocabulary_list.pkl','rb'))

intent_categories = pickle.load(open('intent_categories.pkl','rb'))


def clean_up_sentence(sentence):

    # tokenize the pattern - splitting vocabulary_list into array

    sentence_vocabulary_list = nltk.word_tokenize(sentence)

    # stemming every word - reducing to base form

    sentence_vocabulary_list = [word_normalizer.lemmatize(word.lower()) for word in sentence_vocabulary_list]

    return sentence_vocabulary_list

# return word_presence_bag of vocabulary_list array: 0 or 1 for vocabulary_list that exist in sentence

def word_presence_bag_of_vocabulary_list(sentence, vocabulary_list, show_details=True):

    # tokenizing patterns

    sentence_vocabulary_list = clean_up_sentence(sentence)

    # word_presence_bag of vocabulary_list - vocabulary matrix

    word_presence_bag = [0]*len(vocabulary_list)  

    for s in sentence_vocabulary_list:

        for i,word in enumerate(vocabulary_list):

            if word == s: 

                # assign 1 if current word is in the vocabulary position

                word_presence_bag[i] = 1

                if show_details:

                    print ("found in word_presence_bag: %s" % word)

    return(np.array(word_presence_bag))


def predict_class(sentence):

    # filter below  threshold predictions

    p = word_presence_bag_of_vocabulary_list(sentence, vocabulary_list,show_details=False)

    res = model.predict(np.array([p]))[0]

    ERROR_THRESHOLD = 0.25

    results = [[i,r] for i,r in enumerate(res) if r>ERROR_THRESHOLD]

    # sorting strength probability

    results.sort(key=lambda x: x[1], reverse=True)
    return_list = []

    for r in results:

        return_list.append({"intent": intent_categories[r[0]], "probability": str(r[1])})

    return return_list


def getResponse(ints, parsed_intents_json):

    tag = ints[0]['intent']

    list_of_parsed_intents = parsed_intents_json['parsed_intents']

    for i in list_of_parsed_intents:

        if(i['tag']== tag):

            result = random.choice(i['responses'])

            break

    return result


#Creating tkinter GUI

import tkinter

from tkinter import *


def process_and_send_message():

    msg = MessageEntryBox.get("1.0",'end-1c').strip()

    MessageEntryBox.delete("0.0",END)

    if msg != '':

        ChatDisplay.config(state=NORMAL)

        ChatDisplay.insert(END, "You: " + msg + '\n\n')

        ChatDisplay.config(foreground="#446665", font=("Verdana", 12 )) 


        ints = predict_class(msg)

        res = getResponse(ints, parsed_intents)

        

        ChatDisplay.insert(END, "Bot: " + res + '\n\n')           


        ChatDisplay.config(state=DISABLED)

        ChatDisplay.yview(END)


app_root = Tk()

app_root.title("Chatbot")

app_root.geometry("400x500")

app_root.resizable(width=FALSE, height=FALSE)


# Build the chat window area for displaying messages

ChatDisplay = Text(app_root, bd=0, bg="white", height="8", width="50", font="Arial",)


ChatDisplay.config(state=DISABLED)

# Attach a scrollbar to the chat display window

scrollbar = Scrollbar(app_root, command=ChatDisplay.yview, cursor="heart")

ChatDisplay['yscrollcommand'] = scrollbar.set


#Create Button to process_and_send_message message

SubmitButton = Button(app_root, font=("Verdana",12,'bold'), text="Send", width="12", height=5,

                    bd=0, bg="#f9a602", activebackground="#3c9d9b",fg='#000000',

                    command= process_and_send_message )


# Create a text entry box for typing messages

MessageEntryBox = Text(app_root, bd=0, bg="white",width="29", height="5", font="Arial")

#MessageEntryBox.bind("<Return>", process_and_send_message)


# Arrange the GUI elements on the screen

scrollbar.place(x=376,y=6, height=386)

ChatDisplay.place(x=6,y=6, height=386, width=370)

MessageEntryBox.place(x=128, y=401, height=90, width=265)

SubmitButton.place(x=6, y=401, height=90)


app_root.mainloop()




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step
