This project aims to educate people on COVID-19. The chatbot aims to save labor by using artificial intelligence to answer questions COVID-19 related.

In [1]:
import numpy as np
import tensorflow as tf
import pandas as pd 

import nltk
nltk.download('punkt')


import random
import json

[nltk_data] Downloading package punkt to /home/kevin/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [2]:
nltk.download('punkt')
nltk.download('wordnet')


[nltk_data] Downloading package punkt to /home/kevin/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /home/kevin/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

**Import and load the data file**

The data file is in JSON format (covid.json) so we used the json package to parse the JSON file into Python.

In [3]:
with open('covid.json') as json_data:
  intents=json.load(json_data)

In [4]:
intents

{'intents': [{'tag': 'greeting',
   'patterns': ['Hi there',
    'How are you',
    'Is anyone there?',
    'Hey',
    'Hola',
    'Hello',
    'Good day'],
   'responses': ['Hello, thanks for asking',
    'Good to see you again',
    'Hi there, how can I help?'],
   'context': ['']},
  {'tag': 'goodbye',
   'patterns': ['Bye',
    'See you later',
    'Goodbye',
    'Nice chatting to you, bye',
    'Till next time'],
   'responses': ['See you!', 'Have a nice day', 'Bye! Come back again soon.'],
   'context': ['']},
  {'tag': 'thanks',
   'patterns': ['Thanks',
    'Thank you',
    "That's helpful",
    'Awesome, thanks',
    'Thanks for helping me'],
   'responses': ['Happy to help!', 'Any time!', 'My pleasure'],
   'context': ['']},
  {'tag': 'noanswer',
   'patterns': [],
   'responses': ["Sorry, can't understand you",
    'Please give me more info',
    'Not sure I understand'],
   'context': ['']},
  {'tag': 'options',
   'patterns': ['How you could help me?',
    'What you can do

# Data Pre-processing


data pre-processing will be done by tokenizing our data.
Tokenizing is the process of breaking the whole text into small parts like words.
Here we iterate through the patterns and tokenize the sentence using nltk.word_tokenize() function and append each word in the words list. We also create a list of classes for our tags.

In [5]:
words=[]
classes=[]
documents=[]
ignore=['?','!','.', ',']
#Loop through 
for intent in intents['intents']:
    for pattern in intent['patterns']:
        #Tokenize
        w= nltk.word_tokenize(pattern)
        
        #Add the word to words list
        words.extend(w)
        
        #Add words to the document 
        documents.append((w, intent['tag']))
        
        #Add tags to classes list
        if intent['tag'] not in classes:
            classes.append(intent['tag'])
        

In [6]:
#Lemmatize the words and remove any duplictes from the list
from nltk.stem import WordNetLemmatizer
lemmatizer=WordNetLemmatizer()
words=[lemmatizer.lemmatize(w.lower()) for w in words if w not in ignore]

#Sort words
words=sorted(list(set(words)))

#Sort classes
classes=sorted(list(set(classes)))

In [7]:
#Classes=intents
print(len(classes), 'classes', classes)

61 classes ['COVID-19_ATM', 'COVID-19_BP_medication', 'COVID-19_Pets', 'COVID-19_affect_children', 'COVID-19_antibiotics', 'COVID-19_child_contracts', 'COVID-19_child_risk', 'COVID-19_coronavirus_differences', 'COVID-19_coronavirus_vaccine', 'COVID-19_cure', 'COVID-19_definition', 'COVID-19_diabetes', 'COVID-19_family_member', 'COVID-19_first_occurrence', 'COVID-19_grocery_stores', 'COVID-19_hand_sanitizers_alternatives', 'COVID-19_handwashing_steps', 'COVID-19_heart disease_diabetes_hypertension', 'COVID-19_heat', 'COVID-19_hospitals', 'COVID-19_hot_water_and_alcohol', 'COVID-19_incubation_period', 'COVID-19_isolation', 'COVID-19_lifts', 'COVID-19_likelihood', 'COVID-19_lockdown_penalties', 'COVID-19_mask_use', 'COVID-19_medical_policies', 'COVID-19_money', 'COVID-19_movement', 'COVID-19_novel_coronavirus', 'COVID-19_origin', 'COVID-19_packages', 'COVID-19_painkillers', 'COVID-19_pharmacies', 'COVID-19_pregnant_women', 'COVID-19_price_test', 'COVID-19_protection', 'COVID-19_recovered'

In [8]:
print(len(words), 'unique lemmatized words', words)

315 unique lemmatized words ["''", "'s", '-19', '100', '``', 'a', 'able', 'about', 'ace', 'affect', 'after', 'again', 'against', 'air', 'airborne', 'alcohol', 'alternative', 'am', 'an', 'and', 'animal', 'another', 'antibiotic', 'any', 'anyone', 'anything', 'are', 'area', 'at', 'atm', 'available', 'awesome', 'bank', 'be', 'become', 'becoming', 'been', 'being', 'best', 'better', 'between', 'bio-engineered', 'bp', 'break', 'breath', 'building', 'bulk', 'buy', 'bye', 'called', 'can', 'case', 'catch', 'cause', 'causing', 'cent', 'chatting', 'child', 'city', 'city/state', 'common', 'connection', 'contract', 'contracting', 'corona', 'coronavirus', 'coronaviruses', 'could', 'cover', 'covid', 'covid-19', 'cure', 'curve', 'curve�', 'day', 'definition', 'developing', 'diabetes', 'did', 'difference', 'differently', 'disease', 'display', 'dispose', 'do', 'doe', 'drug', 'easier', 'easy', 'effective', 'environment', 'essential', 'ethyl', 'family', 'fever', 'first', 'flattening', 'flu', 'for', 'free',

In [9]:
#Create a pickle file to store the python objects
import pickle
pickle.dump(words, open('words.pkl', 'wb'))
pickle.dump(classes,open('classes.pkl', 'wb'))

# Creating Training data

In [10]:
#Create training data
training=[]

#Create an empty array for the output
output_empty=[0] * len(classes)

In [11]:
#Create training set, bag of words for each sentence
for doc in documents:
    #Initialize the bow
    bag=[]
    #list of tokenized words for the pattern
    pattern_words=doc[0]
    #Lemmatize each word
    pattern_words=[lemmatizer.lemmatize(word.lower()) for word in pattern_words]
    #Create our bow array with 1 if word match found in current pattern
    for w in words:
        bag.append(1) if w in pattern_words else bag.append(0)
    # output is '0' for each tag and '1' for current tag (for each pattern)
    output_row = list(output_empty)
    output_row[classes.index(doc[1])] = 1
    
    training.append([bag, output_row])

In [12]:
#Shuffle our features and turn into np.array
random.shuffle(training)
training=np.array(training)

  training=np.array(training)


In [13]:
#Create train and test lists where x is patterns and y is intents
train_x= list(training[:,0])
train_y=list(training[:,1])

# Building a neural network

In [14]:
from keras.models import Sequential

In [15]:
from keras.layers import Dense, Activation,Dropout

In [16]:
from keras.optimizers import SGD

In [17]:
#Create 3 layers, 1st layer with 128 neurons, 2nd layer with 64 neurons
#3rd layer neurons=intent

model=Sequential()
model.add(Dense(128, input_shape=(len(train_x[0]),),
                activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(len(train_y[0]), activation='softmax'))

In [18]:
#Use Stochastic gradient descent with nesterov acceleraed to compile model
sgd=SGD(learning_rate=0.01, decay=1e-6, momentum=0.9, nesterov=True)


In [19]:
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])

In [20]:
#Fitting the model
hist=model.fit(np.array(train_x), np.array(train_y), epochs=50,
              batch_size=5, verbose=1)


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [21]:
#Saving the model
model.save('Kevins_covid19_chatbot.h5', hist)

In [22]:
from keras.models import load_model

In [23]:
model=load_model('Kevins_covid19_chatbot.h5')

In [24]:
intents=json.loads(open('covid.json').read())

In [25]:
words=pickle.load(open('words.pkl', 'rb'))

In [26]:
classes=pickle.load(open('classes.pkl', 'rb'))

In [27]:
def clean_up_sentence(sentence):
    #Tokenize 
    sentence_words = nltk.word_tokenize(sentence)
    # stemming each word
    sentence_words = [lemmatizer.lemmatize(word.lower()) for word in sentence_words]
    return sentence_words

In [28]:
#Return bow of array: 0 or 1 for each word in the sentence
def bow(sentence, words,show_details=True):
    #Tokenize the pattern
    sentence_words=clean_up_sentence(sentence)
    #bow where matrix of N words, vocabulary matrix
    bag= [0]*len(words)
    for s in sentence_words:
        for i,w in enumerate(words):
            if w == s:
                bag[i]=1
                if show_details:
                    print('found in bag: %s' %w)
    return(np.array(bag))

In [29]:
ERROR_THRESHOLD=0.25
def classify(sentence, model):
    #filter out predictions below a threshold
    p=bow(sentence, words,show_details=False)
    res=model.predict(np.array([p]))[0]
    results= [[i,r] for i,r in enumerate(res) if r> ERROR_THRESHOLD]
    #Sort by strength of probability
    results.sort(key=lambda x: x[1], reverse=True)
    return_list=[]
    
    for r in results:
        return_list.append({'intent': classes[r[0]], 'probability': str(r[1])})
    
    #return tuple of intent and probability
    return return_list

In [30]:
def get_response(ints, intents_json):
    tag = ints[0]['intent']
    list_of_intents = intents_json['intents']
    for i in list_of_intents:
        if(i['tag']== tag):
            result = random.choice(i['responses'])
            break
    return result

In [31]:
def chatbot_response(text):
    ints=classify(text, model)
    res=get_response(ints, intents)
    return res

In [32]:
#chatbot_response('is covid-19 airborne')

In [33]:
#chatbot_response('can i get coronavirus fro an dog')

In [34]:
#chatbot_response('what are the symptoms of covid-19')

In [35]:
#chatbot_response('can my symptoms be worse if i have diabetes')

In [36]:
#chatbot_response('can children get covid-19')

# Create GUI with tkinter

In [37]:
import tkinter

In [38]:
from tkinter import *

In [39]:
def send():
    msg = EntryBox.get("1.0",'end-1c').strip()
    EntryBox.delete("0.0",END)

    if msg != '':
        ChatLog.config(state=NORMAL)
        ChatLog.insert(END, "You: " + msg + '\n\n')
        ChatLog.config(foreground="#442265", font=("Verdana", 12 ))

        res = chatbot_response(msg)
        ChatLog.insert(END, "Bot: " + res + '\n\n')

        ChatLog.config(state=DISABLED)
        ChatLog.yview(END)

In [40]:
base = Tk()
base.title("Covid chatbot")
base.geometry("800x600")
base.resizable(width=FALSE, height=FALSE)

''

In [41]:
#Create Chat window
ChatLog = Text(base, bd=0, bg="white", height="8", width="50", font="Arial",)


In [42]:
ChatLog.config(state=DISABLED)

In [43]:
#Bind scrollbar to Chat window
scrollbar = Scrollbar(base, command=ChatLog.yview, cursor="heart")
ChatLog['yscrollcommand'] = scrollbar.set

In [44]:
#Create Button to send message
SendButton = Button(base, font=("Verdana",12,'bold'), text="Send", width="12", height=5,
                    bd=0, bg="#32de97", activebackground="#3c9d9b",fg='#ffffff',
                    command= send )

In [45]:
#Create the box to enter message
EntryBox = Text(base, bd=0, bg="white",width="800", height="500", font="Arial")
EntryBox.bind("<Return>", send)

'139836223111360send'

In [46]:
#Place all components on the screen
scrollbar.place(x=676,y=6, height=786)
ChatLog.place(x=6,y=6, height=386, width=670)
EntryBox.place(x=128, y=401, height=300, width=565)
SendButton.place(x=6, y=401, height=300)

In [47]:
base.mainloop()