In [1]:
# Importing all the packages
import nltk
from nltk.stem import WordNetLemmatizer
# Instantiating WordNetLemmatizer to lemma
lemma = WordNetLemmatizer()
import json
import pickle
import numpy as np
import pandas as pd
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Dropout
from tensorflow.keras.optimizers import SGD
import random
import webbrowser


In [2]:
# Creating empty variable for storing each part from dataset
words=[]
classes = []
documents = []
ignore_words = ['?', '!' , '$' , '&' ,'#' ,'%' ,'^' ,'*']
dataset = open('intents_final.json').read()
intents = json.loads(dataset)

In [3]:
for i in intents['intents']:
    for j in i['patterns']:

        #tokenize each word
        w = nltk.word_tokenize(j)
        words.extend(w)
        #add documents in the corpus
        documents.append((w, i['tag']))

        # add to our classes list
        if i['tag'] not in classes:
            classes.append(i['tag'])

In [4]:
words

['Hi',
 'there',
 'How',
 'are',
 'you',
 'Is',
 'anyone',
 'there',
 '?',
 'Hey',
 'Hola',
 'Hello',
 'Good',
 'day',
 'Hi',
 'hai',
 'Need',
 'info',
 'need',
 'information',
 'need',
 'help',
 'anyone',
 'there',
 'Bye',
 'See',
 'you',
 'later',
 'Goodbye',
 'Nice',
 'chatting',
 'to',
 'you',
 ',',
 'bye',
 'Till',
 'next',
 'time',
 'tata',
 'Thanks',
 'Thank',
 'you',
 'That',
 "'s",
 'helpful',
 'Awesome',
 ',',
 'thanks',
 'Thanks',
 'for',
 'helping',
 'me',
 'How',
 'you',
 'could',
 'help',
 'me',
 '?',
 'What',
 'you',
 'can',
 'do',
 '?',
 'What',
 'help',
 'you',
 'provide',
 '?',
 'How',
 'you',
 'can',
 'be',
 'helpful',
 '?',
 'What',
 'support',
 'is',
 'offered',
 'Fee',
 'Fees',
 'How',
 'much',
 'should',
 'I',
 'pay',
 'How',
 'much',
 'is',
 'the',
 'fee',
 'How',
 'much',
 'is',
 'the',
 'fees',
 'Fees',
 'structure',
 'How',
 'can',
 'I',
 'pay',
 'my',
 'fees',
 'How',
 'can',
 'I',
 'make',
 'the',
 'payment',
 'Fee',
 'structure',
 'Make',
 'a',
 'payment',

In [5]:
documents

[(['Hi', 'there'], 'greeting'),
 (['How', 'are', 'you'], 'greeting'),
 (['Is', 'anyone', 'there', '?'], 'greeting'),
 (['Hey'], 'greeting'),
 (['Hola'], 'greeting'),
 (['Hello'], 'greeting'),
 (['Good', 'day'], 'greeting'),
 (['Hi'], 'greeting'),
 (['hai'], 'greeting'),
 (['Need', 'info'], 'greeting'),
 (['need', 'information'], 'greeting'),
 (['need', 'help'], 'greeting'),
 (['anyone', 'there'], 'greeting'),
 (['Bye'], 'goodbye'),
 (['See', 'you', 'later'], 'goodbye'),
 (['Goodbye'], 'goodbye'),
 (['Nice', 'chatting', 'to', 'you', ',', 'bye'], 'goodbye'),
 (['Till', 'next', 'time'], 'goodbye'),
 (['tata'], 'goodbye'),
 (['Thanks'], 'thanks'),
 (['Thank', 'you'], 'thanks'),
 (['That', "'s", 'helpful'], 'thanks'),
 (['Awesome', ',', 'thanks'], 'thanks'),
 (['Thanks', 'for', 'helping', 'me'], 'thanks'),
 ([], 'noanswer'),
 ([], 'noanswer'),
 (['How', 'you', 'could', 'help', 'me', '?'], 'options'),
 (['What', 'you', 'can', 'do', '?'], 'options'),
 (['What', 'help', 'you', 'provide', '?'],

In [6]:
classes

['greeting',
 'goodbye',
 'thanks',
 'noanswer',
 'options',
 'Tuition and Fees',
 'Room and Board Related Expenses',
 'General Cost Information',
 'Full time Program',
 'Part time Program',
 'Scholarship',
 'Parking Services',
 'Library',
 'IT service and helpdesk',
 'Social Insurance Number',
 'How to apply Social Insurance Number',
 'Where to get SIN',
 'Documents required for SIN',
 'Nearest Service Canada',
 'Guaranteed Investment Certificate',
 'Mobile Network SIM card',
 'Covid-19 Related Information',
 'Medical Insurance',
 'How to download the Medical insurance card',
 'Contact Information Medical Insurance',
 'Drivers License Details',
 'Data Analytics for Business',
 'ACE',
 'ace-cep',
 'Advanced Medical Esthetics Practitioner',
 'Advertising & Marketing Communications Management',
 'Animation 2D/3D',
 'Architectural Technology',
 'Autism and Behavioural Science',
 'Autism and Behavioural Science-part time',
 'Biomedical',
 'Border Services',
 'Border Services Fast Track',
 

In [7]:
# Text Preprocessing 
# using lemmatizing, Converting each word to lower case and removing duplicates from list
words = [lemma.lemmatize(i.lower()) for i in words if i not in ignore_words]
words = sorted(list(set(words)))
# sorting classes and take the unique tags
classes = sorted(list(set(classes)))
# Printing the total document
print (len(documents), "documents")
# Printing the total classes
print (len(classes), "classes", classes)
# Printing all the unique words from words 
print (len(words), "unique lemmatized words", words)

pickle.dump(words,open('words.pkl','wb'))
pickle.dump(classes,open('classes.pkl','wb'))

446 documents
138 classes ['ACE', 'Advanced Medical Esthetics Practitioner', 'Advertising & Marketing Communications Management', 'Animation 2D/3D', 'Architectural Technology', 'Autism and Behavioural Science', 'Autism and Behavioural Science-part time', 'Biomedical', 'Border Services', 'Border Services Fast Track', 'Business', 'Business Accounting', 'Business Administration Accounting', 'Business Administration Finance', 'Business Administration Human Resources', 'Business Administration Marketing', 'Business Marketing', 'Cardiovascular Technology', 'Carpentry and Renovation Techniques', 'Chemical Laboratory Technology', 'Child and Youth Care', 'Child and Youth Care Accelerated', 'Civil Engineering Technology', 'Community Integration through Cooperative Education', 'Community Justice Services', 'Computing and IT Web Development', 'Construction Engineering Technician', 'Construction Project Management', 'Contact Information Medical Insurance', 'Covid-19 Related Information', 'Culinary 

In [8]:
#Vectorization
# Training data creation
training_data = []
# Empty array creation for output
emp_output = [0] * len(classes)
# Creating bow for each sentence
for i in documents:
    # initializing empty bag
    bag = []
    question_words = i[0]
    question_words = [lemma.lemmatize(word.lower()) for word in question_words]
    for w in words:
        bag.append(1) if w in question_words else bag.append(0)

        classes_category = list(emp_output)
        classes_category[classes.index(i[1])] = 1

        training_data.append([bag, classes_category])
# Using shuffle , randomly suffling the data before passing the model
random.shuffle(training_data)
training = np.array(training_data)
# Creating train_X for words and train_y for classes
train_x = list(training[:,0])
train_y = list(training[:,1])
print("Training data created")

Training data created


In [11]:
len(train_x[0])

401

In [12]:
len(train_y[0])

138

In [13]:

model = Sequential()
model.add(Dense(128, input_shape=(len(train_x[0]),), activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(len(train_y[0]), activation='softmax'))

sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])

hist = model.fit(np.array(train_x), np.array(train_y), epochs=10, batch_size=5, verbose=1)
model.save('chatbot_model.h5', hist)

print("Basic model created for testing ")

Train on 178846 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Basic model created for testing 


In [15]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 128)               51456     
_________________________________________________________________
dropout (Dropout)            (None, 128)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 64)                8256      
_________________________________________________________________
dropout_1 (Dropout)          (None, 64)                0         
_________________________________________________________________
dense_2 (Dense)              (None, 138)               8970      
Total params: 68,682
Trainable params: 68,682
Non-trainable params: 0
_________________________________________________________________


In [16]:
# Loading the model and pickle files to the respective variables
from tensorflow.keras.models import load_model
model = load_model('chatbot_model.h5')
intents = json.loads(open('intents_final.json').read())
words = pickle.load(open('words.pkl','rb'))
classes = pickle.load(open('classes.pkl','rb'))

In [17]:
# Creating user_query_cleanup function for split each word from user queries and lemmatizing and lowering the case
def user_query_cleanup(user_query):
    sentence_words = nltk.word_tokenize(user_query)
    sentence_words = [lemma.lemmatize(word.lower()) for word in sentence_words]
    return sentence_words

# creating bow function to get the vectorization values from user query
def bow(user_query, words, show_details=True):
    sentence_words = user_query_cleanup(user_query)
    bag = [0]*len(words) 
    for s in sentence_words:
        for i,w in enumerate(words):
            if w == s: 
                bag[i] = 1
                if show_details:
                    print ("Present in the bag: %s" % w)
    return(np.array(bag))

# creating model_predict_class function for highest probability responses retriving from the intent file based on the threshold(error_acceptance) value
def model_predict_class(user_query, model):
    bow_vectors = bow(user_query, words,show_details=False)
    model_prob_stnth = model.predict(np.array([bow_vectors]))[0]
    error_acceptance = 0.30
    results = [[i,r] for i,r in enumerate(model_prob_stnth) if r>error_acceptance]
    results.sort(key=lambda x: x[1], reverse=True)
    return_list = []
    for r in results:
        return_list.append({"intent": classes[r[0]], "probability": str(r[1])})
    return return_list

### Sample user query execution through functions

In [18]:
# def clean_up_sentence(sentence): sample
user_query_cleanup('List of Programs available?')

['list', 'of', 'program', 'available', '?']

In [19]:
# def bow(sentence, words, show_details=True): sample
bow('List of Programs available?', words, show_details=True)

Present in the bag: list
Present in the bag: of
Present in the bag: program
Present in the bag: available


array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,

In [20]:
#predict_class(sentence, model): sample
bow_vectors = bow('List of Programs', words,show_details=False)
bow_vectors

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,

In [21]:
model_prob_stnth = model.predict(np.array([bow_vectors]))[0]

In [22]:
model_prob_stnth

array([2.53535458e-04, 1.00610909e-04, 2.10465085e-08, 8.82593682e-04,
       1.96178770e-03, 2.89047366e-05, 5.04951931e-05, 8.86674798e-07,
       5.35459503e-06, 2.64609844e-05, 2.71659548e-04, 9.36225842e-05,
       4.52210152e-05, 6.42334126e-05, 7.79245693e-06, 1.27826570e-05,
       3.75554350e-06, 1.27019026e-04, 2.01054645e-04, 3.00382308e-05,
       1.29170090e-04, 9.74513750e-05, 3.64325547e-06, 1.51722611e-06,
       9.19762078e-06, 2.44570089e-07, 8.04035662e-05, 1.68703650e-06,
       2.08685577e-07, 1.12177395e-06, 1.35041832e-04, 1.50869585e-06,
       1.01124977e-04, 2.86134513e-04, 1.36203121e-03, 2.94918692e-07,
       8.03760777e-04, 1.24778162e-04, 2.30342721e-07, 4.03067982e-03,
       2.61903815e-02, 1.11865469e-04, 1.08257795e-04, 1.81967160e-04,
       3.54802651e-05, 4.00009469e-07, 1.87228379e-05, 5.25147538e-04,
       3.46997069e-07, 2.43651262e-03, 1.23281995e-06, 6.69857181e-05,
       5.68477204e-03, 9.36834276e-01, 8.80759399e-05, 1.47746988e-07,
      

In [23]:
model_predict_class('List of Programs', model)


[{'intent': 'Full time Program', 'probability': '0.9368343'}]

In [24]:
# Excel file loading ( tags and links)
df = pd.read_excel("links_final.xlsx")

In [25]:
df

Unnamed: 0,tag,link
0,Tuition and Fees,https://www.stclaircollege.ca/sites/default/fi...
1,Room and Board Related Expenses,https://www.stclaircollege.ca/international/ac...
2,Scholarship,https://www.stclaircollege.ca/foundation/schol...
3,Parking Services,https://www.stclaircollege.ca/parking
4,Library,https://www.stclaircollege.ca/library
...,...,...
125,Full time Program,https://www.stclaircollege.ca/programs
126,Part time Program,https://www.stclaircollege.ca/con-ed
127,Frequently Asked Questions,https://www.stclaircollege.ca/programs/data-an...
128,Live Chat Support,https://www.alphakor.com/support/scc


In [26]:
# Displaying the links
df.link

0      https://www.stclaircollege.ca/sites/default/fi...
1      https://www.stclaircollege.ca/international/ac...
2      https://www.stclaircollege.ca/foundation/schol...
3                  https://www.stclaircollege.ca/parking
4                  https://www.stclaircollege.ca/library
                             ...                        
125               https://www.stclaircollege.ca/programs
126                 https://www.stclaircollege.ca/con-ed
127    https://www.stclaircollege.ca/programs/data-an...
128                 https://www.alphakor.com/support/scc
129      https://www.stclaircollege.ca/registrars-office
Name: link, Length: 130, dtype: object

In [27]:
# Creating the list of tags to retrive the index of the links
tag_list = list(df.tag)

In [28]:
q = 'Bot:'
# For enabling the mouse cursor into hand pointer 
def show_hand_cursor(a):
    ChatLog.config(cursor="hand2")
    
# For disabling the hand cursor to text area
def hide_hand_cursor(a):
    ChatLog.config(cursor='')
    
 # To open the hyperlink in webpage using webbrowser  
def callback(event):
    webbrowser.open_new(df.link[getindex(ints_model, intents)])
    
    # To retrieve the index of the corrresponding tag to dispaly the link
def getindex(ints_model, intents_json):
    user_tag = ints_model[0]['intent']
     
    list_of_intents = intents_json['intents']
    
    for i in list_of_intents:
        if(i['tag']== user_tag ):
            if(i['tag'] in list(df.tag)):               
                ind =tag_list.index(i['tag'])
    return ind   

# Matching the classes from the user query and intent file tag and retrieving the random responses  
    
def find_Response(ints_model, intents_json):
    user_tag = ints_model[0]['intent']
    list_of_intents = intents_json['intents']
    for i in list_of_intents:
        if(i['tag']== user_tag):
            if(i['tag'] in list(df.tag)):               
                #txt=Text(base)
                #txt.pack(expand=False, fill=None, in_=master ,side=RIGHT,ipadx=8, ipady=6)
                ans = random.choice(i['responses'])
                ChatLog.insert(END, q + ans + ' ' )
                ChatLog.insert(END, "here" +'\n\n' , ('link', str(0)))
                ChatLog.tag_bind('link',"<Enter>" , show_hand_cursor)
                ChatLog.tag_bind('link', "<Leave>", hide_hand_cursor)
                ChatLog.tag_bind('link','<Button-1>' , callback)
                ChatLog.tag_config( 'link', foreground="blue" ,underline=1 )
                ChatLog.config(state=DISABLED)
            else:
                global result
                result = random.choice(i['responses'])
                break
    return result
def chatbot_response(text):
    #print(text)
    global ints_model
    ints_model = model_predict_class(text, model)
    print(ints_model)
    final_result = find_Response(ints_model, intents)
    return final_result


In [31]:
#Creating GUI with tkinter
import warnings
warnings.filterwarnings('ignore')

import tkinter
from tkinter import *
from tkhtmlview import HTMLLabel


def send():
    msg = EntryBox.get("1.0",'end-1c').strip()
    
    EntryBox.delete("0.0",END)

    if msg != '':
        ChatLog.config(state=NORMAL)
        ChatLog.insert(END, "You: " + msg + '\n\n')
        ChatLog.config(foreground="black", font=("Verdana", 12 ))
        res = chatbot_response(msg)
        ChatLog.insert(END, "Bot: " + res + '\n\n')
        ChatLog.config(state=DISABLED)
        ChatLog.yview(END)

base = Tk()
base.title("St.Clair Bot")
base.geometry("400x500")
base.resizable(width=FALSE, height=FALSE)

#Create Chat window
ChatLog = Text(base, bd=0, bg="white", height="8", width="50", font="Arial",wrap =WORD)

ChatLog.config(state=DISABLED)

#Bind scrollbar to Chat window
scrollbar = Scrollbar(base, command=ChatLog.yview, cursor="arrow")
ChatLog['yscrollcommand'] = scrollbar.set

#Create Button to send message
SendButton = Button(base, font=("Verdana",12,'bold'), text="Send", width="12", height=5,
                    bd=0, bg="#32de97", cursor="hand2", activebackground="#3c9d9b",fg='#ffffff',
                    command= send )

#Create the box to enter message
EntryBox = Text(base, bd=0, bg="white",width="29", height="5", font="Arial" , wrap =WORD)
#EntryBox.bind("<Return>", send)


#Place all components on the screen
scrollbar.place(x=376,y=6, height=386)
ChatLog.place(x=6,y=6, height=386, width=370)
EntryBox.place(x=128, y=401, height=90, width=265)
SendButton.place(x=6, y=401, height=90)

base.mainloop()

[{'intent': 'greeting', 'probability': '0.99999905'}]
[{'intent': 'options', 'probability': '1.0'}]
[{'intent': 'Data Analytics for Business', 'probability': '1.0'}]
[{'intent': 'Full time Program', 'probability': '0.9504401'}]
[{'intent': 'Data Analytics for Business', 'probability': '1.0'}]
[{'intent': 'greeting', 'probability': '0.99999905'}]
[{'intent': 'greeting', 'probability': '0.99999595'}]
