In [1]:
# things we need for NLP
import nltk
nltk.download('punkt')
from nltk.stem.lancaster import LancasterStemmer
stemmer = LancasterStemmer()

import numpy as np
import random
from tensorflow import keras
from keras.models import load_model

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\vivek\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
# import our chat-bot intents file
import json
with open('intents.json') as json_data:
    intents = json.load(json_data)

In [3]:
words = []
classes = []
documents = []
ignore_words = ['?']
# loop through each sentence in our intents patterns
for intent in intents['intents']:
    for pattern in intent['patterns']:
        # tokenize each word in the sentence
        w = nltk.word_tokenize(pattern)
        # add to our words list
        words.extend(w)
        # add to documents in our corpus
        documents.append((w, intent['tag']))
        # add to our classes list
        if intent['tag'] not in classes:
            classes.append(intent['tag'])

# stem and lower each word and remove duplicates
words = [stemmer.stem(w.lower()) for w in words if w not in ignore_words]
words = sorted(list(set(words)))

# remove duplicates
classes = sorted(list(set(classes)))

print (len(documents), "documents")
print (len(classes), "classes", classes)
print (len(words), "unique stemmed words", words)

198 documents
28 classes ['CourseB.Tech', 'accreditation', 'affirmative', 'btechandmbatechadmissions', 'emoticons', 'generalfeestructure', 'goodbye', 'greeting', 'hours', 'integratedprograms', 'noanswer', 'offensivewords', 'opentoday', 'otherprogramsadmissions', 'postgradprograms', 'streamartificialintelligence', 'thanks', 'undergradprograms', 'whyArtificialIntelligence', 'whyCS', 'whyCivil', 'whyDataScience', 'whyEXTC', 'whyElectrical', 'whyIT', 'whyIndustrialAutomation', 'whyMechanical', 'whyMechatronics']
142 unique stemmed words ['!', "'s", '.', 'a', 'about', 'accredit', 'admit', 'ai', 'al', 'alright', 'am', 'and', 'anyon', 'ar', 'art', 'at', 'autom', 'avail', 'b.', 'b.tech', 'bot', 'btech', 'bye', 'camp', 'can', 'cert', 'charg', 'choos', 'civil', 'colleg', 'comput', 'confus', 'cool', 'cours', 'cs', 'dat', 'day', 'degr', 'detail', 'diff', 'do', 'ds', 'dual', 'dumb', 'elect', 'electron', 'engin', 'extc', 'fee', 'fess', 'field', 'fin', 'for', 'get', 'giv', 'good', 'goodby', 'got', 'h

In [4]:
model = load_model('model_ChatBot.h5')

In [5]:
def clean_up_sentence(sentence):
    # tokenize the pattern
    sentence_words = nltk.word_tokenize(sentence)
    # stem each word
    sentence_words = [stemmer.stem(word.lower()) for word in sentence_words]
    return sentence_words

# return bag of words array: 0 or 1 for each word in the bag that exists in the sentence
def bow(sentence, words, show_details=False):
    # tokenize the pattern
    sentence_words = clean_up_sentence(sentence)
    # bag of words
    bag = [0]*len(words)  
    for s in sentence_words:
        for i,w in enumerate(words):
            if w == s: 
                bag[i] = 1
                if show_details:
                    print ("found in bag: %s" % w)

    return(np.array(bag))

In [6]:
# create a data structure to hold user context
context = {}

ERROR_THRESHOLD = 0.65
def classify(sentence):
    # generate probabilities from the model
    p = bow(sentence, words)
    
    d = len(p)
    f = len(documents)-2
    a = np.zeros([f, d])
    tot = np.vstack((p,a))
    
    results = model.predict(tot)[0]
    
    # filter out predictions below a threshold
    results = [[i,r] for i,r in enumerate(results) if r>ERROR_THRESHOLD]
    # sort by strength of probability
    results.sort(key=lambda x: x[1], reverse=True)
    return_list = []
    for r in results:
        return_list.append((classes[r[0]], r[1]))
    # return tuple of intent and probability
    return return_list

def response(sentence, userID, show_details=False):
    results = classify(sentence)
    print('Result:',results)
    # if we have a classification then find the matching intent tag
    if results:
        # loop as long as there are matches to process
        while results:
            for i in intents['intents']:
                # find a tag matching the first result
                if i['tag'] == results[0][0]:
                    # set context for this intent if necessary
                    #print(i)
                    if 'context_set' in i:
                        if show_details: print ('context:', i['context_set'])
                        context[userID] = i['context_set']

                    # check if this intent is contextual and applies to this user's conversation
                    if not 'context_filter' in i or \
                        (userID in context and 'context_filter' in i and i['context_filter'] == context[userID]):
                        if show_details: print ('tag:', i['tag'])
                        # a random response from the intent
                        return (random.choice(i['responses']))
            results.pop(0)

In [7]:
response('i would like to buy a flower', '123', show_details=True)

Result: [('noanswer', 0.9972168)]
tag: noanswer


"Sorry, can't understand you"

In [8]:
context

{}

In [9]:
response('hello there', '123', show_details=True)

Result: [('greeting', 1.0)]
context: 
tag: greeting


'Hi there, how can I help?'

In [10]:
response('rose', '123', show_details=True)

Result: [('noanswer', 0.99999976)]
tag: noanswer


'Please give me more info'

In [11]:
context

{'123': ''}

In [12]:
response('accreditations','123',show_details=True)

Result: [('accreditation', 0.9999999)]
tag: accreditation


'NBA Accreditation: Computer Engineering & Electronics and Telecommunication Programs have been accredited by National Board of Accreditation under Tier â€“ I for two years. (implies international accreditation)'

In [13]:
response('I want to know about the Undergraduate Programs in this college.','123',show_details=True)

Result: [('undergradprograms', 1.0)]
tag: undergradprograms


'We provide the following Undergraduate Programs - B. Tech (4 Years after 12th grade), B. Tech Integrated (6 Years after 10th grade)'

In [14]:
response('stream Mechanical','123',show_details=True )

Result: [('whyIndustrialAutomation', 0.70990515)]
tag: whyIndustrialAutomation


'I can help you. If you understand Programming and Software Development, PLC, HMI, SCADA Programming, Electronic Basics and you have a Detail-Oriented Personality, then this Industrial Automation course is for you!'

In [15]:
response('What is the college fees at MPSTME?','123',show_details=True)

Result: [('generalfeestructure', 1.0)]
tag: generalfeestructure


'Fee structure for various programs at MPSTME: MBA Tech - 3.3-3.7LPA, BTech - 2.7-3.0LPA, BTech Integrated - 2.7-3.0LPA,, PHD - 0.5-0.7LPA, MTech - 1.2LPA, MCA - 1.1LPA, PGDM - 4.9LPA'

In [16]:
response('Tell me about B.Tech course','123',show_details=True)

Result: [('CourseB.Tech', 1.0)]
tag: CourseB.Tech


'Let me help you! We offer 4 year B. Tech Course for 12th+ Students at our Mumbai and Shirpur campuses. We provide B. Tech Degree in the following streams:\nElectrical (30 Seats) \nData Science (60 Seats) \nInformation Technology (60+60 Seats) \nComputer Engineering (120+60 Seats) \nCivil (60 Seats) \nElectronics and Telecommunication (60+60 Seats) \nMechatronics (60+60 Seats) \nMechanical (60+60 Seats) \n*Seats: Mumbai + Shirpur \nMinimum 70% in 12th grade required.'

In [17]:
response('U+1F44D','123',show_details=True)

Result: [('emoticons', 0.9999999)]
tag: emoticons


'U+1F603'