In [1]:
import io
import random
import string # to process standard python strings
import warnings
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import warnings
warnings.filterwarnings('ignore')

## Downloading and installing NLTK


In [2]:
import nltk
from nltk.stem import WordNetLemmatizer
# nltk.download('popular', quiet=True)
# nltk.download('punkt') # first-time use only
# nltk.download('wordnet') # first-time use only

## Reading in the corpus

We will be using the Wikipedia page for chatbots as our corpus.

In [3]:
f=open('wikipedia-first.txt','r',errors = 'ignore')
raw=f.read()
raw = raw.lower()# converts to lowercase

## Tokenisation

In [4]:
sent_tokens = nltk.sent_tokenize(raw)
word_tokens = nltk.word_tokenize(raw)

## Preprocessing

We shall now define a function called LemTokens which will take as input the tokens and return normalized tokens.

In [5]:
lemmer = nltk.stem.WordNetLemmatizer()
#WordNet is a semantically-oriented dictionary of English included in NLTK.
def LemTokens(tokens):
    return [lemmer.lemmatize(token) for token in tokens]
remove_punct_dict = dict((ord(punct), None) for punct in string.punctuation)

def LemNormalize(text):
    return LemTokens(nltk.word_tokenize(text.lower().translate(remove_punct_dict)))

## Keyword matching

Used for greet response by the chatbot.

In [6]:
GREETING_INPUTS = ("hello", "hi", "greetings", "sup", "what's up","hey")
GREETING_RESPONSES = [ "hey", "*nods*", "hi there", "hello", "I am glad! You are talking to me"]
def greeting(sentence):
 
    for word in sentence.split():
        if word.lower() in GREETING_INPUTS:
            return random.choice(GREETING_RESPONSES)

To generate a response from our bot for input questions, the concept of document similarity will be used. We define a function response which searches the user’s utterance for one or more known keywords and returns one of several possible responses. If it doesn’t find the input matching any of the keywords, it returns a response:” I am sorry! I don’t understand you”

In [7]:
def response(user_response):
    robo_response=''
    sent_tokens.append(user_response)
    TfidfVec = TfidfVectorizer(tokenizer=LemNormalize, stop_words='english')
    tfidf = TfidfVec.fit_transform(sent_tokens)
    vals = cosine_similarity(tfidf[-1], tfidf)
    idx=vals.argsort()[0][-2]
    flat = vals.flatten()
    flat.sort()
    req_tfidf = flat[-2]
    if(req_tfidf==0):
        robo_response=robo_response+"I am sorry! I don't understand you"
        return robo_response
    else:
        robo_response = robo_response+sent_tokens[idx]
        return robo_response



### Import Google's cloud language API

In [8]:
from google.cloud import language
from google.cloud.language import enums
from google.cloud.language import types
from nlpGoogleAPI import language_analysis

Finally, we will feed the lines that we want our bot to say while starting and ending a conversation depending upon user’s input.

In [None]:
flag=True
print("")
print("Hi, I'm a WikiBot and i am here to answer your queries. If you want to exit, type Bye!")
while(flag==True):
    user_response = input()
    user_response=user_response.lower()
    if(user_response!='bye'):
        if(user_response=='thanks' or user_response=='thank you' ):
            flag=False
            print("")
            print("WikiBot: You are welcome..")
        else:
            if(greeting(user_response)!=None):
                print("")
                print("WikiBot: "+greeting(user_response))
            else:
                print("")
                print("WikiBot: ",end="")
                print(response(user_response))
                sent_tokens.remove(user_response)
                sentiment, entities = language_analysis(user_response)
                #print(entities, sentiment)
                if(len(entities)>0):
                    print(" ")
                    print("<==== Here is some more info based on your query ====>")

                    print(" ")
    
                    for e in entities:
                        print(u"Representative name identified for the entity: {}".format(e.name))
                        print(u"Entity type: {}".format(enums.Entity.Type(e.type).name))
                        for metadata_name, metadata_value in e.metadata.items():
                            print(u"{}: {}".format(metadata_name, metadata_value))
                        print("")
                                                    
                        print("*** *** *** *** *** ***")
                        for mention in e.mentions:
                            print(u"Mention text: {}".format(mention.text.content))
                            # Get the mention type, e.g. PROPER for proper noun
                            print(u"Mention type: {}".format(enums.EntityMention.Type(mention.type).name))
                        print(" ")
                        #print('Text: {}'.format(text))
                        print(u"Salience score: {}".format(e.salience))
                        print(" ")
                    print('Sentiment: {}, {}'.format(sentiment.score, sentiment.magnitude))
                    if(len(entities) >= 2 and enums.Entity.Type(entities[0].type).name == 'LOCATION' and enums.Entity.Type(entities[1].type).name == 'LOCATION'):
                        print('Map directions : ' + 'https://www.google.com/maps/dir/?api=1&origin='+ entities[0].mentions[0].text.content +'&destination='+ entities[1].mentions[0].text.content + '&travelmode=car')
                
        print("=============================================================")
    else:
        flag=False
        print("")
        print("WikiBot: Bye! Have fun..")


Hi, I'm a WikiBot and i am here to answer your queries. If you want to exit, type Bye!
Hello

WikiBot: hey
Who is king arthur?

WikiBot: king arthur
king arthur is a legendary king in the mythology of great britain.
 
<==== Here is some more info based on your query ====>
 
Representative name identified for the entity: king arthur
Entity type: PERSON

*** *** *** *** *** ***
Mention text: king arthur
Mention type: COMMON
 
Salience score: 1.0
 
Sentiment: 0.0, 0.0
Is covid-19 a deadly disease?

WikiBot: bubonic plague
the bubonic plague is a very deadly disease.
 
<==== Here is some more info based on your query ====>
 
Representative name identified for the entity: disease
Entity type: OTHER

*** *** *** *** *** ***
Mention text: disease
Mention type: COMMON
 
Salience score: 1.0
 
Representative name identified for the entity: 19
Entity type: NUMBER
value: 19

*** *** *** *** *** ***
Mention text: 19
Mention type: TYPE_UNKNOWN
 
Salience score: 0.0
 
Sentiment: -0.10000000149011612