# Building chatbot by using NLp Text similarity

##### Importing libraries

In [1]:
import nltk
import random
import string
import re, string, unicodedata
from nltk.corpus import wordnet as wn
from nltk.stem.wordnet import WordNetLemmatizer

In [2]:
import wikipedia as wk

In [3]:
from collections import defaultdict
import warnings
warnings.filterwarnings("ignore")
nltk.download('punkt') 
nltk.download('wordnet')
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity, linear_kernel

[nltk_data] Downloading package punkt to C:\Users\Reem
[nltk_data]     Telbani\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to C:\Users\Reem
[nltk_data]     Telbani\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


##### Load the dataset and convert every text into lowercase.

In [4]:
data = open('ML1.txt','r',errors = 'ignore')
raw = data.read()
raw = raw.lower()

##### Data pre-processing

In [6]:
sent_tokens = nltk.sent_tokenize(raw)

In [7]:
def Normalize(text):
    remove_punct_dict = dict((ord(punct), None) for punct in string.punctuation)
    #word tokenization
    word_token = nltk.word_tokenize(text.lower().translate(remove_punct_dict))
    
    #remove ascii
    new_words = []
    for word in word_token:
        new_word = unicodedata.normalize('NFKD', word).encode('ascii', 'ignore').decode('utf-8', 'ignore')
        new_words.append(new_word)
    
    #Remove tags
    rmv = []
    for w in new_words:
        text=re.sub("</?.*?>","<>",w)
        rmv.append(text)
        
    #pos tagging and lemmatization
    tag_map = defaultdict(lambda : wn.NOUN)
    tag_map['J'] = wn.ADJ
    tag_map['V'] = wn.VERB
    tag_map['R'] = wn.ADV
    lmtzr = WordNetLemmatizer()
    lemma_list = []
    rmv = [i for i in rmv if i]
    for token, tag in nltk.pos_tag(rmv):
        lemma = lmtzr.lemmatize(token, tag_map[tag[0]])
        lemma_list.append(lemma)
    return lemma_list

##### define welcome notes or greetings that means if a user provides is a greeting message, the chatbot shall respond with a greeting as well based on keyword matching.

In [8]:
welcome_input = ("hello", "hi", "greetings", "sup", "what's up","hey",)
welcome_response = ["hi", "hey", "*nods*", "hi there", "hello", "I am glad! You are talking to me"]
def welcome(user_response):
    for word in user_response.split():
        if word.lower() in welcome_input:
            return random.choice(welcome_response)

##### Generate chatbot response

In [9]:
def generateResponse(user_response):
    robo_response=''
    sent_tokens.append(user_response)
    TfidfVec = TfidfVectorizer(tokenizer=Normalize, stop_words='english')
    tfidf = TfidfVec.fit_transform(sent_tokens)
    #vals = cosine_similarity(tfidf[-1], tfidf)
    vals = linear_kernel(tfidf[-1], tfidf)
    idx=vals.argsort()[0][-2]
    flat = vals.flatten()
    flat.sort()
    req_tfidf = flat[-2]
    if(req_tfidf==0) or "tell me about" in user_response:
        print("Checking Wikipedia")
        if user_response:
            robo_response = wikipedia_data(user_response)
            return robo_response
    else:
        robo_response = robo_response+sent_tokens[idx]
        return robo_response
#wikipedia search
def wikipedia_data(input):
    reg_ex = re.search('tell me about (.*)', input)
    try:
        if reg_ex:
            topic = reg_ex.group(1)
            wiki = wk.summary(topic, sentences = 3)
            return wiki
    except Exception as e:
            print("No content has been found")

# Start Talking to Chatterbot :)

##### Chatterbot is trained on articles include machine learning definitions, types , and algorithms. That helps you to know the meaning of many termnologies.

##### Type Bye or thank you to exit or quit

In [11]:
flag=True
print("My name is Chatterbot and I'm a chatbot. If you want to exit, type Bye!")
print("")
while(flag==True):
    user_response = input()
    user_response=user_response.lower()
    if(user_response not in ['bye','shutdown','exit', 'quit']):
        if(user_response=='thanks' or user_response=='thank you' ):
            flag=False
            print("Chatterbot : You are welcome..")
            print("")
        else:
            if(welcome(user_response)!=None):
                print("Chatterbot : "+welcome(user_response))
                print("")
            else:
                print("Chatterbot : ",end="")
                print(generateResponse(user_response))
                sent_tokens.remove(user_response)
                print("")
    else:
        flag=False
        print("Chatterbot : Bye!!! ")

My name is Chatterbot and I'm a chatbot. If you want to exit, type Bye!

hello
Chatterbot : I am glad! You are talking to me

what is machine learning?
Chatterbot : machine learning basic concepts
there are many different types of machine learning algorithms, with hundreds published each day, and they’re typically grouped by either learning style (i.e.

what is epoch?
Chatterbot : epoch, a full training pass over the entire dataset such that each example has been seen once,  thus, an epoch represents n/batch size training iterations, where n is the total number of examples.

what is outliers
Chatterbot : outliers, values distant from most other values.

what is supervised learning?
Chatterbot : supervised machine learning, training a model from input data and its corresponding labels.

supervised algorithms
Chatterbot : supervised machine learning, training a model from input data and its corresponding labels.

thank you
Chatterbot : You are welcome..

