# Building a Simple Chatbot based on Semantic Similarity using SpaCy, BERT, Wordnet, Word2Vector



In [223]:
from nltk.stem import WordNetLemmatizer

lemmatizer = WordNetLemmatizer()


## Import necessary libraries

In [204]:
import nltk
from nltk.stem import WordNetLemmatizer
import io
import random
import string # to process standard python strings
import warnings
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import warnings

warnings.filterwarnings('ignore')

In [205]:
import spacy
nlp = spacy.load("en_core_web_md")

### Installing NLTK Packages




In [206]:
import nltk
from nltk.stem import WordNetLemmatizer
nltk.download('popular', quiet=True) # for downloading packages
#nltk.download('punkt') # first-time use only
#nltk.download('wordnet') # first-time use only

True

## Reading a corpus



In [207]:

#import pandas as pd
#Corpus = pd.read_csv(r"aji-Arabic_corpus.csv")

#X_train, X_test, y_train, y_test = model_selection.train_test_split(Corpus['text'],Corpus['targe'],test_size=0.2)

with open('chatbot.txt','r', encoding='utf8', errors ='ignore') as fin:
    raw = fin.read().lower()
#raw = raw.lower()# converts to lowercase

## Tokenisation

In [208]:
sent_tokens = nltk.sent_tokenize(raw)# converts to list of sentences 
word_tokens = nltk.word_tokenize(raw)# converts to list of words

# Chatbot based on SpaCy (Semantic Similarity)

In [209]:
def Response_spacy(question:str):
    sentence_sim=""
    dic={}
    for sent in sent_tokens:
        sentence_sim=nlp(sent)
        ques=nlp(question)
        # find similarity degree between Question and Sentence based on Spacy Lib
        sim_degree=sentence_sim.similarity(ques)
        dic.update({sim_degree:sent})
    sort_dic = sorted(dic.items())
    sort_dic=sort_dic[-1]
    return sort_dic[-1]


# Chatbot based on Bidirectional Encoder Representations from Transformers (BERT)

In [210]:
from sentence_transformers import SentenceTransformer, util

def Response_BERT(question:str):
    model = SentenceTransformer('all-MiniLM-L6-v2')
    sentence_sim=""
    dic={}
    for sent in sent_tokens:
        '''sentence_sim=nlp(sent)
        ques=nlp(klartext)'''
        embeddings1 = model.encode(question, convert_to_tensor=True)
        embeddings2 = model.encode(sent, convert_to_tensor=True)
        sim_degree = util.cos_sim(embeddings1, embeddings2)
        # find similarity degree between Question and Sentence based on BEPR Lib
        dic.update({sim_degree:sent})
    sort_dic = sorted(dic.items())
    sort_dic=sort_dic[-1]
    return sort_dic[-1]

# Chatbot based on Wordnet

In [211]:
import wordnet_Similarity
def Response_wordnet(question:str):
    sentence_sim=""
    dic={}
    for sent in sent_tokens:
        sim_degree=wordnet_Similarity.sim3(question,sent)
        # find similarity degree between Question and Sentence based on wordnet Lib
        dic.update({sim_degree:sent})
    sort_dic = sorted(dic.items())
    sort_dic=sort_dic[-1]
    return sort_dic[-1]

# Chatbot based on word2vec

In [212]:
from gensim.models.keyedvectors import KeyedVectors
model_path = 'GoogleNews-vectors-negative300.bin'
w2v_model = KeyedVectors.load_word2vec_format(model_path, binary=True)



In [213]:
from DocSim import DocSim
ds = DocSim(w2v_model)

In [214]:
def Response_word2vec(question:str):
    sentence_sim=""
    dic={}
    sent1=[]
    for sent in sent_tokens:
        sent1.append(sent)
    sim_degree = ds.calculate_similarity(question, sent1)
    for Dic in sim_degree:
        dic.update({Dic['score']:Dic['doc']})
    sort_dic = sorted(dic.items())
    sort_dic=sort_dic[-1]
    
    return sort_dic[-1]

In [215]:
q1="the aratificial intelliegnce play crucial role in conversational AI, "



In [216]:
print('the response based word2vec:  ',Response_word2vec(q1))

the response based word2vec:   this criterion depends on the ability of a computer program to impersonate a human in a real-time written conversation with a human judge, sufficiently well that the judge is unable to distinguish reliablyon the basis of the conversational content alonebetween the program and a real human.


In [217]:
print('the response based BERT: ',Response_BERT(q1))

the response based BERT:  one pertinent field of ai research is natural language processing.


In [218]:
print('the response based Spacy:  ',Response_spacy(q1))

the response based Spacy:   this criterion depends on the ability of a computer program to impersonate a human in a real-time written conversation with a human judge, sufficiently well that the judge is unable to distinguish reliablyon the basis of the conversational content alonebetween the program and a real human.


In [219]:
print('the response based wordnet:  ',Response_wordnet(q1))

the response based wordnet:   still, there is currently no general purpose conversational artificial intelligence, and some software developers focus on the practical aspect, information retrieval.


In [220]:
q1="the aratificial intelliegnce play crucial role in conversational AI, "
s1='this criterion depends on the ability of a computer program to impersonate a human in a real-time written conversation with a human judge, sufficiently well that the judge is unable to distinguish reliablyon the basis of the conversational content alonebetween the program and a real human.'

In [221]:

# Keyword Matching
GREETING_INPUTS = ("hello", "hi", "greetings", "sup", "what's up","hey",)
GREETING_RESPONSES = ["hi", "hey", "*nods*", "hi there", "hello", "I am glad! You are talking to me"]

def greeting(sentence):
    """If user's input is a greeting, return a greeting response"""
    for word in sentence.split():
        if word.lower() in GREETING_INPUTS:
            return random.choice(GREETING_RESPONSES)

In [222]:
flag=True
print("SAMI: My name is Sami. I will answer your queries about Chatbots. If you want to exit, type Bye!")
while(flag==True):
    user_response = input()
    user_response=user_response.lower()
    if(user_response!='bye'):
        if(user_response=='thanks' or user_response=='thank you' ):
            flag=False
            print("SAMI: You are welcome..")
        else:
            if(greeting(user_response)!=None):
                print("SAMI: "+greeting(user_response))
            else:
                print("SAMI: ",end="")
                print('please wait .....')
                #print(Response_wordnet(user_response))
                #print(Response_BERT(user_response))
                #print(Response_word2vec(user_response))
                print(Response_spacy(user_response))

    else:
        flag=False
        print("SAMI: Bye! take care..")    
        


SAMI: My name is Sami. I will answer your queries about Chatbots. If you want to exit, type Bye!


 the aratificial intelliegnce play crucial role in conversational AI,


SAMI: please wait .....
this criterion depends on the ability of a computer program to impersonate a human in a real-time written conversation with a human judge, sufficiently well that the judge is unable to distinguish reliablyon the basis of the conversational content alonebetween the program and a real human.


 bye


SAMI: Bye! take care..
