In [1]:
# using concept of vectorization and cosine similarity
# dataset amazon electronic q&a data

In [17]:
import ast
import numpy as np
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import CountVectorizer

In [15]:
questions=[]
answers=[]
with open('qa_Electronics.json','r') as f:
    for line in f:
        data = ast.literal_eval(line)
        questions.append(data['question'].lower())
        answers.append(data['answer'].lower())
        
       

In [16]:
# tokenize the text and convert data in matrix format
vectorizer = CountVectorizer(stop_words='english')
X_vec = vectorizer.fit_transform(questions)

In [37]:
# X_vec.toarray()

In [18]:
tfidf = TfidfTransformer() #by default applies "l2" normalization
X_tfidf = tfidf.fit_transform(X_vec)

In [38]:
X_tfidf.toarray()

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [20]:
# X_tfidf is the repository matrix that will be searched every time a new question is entered in the chatbot for 
# the most similar question. 
# we search the row that has the maximum cosine (or the minimum angle) with the new question vector and return 
# the corresponding answer to that question as the response.

In [32]:

def conversation(im):
    global tfidf, answers, X_tfidf
    Y_vec = vectorizer.transform(im)
    Y_tfidf = tfidf.fit_transform(Y_vec)
    cos_sim = np.rad2deg(np.arccos(max(cosine_similarity(Y_tfidf, X_tfidf)[0])))
    if cos_sim > 60 :
        return "sorry, I did not quite understand that"
    else:
        return answers[np.argmax(cosine_similarity(Y_tfidf, X_tfidf)[0])]


In [33]:
def main():
    usr = input("Please enter your username: ")
    print("support: Hi, welcome to Q&A support. How can I help you?")
    while True:
        im = input("{}: ".format(usr))
        if im.lower() == 'bye':
            print("Q&A support: bye!")
            break
        else:
            print("Q&A support: "+conversation([im]))

In [34]:
main()

Please enter your username: hello
support: Hi, welcome to Q&A support. How can I help you?
hello: i need a phone
Q&A support: you only need one of these devices per phone number or phoneline. one unit will protect all phones and or extensions on a phoneline. regards,
hello: i need an iphone price
Q&A support: no it won't work ... iphone 5 works only with apple tv .
hello: what is the price of iphone
Q&A support: it does not plug into an iphone. i love this plantronic headset, quality wonderful. i bought an adapter that fit the iphone but the sound quality was bad so i now use the white ear bud thing that came with the iphone. i have small ears so those things hurt when i use them for a long time. i don't recommend the plantronics headset with an iphone.
hello: goodbye
Q&A support: sorry, I did not quite understand that
hello: bye
Q&A support: bye!
