# Chatbot for E-commerce (Logistic Regression)

## 1. Data Analysis

In [1]:
import nltk

import json
import numpy as np
import random

words=[]
classes = []
documents = []

ignore_words = ['?', '!', ".", ",", "'d", "'s", "@" , "#"]

data_file = open('intents.json').read()
intents = json.loads(data_file)

In [2]:
intents

{'intents': [{'tag': 'greeting',
   'patterns': ['Hi there',
    'How are you',
    'Is anyone there?',
    'Hey',
    'Hola',
    'Hello',
    'Good day',
    'Hi',
    'Hello there',
    'Hya',
    'Hya there',
    'Is anyone there?',
    'Hello how are you',
    'Hi how do you do'],
   'responses': ['Nice to meet you.',
    'Hello, Good to see you.',
    'Hi! What can I do for you?',
    'Hi! How can I help?',
    'Hello! How can I help?',
    'Hola! How can I help?',
    'Hello, thanks for visiting.'],
   'context': ['']},
  {'tag': 'introduce',
   'patterns': ['What are you?',
    'Who are you?',
    'Can you introduce yourself?',
    "What's your name?"],
   'responses': ['My name is ECOM. I am your Online Shopping Assistant.'],
   'context': ['']},
  {'tag': 'goodbye',
   'patterns': ['Bye',
    'See you later',
    'Goodbye',
    'Till next time',
    'Adios',
    'See you later',
    'Thanks, bye',
    'Thanks for the help, goodbye',
    'Thank you, bye',
    'Thank you, goodb

In [4]:
for intent in intents['intents']:
    for pattern in intent['patterns']:
        #tokenize each word
        w = nltk.word_tokenize(pattern)
        words.extend(w)
        #add documents in the corpus
        documents.append((w, intent['tag']))
        # add to our classes list
        if intent['tag'] not in classes:
            classes.append(intent['tag'])

In [5]:
print("Words: ",words, "\n")
print("Words Size: ", len(words), "\n----------------- \n")

print("Classes: ",classes, "\nClasses Size: ", len(classes),
      "\n----------------- \n")

print("Documents: \nDocument Size: ", len(documents))
for d in documents:
     print(d, "\n")

Words:  ['Hi', 'there', 'How', 'are', 'you', 'Is', 'anyone', 'there', '?', 'Hey', 'Hola', 'Hello', 'Good', 'day', 'Hi', 'Hello', 'there', 'Hya', 'Hya', 'there', 'Is', 'anyone', 'there', '?', 'Hello', 'how', 'are', 'you', 'Hi', 'how', 'do', 'you', 'do', 'What', 'are', 'you', '?', 'Who', 'are', 'you', '?', 'Can', 'you', 'introduce', 'yourself', '?', 'What', "'s", 'your', 'name', '?', 'Bye', 'See', 'you', 'later', 'Goodbye', 'Till', 'next', 'time', 'Adios', 'See', 'you', 'later', 'Thanks', ',', 'bye', 'Thanks', 'for', 'the', 'help', ',', 'goodbye', 'Thank', 'you', ',', 'bye', 'Thank', 'you', ',', 'goodbye', 'Thanks', 'goodbye', 'Thanks', 'that', "'s", 'all', 'I', 'needed', 'to', 'see', 'Thanks', 'Thank', 'you', 'That', "'s", 'helpful', 'Awesome', ',', 'thanks', 'Thanks', 'for', 'helping', 'me', 'OK', 'thank', 'you', 'OK', 'thanks', 'OK', 'That', "'s", 'helpful', 'Thanks', 'a', 'lot', 'I', 'want', 'to', 'buy', 'a', 'novel', '.', 'I', 'want', 'to', 'buy', 'the', 'newest', 'lipstick', 'you',

## 2. Preprocess data



In [6]:
from nltk.stem import WordNetLemmatizer

lemmatizer = WordNetLemmatizer()

# lemmatize, lower each word and remove duplicates
words = [lemmatizer.lemmatize(w.lower()) for w in words if w not in ignore_words]
words = sorted(list(set(words)))

print(words)

["'m", 'a', 'about', 'accept', 'adios', 'all', 'an', 'any', 'anyone', 'are', 'awesome', 'be', 'book', 'bracelet', 'buy', 'bye', 'camera', 'can', 'card', 'cash', 'check', 'child', 'cooker', 'cost', 'credit', 'day', 'do', 'doe', 'dress', 'for', 'frying', 'get', 'gloss', 'good', 'goodbye', 'got', 'have', 'headphone', 'heel', 'hello', 'help', 'helpful', 'helping', 'hey', 'hi', 'high', 'hola', 'horror', 'how', 'hya', 'i', 'introduce', 'iphone', 'is', 'kind', 'laptop', 'later', 'lip', 'lipstick', 'looking', 'lot', 'lotion', 'mastercard', 'me', 'moisturizers', 'money', 'much', 'na', 'name', 'need', 'needed', 'newest', 'next', 'nice', 'novel', 'of', 'ok', 'only', 'or', 'order', 'out', 'pair', 'pan', 'pay', 'paypal', 'please', 'pot', 'prom', 'purchase', 'recommend', 'rice', 'see', 'sell', 'shampoo', 'shirt', 'shoe', 'show', 'some', 'suggest', 'sunglass', 'take', 'thank', 'thanks', 'that', 'the', 'there', 'this', 'till', 'time', 'to', 'too', 'toy', 'vest', 'wan', 'want', 'watch', 'what', 'who', 

In [7]:
# sort classes
classes = sorted(list(set(classes)))
classes

['buyproduct',
 'goodbye',
 'greeting',
 'introduce',
 'noanswer',
 'payment',
 'thanks']

In [8]:
# words = all words, vocabulary
print (len(words), "unique lemmatized words", words)

123 unique lemmatized words ["'m", 'a', 'about', 'accept', 'adios', 'all', 'an', 'any', 'anyone', 'are', 'awesome', 'be', 'book', 'bracelet', 'buy', 'bye', 'camera', 'can', 'card', 'cash', 'check', 'child', 'cooker', 'cost', 'credit', 'day', 'do', 'doe', 'dress', 'for', 'frying', 'get', 'gloss', 'good', 'goodbye', 'got', 'have', 'headphone', 'heel', 'hello', 'help', 'helpful', 'helping', 'hey', 'hi', 'high', 'hola', 'horror', 'how', 'hya', 'i', 'introduce', 'iphone', 'is', 'kind', 'laptop', 'later', 'lip', 'lipstick', 'looking', 'lot', 'lotion', 'mastercard', 'me', 'moisturizers', 'money', 'much', 'na', 'name', 'need', 'needed', 'newest', 'next', 'nice', 'novel', 'of', 'ok', 'only', 'or', 'order', 'out', 'pair', 'pan', 'pay', 'paypal', 'please', 'pot', 'prom', 'purchase', 'recommend', 'rice', 'see', 'sell', 'shampoo', 'shirt', 'shoe', 'show', 'some', 'suggest', 'sunglass', 'take', 'thank', 'thanks', 'that', 'the', 'there', 'this', 'till', 'time', 'to', 'too', 'toy', 'vest', 'wan', 'wan

In [10]:
# with open("words_data.json", "w") as words_data:
#     json.dump(words, words_data)
# with open("classes_data.json", "w") as classes_data:
#     json.dump(classes, classes_data)

## 3. Create training and testing data



In [9]:
# create our training data
training = []

In [10]:
for d in documents:
    bow = []
    
    pattern_words = d[0]
    pattern_words = [lemmatizer.lemmatize(word.lower()) for word in pattern_words]
    pattern_words = [w for w in pattern_words if w not in ignore_words]
    #print(pattern_words)
    for w in words:
        bow.append(1) if w in pattern_words else bow.append(0)
    y = classes.index(d[1])
    
    training.append([bow, y])

In [11]:
for t in training:
    print(t)

[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 2]
[[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0], 2]
[[0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

In [12]:
# shuffle our features and turn into np.array
random.shuffle(training)
training = np.array(training, dtype=object)

# create train and test lists. X - patterns, Y - intents
X_train = list(training[:,0])
y_train = list(training[:,1])

## 4. Build the Model

In [122]:
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier

# model = GaussianNB()
# model = LogisticRegression()
# model = SVC(kernel = 'linear')
# model = KNeighborsClassifier(n_neighbors = 1)
# model = DecisionTreeClassifier()
# model = RandomForestClassifier(max_depth=10, random_state=1000)

model.fit(X_train, y_train)

RandomForestClassifier(max_depth=10, random_state=1000)

## 5. Predict response

In [95]:
import random
ignore_words = ['?', '!', ".", ",", "'s"]

def clean(sentence):
    # tokenize the pattern - split words into array
    sentence_words = nltk.word_tokenize(sentence)
    # stem each word - create short form for word
    sentence_words = [lemmatizer.lemmatize(word.lower()) for word in sentence_words if word not in ignore_words]
    return sentence_words

# return bag of words array: 0 or 1 for each word in the bag that exists in the sentence
def bow(sentence):
    # tokenize the pattern
    sentence_words = clean(sentence)
    # bag of words - matrix of N words, vocabulary matrix
    bag = [0]*len(words) 
    for s in sentence_words:
        for i,w in enumerate(words):
            if w == s: 
                # assign 1 if current word is in the vocabulary position
                bag[i] = 1
    return(np.array(bag))

def predict(sentence):
    x_vec = bow(sentence)
    y_pred = model.predict(np.array([x_vec]))[0]
    intent = classes[y_pred]
#     print("PREDICT: ",intent, "\n")

    return intent

In [96]:
def get_response(intent):
    intent_list = intents["intents"]
    for i in intent_list:
        if (i['tag'] == intent):
            result = random.choice(i['responses'])
            break
    return result
def chatbot(word):
    pred = predict(word)
    res = get_response(pred)
    return res

In [97]:
print(predict("thanks"))

thanks


In [123]:
from sklearn.metrics import accuracy_score, f1_score
testjson = json.loads(open('test_data.json').read())
testtrue = []
testpred = []
for idx in range(len(testjson)):
    testtrue.append(testjson[idx][1])
    testpred.append(predict(testjson[idx][0]))
print("test_true: {}".format(testtrue))
print('*'*40)
print("test_pred: {}".format(testpred))
print('*'*40)
print("Accuracy: {}".format(accuracy_score(testtrue, testpred)))
print("F1 score: {}".format(f1_score(testtrue, testpred, average = 'macro')))

test_true: ['greeting', 'greeting', 'greeting', 'greeting', 'introduce', 'goodbye', 'goodbye', 'goodbye', 'goodbye', 'thanks', 'thanks', 'thanks', 'thanks', 'buyproduct', 'buyproduct', 'buyproduct', 'buyproduct', 'buyproduct', 'buyproduct', 'buyproduct', 'buyproduct', 'buyproduct', 'payment', 'payment', 'payment', 'thanks', 'thanks', 'buyproduct', 'buyproduct', 'introduce', 'introduce', 'greeting', 'thanks', 'greeting', 'payment', 'goodbye', 'thanks', 'thanks', 'payment', 'buyproduct', 'introduce', 'greeting', 'buyproduct', 'introduce', 'payment', 'payment', 'payment', 'thanks', 'buyproduct', 'greeting']
****************************************
test_pred: ['greeting', 'greeting', 'greeting', 'greeting', 'introduce', 'goodbye', 'goodbye', 'goodbye', 'goodbye', 'thanks', 'thanks', 'thanks', 'thanks', 'buyproduct', 'buyproduct', 'buyproduct', 'buyproduct', 'buyproduct', 'buyproduct', 'buyproduct', 'buyproduct', 'buyproduct', 'payment', 'payment', 'payment', 'greeting', 'greeting', 'buypro

In [60]:
# from sklearn.metrics import accuracy_score, f1_score
# besti = 0
# maxacc = 0
# for i in range(1,21):
#     model = KNeighborsClassifier(n_neighbors = i)
#     print('*'*100)
#     print(i)
#     testjson = json.loads(open('test_data.json').read())
#     testtrue = []
#     testpred = []
#     for idx in range(len(testjson)):
#         testtrue.append(testjson[idx][1])
#         testpred.append(predict(testjson[idx][0]))
#     acc = accuracy_score(testtrue, testpred)
#     f1 = f1_score(testtrue, testpred, average = 'macro')
#     print("Accuracy: {}".format(acc))
#     print("F1 score: {}".format(f1))
#     if(acc > maxacc):
#         print("aaaa")
#         maxacc = acc
#         besti = i
# print("besti ",i)

In [None]:
# while(True):
#     w = str(input())
#     print("You: ", w)
#     print("Bot: ",chatbot(w))
#     print("-------------")
    

Hello
You:  Hello
PREDICT:  greeting 

Bot:  Hello, Good to see you.
-------------
?
You:  ?
PREDICT:  greeting 

Bot:  Hello, Good to see you.
-------------
i want to buy something
You:  i want to buy something
PREDICT:  payment 

Bot:   Please fill in your payment form.
-------------
i want to pay
You:  i want to pay
PREDICT:  payment 

Bot:   Please fill in your payment form.
-------------
i want to buy
You:  i want to buy
PREDICT:  payment 

Bot:   Please fill in your payment form.
-------------
buy
You:  buy
PREDICT:  greeting 

Bot:  Nice to meet you.
-------------
how do i buy
You:  how do i buy
PREDICT:  payment 

Bot:   Please fill in your payment form.
-------------
