In [2]:
# Libraries for NLP
import nltk
nltk.download("punkt")
from nltk.stem.lancaster import LancasterStemmer
stemmer = LancasterStemmer()

# Libraries needed for Tensorflow processing
import tensorflow as tf
import numpy as np
import tflearn
import random
import json

  from collections import Mapping, defaultdict
[nltk_data] Downloading package punkt to
[nltk_data]     /Users/abhinavdubey/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


Instructions for updating:
Colocations handled automatically by placer.


In [4]:
# importing intent file for chatbot
with open("intents.json") as json_data:
    intents = json.load(json_data)

In [5]:
intents


{'intents': [{'tag': 'greeting',
   'patterns': ['Hi', 'How are you', 'Is anyone there?', 'Hello', 'Good day'],
   'responses': ['Hello, thanks for visiting',
    'Good to see you again',
    'Hi there, how can I help?'],
   'context_set': ''},
  {'tag': 'goodbye',
   'patterns': ['Bye', 'See you later', 'Goodbye'],
   'responses': ['See you later, thanks for visiting',
    'Have a nice day',
    'Bye! Come back again soon.']},
  {'tag': 'thanks',
   'patterns': ['Thanks', 'Thank you', "That's helpful"],
   'responses': ['Happy to help!', 'Any time!', 'My pleasure']},
  {'tag': 'hours',
   'patterns': ['What hours are you open?',
    'What are your hours?',
    'When are you open?'],
   'responses': ["We're open every day 9am-9pm",
    'Our hours are 9am-9pm every day']},
  {'tag': 'location',
   'patterns': ['What is your location?',
    'Where are you located?',
    'What is your address?',
    'Where is your restaurant situated?'],
   'responses': ['We are on the intersection of Lon

In [6]:
words=[]
classes = []
documents = []
ignore = ["?"]

# Loop through each intent in intents["patterns"]
for intent in intents["intents"]:
    for pattern in intent["patterns"]:
        
        # tokenize each and every word in the sentence
        w = nltk.word_tokenize(pattern)
        
        # Adding words to words list
        words.extend(w) 
        #print(words)
        
        # Adding words to documents
        documents.append((w, intent["tag"]))
        print(documents)
        
        # Adding tag to our class list
        if intent["tag"] not in classes:
            classes.append(intent["tag"])
            #print(classes)
            

[(['Hi'], 'greeting')]
[(['Hi'], 'greeting'), (['How', 'are', 'you'], 'greeting')]
[(['Hi'], 'greeting'), (['How', 'are', 'you'], 'greeting'), (['Is', 'anyone', 'there', '?'], 'greeting')]
[(['Hi'], 'greeting'), (['How', 'are', 'you'], 'greeting'), (['Is', 'anyone', 'there', '?'], 'greeting'), (['Hello'], 'greeting')]
[(['Hi'], 'greeting'), (['How', 'are', 'you'], 'greeting'), (['Is', 'anyone', 'there', '?'], 'greeting'), (['Hello'], 'greeting'), (['Good', 'day'], 'greeting')]
[(['Hi'], 'greeting'), (['How', 'are', 'you'], 'greeting'), (['Is', 'anyone', 'there', '?'], 'greeting'), (['Hello'], 'greeting'), (['Good', 'day'], 'greeting'), (['Bye'], 'goodbye')]
[(['Hi'], 'greeting'), (['How', 'are', 'you'], 'greeting'), (['Is', 'anyone', 'there', '?'], 'greeting'), (['Hello'], 'greeting'), (['Good', 'day'], 'greeting'), (['Bye'], 'goodbye'), (['See', 'you', 'later'], 'goodbye')]
[(['Hi'], 'greeting'), (['How', 'are', 'you'], 'greeting'), (['Is', 'anyone', 'there', '?'], 'greeting'), (['Hel

In [7]:
# Performing Stemming nd lower each word as well asremoving duplicates
words = [stemmer.stem(w.lower()) for w in words if w not in ignore]
words = sorted(list(set(words)))

# Removing Duplicate Classes
classes = sorted(list(set(classes)))

print(len(documents),"Documents ",documents,"\n")
print(len(classes),"Classes ", classes,"\n")
print(len(words), "Stemmed Words ", words)

31 Documents  [(['Hi'], 'greeting'), (['How', 'are', 'you'], 'greeting'), (['Is', 'anyone', 'there', '?'], 'greeting'), (['Hello'], 'greeting'), (['Good', 'day'], 'greeting'), (['Bye'], 'goodbye'), (['See', 'you', 'later'], 'goodbye'), (['Goodbye'], 'goodbye'), (['Thanks'], 'thanks'), (['Thank', 'you'], 'thanks'), (['That', "'s", 'helpful'], 'thanks'), (['What', 'hours', 'are', 'you', 'open', '?'], 'hours'), (['What', 'are', 'your', 'hours', '?'], 'hours'), (['When', 'are', 'you', 'open', '?'], 'hours'), (['What', 'is', 'your', 'location', '?'], 'location'), (['Where', 'are', 'you', 'located', '?'], 'location'), (['What', 'is', 'your', 'address', '?'], 'location'), (['Where', 'is', 'your', 'restaurant', 'situated', '?'], 'location'), (['Do', 'you', 'take', 'credit', 'cards', '?'], 'payments'), (['Do', 'you', 'accept', 'Mastercard', '?'], 'payments'), (['Are', 'you', 'cash', 'only', '?'], 'payments'), (['What', 'is', 'your', 'menu', 'for', 'today', '?'], 'todaysmenu'), (['What', 'are', 

In [8]:
# Creating Training Data
training = []
output = []

#creating empty array for output
output_empty = [0] * len(classes)

#Creating Training set, bag of words for each sentence
for doc in documents:
    # initialize bag of words
    bag = []
    
    #list of tokenized words for the pattern
    pattern_words = doc[0]
    
    # Stemming each word
    pattern_word = [stemmer.stem(word.lower()) for word in pattern_words]
    
    #Creating bag of words array
    for w in words:
        bag.append(1) if w in pattern_words else bag.append(0)
    
    # Output 1 for curent tag and 0 for all other tags
    output_row = list(output_empty)
    output_row[classes.index(doc[1])] =1 
    training.append([bag, output_row])
    

In [9]:
# Suffling features and turning it into numpy array
random.shuffle(training)
training = np.array(training)

# Creating Training Lists
train_x = list(training[:,0])
train_y = list(training[:,1])

In [10]:
# Reseeting Underlying graph data
tf.reset_default_graph()

# Building Neural Network
net = tflearn.input_data(shape=[None, len(train_x[0])])
net = tflearn.fully_connected(net, 10)
net = tflearn.fully_connected(net, 10)
net = tflearn.fully_connected(net, len(train_y[0]), activation="softmax")
net = tflearn.regression(net)

# Defining Model and setting up tensorboard
model = tflearn.DNN(net, tensorboard_dir="tflearn_logs")

# Start Training
model.fit(train_x, train_y, n_epoch=1000, batch_size=8, show_metric=True)
model.save("model.tflearn")

Training Step: 3999  | total loss: [1m[32m0.86794[0m[0m | time: 0.022s
| Adam | epoch: 1000 | loss: 0.86794 - acc: 0.6725 -- iter: 24/31
Training Step: 4000  | total loss: [1m[32m0.83248[0m[0m | time: 0.025s
| Adam | epoch: 1000 | loss: 0.83248 - acc: 0.6910 -- iter: 31/31
--
INFO:tensorflow:/Users/abhinavdubey/AI Chatboat/model.tflearn is not in all_model_checkpoint_paths. Manually adding it.


In [11]:
import pickle
pickle.dump({"words":words, "classes":classes, "train_x":train_x, "train_y":train_y}, open("training_data", "wb"))

In [12]:
#restoring all data structure
data = pickle.load(open("training_data","rb"))
words = data['words']
classes = data['classes']
train_x = data['train_x']
train_y = data['train_y']

In [13]:
with open("intents.json") as json_data:
    intents = json.load(json_data)

In [14]:
# Loading the saved model
model.load("./model.tflearn")

Instructions for updating:
Use standard file APIs to check for files with this prefix.
INFO:tensorflow:Restoring parameters from /Users/abhinavdubey/AI Chatboat/model.tflearn


In [17]:
# Cleaning up user input
def clean_up_sentence(sentence):
    
    # Tokenizing the pattern
    sentence_words = nltk.word_tokenize(sentence)
    
    # Stemming each word
    sentence_words= [stemmer.stem(word.lower()) for word in sentence_words]
    return sentence_words

# returning bag of words array: 0 or 1 or each word in the bag that exists in
def bow(sentence, words, show_details=False):
    
    #Tokenizing the pattern
    sentence_words = clean_up_sentence(sentence)
    
    # Generating bag of words
    bag = [0]*len(words)
    for s in sentence_words:
        for i,w in enumerate(words):
            if w == s:
                bag[i] = 1
                if show_details:
                    print("Found in bag: %s"% w)
    return(np.array(bag))

In [None]:
ERROR THRESHOLD = 0.30
def classify(sentence):
    
    # Generate probability from the model
    results = model.predict([bow(sentence, words)])[0]
    
    # Filter our pridictions below a threshold
    results = [[i,r] for i,r in enumerate(results) if r> ERROR_THRESHOLD]
    
    # Sort by strength of probability 
    results.sort(key=lambda x: x[1], reverse= True)
    return_list= []
    for r in results:
        return_list.append((classes[r[0]],r[1]))
        
    #return touple of intent nd probability
    return return_list