In [5]:
# things we need for NLP
import nltk
from nltk.stem.lancaster import LancasterStemmer
stemmer = LancasterStemmer()

# things we need for Tensorflow
import numpy as np
import tensorflow as tf
import random

  from ._conv import register_converters as _register_converters


In [6]:
# import our chat-bot intents file
import json
with open('dataset.json') as json_data:
    intents = json.load(json_data)

In [8]:
words = []
classes = []
documents = []
ignore_words = ['?']
# loop through each sentence in our intents utterances
for intent in intents['intents']:
    for pattern in intent['utterances']:
        # tokenize each word in the sentence
        w = nltk.word_tokenize(pattern)
        # add to our words list
        words.extend(w)
        # add to documents in our corpus
        documents.append((w, intent['intent']))
        # add to our classes list
        if intent['intent'] not in classes:
            classes.append(intent['intent'])

# stem and lower each word and remove duplicates
words = [stemmer.stem(w.lower()) for w in words if w not in ignore_words]
words = sorted(list(set(words)))

# remove duplicates
classes = sorted(list(set(classes)))

print (len(classes), "classes", classes)
print (len(words), "unique stemmed words")
print (len(documents), "documents")
print (documents[0])
print (documents[1])




(25, 'classes', [u'about_VA', u'capabilities', u'compound_questions', u'decision_replies', u'goodbyes', u'greetings', u'improving_system', u'information_request', u'interface_interactions', u'interface_issues', u'locate_amenity', u'navigation', u'negative_reaction', u'not_specified', u'out_of_scope', u'phone', u'positive_reaction', u'selections', u'system_reliance', u'traffic_update', u'turn_down', u'turn_off', u'turn_on', u'turn_up', u'weather'])
(1320, 'unique stemmed words')
(2520, 'documents')
([u'hey', u',', u'now', u'it', u"'s", u'raining', u',', u'you', u'said', u'you', u'did', u"n't", u'know', u'anything', u'about', u'that', u'.', u'Are', u'you', u'a', u'liar', u'?'], u'system_reliance')
([u'do', u'you', u'tell', u'the', u'truth'], u'system_reliance')


In [15]:
# create our training data
training = []
# create an empty array for our output
output_empty = [0] * len(classes)

# training set, bag of words for each sentence
for doc in documents:
    # initialize our bag of words
    bag = []
    # list of tokenized words for the pattern
    pattern_words = doc[0]
    # stem each word
    pattern_words = [stemmer.stem(word.lower()) for word in pattern_words]
    # create our bag of words array
    for w in words:
        bag.append(1) if w in pattern_words else bag.append(0)

    # output is a '0' for each intent and '1' for current intent
    output_row = list(output_empty)
    output_row[classes.index(doc[1])] = 1

    training.append([bag, output_row])

print (len(bag), "bags")    
print (len(training), "trainings")    

    
# shuffle our features and turn into np.array
random.shuffle(training)
training = np.array(training)

# create train and test lists
train_x = list(training[:,0])
train_y = list(training[:,1])

(1320, 'bags')
(2520, 'trainings')


In [20]:
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam


Using TensorFlow backend.


In [25]:
model = Sequential()
model.add(Dense(8, input_dim=len(train_x[0]), activation='relu'))
model.add(Dense(8, activation='relu'))
model.add(Dense(len(train_y[0]), activation='softmax'))
model.summary()


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 8)                 10568     
_________________________________________________________________
dense_2 (Dense)              (None, 8)                 72        
_________________________________________________________________
dense_3 (Dense)              (None, 25)                225       
Total params: 10,865
Trainable params: 10,865
Non-trainable params: 0
_________________________________________________________________


In [26]:
# Compile model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])


In [29]:
model.fit(np.array(train_x), np.array(train_y), epochs=40)

Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40


<keras.callbacks.History at 0x125800a50>

In [30]:
def clean_up_sentence(sentence):
    # tokenize the pattern
    sentence_words = nltk.word_tokenize(sentence)
    # stem each word
    sentence_words = [stemmer.stem(word.lower()) for word in sentence_words]
    return sentence_words

# return bag of words array: 0 or 1 for each word in the bag that exists in the sentence
def bow(sentence, words, show_details=False):
    # tokenize the pattern
    sentence_words = clean_up_sentence(sentence)
    # bag of words
    bag = [0]*len(words)  
    for s in sentence_words:
        for i,w in enumerate(words):
            if w == s: 
                bag[i] = 1
                if show_details:
                    print ("found in bag: %s" % w)

    return(np.array(bag))

In [39]:
p = bow("Turn on the radio", words)
print (p)
print (classes)

[0 0 0 ... 0 0 0]
[u'about_VA', u'capabilities', u'compound_questions', u'decision_replies', u'goodbyes', u'greetings', u'improving_system', u'information_request', u'interface_interactions', u'interface_issues', u'locate_amenity', u'navigation', u'negative_reaction', u'not_specified', u'out_of_scope', u'phone', u'positive_reaction', u'selections', u'system_reliance', u'traffic_update', u'turn_down', u'turn_off', u'turn_on', u'turn_up', u'weather']


In [40]:
y_pred = model.predict(np.array([p]))

In [41]:
print(y_pred)

[[1.12378768e-06 1.01364196e-06 2.43094582e-02 2.97890688e-06
  3.92497168e-04 1.44808485e-08 1.42170524e-08 1.87340256e-13
  1.89397076e-06 2.16867482e-10 2.74130400e-07 3.26781446e-07
  9.34756352e-12 8.14700418e-10 3.76845070e-04 9.97871757e-05
  5.39235434e-10 1.13409247e-11 6.16560243e-08 3.85555743e-14
  1.02777397e-02 1.72899876e-04 9.53947425e-01 1.04156444e-02
  2.88402724e-08]]


In [47]:
print(y_pred[0][22])

0.9539474


In [48]:
print(classes[22])

turn_on
