In [63]:
import numpy as np
import tensorflow as tf
import random

# NLTK one_hot encoding test

In [64]:
import nltk
from nltk.stem.lancaster import LancasterStemmer
stemmer = LancasterStemmer()


# import intents file
import json
with open('dataset.json') as json_data:
    intents = json.load(json_data)

In [51]:
words = []
classes = []
documents = []
ignore_words = ['?']
# loop through each sentence in our intents utterances
for intent in intents['intents']:
    for pattern in intent['utterances']:
        # tokenize each word in the sentence
        w = nltk.word_tokenize(pattern)
        # add to our words list
        words.extend(w)
        # add to documents in our corpus
        documents.append((w, intent['intent']))
        # add to our classes list
        if intent['intent'] not in classes:
            classes.append(intent['intent'])

# stem and lower each word and remove duplicates
words = [stemmer.stem(w.lower()) for w in words if w not in ignore_words]
words = sorted(list(set(words)))

# remove duplicates
classes = sorted(list(set(classes)))

print (len(classes), "classes", classes)
print (len(words), "unique stemmed words")
print (len(documents), "documents")
print (documents[0])
print (documents[1])




(25, 'classes', [u'about_VA', u'capabilities', u'compound_questions', u'decision_replies', u'goodbyes', u'greetings', u'improving_system', u'information_request', u'interface_interactions', u'interface_issues', u'locate_amenity', u'navigation', u'negative_reaction', u'not_specified', u'out_of_scope', u'phone', u'positive_reaction', u'selections', u'system_reliance', u'traffic_update', u'turn_down', u'turn_off', u'turn_on', u'turn_up', u'weather'])
(1320, 'unique stemmed words')
(2520, 'documents')
([u'hey', u',', u'now', u'it', u"'s", u'raining', u',', u'you', u'said', u'you', u'did', u"n't", u'know', u'anything', u'about', u'that', u'.', u'Are', u'you', u'a', u'liar', u'?'], u'system_reliance')
([u'do', u'you', u'tell', u'the', u'truth'], u'system_reliance')


In [52]:
# create our training data
training = []
# create an empty array for our output
output_empty = [0] * len(classes)

# training set, bag of words for each sentence
for doc in documents:
    # initialize our bag of words
    bag = []
    # list of tokenized words for the pattern
    pattern_words = doc[0]
    # stem each word
    pattern_words = [stemmer.stem(word.lower()) for word in pattern_words]
    # create our bag of words array
    for w in words:
        bag.append(1) if w in pattern_words else bag.append(0)

    # output is a '0' for each intent and '1' for current intent
    output_row = list(output_empty)
    output_row[classes.index(doc[1])] = 1

    training.append([bag, output_row])

print (len(bag), "bags")    
print (len(training), "trainings")    

    
# shuffle our features and turn into np.array
random.shuffle(training)
training = np.array(training)

# create train and test lists
train_x = list(training[:,0])
train_y = list(training[:,1])

(1320, 'bags')
(2520, 'trainings')


# Prepare Keras / TensorFlow model

In [53]:
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam


In [54]:
model = Sequential()
model.add(Dense(8, input_dim=len(train_x[0]), activation='relu'))
model.add(Dense(8, activation='relu'))
model.add(Dense(len(train_y[0]), activation='softmax'))
model.summary()


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_4 (Dense)              (None, 8)                 10568     
_________________________________________________________________
dense_5 (Dense)              (None, 8)                 72        
_________________________________________________________________
dense_6 (Dense)              (None, 25)                225       
Total params: 10,865
Trainable params: 10,865
Non-trainable params: 0
_________________________________________________________________


In [55]:
# Compile model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])


# Train Model

In [56]:
model.fit(np.array(train_x), np.array(train_y), epochs=100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


<keras.callbacks.History at 0x1278fd510>

# Test Model

In [57]:
def clean_up_sentence(sentence):
    # tokenize the pattern
    sentence_words = nltk.word_tokenize(sentence)
    # stem each word
    sentence_words = [stemmer.stem(word.lower()) for word in sentence_words]
    return sentence_words

# return bag of words array: 0 or 1 for each word in the bag that exists in the sentence
def bow(sentence, words, show_details=False):
    # tokenize the pattern
    sentence_words = clean_up_sentence(sentence)
    # bag of words
    bag = [0]*len(words)  
    for s in sentence_words:
        for i,w in enumerate(words):
            if w == s: 
                bag[i] = 1
                if show_details:
                    print ("found in bag: %s" % w)

    return(np.array(bag))

In [58]:
p = bow("Turn on the radio", words)
print (p)
print (classes)

[0 0 0 ... 0 0 0]
[u'about_VA', u'capabilities', u'compound_questions', u'decision_replies', u'goodbyes', u'greetings', u'improving_system', u'information_request', u'interface_interactions', u'interface_issues', u'locate_amenity', u'navigation', u'negative_reaction', u'not_specified', u'out_of_scope', u'phone', u'positive_reaction', u'selections', u'system_reliance', u'traffic_update', u'turn_down', u'turn_off', u'turn_on', u'turn_up', u'weather']


In [59]:
y_pred = model.predict(np.array([p]))

In [60]:
print(y_pred)

[[7.5455051e-26 4.6361151e-10 2.4519872e-04 6.4304201e-11 8.1930615e-05
  2.1279399e-13 7.7437688e-18 4.7291953e-14 5.7925716e-11 1.6730375e-05
  1.3792224e-05 5.9013184e-13 6.9718530e-11 5.2293857e-13 3.8408107e-08
  3.3146169e-13 1.2176230e-12 9.6799624e-10 1.5009475e-16 2.6067844e-24
  4.6020046e-09 3.6008959e-08 9.9962592e-01 1.6307209e-05 1.7988539e-16]]


In [61]:
print(y_pred[0][22])

0.9996259


In [62]:
print(classes[22])

turn_on


# Export Model using CoreML Tools

In [None]:
model.save('Car.h5')

In [None]:
import coremltools
coreml_model = coremltools.converters.keras.convert(model, input_names="utterances", output_names="entities")
coreml_model

In [None]:
coreml_model.save('Car.mlmodel')