# Imports

In [1]:
import numpy as np
from nltk.tokenize import RegexpTokenizer
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential, Model
from keras.layers import Dense, Input, Bidirectional, LSTM, GRU, TimeDistributed, Activation, Flatten, Embedding, GlobalMaxPool1D, Dropout, Flatten, merge, RepeatVector, Permute, AveragePooling1D
from keras.optimizers import Adam



  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


# Functions

In [2]:
# Prepare Glove File
def readGloveFile(gloveFile):
    with open(gloveFile, 'r') as f:
        wordToGlove = {}  # map from a token (word) to a Glove embedding vector
        wordToIndex = {}  # map from a token to an index
        indexToWord = {}  # map from an index to a token 

        for line in f:
            record = line.strip().split()
            token = record[0] # take the token (word) from the text line
            wordToGlove[token] = np.array(record[1:], dtype=np.float64) # associate the Glove embedding vector to a that token (word)

        tokens = sorted(wordToGlove.keys())
        for idx, tok in enumerate(tokens):
            kerasIdx = idx + 1  # 0 is reserved for masking in Keras (see above)
            wordToIndex[tok] = kerasIdx # associate an index to a token (word)
            indexToWord[kerasIdx] = tok # associate a word to a token (word). Note: inverse of dictionary above

    return wordToIndex, indexToWord, wordToGlove

In [3]:
# Create Pretrained Keras Embedding Weights Matrix
def createPretrainedEmbeddingMatrix(wordToGlove, wordToIndex):
    vocabLen = len(wordToIndex) + 1  # adding 1 to account for masking
    embDim = next(iter(wordToGlove.values())).shape[0]  # works with any glove dimensions (e.g. 50)

    embeddingMatrix = np.zeros((vocabLen, embDim))  # initialize with zeros
    for word, index in wordToIndex.items():
        embeddingMatrix[index, :] = wordToGlove[word] # create embedding: word index to Glove word embedding

    return vocabLen, embDim, embeddingMatrix

In [4]:
# Embedding
def getEncodedDocs(docs):
    encoded_docs = []

    tokenizer = RegexpTokenizer(r'\w+')
    for doc in docs:
        encoded_doc = []
        for word in tokenizer.tokenize(doc.lower()):
            index = wordToIndex[word]
            if index is not None:
                encoded_doc.append(index)
            else:
                encoded_doc.append(0)
        encoded_docs.append(encoded_doc)

    return encoded_docs

# Import GloVe Pretrained dataset and create Embedding Weight Matrix

In [5]:
max_length = 20
wordToIndex, indexToWord, wordToGlove = readGloveFile("glove/glove.6B.100d.txt")
vocabLen, embDim, embeddingMatrix = createPretrainedEmbeddingMatrix(wordToGlove, wordToIndex)

# Import intents file

In [6]:
import json
with open('PharmacyDataset.json') as json_data:
    intents = json.load(json_data)

# Padding, Encoding and Preparing final X and Y data for Training

In [7]:
classes = []
encodedUtterances = []

# loop through each sentence in our intents utterances
for intent in intents['intents']:
    classes.append(intent['intent'])
    encoded_docs = getEncodedDocs(intent['utterances'])
    padded_docs = pad_sequences(encoded_docs, maxlen=max_length, padding='post')
    encodedUtterances.append(padded_docs)


In [8]:
currentClass = 0
train_x = []
train_y = []

for intent in classes:
    y = [0] * len(classes)
    y[currentClass] = 1

    for vector in encodedUtterances[currentClass]:
        train_x.append(vector)
        train_y.append(y)

    currentClass += 1

vectorSize = len(train_x[0])

train_X = np.array(train_x)
train_Y = np.array(train_y)

print(classes, "classes")
print(vectorSize, "vector size")
print(len(train_x), len(train_x[0]), "x")
print(len(train_y), len(train_y[0]), "y")
print(train_X.shape)
print(train_Y.shape)

([u'greeting', u'goodbye', u'thanks', u'options', u'adverse_drug', u'blood_pressure', u'blood_pressure_search', u'pharmacy_search'], 'classes')
(20, 'vector size')
(26, 20, 'x')
(26, 8, 'y')
(26, 20)
(26, 8)


# Prepare and Compile Keras / TensorFlow model

In [9]:
# model = Sequential([
#                    Embedding(vocabLen, embDim, weights=[embeddingMatrix], input_length=max_length, trainable=False),
#                    Bidirectional(LSTM(embDim, batch_size=1, input_shape=(None, embDim), return_sequences=True, dropout=0.0, recurrent_dropout=0.0)),
#                    GlobalMaxPool1D(),
#                    Dense(50, activation="relu"),
#                    Dense(len(train_y[0]), activation='softmax')
#                   ])

# input_ = Input(shape=(len(train_x[0]),), dtype='float')  #int32
# embedded = Embedding(vocabLen, embDim, weights=[embeddingMatrix], input_length=max_length, trainable=False)(input_)
# bidi = Bidirectional(LSTM(embDim, batch_size=1, input_shape=(None, embDim), return_sequences=True, dropout=0.0, recurrent_dropout=0.0))(embedded)
# gmax = GlobalMaxPool1D()(bidi)
# dense50 = Dense(50, activation="relu")(gmax)
# preds = Dense(len(train_y[0]), activation='softmax')(dense50)
# model = Model(input_, preds)

input_ = Input(shape=(len(train_x[0]),), dtype='float')  #int32
embedded = Embedding(vocabLen, embDim, weights=[embeddingMatrix], input_length=max_length, trainable=False)(input_)
#bidi = LSTM(embDim, batch_size=1, input_shape=(None, embDim), return_sequences=True, dropout=0.0, recurrent_dropout=0.0)(embedded)
bidi = Bidirectional(LSTM(embDim, batch_size=1, input_shape=(None, embDim), return_sequences=True, dropout=0.0, recurrent_dropout=0.0))(embedded)
# compute importance for each step
attention = Dense(1, activation='tanh')(bidi)
attention = Flatten()(attention)
attention = Activation('softmax')(attention)
attention = RepeatVector(embDim*2)(attention)
attention = Permute([2, 1])(attention)
# merge
mer = merge([attention, bidi], "mul")
hid = AveragePooling1D(pool_length=len(train_x[0]))(mer)
hid = Flatten()(hid)
preds = Dense(len(train_y[0]), activation='softmax')(hid)
# Model
model = Model(input_, preds)


print("model - Bidirectional LSTM with Attention")
model.summary()


model - Bidirectional LSTM with Attention
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 20)           0                                            
__________________________________________________________________________________________________
embedding_1 (Embedding)         (None, 20, 100)      40000100    input_1[0][0]                    
__________________________________________________________________________________________________
bidirectional_1 (Bidirectional) (None, 20, 200)      160800      embedding_1[0][0]                
__________________________________________________________________________________________________
dense_1 (Dense)                 (None, 20, 1)        201         bidirectional_1[0][0]            
___________________________________________________________________

  name=name)


In [10]:
# Compile model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train Model

In [11]:
model.fit(train_X, train_Y, epochs=250)

Epoch 1/250
Epoch 2/250
Epoch 3/250
Epoch 4/250
Epoch 5/250
Epoch 6/250
Epoch 7/250
Epoch 8/250
Epoch 9/250
Epoch 10/250
Epoch 11/250
Epoch 12/250
Epoch 13/250
Epoch 14/250
Epoch 15/250
Epoch 16/250
Epoch 17/250
Epoch 18/250
Epoch 19/250
Epoch 20/250
Epoch 21/250
Epoch 22/250
Epoch 23/250
Epoch 24/250
Epoch 25/250
Epoch 26/250
Epoch 27/250
Epoch 28/250
Epoch 29/250
Epoch 30/250
Epoch 31/250
Epoch 32/250
Epoch 33/250
Epoch 34/250
Epoch 35/250
Epoch 36/250
Epoch 37/250
Epoch 38/250
Epoch 39/250
Epoch 40/250
Epoch 41/250
Epoch 42/250
Epoch 43/250
Epoch 44/250
Epoch 45/250
Epoch 46/250
Epoch 47/250
Epoch 48/250
Epoch 49/250
Epoch 50/250
Epoch 51/250
Epoch 52/250
Epoch 53/250
Epoch 54/250
Epoch 55/250
Epoch 56/250
Epoch 57/250
Epoch 58/250
Epoch 59/250
Epoch 60/250
Epoch 61/250
Epoch 62/250
Epoch 63/250
Epoch 64/250
Epoch 65/250
Epoch 66/250
Epoch 67/250
Epoch 68/250
Epoch 69/250
Epoch 70/250
Epoch 71/250
Epoch 72/250
Epoch 73/250
Epoch 74/250
Epoch 75/250
Epoch 76/250
Epoch 77/250
Epoch 78

Epoch 87/250
Epoch 88/250
Epoch 89/250
Epoch 90/250
Epoch 91/250
Epoch 92/250
Epoch 93/250
Epoch 94/250
Epoch 95/250
Epoch 96/250
Epoch 97/250
Epoch 98/250
Epoch 99/250
Epoch 100/250
Epoch 101/250
Epoch 102/250
Epoch 103/250
Epoch 104/250
Epoch 105/250
Epoch 106/250
Epoch 107/250
Epoch 108/250
Epoch 109/250
Epoch 110/250
Epoch 111/250
Epoch 112/250
Epoch 113/250
Epoch 114/250
Epoch 115/250
Epoch 116/250
Epoch 117/250
Epoch 118/250
Epoch 119/250
Epoch 120/250
Epoch 121/250
Epoch 122/250
Epoch 123/250
Epoch 124/250
Epoch 125/250
Epoch 126/250
Epoch 127/250
Epoch 128/250
Epoch 129/250
Epoch 130/250
Epoch 131/250
Epoch 132/250
Epoch 133/250
Epoch 134/250
Epoch 135/250
Epoch 136/250
Epoch 137/250
Epoch 138/250
Epoch 139/250
Epoch 140/250
Epoch 141/250
Epoch 142/250
Epoch 143/250
Epoch 144/250
Epoch 145/250
Epoch 146/250
Epoch 147/250
Epoch 148/250
Epoch 149/250
Epoch 150/250
Epoch 151/250
Epoch 152/250
Epoch 153/250
Epoch 154/250
Epoch 155/250
Epoch 156/250
Epoch 157/250
Epoch 158/250
Epoch

Epoch 170/250
Epoch 171/250
Epoch 172/250
Epoch 173/250
Epoch 174/250
Epoch 175/250
Epoch 176/250
Epoch 177/250
Epoch 178/250
Epoch 179/250
Epoch 180/250
Epoch 181/250
Epoch 182/250
Epoch 183/250
Epoch 184/250
Epoch 185/250
Epoch 186/250
Epoch 187/250
Epoch 188/250
Epoch 189/250
Epoch 190/250
Epoch 191/250
Epoch 192/250
Epoch 193/250
Epoch 194/250
Epoch 195/250
Epoch 196/250
Epoch 197/250
Epoch 198/250
Epoch 199/250
Epoch 200/250
Epoch 201/250
Epoch 202/250
Epoch 203/250
Epoch 204/250
Epoch 205/250
Epoch 206/250
Epoch 207/250
Epoch 208/250
Epoch 209/250
Epoch 210/250
Epoch 211/250
Epoch 212/250
Epoch 213/250
Epoch 214/250
Epoch 215/250
Epoch 216/250
Epoch 217/250
Epoch 218/250
Epoch 219/250
Epoch 220/250
Epoch 221/250
Epoch 222/250
Epoch 223/250
Epoch 224/250
Epoch 225/250
Epoch 226/250
Epoch 227/250
Epoch 228/250
Epoch 229/250
Epoch 230/250
Epoch 231/250
Epoch 232/250
Epoch 233/250
Epoch 234/250
Epoch 235/250
Epoch 236/250
Epoch 237/250
Epoch 238/250
Epoch 239/250
Epoch 240/250
Epoch 

<keras.callbacks.History at 0x12155d810>

# Save Model

In [12]:
model.save('SwiftNLCGloveRNN.h5')

# Test Model

In [13]:
# evaluate the model
loss, accuracy = model.evaluate(train_X, train_Y, verbose=0)
print('Loss: %f ' % (loss*100))
print('Accuracy: %f ' % (accuracy*100))

Loss: 62.928510 
Accuracy: 100.000000 


In [14]:
print(intents)

#test = np.array([[178126, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])
#test_docs = getEncodedDocs(["Display blood values for patient"])
#test_docs = getEncodedDocs(["That was very useful"])
#test_docs = getEncodedDocs(["Show blood pressure results for patient"])
test_docs = getEncodedDocs(["Search for a pharmacy please"])



padded_test_docs = pad_sequences(test_docs, maxlen=max_length, padding='post')

print(padded_test_docs)

y_pred = model.predict(padded_test_docs)
print(y_pred)

max_value = max(y_pred[0])
print(max_value)

max_index = y_pred[0].tolist().index(max_value)
print(max_index)

print(intents['intents'][max_index]['intent'])

{u'intents': [{u'intent': u'greeting', u'utterances': [u'Hi', u'How are you', u'Is anyone there?', u'Hello', u'Good day']}, {u'intent': u'goodbye', u'utterances': [u'Bye', u'See you later', u'Goodbye']}, {u'intent': u'thanks', u'utterances': [u'Thanks', u'Thank you', u"That's helpful"]}, {u'intent': u'options', u'utterances': [u'How you could help me?', u'What you can do?', u'What help you provide?']}, {u'intent': u'adverse_drug', u'utterances': [u'How to check Adverse drug reaction?', u'List all drugs suitable for patient with adverse reaction', u'Which drugs dont have adverse reaction?']}, {u'intent': u'blood_pressure', u'utterances': [u'Open blood pressure module', u'I want to log blood pressure results', u'Blood pressure data management']}, {u'intent': u'blood_pressure_search', u'utterances': [u'I want to search for blood pressure result history', u'Show blood pressure results for patient', u'Find blood pressure results by ID']}, {u'intent': u'pharmacy_search', u'utterances': [u'Fi

# Export Word Embedding Array

In [15]:
for i in range(0,10):
    print(wordToIndex.keys()[i], wordToIndex[wordToIndex.keys()[i]], i)
    
import json
with open('Words.json', 'w') as fp:
    json.dump(wordToIndex, fp)


('biennials', 75684, 0)
('tripolitan', 365444, 1)
('biysk', 77319, 2)
('woode', 389559, 3)
('verplank', 377800, 4)
('mdbo', 239051, 5)
('sowell', 338477, 6)
('mdbu', 239054, 7)
('soestdijk', 336526, 8)
('spiders', 339422, 9)


# Export Model using CoreML Tools

In [16]:
import coremltools
coreml_model = coremltools.converters.keras.convert(model, input_names="vectors", output_names="entities")
coreml_model



ValueError: Keras layer '<class 'keras.legacy.layers.Merge'>' not supported. 

In [None]:
coreml_model.save('SwiftNLCGloveRNN.mlmodel')