In [4]:
from numpy import array
from numpy import asarray
from numpy import zeros
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers import Embedding

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [5]:
# define documents
docs = ['Well done!',
        'Good work',
        'Great effort',
        'nice work',
        'Excellent!',
        'Weak',
        'Poor effort!',
        'not good',
        'poor work',
        'Could have done better.']
# define class labels
labels = array([1,1,1,1,1,0,0,0,0,0])

In [6]:
# prepare tokenizer
t = Tokenizer()
t.fit_on_texts(docs)
vocab_size = len(t.word_index) + 1

In [7]:
# integer encode the documents
encoded_docs = t.texts_to_sequences(docs)
print(encoded_docs)

[[6, 2], [3, 1], [7, 4], [8, 1], [9], [10], [5, 4], [11, 3], [5, 1], [12, 13, 2, 14]]


In [8]:
# pad documents to a max length of 4 words
max_length = 4
padded_docs = pad_sequences(encoded_docs, maxlen=max_length, padding='post')
print(padded_docs)

[[ 6  2  0  0]
 [ 3  1  0  0]
 [ 7  4  0  0]
 [ 8  1  0  0]
 [ 9  0  0  0]
 [10  0  0  0]
 [ 5  4  0  0]
 [11  3  0  0]
 [ 5  1  0  0]
 [12 13  2 14]]


In [9]:
# load the whole embedding into memory
embeddings_index = dict()
f = open('glove/glove.6B.100d.txt')
for line in f:
	values = line.split()
	word = values[0]
	coefs = asarray(values[1:], dtype='float32')
	embeddings_index[word] = coefs
f.close()
print('Loaded %s word vectors.' % len(embeddings_index))

Loaded 400000 word vectors.


In [10]:
# create a weight matrix for words in training docs
embedding_matrix = zeros((vocab_size, 100))
for word, i in t.word_index.items():
	embedding_vector = embeddings_index.get(word)
	if embedding_vector is not None:
		embedding_matrix[i] = embedding_vector

In [11]:
print(embedding_matrix.shape)
print(embedding_matrix)

(15, 100)
[[ 0.          0.          0.         ...  0.          0.
   0.        ]
 [-0.11619     0.45447001 -0.69216001 ... -0.54737002  0.48822001
   0.32246   ]
 [-0.2978      0.31147    -0.14937    ... -0.22709    -0.029261
   0.4585    ]
 ...
 [ 0.05869     0.40272999  0.38633999 ... -0.35973999  0.43718001
   0.10121   ]
 [ 0.15711001  0.65605998  0.0021149  ... -0.60614997  0.71004999
   0.41468999]
 [-0.047543    0.51914001  0.34283999 ... -0.26859     0.48664999
   0.55609   ]]


In [12]:
# define model
model = Sequential()
e = Embedding(vocab_size, 100, weights=[embedding_matrix], input_length=4, trainable=False)
model.add(e)
model.add(Flatten())
model.add(Dense(1, activation='sigmoid'))

In [13]:
# compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc'])
# summarize the model
print(model.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, 4, 100)            1500      
_________________________________________________________________
flatten_1 (Flatten)          (None, 400)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 401       
Total params: 1,901
Trainable params: 401
Non-trainable params: 1,500
_________________________________________________________________
None


In [14]:
# fit the model
model.fit(padded_docs, labels, epochs=50, verbose=1)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x11048a850>

In [15]:
# evaluate the model
loss, accuracy = model.evaluate(padded_docs, labels, verbose=0)
print('Accuracy: %f' % (accuracy*100))

Accuracy: 100.000000


In [16]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, 4, 100)            1500      
_________________________________________________________________
flatten_1 (Flatten)          (None, 400)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 401       
Total params: 1,901
Trainable params: 401
Non-trainable params: 1,500
_________________________________________________________________


In [17]:
# test prediction
test = array([[ 6,  2,  0,  0],
              [ 3,  1,  0,  0],
              [ 7,  4,  0,  0],
              [ 8,  1,  0,  0],
              [ 9,  0,  0,  0],
              [10,  0,  0,  0],
              [ 5,  4,  0,  0],
              [11,  3,  0,  0],
              [ 5,  1,  0,  0],
              [12, 13,  2, 14]])

y_pred = model.predict(test)
print(y_pred)
print(docs)
print(labels)

[[0.62163186]
 [0.72402626]
 [0.69156706]
 [0.795892  ]
 [0.69939846]
 [0.21752124]
 [0.33696893]
 [0.29467344]
 [0.44551077]
 [0.03579354]]
['Well done!', 'Good work', 'Great effort', 'nice work', 'Excellent!', 'Weak', 'Poor effort!', 'not good', 'poor work', 'Could have done better.']
[1 1 1 1 1 0 0 0 0 0]


In [18]:
# save model
model.save('model.h5')

In [19]:
# export coreml
import coremltools
coreml_model = coremltools.converters.keras.convert(model)
coreml_model.save('model.mlmodel')



0 : embedding_1_input, <keras.engine.topology.InputLayer object at 0x124978ed0>
1 : embedding_1, <keras.layers.embeddings.Embedding object at 0x124978d50>
2 : flatten_1, <keras.layers.core.Flatten object at 0x12a3a6dd0>
3 : dense_1, <keras.layers.core.Dense object at 0x12a3a6f50>
4 : dense_1__activation__, <keras.layers.core.Activation object at 0x12ae20450>


In [20]:
# dump coreml
coreml_model

input {
  name: "input1"
  type {
    multiArrayType {
      shape: 1
      dataType: DOUBLE
    }
  }
}
output {
  name: "output1"
  type {
    multiArrayType {
      shape: 1
      dataType: DOUBLE
    }
  }
}

In [30]:
# Make predictions
predictions = coreml_model.predict({'input1': [[6.0,  2.0,  0.0,  0.0]] })

RuntimeError: {
    NSLocalizedDescription = "Input input1 is an array of rank 2, but this model only supports single vector inputs (rank 1) or a sequence of batches of vectors (rank 3).";
}