In [2]:
from numpy import array
from tensorflow.keras.preprocessing import text
from tensorflow.keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Flatten, Embedding
from keras.callbacks import LambdaCallback
from tensorflow.keras.models import Model

In [3]:
# define documents
docs = ['Well done!',
		'Good work',
		'Great effort',
		'nice work',
		'Excellent!',
		'Weak',
		'Poor effort!',
		'not good',
		'poor work',
		'Could have done better.']
# define class labels
labels = array([1,1,1,1,1,0,0,0,0,0])

In [4]:
# tokenize documents
tokenizer = text.Tokenizer()
tokenizer.fit_on_texts(docs)
word2idx = tokenizer.word_index

# integer encode the documents
encoded_docs = [[word2idx[w] for w in text.text_to_word_sequence(doc)] for doc in docs]
print(encoded_docs)

# pad documents to a max length of 4 words
max_length = 4
vocab_size = 15 
padded_docs = pad_sequences(encoded_docs, maxlen=max_length, padding='post')
print(padded_docs)

[[6, 2], [3, 1], [7, 4], [8, 1], [9], [10], [5, 4], [11, 3], [5, 1], [12, 13, 2, 14]]
[[ 6  2  0  0]
 [ 3  1  0  0]
 [ 7  4  0  0]
 [ 8  1  0  0]
 [ 9  0  0  0]
 [10  0  0  0]
 [ 5  4  0  0]
 [11  3  0  0]
 [ 5  1  0  0]
 [12 13  2 14]]


In [5]:
# define the model
model = Sequential()
model.add(Embedding(vocab_size, 2, input_length=max_length ))
model.add(Flatten())
model.add(Dense(1, activation='sigmoid'))

# compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
# summarize the model



In [6]:
model.summary()

In [9]:
# fit the model
model.fit(padded_docs, labels, epochs=10, verbose=1 )
# evaluate the model
loss, accuracy = model.evaluate(padded_docs, labels, verbose=0)
print('Accuracy: %f' % (accuracy*100))

Epoch 1/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - accuracy: 0.6000 - loss: 0.6825
Epoch 2/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - accuracy: 0.6000 - loss: 0.6817
Epoch 3/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step - accuracy: 0.6000 - loss: 0.6809
Epoch 4/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - accuracy: 0.7000 - loss: 0.6801
Epoch 5/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - accuracy: 0.7000 - loss: 0.6793
Epoch 6/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - accuracy: 0.8000 - loss: 0.6785
Epoch 7/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - accuracy: 0.9000 - loss: 0.6777
Epoch 8/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - accuracy: 0.9000 - loss: 0.6769
Epoch 9/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [

In [10]:
embedding_layer = model.layers[0]
model2 = Sequential()
model2.add(model.layers[0])

In [13]:
i=4
model2.predict(padded_docs[i:i+1]), padded_docs[i:i+1], docs[i:i+1]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step


(array([[[ 0.00501636,  0.0392581 ],
         [-0.02427742, -0.03882178],
         [-0.00132937,  0.02699922],
         [-0.00132937,  0.02699922]]], dtype=float32),
 array([[7, 4, 0, 0]]),
 ['Great effort'])

In [15]:
embedding_layer.weights[0].shape

TensorShape([15, 2])