In [9]:
import numpy as np
from keras.preprocessing.text import one_hot
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers import Embedding


In [10]:
# define documents
docs = ['Well done!',
        'Good work',
        'Great effort',
        'nice work',
        'Excellent!',
        'Weak',
        'Poor effort!',
        'not good',
        'poor work',
        'Could have done better.']

In [11]:
docs

['Well done!',
 'Good work',
 'Great effort',
 'nice work',
 'Excellent!',
 'Weak',
 'Poor effort!',
 'not good',
 'poor work',
 'Could have done better.']

In [12]:
len(docs)

10

In [13]:
#Targets
labels = np.array([1,1,1,1,1,0,0,0,0,0])

In [14]:
# integer encode the documents
vocab_size = 50

# One Hot Representation

In [15]:
encoded_docs = [one_hot(d,vocab_size) for d in docs]

In [16]:
print(encoded_docs)

[[4, 6], [24, 41], [34, 36], [30, 41], [14], [21], [34, 36], [25, 24], [34, 41], [49, 10, 6, 48]]


In [17]:
for i,doc in enumerate(encoded_docs):
    print("The encoded documents ",i+1,doc)

The encoded documents  1 [4, 6]
The encoded documents  2 [24, 41]
The encoded documents  3 [34, 36]
The encoded documents  4 [30, 41]
The encoded documents  5 [14]
The encoded documents  6 [21]
The encoded documents  7 [34, 36]
The encoded documents  8 [25, 24]
The encoded documents  9 [34, 41]
The encoded documents  10 [49, 10, 6, 48]


# PADDING THE DOCS (to make very doc of same length)

In [19]:
max_length = 4
padded_docs = pad_sequences(encoded_docs,maxlen = max_length,padding = 'post' )

In [20]:
print(padded_docs)

[[ 4  6  0  0]
 [24 41  0  0]
 [34 36  0  0]
 [30 41  0  0]
 [14  0  0  0]
 [21  0  0  0]
 [34 36  0  0]
 [25 24  0  0]
 [34 41  0  0]
 [49 10  6 48]]


In [22]:
for i,doc in enumerate(padded_docs):
    print("The padded documents ",i+1,doc)

The padded documents  1 [4 6 0 0]
The padded documents  2 [24 41  0  0]
The padded documents  3 [34 36  0  0]
The padded documents  4 [30 41  0  0]
The padded documents  5 [14  0  0  0]
The padded documents  6 [21  0  0  0]
The padded documents  7 [34 36  0  0]
The padded documents  8 [25 24  0  0]
The padded documents  9 [34 41  0  0]
The padded documents  10 [49 10  6 48]


# Creating model

In [24]:
model = Sequential()
model.add(Embedding(vocab_size,8,input_length = max_length))

# Flattening

In [25]:
model.add(Flatten())

In [26]:
model.add(Dense(1,activation = 'sigmoid'))

# compiling model

In [28]:
model.compile(optimizer = 'adam',loss = 'binary_crossentropy',metrics = ['accuracy'])

In [29]:
#summary
model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_2 (Embedding)      (None, 4, 8)              400       
_________________________________________________________________
flatten_1 (Flatten)          (None, 32)                0         
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 33        
Total params: 433
Trainable params: 433
Non-trainable params: 0
_________________________________________________________________


In [31]:
#Fit the model
model.fit(padded_docs,labels,epochs = 50,verbose = 0)

<keras.callbacks.History at 0x2027f481c48>

In [32]:
loss,accuracy = model.evaluate(padded_docs,labels,verbose = 0)

In [35]:
print("Loss : ",loss)
print("Accuracy : ",accuracy)

Loss :  0.5411416888237
Accuracy :  0.800000011920929
