In [47]:
from keras.preprocessing.text import one_hot
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers.embeddings import Embedding
from numpy import asarray
from numpy import zeros

In [30]:
docs = ['Well done!',
       'Good work.',
       'Great Effort',
       'nice work',
       'excellent',
       'weak',
       'Poor Effort!',
       'Poor Work',
       'not good',
       'Could have been better.']
labels = [0,0,0,0,0,1,1,1,1,1]

In [31]:
vocab_size = 50
encoded_docs = [one_hot(d, vocab_size) for d in docs]

In [32]:
max_length = 4
padded_docs = pad_sequences(encoded_docs, maxlen= max_length, padding = 'post')
print(padded_docs)

[[25 33  0  0]
 [25 49  0  0]
 [35 39  0  0]
 [45 49  0  0]
 [34  0  0  0]
 [24  0  0  0]
 [47 39  0  0]
 [47 49  0  0]
 [16 25  0  0]
 [45 22 24 41]]


In [33]:
model = Sequential()
model.add(Embedding(vocab_size, 8, input_length = max_length))
model.add(Flatten())
model.add(Dense(1, activation = 'sigmoid'))
model.compile(optimizer= 'adam', loss = 'binary_crossentropy', metrics = ['acc'])
model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_2 (Embedding)      (None, 4, 8)              400       
_________________________________________________________________
flatten_2 (Flatten)          (None, 32)                0         
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 33        
Total params: 433
Trainable params: 433
Non-trainable params: 0
_________________________________________________________________


In [34]:
model.fit(padded_docs,labels, epochs = 50, verbose = 0)

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


<keras.callbacks.callbacks.History at 0x202c49565f8>

In [35]:
loss, accuracy = model.evaluate(padded_docs, labels, verbose = 0)
print("Accuracy: %f " % (accuracy*100))

Accuracy: 80.000001 


In [36]:
#Using pre-trained word embeddings of glove6B with Keras

In [37]:
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences

In [38]:
t = Tokenizer()
t.fit_on_texts(docs)
vocab_size = len(t.word_index)+ 1
encoded_docs = t.texts_to_sequences(docs)
print(encoded_docs)
max_length = 4
padded_docs = pad_sequences(encoded_docs, maxlen = max_length, padding='post')
print(padded_docs)

[[5, 6], [2, 1], [7, 3], [8, 1], [9], [10], [4, 3], [4, 1], [11, 2], [12, 13, 14, 15]]
[[ 5  6  0  0]
 [ 2  1  0  0]
 [ 7  3  0  0]
 [ 8  1  0  0]
 [ 9  0  0  0]
 [10  0  0  0]
 [ 4  3  0  0]
 [ 4  1  0  0]
 [11  2  0  0]
 [12 13 14 15]]


In [43]:
f = open('datasets/glove6b/glove.6B.100d.txt', mode='rt', encoding='utf-8')
embeddings_index = dict()

In [44]:
for line in f:
    values = line.split()
    word = values[0]
    coefs = asarray(values[1:], dtype='float32')
    embeddings_index[word] = coefs
f.close()

In [45]:
print('Loaded Word Vectors. ' % len(embeddings_index))

TypeError: not all arguments converted during string formatting

In [48]:
embeddings_matrix = zeros((vocab_size, 100))

In [52]:
for word, i in t.word_index.items():
    embedding_vector = embeddings_index.get(word)
    if embedding_vector is not None:
        embeddings_matrix[i] = embedding_vector

In [58]:
#define model
model = Sequential()
model.add(Embedding(vocab_size, 100, weights= [embeddings_matrix], input_length = 4, trainable = False))
model.add(Flatten())
model.add(Dense(1, activation = 'sigmoid'))
model.compile(optimizer = 'adam', loss='binary_crossentropy', metrics=['acc'])
model.summary()

Model: "sequential_8"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_7 (Embedding)      (None, 4, 100)            1600      
_________________________________________________________________
flatten_7 (Flatten)          (None, 400)               0         
_________________________________________________________________
dense_6 (Dense)              (None, 1)                 401       
Total params: 2,001
Trainable params: 401
Non-trainable params: 1,600
_________________________________________________________________


In [59]:
model.fit(padded_docs, labels, epochs = 50, verbose = 0)

<keras.callbacks.callbacks.History at 0x202c6695518>

In [60]:
loss, accuracy = model.evaluate(padded_docs, labels, verbose = 0)
print("Accuracy: %f" % (accuracy*100))

Accuracy: 100.000000
