In [1]:
import numpy as np
from tensorflow.keras.preprocessing.text import one_hot
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Embedding



In [2]:
reviews = [
    'nice food',
    'amazing restaurant',
    'too good',
    'just loved it!',
    'will go again',
    'horrible food',
    'never go there',
    'poor service',
    'poor quality',
    'needs improvement'
]

sentiment = np.array([1,1,1,1,1,0,0,0,0,0])

In [4]:
one_hot("just loved it!",50)  #out of 50 words in vocabulary giving some index to each word

[4, 29, 12]

In [5]:
vocab_size = 30
encoded_reviews = [one_hot(d,vocab_size) for d in reviews ]
print(encoded_reviews)

[[2, 26], [17, 15], [29, 8], [7, 11, 28], [11, 16, 14], [6, 26], [12, 16, 1], [2, 17], [2, 7], [4, 25]]


In [7]:
# doing padding for making encoding even

max_length = 3
padded_reviews = pad_sequences(encoded_reviews,maxlen=max_length,padding='post') 
#post means pad towards end
padded_reviews

array([[ 2, 26,  0],
       [17, 15,  0],
       [29,  8,  0],
       [ 7, 11, 28],
       [11, 16, 14],
       [ 6, 26,  0],
       [12, 16,  1],
       [ 2, 17,  0],
       [ 2,  7,  0],
       [ 4, 25,  0]])

In [12]:
embeded_vector_size = 4

model = Sequential()
model.add(Embedding(vocab_size,embeded_vector_size,input_length=max_length,
                    name='embedding'
                    ))
model.add(Flatten())
model.add(Dense(1,activation='sigmoid'))

In [13]:
X = padded_reviews
y = sentiment


In [14]:
model.compile(optimizer = "adam",loss="binary_crossentropy",metrics=['accuracy'])
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 3, 4)              120       
                                                                 
 flatten_1 (Flatten)         (None, 12)                0         
                                                                 
 dense_1 (Dense)             (None, 1)                 13        
                                                                 
Total params: 133
Trainable params: 133
Non-trainable params: 0
_________________________________________________________________


In [15]:
model.fit(X,y,epochs=50,verbose=0)


<keras.callbacks.History at 0x1bca4927490>

In [16]:
loss,accuracy = model.evaluate(X,y)



In [17]:
accuracy

1.0

In [19]:
weights = model.get_layer('embedding').get_weights()[0]  #the matrix of 4 elements carrying probability of features

In [20]:
len(weights)

30

In [24]:
weights[17]

array([ 0.00779741, -0.09949016,  0.07029911, -0.03296664], dtype=float32)

In [23]:
weights[2]

array([ 0.01998142, -0.01540712,  0.05620651,  0.00430774], dtype=float32)

In [25]:
#large datasets could have got these weights closer 
