In [2]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.preprocessing.text import one_hot
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Embedding

In [3]:
reviews = ['nice food',
        'amazing restaurant',
        'too good',
        'just loved it!',
        'will go again',
        'horrible food',
        'never go there',
        'poor service',
        'poor quality',
        'needs improvement']

sentiment = np.array([1,1,1,1,1,0,0,0,0,0])

In [6]:
one_hot("amazing restaurant", 30)

[11, 24]

In [7]:
vocab_size = 30
encoded_reviews = [one_hot(review, vocab_size) for review in reviews]
encoded_reviews

[[27, 12],
 [11, 24],
 [9, 20],
 [9, 13, 27],
 [21, 24, 15],
 [9, 12],
 [14, 24, 14],
 [19, 16],
 [19, 4],
 [28, 27]]

In [11]:
max_length = 3
padded_reviews = pad_sequences(encoded_reviews, maxlen=max_length, padding="post")
padded_reviews

array([[27, 12,  0],
       [11, 24,  0],
       [ 9, 20,  0],
       [ 9, 13, 27],
       [21, 24, 15],
       [ 9, 12,  0],
       [14, 24, 14],
       [19, 16,  0],
       [19,  4,  0],
       [28, 27,  0]])

In [12]:
embeded_vector_size = 4

model = Sequential([
    Embedding(vocab_size, embeded_vector_size, input_length=max_length,name="embedding"),
    Flatten(),
    Dense(1, activation="sigmoid")
])

In [14]:
X = padded_reviews
y = sentiment

In [15]:
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])
print(model.summary())

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 3, 4)              120       
                                                                 
 flatten (Flatten)           (None, 12)                0         
                                                                 
 dense (Dense)               (None, 1)                 13        
                                                                 
Total params: 133
Trainable params: 133
Non-trainable params: 0
_________________________________________________________________
None


In [16]:
model.fit(X,y, epochs=50)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x1a05e6b2380>

In [17]:
model.evaluate(X,y)



[0.6282057762145996, 0.800000011920929]

In [32]:
weights = model.get_layer("embedding").get_weights()[0]
weights

array([[-0.07679579,  0.09732715,  0.08619899,  0.05739338],
       [-0.04986708, -0.01229765,  0.0284027 ,  0.0126645 ],
       [-0.01942624,  0.03850574,  0.00598303,  0.03321396],
       [-0.03656382, -0.03151001,  0.02115289, -0.02943042],
       [-0.01108667, -0.01047962, -0.02529276, -0.04410189],
       [ 0.01336385,  0.02275005, -0.04700422,  0.01468054],
       [ 0.00975124, -0.01158715,  0.04300253,  0.01824095],
       [ 0.0122054 ,  0.02648682,  0.03019735,  0.04157075],
       [ 0.02046919, -0.0339105 , -0.02665832, -0.00383834],
       [-0.03774757,  0.03876178, -0.02435127,  0.03959162],
       [-0.01633428,  0.0471046 , -0.01960145,  0.02669289],
       [-0.01645269,  0.05126924, -0.05880726,  0.04251503],
       [-0.04393397, -0.01388018, -0.01676842,  0.0380533 ],
       [ 0.00257516,  0.01259921,  0.02517235,  0.0317516 ],
       [ 0.05120807, -0.02640042,  0.07653388, -0.00314644],
       [ 0.01992092, -0.04150433, -0.08482601, -0.09526844],
       [-0.08273199, -0.

In [34]:
weights[27], weights[11], weights[20] 

(array([-0.06430959, -0.03284765, -0.00723304,  0.06166505], dtype=float32),
 array([-0.01645269,  0.05126924, -0.05880726,  0.04251503], dtype=float32),
 array([0.08370835, 0.02978159, 0.03817386, 0.06928911], dtype=float32))

In [83]:
def predict(word):
    print(model.predict(pad_sequences([np.array(one_hot(word, 30))], maxlen=max_length, padding="post")))

In [103]:
predict("poor meat")

[[0.45119837]]
