# Understand Embedding layer

## Basic example

In [5]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding

import numpy as np

In [6]:
model = Sequential()
embedding_layer = Embedding(input_dim=10, output_dim=4, input_length=2)
model.add(embedding_layer)
model.compile('adam', 'mse')

In [7]:
input_data = np.array([[1, 2]])
pred = model.predict(input_data)
print(input_data.shape)
print(pred)

(1, 2)
[[[-0.04788527 -0.04349394 -0.00317895 -0.03623734]
  [-0.04228467  0.00742381  0.04753392 -0.02544016]]]


In [8]:
embedding_layer.get_weights()

[array([[ 0.02455905,  0.01105393, -0.01862649,  0.02651588],
        [-0.04788527, -0.04349394, -0.00317895, -0.03623734],
        [-0.04228467,  0.00742381,  0.04753392, -0.02544016],
        [-0.00387619,  0.03649409, -0.00065585,  0.04683853],
        [-0.02187707,  0.01539865,  0.01836853, -0.00290058],
        [ 0.01581964,  0.01207968,  0.0160506 , -0.00442315],
        [-0.03782524,  0.04370471, -0.01593099,  0.00055779],
        [ 0.02505   ,  0.00263749,  0.02375183,  0.02757181],
        [ 0.0096316 , -0.00630097, -0.00160276,  0.03710118],
        [ 0.0383487 , -0.02766459, -0.01623661, -0.01034636]],
       dtype=float32)]

## Resaturant Review Classification

In [9]:
from numpy import array
from tensorflow.keras.preprocessing.text import one_hot
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Flatten, Embedding, Dense

In [10]:
# Define 10 restaurant reviews
reviews = [
    "Never coming back!",
    "horrible service",
    'reude waitress',
    'cold food',
    'horrible food!',
    'awesome',
    'awesome services!',
    'rocks',
    'poor work',
    'couldn\'t have done better'
]

# Define labels
labels = array([1,1,1,1,1,0,0,0,0,0])

In [11]:
vocab_size = 50
encoded_reviews = [one_hot(d, vocab_size) for d in reviews]
print(f'Encoded reviews: {encoded_reviews}')

Encoded reviews: [[40, 10, 9], [43, 47], [30, 9], [46, 38], [43, 38], [10], [10, 17], [28], [8, 6], [36, 49, 42, 21]]


In [12]:
max_length = 4
padded_reviews = pad_sequences(encoded_reviews, maxlen=max_length, padding='post')
print(padded_reviews)

[[40 10  9  0]
 [43 47  0  0]
 [30  9  0  0]
 [46 38  0  0]
 [43 38  0  0]
 [10  0  0  0]
 [10 17  0  0]
 [28  0  0  0]
 [ 8  6  0  0]
 [36 49 42 21]]


In [13]:
model = Sequential()
embedding_layer = Embedding(input_dim=vocab_size, output_dim=8, input_length=max_length)
model.add(embedding_layer)
model.add(Flatten())
model.add(Dense(1, activation='sigmoid'))
model.compile(optimizer='adam', loss="binary_crossentropy", metrics=['acc'])

print(model.summary())

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_2 (Embedding)     (None, 4, 8)              400       
                                                                 
 flatten (Flatten)           (None, 32)                0         
                                                                 
 dense (Dense)               (None, 1)                 33        
                                                                 
Total params: 433
Trainable params: 433
Non-trainable params: 0
_________________________________________________________________
None


In [14]:
model.fit(padded_reviews, labels, epochs=100, verbose=0)

<keras.callbacks.History at 0x202a9072160>

In [15]:
print(embedding_layer.get_weights()[0].shape)

(50, 8)


In [20]:
len(embedding_layer.get_weights()[0])

50

In [21]:
len(embedding_layer.get_weights()[0][0])

8

In [24]:
y_test = padded_reviews[-1]
y_test

array([36, 49, 42, 21])

In [26]:
model.predict(array([y_test]))



array([[0.25025597]], dtype=float32)