# Understand Embedding layer

## Basic example

In [1]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding

import numpy as np

In [2]:
model = Sequential()
embedding_layer = Embedding(input_dim=10, output_dim=4, input_length=2)
model.add(embedding_layer)
model.compile('adam', 'mse')

In [3]:
input_data = np.array([[1, 2]])
pred = model.predict(input_data)
print(input_data.shape)
print(pred)

(1, 2)
[[[-0.02928813  0.02421535 -0.01454658 -0.02975478]
  [-0.03927112 -0.04286926 -0.02040139  0.02429599]]]


In [4]:
embedding_layer.get_weights()

[array([[-0.00206206, -0.02268063,  0.00161115,  0.03826547],
        [-0.02928813,  0.02421535, -0.01454658, -0.02975478],
        [-0.03927112, -0.04286926, -0.02040139,  0.02429599],
        [-0.00706007,  0.03350106,  0.00542669,  0.01906283],
        [ 0.02318514,  0.01628311, -0.00812899,  0.03489396],
        [ 0.04975443, -0.00460393, -0.04770814, -0.03671625],
        [ 0.02880499, -0.02322553,  0.04904578,  0.04266823],
        [ 0.03153243,  0.00841354, -0.04467653, -0.0355782 ],
        [-0.00714878, -0.00233785, -0.00956761, -0.00749818],
        [-0.01616727, -0.00838017, -0.03434888, -0.0455641 ]],
       dtype=float32)]

## Resaturant Review Classification

In [5]:
from numpy import array
from tensorflow.keras.preprocessing.text import one_hot
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Flatten, Embedding, Dense

In [6]:
# Define 10 restaurant reviews
reviews = [
    "Never coming back!",
    "horrible service",
    'reude waitress',
    'cold food',
    'horrible food!',
    'awesome',
    'awesome services!',
    'rocks',
    'poor work',
    'couldn\'t have done better'
]

# Define labels
labels = array([1,1,1,1,1,0,0,0,0,0])

In [7]:
vocab_size = 50
encoded_reviews = [one_hot(d, vocab_size) for d in reviews]
print(f'Encoded reviews: {encoded_reviews}')

Encoded reviews: [[5, 39, 28], [13, 44], [33, 43], [8, 46], [13, 46], [49], [49, 22], [46], [25, 21], [17, 39, 44, 47]]


In [8]:
max_length = 4
padded_reviews = pad_sequences(encoded_reviews, maxlen=max_length, padding='post')
print(padded_reviews)

[[ 5 39 28  0]
 [13 44  0  0]
 [33 43  0  0]
 [ 8 46  0  0]
 [13 46  0  0]
 [49  0  0  0]
 [49 22  0  0]
 [46  0  0  0]
 [25 21  0  0]
 [17 39 44 47]]


In [9]:
model = Sequential()
embedding_layer = Embedding(input_dim=vocab_size, output_dim=8, input_length=max_length)
model.add(embedding_layer)
model.add(Flatten())
model.add(Dense(1, activation='sigmoid'))
model.compile(optimizer='adam', loss="binary_crossentropy", metrics=['acc'])

print(model.summary())

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_1 (Embedding)     (None, 4, 8)              400       
                                                                 
 flatten (Flatten)           (None, 32)                0         
                                                                 
 dense (Dense)               (None, 1)                 33        
                                                                 
Total params: 433
Trainable params: 433
Non-trainable params: 0
_________________________________________________________________
None


In [10]:
model.fit(padded_reviews, labels, epochs=100, verbose=0)

<keras.callbacks.History at 0x1967b693190>

In [11]:
print(embedding_layer.get_weights()[0].shape)

(50, 8)


In [12]:
len(embedding_layer.get_weights()[0])

50

In [13]:
len(embedding_layer.get_weights()[0][0])

8

In [14]:
y_test = padded_reviews[-1]
y_test

array([17, 39, 44, 47])

In [15]:
model.predict(array([y_test]))



array([[0.35992137]], dtype=float32)