# In this session we will look into how we can convert words to numbers using Word Embedding in keras embedding layers

In [80]:
# Libraries
import numpy as np
from tensorflow import keras
# from tensorflow.keras.preprocessing.text import one_hot
# from tensorflow.keras.preprocessing.sequence import pad_sequences
# from tensorflow.keras.models import Sequential
# from tensorflow.keras.layers import Dense
# from tensorflow.keras.layers import Flatten
# from tensorflow.keras.layers import Embedding

In [81]:
reviews = [
    'nice food',
    'amazing restaurant',
    'too good',
    'just loved it',
    'will go again',
    'horrible food',
    'never go there',
    'poor service',
    'poor quality',
    'needs improvement'
]
sentiment = np.array([1, 1, 1, 1, 1, 0, 0, 0, 0, 0])

# Converting words to One Hot

In [82]:
vocab_size = 40
# Encoding Words to Vectors
encoded_reviews = [keras.preprocessing.text.one_hot(d, vocab_size) for d in reviews]
encoded_reviews

[[36, 31],
 [16, 1],
 [29, 16],
 [11, 34, 3],
 [10, 16, 19],
 [39, 31],
 [37, 16, 22],
 [2, 32],
 [2, 31],
 [36, 23]]

# Padding

In [83]:
max_length = 3
# Padding the reviews towards the end
padded_reviews = keras.preprocessing.sequence.pad_sequences(encoded_reviews, maxlen=max_length, padding='post')
padded_reviews

array([[36, 31,  0],
       [16,  1,  0],
       [29, 16,  0],
       [11, 34,  3],
       [10, 16, 19],
       [39, 31,  0],
       [37, 16, 22],
       [ 2, 32,  0],
       [ 2, 31,  0],
       [36, 23,  0]])

# Creating model with Embedding Layers

In [84]:
embeded_vector_size = 4

model = keras.models.Sequential([
    keras.layers.Embedding(vocab_size, embeded_vector_size, input_length=max_length, name="embedding"),
    keras.layers.Flatten(),
    keras.layers.Dense(1, activation='sigmoid')
])    
# # Adding the layers using 'model.add()'
# model.add(keras.layers.Embedding(vocab_size, embeded_vector_size, input_length=max_length, name="embedding"))
# model.add(keras.layers.Flatten())
# model.add(keras.layers.Dense(1, activation='sigmoid'))

x = padded_reviews
y = sentiment

model.compile(
    optimizer=keras.optimizers.Adam(),
    loss=keras.losses.BinaryCrossentropy(from_logits=False),
    metrics=[keras.metrics.BinaryAccuracy()]
)

model.summary()

In [85]:
model.fit(x, y, epochs=50, verbose=1)

Epoch 1/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 986ms/step - binary_accuracy: 0.4000 - loss: 0.6956
Epoch 2/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59ms/step - binary_accuracy: 0.4000 - loss: 0.6947
Epoch 3/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 63ms/step - binary_accuracy: 0.4000 - loss: 0.6938
Epoch 4/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 70ms/step - binary_accuracy: 0.5000 - loss: 0.6930
Epoch 5/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 64ms/step - binary_accuracy: 0.5000 - loss: 0.6921
Epoch 6/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 60ms/step - binary_accuracy: 0.5000 - loss: 0.6913
Epoch 7/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 65ms/step - binary_accuracy: 0.5000 - loss: 0.6904
Epoch 8/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 61ms/step - binary_accuracy: 0.6000 - loss: 0.6895
Epoch 9

<keras.src.callbacks.history.History at 0x2436a516930>

In [86]:
loss, accuracy = model.evaluate(x, y)
accuracy

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 207ms/step - binary_accuracy: 0.9000 - loss: 0.6492


0.8999999761581421

# Getting the Word Embedding

In [88]:
weights = model.get_layer("embedding").get_weights()[0]
len(weights)

40

In [90]:
weights[11]

array([-0.0571991 ,  0.0984407 ,  0.09533438, -0.09908765], dtype=float32)