In [2]:
from tensorflow.keras.preprocessing.text import one_hot

In [3]:
# sentences
sent = ['the glass of milk',
        'the glass of juice',
        'the cup of tea',
        'I am a good boy',
        'I am a good developer',
        'Understand the meaning of words',
        'Your videos are good']

In [4]:
sent

['the glass of milk',
 'the glass of juice',
 'the cup of tea',
 'I am a good boy',
 'I am a good developer',
 'Understand the meaning of words',
 'Your videos are good']

In [5]:
# Define the vocabulary size
voc_size = 10000

In [6]:
# One hot representation
one_hot_repr = [one_hot(words, voc_size) for words in sent]
one_hot_repr

[[7218, 2478, 3743, 2866],
 [7218, 2478, 3743, 200],
 [7218, 9420, 3743, 1399],
 [5842, 6565, 2269, 2340, 494],
 [5842, 6565, 2269, 2340, 1972],
 [763, 7218, 1855, 3743, 9378],
 [5096, 319, 6781, 2340]]

In [18]:
# import libraries
import numpy as np
from tensorflow.keras.layers import Embedding, Input
from tensorflow.keras.utils import pad_sequences
from tensorflow.keras.models import Sequential

In [8]:
sent_length = 8
embedded_docs = pad_sequences(one_hot_repr, padding='pre', maxlen=sent_length)
print(embedded_docs)

[[   0    0    0    0 7218 2478 3743 2866]
 [   0    0    0    0 7218 2478 3743  200]
 [   0    0    0    0 7218 9420 3743 1399]
 [   0    0    0 5842 6565 2269 2340  494]
 [   0    0    0 5842 6565 2269 2340 1972]
 [   0    0    0  763 7218 1855 3743 9378]
 [   0    0    0    0 5096  319 6781 2340]]


In [9]:
# feature representation
dim = 10

In [20]:
model = Sequential()
model.add(Input(shape=(sent_length,)))
model.add(Embedding(voc_size, dim))
model.compile('adam', 'mse')

In [21]:
model.summary()

In [22]:
model.predict(embedded_docs)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 334ms/step


array([[[-0.01511855, -0.02393634, -0.0422629 , -0.00485667,
          0.00697824, -0.04767   ,  0.04787428, -0.03036904,
          0.03097827, -0.02054119],
        [-0.01511855, -0.02393634, -0.0422629 , -0.00485667,
          0.00697824, -0.04767   ,  0.04787428, -0.03036904,
          0.03097827, -0.02054119],
        [-0.01511855, -0.02393634, -0.0422629 , -0.00485667,
          0.00697824, -0.04767   ,  0.04787428, -0.03036904,
          0.03097827, -0.02054119],
        [-0.01511855, -0.02393634, -0.0422629 , -0.00485667,
          0.00697824, -0.04767   ,  0.04787428, -0.03036904,
          0.03097827, -0.02054119],
        [ 0.02090422,  0.03870109, -0.04971898,  0.00126183,
          0.00191801,  0.02120853, -0.02249232, -0.0469758 ,
         -0.03946502, -0.01758037],
        [-0.03664522,  0.00044625, -0.00303239, -0.03876908,
         -0.01665741, -0.01964722, -0.01451094, -0.00168437,
          0.04596397,  0.04579471],
        [ 0.00603824,  0.03994807, -0.03527318,  0.0

In [23]:
embedded_docs[0]

array([   0,    0,    0,    0, 7218, 2478, 3743, 2866], dtype=int32)

In [25]:
model.predict(embedded_docs[0].reshape(1, -1))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 95ms/step


array([[[-0.01511855, -0.02393634, -0.0422629 , -0.00485667,
          0.00697824, -0.04767   ,  0.04787428, -0.03036904,
          0.03097827, -0.02054119],
        [-0.01511855, -0.02393634, -0.0422629 , -0.00485667,
          0.00697824, -0.04767   ,  0.04787428, -0.03036904,
          0.03097827, -0.02054119],
        [-0.01511855, -0.02393634, -0.0422629 , -0.00485667,
          0.00697824, -0.04767   ,  0.04787428, -0.03036904,
          0.03097827, -0.02054119],
        [-0.01511855, -0.02393634, -0.0422629 , -0.00485667,
          0.00697824, -0.04767   ,  0.04787428, -0.03036904,
          0.03097827, -0.02054119],
        [ 0.02090422,  0.03870109, -0.04971898,  0.00126183,
          0.00191801,  0.02120853, -0.02249232, -0.0469758 ,
         -0.03946502, -0.01758037],
        [-0.03664522,  0.00044625, -0.00303239, -0.03876908,
         -0.01665741, -0.01964722, -0.01451094, -0.00168437,
          0.04596397,  0.04579471],
        [ 0.00603824,  0.03994807, -0.03527318,  0.0