In [1]:
from tensorflow.keras.preprocessing.text import one_hot

In [2]:
## sentences
sent = ['the glass of milk',
    'the glass of juice',
    'the cup of tea',
    'I am a good boy',
    'I am a good developer',
    'understand the meaning of words',
    'your videos are good',]

In [3]:
sent

['the glass of milk',
 'the glass of juice',
 'the cup of tea',
 'I am a good boy',
 'I am a good developer',
 'understand the meaning of words',
 'your videos are good']

In [4]:
## Define the vocabulary size
voc_size = 10000

In [9]:
## One hot representation for every word in the sentence
one_hot_repr = [one_hot(words, voc_size) for words in sent]
one_hot_repr

[[6093, 3700, 7352, 4956],
 [6093, 3700, 7352, 8854],
 [6093, 4965, 7352, 6996],
 [9721, 3171, 4409, 2967, 5163],
 [9721, 3171, 4409, 2967, 7468],
 [2959, 6093, 2712, 7352, 662],
 [9728, 532, 8914, 2967]]

In [11]:
## Now i'll take all these indexes (formed above) sentence by sentence and pass it to my embedding layer and 
# then we'll try to convert it into a word embedding representation

from tensorflow.keras.layers import Embedding
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
import numpy as np

In [12]:
## make all sentences of same length, otherwise we wont be able to train it in our RNN
sent_length = 8
embedded_docs = pad_sequences(one_hot_repr, padding='pre', maxlen=sent_length) # apply padding - pre or post; add zeroes
print(embedded_docs)

[[   0    0    0    0 6093 3700 7352 4956]
 [   0    0    0    0 6093 3700 7352 8854]
 [   0    0    0    0 6093 4965 7352 6996]
 [   0    0    0 9721 3171 4409 2967 5163]
 [   0    0    0 9721 3171 4409 2967 7468]
 [   0    0    0 2959 6093 2712 7352  662]
 [   0    0    0    0 9728  532 8914 2967]]


In [14]:
## feature dimension
dim = 10

In [15]:
model = Sequential()
model.add(Embedding(voc_size, dim, input_length=sent_length))
model.compile('adam', 'mse')




In [16]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 8, 10)             100000    
                                                                 
Total params: 100000 (390.62 KB)
Trainable params: 100000 (390.62 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [17]:
model.predict(embedded_docs)



array([[[-0.00032295,  0.00771744,  0.03876254, -0.02047509,
          0.04109324, -0.04606049,  0.00745664,  0.03768349,
          0.04825184, -0.00708853],
        [-0.00032295,  0.00771744,  0.03876254, -0.02047509,
          0.04109324, -0.04606049,  0.00745664,  0.03768349,
          0.04825184, -0.00708853],
        [-0.00032295,  0.00771744,  0.03876254, -0.02047509,
          0.04109324, -0.04606049,  0.00745664,  0.03768349,
          0.04825184, -0.00708853],
        [-0.00032295,  0.00771744,  0.03876254, -0.02047509,
          0.04109324, -0.04606049,  0.00745664,  0.03768349,
          0.04825184, -0.00708853],
        [-0.02474154,  0.03170068,  0.02509512,  0.01659829,
         -0.00725254,  0.02208367,  0.00387448,  0.04670409,
          0.04424289, -0.01208498],
        [-0.03039724,  0.03836394,  0.00236946,  0.02050562,
         -0.00067693,  0.02834396,  0.0340381 ,  0.01812753,
          0.04511276,  0.00358588],
        [ 0.02202953, -0.04539287,  0.00770871,  0.0

In [18]:
embedded_docs[0]
#  0 -> [-0.00032295,  0.00771744,  0.03876254, -0.02047509,  0.04109324,
#        -0.04606049,  0.00745664,  0.03768349,  0.04825184, -0.00708853]

array([   0,    0,    0,    0, 6093, 3700, 7352, 4956])

In [21]:
model.predict(embedded_docs[0])



array([[-0.00032295,  0.00771744,  0.03876254, -0.02047509,  0.04109324,
        -0.04606049,  0.00745664,  0.03768349,  0.04825184, -0.00708853],
       [-0.00032295,  0.00771744,  0.03876254, -0.02047509,  0.04109324,
        -0.04606049,  0.00745664,  0.03768349,  0.04825184, -0.00708853],
       [-0.00032295,  0.00771744,  0.03876254, -0.02047509,  0.04109324,
        -0.04606049,  0.00745664,  0.03768349,  0.04825184, -0.00708853],
       [-0.00032295,  0.00771744,  0.03876254, -0.02047509,  0.04109324,
        -0.04606049,  0.00745664,  0.03768349,  0.04825184, -0.00708853],
       [-0.02474154,  0.03170068,  0.02509512,  0.01659829, -0.00725254,
         0.02208367,  0.00387448,  0.04670409,  0.04424289, -0.01208498],
       [-0.03039724,  0.03836394,  0.00236946,  0.02050562, -0.00067693,
         0.02834396,  0.0340381 ,  0.01812753,  0.04511276,  0.00358588],
       [ 0.02202953, -0.04539287,  0.00770871,  0.04256893, -0.04190931,
        -0.04352592,  0.00203403, -0.04935901