In [4]:
from tensorflow.keras.preprocessing.text import one_hot

In [5]:
### Sentences
sent = [ 'the glass of milk',
        'the glass of juice',
        'the cup of tea',
        'I am a good boy',
        'I am a good developer',
        'understand the meaning of words',
        'your videos are good',]

In [6]:
sent

['the glass of milk',
 'the glass of juice',
 'the cup of tea',
 'I am a good boy',
 'I am a good developer',
 'understand the meaning of words',
 'your videos are good']

In [7]:
## Define vocabulary size
voc_size = 10000

In [8]:
### One hot Representation
one_hot_repr = [one_hot(words, voc_size) for words in sent]
one_hot_repr


[[7142, 4784, 3549, 8428],
 [7142, 4784, 3549, 2364],
 [7142, 7613, 3549, 395],
 [4267, 3293, 5201, 3484, 6122],
 [4267, 3293, 5201, 3484, 7951],
 [6280, 7142, 7185, 3549, 9351],
 [9505, 2372, 3170, 3484]]

In [9]:
## word embedding representation

from tensorflow.keras.layers import Embedding
from tensorflow.keras.utils import pad_sequences
from tensorflow.keras.models import Sequential

In [10]:
import numpy as np

In [11]:
sent_length = 8
embedded_docs = pad_sequences(one_hot_repr, padding = 'pre', maxlen = sent_length)
print(embedded_docs)

[[   0    0    0    0 7142 4784 3549 8428]
 [   0    0    0    0 7142 4784 3549 2364]
 [   0    0    0    0 7142 7613 3549  395]
 [   0    0    0 4267 3293 5201 3484 6122]
 [   0    0    0 4267 3293 5201 3484 7951]
 [   0    0    0 6280 7142 7185 3549 9351]
 [   0    0    0    0 9505 2372 3170 3484]]


In [12]:
## feature representation 
dim = 10

In [13]:
model = Sequential()
model.add(Embedding(voc_size, dim, input_length = sent_length))
model.compile('adam','mse')
          



In [14]:
model.summary()

In [15]:
model.predict(embedded_docs)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


array([[[-0.02788483, -0.01242678,  0.04210098, -0.02437191,
         -0.04098518, -0.01757019,  0.00017288, -0.00713043,
          0.00117327,  0.01571984],
        [-0.02788483, -0.01242678,  0.04210098, -0.02437191,
         -0.04098518, -0.01757019,  0.00017288, -0.00713043,
          0.00117327,  0.01571984],
        [-0.02788483, -0.01242678,  0.04210098, -0.02437191,
         -0.04098518, -0.01757019,  0.00017288, -0.00713043,
          0.00117327,  0.01571984],
        [-0.02788483, -0.01242678,  0.04210098, -0.02437191,
         -0.04098518, -0.01757019,  0.00017288, -0.00713043,
          0.00117327,  0.01571984],
        [ 0.03465659,  0.02488242,  0.01882757,  0.02702561,
         -0.0043564 , -0.04798392, -0.01499317, -0.02326448,
         -0.00690408,  0.03454093],
        [-0.02264737,  0.02763828, -0.00285298, -0.0406559 ,
          0.03680868,  0.01323486,  0.0464117 , -0.04980272,
          0.01985571, -0.03430444],
        [-0.01883961, -0.03607448, -0.04346418, -0.0

In [16]:
embedded_docs[0]

array([   0,    0,    0,    0, 7142, 4784, 3549, 8428], dtype=int32)

In [18]:
model.predict(embedded_docs)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 79ms/step


array([[[-0.02788483, -0.01242678,  0.04210098, -0.02437191,
         -0.04098518, -0.01757019,  0.00017288, -0.00713043,
          0.00117327,  0.01571984],
        [-0.02788483, -0.01242678,  0.04210098, -0.02437191,
         -0.04098518, -0.01757019,  0.00017288, -0.00713043,
          0.00117327,  0.01571984],
        [-0.02788483, -0.01242678,  0.04210098, -0.02437191,
         -0.04098518, -0.01757019,  0.00017288, -0.00713043,
          0.00117327,  0.01571984],
        [-0.02788483, -0.01242678,  0.04210098, -0.02437191,
         -0.04098518, -0.01757019,  0.00017288, -0.00713043,
          0.00117327,  0.01571984],
        [ 0.03465659,  0.02488242,  0.01882757,  0.02702561,
         -0.0043564 , -0.04798392, -0.01499317, -0.02326448,
         -0.00690408,  0.03454093],
        [-0.02264737,  0.02763828, -0.00285298, -0.0406559 ,
          0.03680868,  0.01323486,  0.0464117 , -0.04980272,
          0.01985571, -0.03430444],
        [-0.01883961, -0.03607448, -0.04346418, -0.0