In [1]:
from tensorflow.keras.preprocessing.text import one_hot

In [2]:
## sentences
sent = ['the glass of milk',
        'the glass of juice',
        'the cup of tea',
        'I am a good boy', 
        'I am a good developer', 
        'understand the meaning of words',
        'your videos are good',]

In [3]:
sent


['the glass of milk',
 'the glass of juice',
 'the cup of tea',
 'I am a good boy',
 'I am a good developer',
 'understand the meaning of words',
 'your videos are good']

In [4]:
## Define the vocabulary size
voc_size = 10000

In [5]:
## One Hot Representation
one_hot_repr = [one_hot(words,voc_size) for words in sent] 
one_hot_repr


[[540, 4123, 4900, 4348],
 [540, 4123, 4900, 3674],
 [540, 9108, 4900, 3043],
 [5045, 4287, 6468, 2950, 7203],
 [5045, 4287, 6468, 2950, 2847],
 [2080, 540, 7743, 4900, 7497],
 [202, 285, 4720, 2950]]

In [6]:
## word embedding representation
from tensorflow.keras.layers import Embedding
from tensorflow.keras.utils import pad_sequences
from tensorflow.keras.models import Sequential

In [7]:
import numpy as np

In [8]:
sent_length = 8
embedded_docs = pad_sequences(one_hot_repr,padding='pre',maxlen=sent_length)
print(embedded_docs)

[[   0    0    0    0  540 4123 4900 4348]
 [   0    0    0    0  540 4123 4900 3674]
 [   0    0    0    0  540 9108 4900 3043]
 [   0    0    0 5045 4287 6468 2950 7203]
 [   0    0    0 5045 4287 6468 2950 2847]
 [   0    0    0 2080  540 7743 4900 7497]
 [   0    0    0    0  202  285 4720 2950]]


In [9]:
## feature representation
dim = 10 

In [10]:
model = Sequential()
model.add(Embedding(voc_size, dim,input_length = sent_length))
model.compile('adam', 'mse')



In [12]:
model.build(input_shape=(None, sent_length))
model.summary()


In [13]:
model.predict(embedded_docs)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 84ms/step


array([[[-0.0027006 , -0.01225745, -0.01860287,  0.03833285,
          0.01539065, -0.00574521, -0.04147396, -0.03966626,
          0.01435591, -0.00857135],
        [-0.0027006 , -0.01225745, -0.01860287,  0.03833285,
          0.01539065, -0.00574521, -0.04147396, -0.03966626,
          0.01435591, -0.00857135],
        [-0.0027006 , -0.01225745, -0.01860287,  0.03833285,
          0.01539065, -0.00574521, -0.04147396, -0.03966626,
          0.01435591, -0.00857135],
        [-0.0027006 , -0.01225745, -0.01860287,  0.03833285,
          0.01539065, -0.00574521, -0.04147396, -0.03966626,
          0.01435591, -0.00857135],
        [-0.03325617,  0.01717975, -0.02245902, -0.02892929,
          0.02744192, -0.03677356, -0.0310715 ,  0.0114972 ,
          0.00654798, -0.00058943],
        [ 0.01392654,  0.04442317,  0.04417342, -0.02630438,
         -0.0427728 , -0.04779253,  0.00230447,  0.01942961,
         -0.01229773, -0.02106922],
        [-0.00497767, -0.04375673, -0.02126399,  0.0

In [14]:
 embedded_docs[0]

array([   0,    0,    0,    0,  540, 4123, 4900, 4348], dtype=int32)

In [17]:
model.predict(embedded_docs[0:1])

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step


array([[[-0.0027006 , -0.01225745, -0.01860287,  0.03833285,
          0.01539065, -0.00574521, -0.04147396, -0.03966626,
          0.01435591, -0.00857135],
        [-0.0027006 , -0.01225745, -0.01860287,  0.03833285,
          0.01539065, -0.00574521, -0.04147396, -0.03966626,
          0.01435591, -0.00857135],
        [-0.0027006 , -0.01225745, -0.01860287,  0.03833285,
          0.01539065, -0.00574521, -0.04147396, -0.03966626,
          0.01435591, -0.00857135],
        [-0.0027006 , -0.01225745, -0.01860287,  0.03833285,
          0.01539065, -0.00574521, -0.04147396, -0.03966626,
          0.01435591, -0.00857135],
        [-0.03325617,  0.01717975, -0.02245902, -0.02892929,
          0.02744192, -0.03677356, -0.0310715 ,  0.0114972 ,
          0.00654798, -0.00058943],
        [ 0.01392654,  0.04442317,  0.04417342, -0.02630438,
         -0.0427728 , -0.04779253,  0.00230447,  0.01942961,
         -0.01229773, -0.02106922],
        [-0.00497767, -0.04375673, -0.02126399,  0.0