In [35]:
from tensorflow.keras.preprocessing.text import one_hot

In [36]:
# sentences
sent = [
    'the glass of milk',
    'the glass of juice',
    'the cup of tea',
    'I am a good boy',
    'I am a good developer',
    'understand the meaning of words',
    'your videos are good'
]

In [37]:
# Define a vocabulary size
voc_size = 10000

In [38]:
one_hot_rep = [one_hot(words, voc_size) for words in sent]
one_hot_rep

[[2512, 5888, 3264, 933],
 [2512, 5888, 3264, 1709],
 [2512, 124, 3264, 5111],
 [2332, 9755, 272, 866, 958],
 [2332, 9755, 272, 866, 2832],
 [7113, 2512, 6044, 3264, 7126],
 [1370, 7470, 4756, 866]]

In [39]:
from tensorflow.keras.layers import Embedding
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential

In [40]:
import numpy as np

In [41]:
# Since each sentence is having the different length, we should
# make it same.
sent_length = 5
# since i know the max lenght of sentences is 5 so i decided to
# go with 8, we do padding if the length is less then 8.
# Padding is just adding zero at initail or back to make same length

embedded_docs = pad_sequences(one_hot_rep, padding='pre', maxlen=sent_length)

In [42]:
embedded_docs

array([[   0, 2512, 5888, 3264,  933],
       [   0, 2512, 5888, 3264, 1709],
       [   0, 2512,  124, 3264, 5111],
       [2332, 9755,  272,  866,  958],
       [2332, 9755,  272,  866, 2832],
       [7113, 2512, 6044, 3264, 7126],
       [   0, 1370, 7470, 4756,  866]])

In [43]:
## Feature Representations
dim = 10

In [44]:
model = Sequential()
model.add(Embedding(voc_size, dim, input_length = sent_length))
model.compile('adam','mse')

In [45]:
model.summary()

In [46]:
model.predict(embedded_docs)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 238ms/step


array([[[ 0.03796433, -0.02105994,  0.00047394, -0.0461508 ,
         -0.01381866, -0.00927627, -0.00800209, -0.03391074,
          0.018596  ,  0.03711988],
        [-0.04187528, -0.02948151,  0.01628942, -0.0353852 ,
          0.03604111,  0.02625746, -0.04531336,  0.03974028,
         -0.02721913, -0.01055152],
        [ 0.02881299, -0.03609649,  0.00358299, -0.02808828,
          0.0326626 , -0.01106644,  0.0219037 , -0.03350867,
         -0.00408806, -0.03924178],
        [ 0.02230309,  0.01595172, -0.04419747, -0.04105239,
         -0.01885412,  0.0359889 ,  0.02800893,  0.01285565,
         -0.0149624 , -0.01744633],
        [ 0.00022529,  0.01794595, -0.04516739, -0.01180711,
         -0.01684733,  0.00111877, -0.0101616 ,  0.03926667,
         -0.03638357,  0.02069675]],

       [[ 0.03796433, -0.02105994,  0.00047394, -0.0461508 ,
         -0.01381866, -0.00927627, -0.00800209, -0.03391074,
          0.018596  ,  0.03711988],
        [-0.04187528, -0.02948151,  0.01628942, -0

In [47]:
model.summary()

In [48]:
model.predict(embedded_docs[[0]])

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 107ms/step


array([[[ 0.03796433, -0.02105994,  0.00047394, -0.0461508 ,
         -0.01381866, -0.00927627, -0.00800209, -0.03391074,
          0.018596  ,  0.03711988],
        [-0.04187528, -0.02948151,  0.01628942, -0.0353852 ,
          0.03604111,  0.02625746, -0.04531336,  0.03974028,
         -0.02721913, -0.01055152],
        [ 0.02881299, -0.03609649,  0.00358299, -0.02808828,
          0.0326626 , -0.01106644,  0.0219037 , -0.03350867,
         -0.00408806, -0.03924178],
        [ 0.02230309,  0.01595172, -0.04419747, -0.04105239,
         -0.01885412,  0.0359889 ,  0.02800893,  0.01285565,
         -0.0149624 , -0.01744633],
        [ 0.00022529,  0.01794595, -0.04516739, -0.01180711,
         -0.01684733,  0.00111877, -0.0101616 ,  0.03926667,
         -0.03638357,  0.02069675]]], dtype=float32)

In [49]:
output = model.predict(embedded_docs[[0]])
output.shape

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step


(1, 5, 10)

In [50]:
# So here this embeding model internally using word2vec model to convert the words to numbers