In [1]:
from tensorflow.keras.preprocessing.text import one_hot

In [2]:
### Sentences

sent = ['the glass of milk',
        'the glass of juice',
        'the cup of tea',
        'I am a good boy',
        'I am a good developer',
        'Understand the meaning of words',
        'Your videos are good'
        ]

In [3]:
sent

['the glass of milk',
 'the glass of juice',
 'the cup of tea',
 'I am a good boy',
 'I am a good developer',
 'Understand the meaning of words',
 'Your videos are good']

In [4]:
## Define the vocablury size
voc_size = 10000

In [5]:
### One Hot Representation
one_hot_repr = [one_hot(words, voc_size) for words in sent]
one_hot_repr


[[102, 6636, 577, 8226],
 [102, 6636, 577, 7954],
 [102, 5620, 577, 4211],
 [9345, 8960, 724, 7830, 8960],
 [9345, 8960, 724, 7830, 2107],
 [3696, 102, 5864, 577, 9874],
 [3471, 3954, 1967, 7830]]

In [7]:
## Word Embedding Representation

from tensorflow.keras.layers import Embedding
#from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import pad_sequences
from tensorflow.keras.models import Sequential
import numpy as np

In [8]:
sent_length = 8
embedded_docs = pad_sequences(one_hot_repr, padding='pre', maxlen=sent_length)
print(embedded_docs)

[[   0    0    0    0  102 6636  577 8226]
 [   0    0    0    0  102 6636  577 7954]
 [   0    0    0    0  102 5620  577 4211]
 [   0    0    0 9345 8960  724 7830 8960]
 [   0    0    0 9345 8960  724 7830 2107]
 [   0    0    0 3696  102 5864  577 9874]
 [   0    0    0    0 3471 3954 1967 7830]]


In [9]:
## Feature Representation
dim = 10

In [10]:
model = Sequential()
model.add(Embedding(voc_size, dim, input_length = sent_length))
model.compile('adam', 'mse')





In [11]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 8, 10)             100000    
                                                                 
Total params: 100000 (390.62 KB)
Trainable params: 100000 (390.62 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [12]:
model.predict(embedded_docs)



array([[[ 3.3111420e-02, -3.3717833e-02,  2.5737371e-02, -1.6905554e-03,
          3.8780067e-02,  2.9451881e-02,  4.8738446e-02, -3.8658012e-02,
         -3.6789753e-02, -1.6376305e-02],
        [ 3.3111420e-02, -3.3717833e-02,  2.5737371e-02, -1.6905554e-03,
          3.8780067e-02,  2.9451881e-02,  4.8738446e-02, -3.8658012e-02,
         -3.6789753e-02, -1.6376305e-02],
        [ 3.3111420e-02, -3.3717833e-02,  2.5737371e-02, -1.6905554e-03,
          3.8780067e-02,  2.9451881e-02,  4.8738446e-02, -3.8658012e-02,
         -3.6789753e-02, -1.6376305e-02],
        [ 3.3111420e-02, -3.3717833e-02,  2.5737371e-02, -1.6905554e-03,
          3.8780067e-02,  2.9451881e-02,  4.8738446e-02, -3.8658012e-02,
         -3.6789753e-02, -1.6376305e-02],
        [-3.8349725e-02,  5.5480711e-03,  5.1595271e-05,  4.4626746e-02,
         -2.6102472e-02, -4.3241501e-02, -1.5237473e-02,  7.9674013e-03,
          2.6050236e-02, -2.7807986e-02],
        [-4.4483460e-02, -4.4731844e-02, -2.2736002e-02,  4.

In [13]:
embedded_docs[0]

array([   0,    0,    0,    0,  102, 6636,  577, 8226])

In [14]:
model.predict(embedded_docs[0])



array([[ 3.3111420e-02, -3.3717833e-02,  2.5737371e-02, -1.6905554e-03,
         3.8780067e-02,  2.9451881e-02,  4.8738446e-02, -3.8658012e-02,
        -3.6789753e-02, -1.6376305e-02],
       [ 3.3111420e-02, -3.3717833e-02,  2.5737371e-02, -1.6905554e-03,
         3.8780067e-02,  2.9451881e-02,  4.8738446e-02, -3.8658012e-02,
        -3.6789753e-02, -1.6376305e-02],
       [ 3.3111420e-02, -3.3717833e-02,  2.5737371e-02, -1.6905554e-03,
         3.8780067e-02,  2.9451881e-02,  4.8738446e-02, -3.8658012e-02,
        -3.6789753e-02, -1.6376305e-02],
       [ 3.3111420e-02, -3.3717833e-02,  2.5737371e-02, -1.6905554e-03,
         3.8780067e-02,  2.9451881e-02,  4.8738446e-02, -3.8658012e-02,
        -3.6789753e-02, -1.6376305e-02],
       [-3.8349725e-02,  5.5480711e-03,  5.1595271e-05,  4.4626746e-02,
        -2.6102472e-02, -4.3241501e-02, -1.5237473e-02,  7.9674013e-03,
         2.6050236e-02, -2.7807986e-02],
       [-4.4483460e-02, -4.4731844e-02, -2.2736002e-02,  4.3189514e-02,
   