In [1]:
from tensorflow.keras.preprocessing.text import one_hot
from tensorflow.keras.layers import Embedding
from tensorflow.keras.utils import pad_sequences
from tensorflow.keras.models import Sequential




In [2]:
### sentences
sent=[  'the glass of milk',
     'the glass of juice',
     'the cup of tea',
    'I am a good boy',
     'I am a good developer',
     'understand the meaning of words',
     'your videos are good',]

In [3]:
#Vocabulary size
voc_size = 1000 

In [4]:
### One Hot Representation
one_hot_repr=[one_hot(words,voc_size)for words in sent]
one_hot_repr

[[383, 549, 916, 341],
 [383, 549, 916, 493],
 [383, 117, 916, 287],
 [146, 259, 608, 183, 628],
 [146, 259, 608, 183, 725],
 [219, 383, 340, 916, 734],
 [964, 996, 659, 183]]

Above, each word of the sentence is represented out using index out of 1000 (unique words) vocab.                               
Below, we need each input (sentence) to be of exactly same length for model training, so we are padding it

In [5]:
sent_length=8
embedded_docs=pad_sequences(one_hot_repr,padding='pre',maxlen=sent_length)
print(embedded_docs)

[[  0   0   0   0 383 549 916 341]
 [  0   0   0   0 383 549 916 493]
 [  0   0   0   0 383 117 916 287]
 [  0   0   0 146 259 608 183 628]
 [  0   0   0 146 259 608 183 725]
 [  0   0   0 219 383 340 916 734]
 [  0   0   0   0 964 996 659 183]]


Below, we are adding a embedding layer.
1. Input Dim --> Number of Unique words or length of vocab.
2. Output Dim --> Using how many features you want to represent a single word.
3. Input Length --> Length of each sentence

In [6]:
model=Sequential()
model.add(Embedding(input_dim=voc_size,output_dim=10,input_length=sent_length))
model.compile('adam','mse')





In [7]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 8, 10)             10000     
                                                                 
Total params: 10000 (39.06 KB)
Trainable params: 10000 (39.06 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [10]:
model.predict(embedded_docs[0])



array([[ 0.02882395,  0.00197117, -0.04019519,  0.01561208, -0.03078496,
        -0.01849766,  0.02195524, -0.04731867,  0.04029831, -0.03399174],
       [ 0.02882395,  0.00197117, -0.04019519,  0.01561208, -0.03078496,
        -0.01849766,  0.02195524, -0.04731867,  0.04029831, -0.03399174],
       [ 0.02882395,  0.00197117, -0.04019519,  0.01561208, -0.03078496,
        -0.01849766,  0.02195524, -0.04731867,  0.04029831, -0.03399174],
       [ 0.02882395,  0.00197117, -0.04019519,  0.01561208, -0.03078496,
        -0.01849766,  0.02195524, -0.04731867,  0.04029831, -0.03399174],
       [-0.01600752, -0.04901148,  0.00883685, -0.00808729,  0.01548574,
        -0.00749417,  0.00782134,  0.00844909,  0.03585761,  0.01345693],
       [-0.02372405,  0.01758246,  0.0286244 ,  0.02625806, -0.01910296,
         0.02517306, -0.02423089,  0.00123686, -0.01686454,  0.04375732],
       [ 0.03677466,  0.01583414,  0.03492576,  0.00292488,  0.00552841,
         0.01222633,  0.02666327,  0.04681028