##### Implementing embeddings through One hot encoding 

In [4]:
from tensorflow.keras.preprocessing.text import one_hot

In [5]:
### sentences
sent=[  'the glass of milk',
     'the glass of juice',
     'the cup of tea',
    'I am a good boy',
     'I am a good developer',
     'understand the meaning of words',
     'your videos are good']
sent

['the glass of milk',
 'the glass of juice',
 'the cup of tea',
 'I am a good boy',
 'I am a good developer',
 'understand the meaning of words',
 'your videos are good']

In [6]:
## Define the vocabulary size
voc_size=10000

In [7]:
### One Hot Representation
one_hot_repr = [one_hot(words,voc_size) for words in sent]
one_hot_repr

[[5659, 2551, 7424, 6666],
 [5659, 2551, 7424, 2791],
 [5659, 6187, 7424, 1583],
 [5140, 9715, 9798, 3116, 1096],
 [5140, 9715, 9798, 3116, 9019],
 [2512, 5659, 2204, 7424, 1962],
 [2810, 8466, 9887, 3116]]

##### Word Embedding Representation

1. Import pad_sequences from TensorFlow to make all sentences the same length.
2. Add zeros (pre-padding or post-padding) to ensure uniform length.

In [8]:
## word Embedding Representation
# Using Index generated by one hot encoding

from tensorflow.keras.layers import Embedding
#from tensorflow.keras.processing.sequence import pad_sequences
from tensorflow.keras.utils import pad_sequences
from tensorflow.keras.models import Sequential
import numpy as np

In [9]:
sent_length=8
embedded_docs=pad_sequences(one_hot_repr,padding='pre',maxlen=sent_length)
print(embedded_docs)

[[   0    0    0    0 5659 2551 7424 6666]
 [   0    0    0    0 5659 2551 7424 2791]
 [   0    0    0    0 5659 6187 7424 1583]
 [   0    0    0 5140 9715 9798 3116 1096]
 [   0    0    0 5140 9715 9798 3116 9019]
 [   0    0    0 2512 5659 2204 7424 1962]
 [   0    0    0    0 2810 8466 9887 3116]]


In [10]:
## feature representation
dim=10

In [18]:
model=Sequential()
model.add(Embedding(voc_size,dim))
model.build(input_shape=(None, sent_length)) 
model.compile('adam','mse')

In [17]:
model.summary()
# Parameters = voc_size × dim

In [None]:
model.predict(embedded_docs)
# Each word in a sentence is converted to a dense vector of size 10

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 206ms/step


array([[[-0.03390995, -0.0464114 ,  0.02545905, -0.02400194,
         -0.04791424, -0.04567109,  0.04961963,  0.00663756,
         -0.00898156, -0.04912361],
        [-0.03390995, -0.0464114 ,  0.02545905, -0.02400194,
         -0.04791424, -0.04567109,  0.04961963,  0.00663756,
         -0.00898156, -0.04912361],
        [-0.03390995, -0.0464114 ,  0.02545905, -0.02400194,
         -0.04791424, -0.04567109,  0.04961963,  0.00663756,
         -0.00898156, -0.04912361],
        [-0.03390995, -0.0464114 ,  0.02545905, -0.02400194,
         -0.04791424, -0.04567109,  0.04961963,  0.00663756,
         -0.00898156, -0.04912361],
        [ 0.04785898,  0.04705432, -0.0013222 , -0.02714945,
         -0.00512254, -0.01770171,  0.03854021,  0.00611635,
         -0.03620814,  0.02093078],
        [-0.02953725,  0.03260431,  0.03392141,  0.02488101,
         -0.03587455,  0.04227409, -0.00353897,  0.0312073 ,
          0.01876629, -0.02959335],
        [ 0.03753291, -0.04977387,  0.02010253, -0.0

In [19]:
embedded_docs[0] # single sentece 

array([   0,    0,    0,    0, 6186, 6775,  637, 4895])

In [22]:
model.predict(embedded_docs[0]) # single sentence embedding



array([[ 0.01453609,  0.03697893,  0.02267558,  0.01149287, -0.03695335,
         0.01416664,  0.03655917,  0.00734384,  0.03028754,  0.00339943],
       [ 0.01453609,  0.03697893,  0.02267558,  0.01149287, -0.03695335,
         0.01416664,  0.03655917,  0.00734384,  0.03028754,  0.00339943],
       [ 0.01453609,  0.03697893,  0.02267558,  0.01149287, -0.03695335,
         0.01416664,  0.03655917,  0.00734384,  0.03028754,  0.00339943],
       [ 0.01453609,  0.03697893,  0.02267558,  0.01149287, -0.03695335,
         0.01416664,  0.03655917,  0.00734384,  0.03028754,  0.00339943],
       [-0.03792738,  0.01958679, -0.04232483, -0.03475742,  0.02182527,
         0.01143194, -0.03125288,  0.02584182,  0.0050171 ,  0.04725457],
       [-0.02213118,  0.00730393,  0.02797868, -0.02386508, -0.0024281 ,
         0.04419583, -0.02011771, -0.00502002, -0.03373672, -0.04126013],
       [-0.02629154,  0.02487988, -0.02824695,  0.0302802 , -0.01835672,
        -0.00683415,  0.01606056, -0.04426531