In [1]:
from tensorflow.keras.preprocessing.text import one_hot

In [2]:
sentences = ["i am a good boy",
             "i am a bad gril",
             "boy and girl are good",
             "i am bad"]

In [3]:
# defining Vocabulary Size
voc_size = 10000

In [4]:
# One Hot representation
one_hot_repr = [ one_hot(words,voc_size) for words in sentences ]
one_hot_repr

[[6736, 6982, 5844, 282, 6900],
 [6736, 6982, 5844, 9223, 957],
 [6900, 605, 9115, 2449, 282],
 [6736, 6982, 9223]]

In [5]:
# Word Embedding Representation
from tensorflow.keras.layers import Embedding
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential

In [6]:
import numpy as np

#### Every Sentences have different dimensions in OneHot representation
##### so we have to make the dimensions equal

In [7]:
# PrePadding is used
sent_length = 10
embedded_docs_pre = pad_sequences(one_hot_repr, padding = 'pre',maxlen = sent_length)
embedded_docs_pre

array([[   0,    0,    0,    0,    0, 6736, 6982, 5844,  282, 6900],
       [   0,    0,    0,    0,    0, 6736, 6982, 5844, 9223,  957],
       [   0,    0,    0,    0,    0, 6900,  605, 9115, 2449,  282],
       [   0,    0,    0,    0,    0,    0,    0, 6736, 6982, 9223]])

In [8]:
# PostPadding is used
sent_length = 10
embedded_docs_post = pad_sequences(one_hot_repr, padding = 'post',maxlen = sent_length)
embedded_docs_post

array([[6736, 6982, 5844,  282, 6900,    0,    0,    0,    0,    0],
       [6736, 6982, 5844, 9223,  957,    0,    0,    0,    0,    0],
       [6900,  605, 9115, 2449,  282,    0,    0,    0,    0,    0],
       [6736, 6982, 9223,    0,    0,    0,    0,    0,    0,    0]])

In [9]:
# Feature Representation 
dimensions = 10
voc_size = 10000
sent_length = 10

In [10]:
model = Sequential()
model.add(Embedding(voc_size, dimensions, input_length = sent_length))
model.compile(optimizer = 'adam', loss = 'mse')
model.summary()



In [15]:
model.predict(embedded_docs_pre[0])

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step


array([[-0.0261089 , -0.01423099,  0.01434607,  0.02565758,  0.0372723 ,
         0.00691609, -0.03516054,  0.04381819, -0.0424302 ,  0.00642556],
       [-0.0261089 , -0.01423099,  0.01434607,  0.02565758,  0.0372723 ,
         0.00691609, -0.03516054,  0.04381819, -0.0424302 ,  0.00642556],
       [-0.0261089 , -0.01423099,  0.01434607,  0.02565758,  0.0372723 ,
         0.00691609, -0.03516054,  0.04381819, -0.0424302 ,  0.00642556],
       [-0.0261089 , -0.01423099,  0.01434607,  0.02565758,  0.0372723 ,
         0.00691609, -0.03516054,  0.04381819, -0.0424302 ,  0.00642556],
       [-0.0261089 , -0.01423099,  0.01434607,  0.02565758,  0.0372723 ,
         0.00691609, -0.03516054,  0.04381819, -0.0424302 ,  0.00642556],
       [ 0.01535361,  0.01623745,  0.02720502,  0.04406718, -0.04479914,
        -0.02856064,  0.01290437,  0.0391459 , -0.00339937, -0.00965848],
       [ 0.01533986,  0.0172087 , -0.01661281, -0.01262754, -0.01411374,
         0.00609992,  0.00407622, -0.04435425

In [16]:
model.predict(embedded_docs_post[0])

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step


array([[ 0.01535361,  0.01623745,  0.02720502,  0.04406718, -0.04479914,
        -0.02856064,  0.01290437,  0.0391459 , -0.00339937, -0.00965848],
       [ 0.01533986,  0.0172087 , -0.01661281, -0.01262754, -0.01411374,
         0.00609992,  0.00407622, -0.04435425,  0.02829008, -0.04433703],
       [ 0.04395438,  0.01581069,  0.0386696 , -0.04425701,  0.0119583 ,
         0.03315267,  0.03139723, -0.03984659,  0.00937038,  0.03142503],
       [ 0.01617812,  0.04440341, -0.01337274,  0.03227563, -0.03976886,
        -0.01964544,  0.00351082, -0.01542874,  0.00690074, -0.0105018 ],
       [-0.03840343,  0.04952505, -0.04974193,  0.02047337, -0.00752147,
        -0.0288195 ,  0.03140004, -0.00033976,  0.03064394,  0.02638545],
       [-0.0261089 , -0.01423099,  0.01434607,  0.02565758,  0.0372723 ,
         0.00691609, -0.03516054,  0.04381819, -0.0424302 ,  0.00642556],
       [-0.0261089 , -0.01423099,  0.01434607,  0.02565758,  0.0372723 ,
         0.00691609, -0.03516054,  0.04381819