### Word Embedding Techniques using Embedding Layer in Keras

In [1]:
from tensorflow.keras.preprocessing.text import one_hot

In [2]:
### sentences
sent=['the glass of milk',
     'the glass of juice',
     'the cup of tea',
     'I am a good boy',
     'I am a good developer',
     'understand the meaning of words',
     'your videos are good',]

In [3]:
sent

['the glass of milk',
 'the glass of juice',
 'the cup of tea',
 'I am a good boy',
 'I am a good developer',
 'understand the meaning of words',
 'your videos are good']

In [4]:
voc_size = 10000

### One Hot Representation

In [5]:
onehot_repr = [one_hot(words,voc_size) for words in sent]
print(onehot_repr)

[[3257, 2201, 3296, 7399], [3257, 2201, 3296, 9039], [3257, 2132, 3296, 1006], [3783, 8677, 5878, 6630, 9018], [3783, 8677, 5878, 6630, 4113], [1303, 3257, 749, 3296, 7759], [7603, 742, 7464, 6630]]


### Word Embedding Representation

In [6]:
from tensorflow.keras.layers import Embedding
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential

In [7]:
import numpy as np

In [8]:
sent_length = 8 
embedded_docs = pad_sequences(onehot_repr,padding='pre',maxlen=sent_length)
print(embedded_docs)

[[   0    0    0    0 3257 2201 3296 7399]
 [   0    0    0    0 3257 2201 3296 9039]
 [   0    0    0    0 3257 2132 3296 1006]
 [   0    0    0 3783 8677 5878 6630 9018]
 [   0    0    0 3783 8677 5878 6630 4113]
 [   0    0    0 1303 3257  749 3296 7759]
 [   0    0    0    0 7603  742 7464 6630]]


In [9]:
dim =10 

In [10]:
model = Sequential()
model.add(Embedding(voc_size,10,input_length=sent_length))
model.compile('adam','mse')

In [11]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, 8, 10)             100000    
Total params: 100,000
Trainable params: 100,000
Non-trainable params: 0
_________________________________________________________________


In [12]:
print(model.predict(embedded_docs))

[[[-0.01614728 -0.0253458   0.00129456 -0.00078694 -0.03175829
    0.03129667  0.04862449  0.02144975 -0.03420287  0.03227457]
  [-0.01614728 -0.0253458   0.00129456 -0.00078694 -0.03175829
    0.03129667  0.04862449  0.02144975 -0.03420287  0.03227457]
  [-0.01614728 -0.0253458   0.00129456 -0.00078694 -0.03175829
    0.03129667  0.04862449  0.02144975 -0.03420287  0.03227457]
  [-0.01614728 -0.0253458   0.00129456 -0.00078694 -0.03175829
    0.03129667  0.04862449  0.02144975 -0.03420287  0.03227457]
  [ 0.02176994  0.03011174 -0.04631669  0.03293971  0.03082944
    0.02441655  0.04258305  0.04981368 -0.04545472 -0.03900479]
  [-0.04135974  0.03848523 -0.02844489 -0.005165   -0.02984157
   -0.01698893  0.03222806  0.00662055 -0.04883775 -0.02285662]
  [ 0.02005495  0.01034435 -0.03273024 -0.04940462 -0.04341356
   -0.04771434  0.03110305 -0.0230414   0.0092085  -0.0437515 ]
  [-0.00590122  0.00493505  0.00076836 -0.03636929 -0.02896433
   -0.01419281  0.00205413 -0.02702466  0.042748

In [13]:
embedded_docs[0]

array([   0,    0,    0,    0, 3257, 2201, 3296, 7399])

In [14]:
print(model.predict(embedded_docs)[0])

[[-0.01614728 -0.0253458   0.00129456 -0.00078694 -0.03175829  0.03129667
   0.04862449  0.02144975 -0.03420287  0.03227457]
 [-0.01614728 -0.0253458   0.00129456 -0.00078694 -0.03175829  0.03129667
   0.04862449  0.02144975 -0.03420287  0.03227457]
 [-0.01614728 -0.0253458   0.00129456 -0.00078694 -0.03175829  0.03129667
   0.04862449  0.02144975 -0.03420287  0.03227457]
 [-0.01614728 -0.0253458   0.00129456 -0.00078694 -0.03175829  0.03129667
   0.04862449  0.02144975 -0.03420287  0.03227457]
 [ 0.02176994  0.03011174 -0.04631669  0.03293971  0.03082944  0.02441655
   0.04258305  0.04981368 -0.04545472 -0.03900479]
 [-0.04135974  0.03848523 -0.02844489 -0.005165   -0.02984157 -0.01698893
   0.03222806  0.00662055 -0.04883775 -0.02285662]
 [ 0.02005495  0.01034435 -0.03273024 -0.04940462 -0.04341356 -0.04771434
   0.03110305 -0.0230414   0.0092085  -0.0437515 ]
 [-0.00590122  0.00493505  0.00076836 -0.03636929 -0.02896433 -0.01419281
   0.00205413 -0.02702466  0.04274837 -0.04431204]]