### Word Embedding Techniques using Embedding Layer in Keras

In [1]:
from tensorflow.keras.preprocessing.text import one_hot

In [2]:
### sentences
sent=[  'the glass of milk',
     'the glass of juice',
     'the cup of tea',
    'I am a good boy',
     'I am a good developer',
     'understand the meaning of words',
     'your videos are good',]

In [3]:
sent

['the glass of milk',
 'the glass of juice',
 'the cup of tea',
 'I am a good boy',
 'I am a good developer',
 'understand the meaning of words',
 'your videos are good']

In [4]:
### Vocabulary size
voc_size=10000

#### One Hot Representation

In [5]:
onehot_repr=[one_hot(words,voc_size)for words in sent] 
print(onehot_repr)

[[3902, 2507, 2793, 3849], [3902, 2507, 2793, 4695], [3902, 936, 2793, 1395], [9206, 4470, 740, 5661, 9524], [9206, 4470, 740, 5661, 8624], [2705, 3902, 8707, 2793, 616], [5553, 830, 8717, 5661]]


### Word Embedding Represntation

In [6]:
from tensorflow.keras.layers import Embedding
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential

In [7]:
import numpy as np

In [8]:
sent_length=8
embedded_docs=pad_sequences(onehot_repr,padding='pre',maxlen=sent_length)
#make all sentence in same dimintion
print(embedded_docs)

[[   0    0    0    0 3902 2507 2793 3849]
 [   0    0    0    0 3902 2507 2793 4695]
 [   0    0    0    0 3902  936 2793 1395]
 [   0    0    0 9206 4470  740 5661 9524]
 [   0    0    0 9206 4470  740 5661 8624]
 [   0    0    0 2705 3902 8707 2793  616]
 [   0    0    0    0 5553  830 8717 5661]]


In [9]:
dim=10


In [10]:
model=Sequential()
model.add(Embedding(voc_size,10,input_length=sent_length))
model.compile('adam','mse')

In [11]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, 8, 10)             100000    
Total params: 100,000
Trainable params: 100,000
Non-trainable params: 0
_________________________________________________________________


In [12]:
print(model.predict(embedded_docs))

[[[-0.00942621 -0.04406041  0.00834959 -0.03023748  0.04921594
    0.01366159  0.04002566  0.02233701  0.03478552 -0.00447224]
  [-0.00942621 -0.04406041  0.00834959 -0.03023748  0.04921594
    0.01366159  0.04002566  0.02233701  0.03478552 -0.00447224]
  [-0.00942621 -0.04406041  0.00834959 -0.03023748  0.04921594
    0.01366159  0.04002566  0.02233701  0.03478552 -0.00447224]
  [-0.00942621 -0.04406041  0.00834959 -0.03023748  0.04921594
    0.01366159  0.04002566  0.02233701  0.03478552 -0.00447224]
  [ 0.04539077 -0.02246778  0.04464264 -0.02949488 -0.03070169
    0.04934646  0.01215344 -0.01840043 -0.02366161  0.01484529]
  [-0.02318504  0.02750767  0.00586643  0.03557223  0.01574583
   -0.02218266  0.02720712  0.01330462  0.01784703  0.02839286]
  [-0.01616794 -0.03254596  0.04093169 -0.03156298  0.01099473
   -0.03667103  0.02746623  0.04758375  0.04417062 -0.03666779]
  [-0.03486198 -0.00627007 -0.04498976 -0.01965009 -0.02221961
    0.0357261   0.01787833 -0.01015256 -0.041486

In [13]:
embedded_docs[0]

array([   0,    0,    0,    0, 3902, 2507, 2793, 3849])

In [14]:
print(model.predict(embedded_docs)[0])
#list of eight represent a word of first sentence and ten columns represent weight each word in vocabulary

[[-0.00942621 -0.04406041  0.00834959 -0.03023748  0.04921594  0.01366159
   0.04002566  0.02233701  0.03478552 -0.00447224]
 [-0.00942621 -0.04406041  0.00834959 -0.03023748  0.04921594  0.01366159
   0.04002566  0.02233701  0.03478552 -0.00447224]
 [-0.00942621 -0.04406041  0.00834959 -0.03023748  0.04921594  0.01366159
   0.04002566  0.02233701  0.03478552 -0.00447224]
 [-0.00942621 -0.04406041  0.00834959 -0.03023748  0.04921594  0.01366159
   0.04002566  0.02233701  0.03478552 -0.00447224]
 [ 0.04539077 -0.02246778  0.04464264 -0.02949488 -0.03070169  0.04934646
   0.01215344 -0.01840043 -0.02366161  0.01484529]
 [-0.02318504  0.02750767  0.00586643  0.03557223  0.01574583 -0.02218266
   0.02720712  0.01330462  0.01784703  0.02839286]
 [-0.01616794 -0.03254596  0.04093169 -0.03156298  0.01099473 -0.03667103
   0.02746623  0.04758375  0.04417062 -0.03666779]
 [-0.03486198 -0.00627007 -0.04498976 -0.01965009 -0.02221961  0.0357261
   0.01787833 -0.01015256 -0.04148688  0.00051738]]
