## Word Embedding Techniques using Embedding Layer in Keras

In [1]:
from tensorflow.keras.preprocessing.text import one_hot

In [2]:
### Sentences 
sent=['the glass of milk','the glass of juice', 'the cup of tea', 'I am a good Boy','I am a good Developer','understand the meaning of words']

In [3]:
sent

['the glass of milk',
 'the glass of juice',
 'the cup of tea',
 'I am a good Boy',
 'I am a good Developer',
 'understand the meaning of words']

In [4]:
voc_size=10000 ## We have to define the Dictionary size

## One Hot Representation

In [6]:
onehot_rep=[one_hot(words,voc_size) for words in sent]
print(onehot_rep)

[[4905, 4808, 1090, 2606], [4905, 4808, 1090, 2415], [4905, 8031, 1090, 3435], [9323, 8096, 555, 4005, 1819], [9323, 8096, 555, 4005, 1445], [4218, 4905, 785, 1090, 2632]]


## Word Embedding Representation

In [7]:
from tensorflow.keras.layers import Embedding
from tensorflow.keras.preprocessing.sequence import pad_sequences ## to have the same size of the sentences which is very imp
from tensorflow.keras.models import Sequential

In [8]:
import numpy as np

In [9]:
sent_length=8 ## maximum sentence length which is the no of words
embedded_docs=pad_sequences(onehot_rep,padding='pre',maxlen=sent_length)## pre padding will add 0 to have same no of words in sentences
print(embedded_docs)## Zero's below show how zeros added to have same length

[[   0    0    0    0 4905 4808 1090 2606]
 [   0    0    0    0 4905 4808 1090 2415]
 [   0    0    0    0 4905 8031 1090 3435]
 [   0    0    0 9323 8096  555 4005 1819]
 [   0    0    0 9323 8096  555 4005 1445]
 [   0    0    0 4218 4905  785 1090 2632]]


In [13]:
dim=10

In [14]:
model=Sequential()
model.add(Embedding(voc_size,10,input_length=sent_length))
model.compile('adam','mse')

In [15]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, 8, 10)             100000    
Total params: 100,000
Trainable params: 100,000
Non-trainable params: 0
_________________________________________________________________


In [16]:
print(model.predict(embedded_docs))

[[[-0.01430857  0.04949788 -0.04456719 -0.03717247 -0.00343758
   -0.04162244  0.03690245  0.03293054  0.02904023 -0.02412407]
  [-0.01430857  0.04949788 -0.04456719 -0.03717247 -0.00343758
   -0.04162244  0.03690245  0.03293054  0.02904023 -0.02412407]
  [-0.01430857  0.04949788 -0.04456719 -0.03717247 -0.00343758
   -0.04162244  0.03690245  0.03293054  0.02904023 -0.02412407]
  [-0.01430857  0.04949788 -0.04456719 -0.03717247 -0.00343758
   -0.04162244  0.03690245  0.03293054  0.02904023 -0.02412407]
  [ 0.01519437 -0.03926327 -0.03205682  0.03141098 -0.03552306
    0.01130614  0.04225428  0.03330571 -0.02302227 -0.03904236]
  [-0.00551723  0.01562634  0.04043944 -0.03986258 -0.00583009
    0.02873779  0.01823794  0.03918209  0.0062502   0.026445  ]
  [-0.04398396  0.03451565  0.01310423 -0.03416965  0.03361872
    0.03043454  0.04363732 -0.01048141 -0.00874365 -0.02051386]
  [-0.01392709  0.02684015 -0.03861182  0.00311375 -0.0368335
   -0.00773407  0.01962788 -0.0493413  -0.0414451

In [17]:
embedded_docs[0] ## First statement

array([   0,    0,    0,    0, 4905, 4808, 1090, 2606])

In [19]:
print(model.predict(embedded_docs)[0]) ## Vector representation of the First Statement

[[-0.01430857  0.04949788 -0.04456719 -0.03717247 -0.00343758 -0.04162244
   0.03690245  0.03293054  0.02904023 -0.02412407]
 [-0.01430857  0.04949788 -0.04456719 -0.03717247 -0.00343758 -0.04162244
   0.03690245  0.03293054  0.02904023 -0.02412407]
 [-0.01430857  0.04949788 -0.04456719 -0.03717247 -0.00343758 -0.04162244
   0.03690245  0.03293054  0.02904023 -0.02412407]
 [-0.01430857  0.04949788 -0.04456719 -0.03717247 -0.00343758 -0.04162244
   0.03690245  0.03293054  0.02904023 -0.02412407]
 [ 0.01519437 -0.03926327 -0.03205682  0.03141098 -0.03552306  0.01130614
   0.04225428  0.03330571 -0.02302227 -0.03904236]
 [-0.00551723  0.01562634  0.04043944 -0.03986258 -0.00583009  0.02873779
   0.01823794  0.03918209  0.0062502   0.026445  ]
 [-0.04398396  0.03451565  0.01310423 -0.03416965  0.03361872  0.03043454
   0.04363732 -0.01048141 -0.00874365 -0.02051386]
 [-0.01392709  0.02684015 -0.03861182  0.00311375 -0.0368335  -0.00773407
   0.01962788 -0.0493413  -0.0414451   0.03222022]]