## Word Embedding Techniques using Embedding Layer in Keras

### Importing Tensorflow 2.0 and Keras

In [6]:
from tensorflow.keras.preprocessing.text import one_hot

In [8]:
sentences=['the glass of milk', 'the glass of juice', 'the cup of tea', 'I am a good boy', 
           'I am a good developer', 'understand the meaning of words', 'your videos are good']

In [9]:
sentences

['the glass of milk',
 'the glass of juice',
 'the cup of tea',
 'I am a good boy',
 'I am a good developer',
 'understand the meaning of words',
 'your videos are good']

In [10]:
#Vocabulary Size
voc_size=10000

### One Hot Representation

In [11]:
onehot_rep=[one_hot(words,voc_size) for words in sentences]
print(onehot_rep)

[[993, 7202, 5567, 863], [993, 7202, 5567, 3650], [993, 2258, 5567, 540], [1705, 97, 8701, 3108, 4708], [1705, 97, 8701, 3108, 4284], [3188, 993, 9027, 5567, 6498], [6258, 9942, 8633, 3108]]


### Word Embedding Representation

In [12]:
from tensorflow.keras.layers import Embedding
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential

In [13]:
import numpy as np

In [15]:
sent_length=8
embedded_docs=pad_sequences(onehot_rep,padding='pre',maxlen=sent_length)
print(embedded_docs)

[[   0    0    0    0  993 7202 5567  863]
 [   0    0    0    0  993 7202 5567 3650]
 [   0    0    0    0  993 2258 5567  540]
 [   0    0    0 1705   97 8701 3108 4708]
 [   0    0    0 1705   97 8701 3108 4284]
 [   0    0    0 3188  993 9027 5567 6498]
 [   0    0    0    0 6258 9942 8633 3108]]


In [17]:
dim=10

In [18]:
model=Sequential()
model.add(Embedding(voc_size,10,input_length=sent_length))
model.compile('adam','mse')

In [20]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, 8, 10)             100000    
Total params: 100,000
Trainable params: 100,000
Non-trainable params: 0
_________________________________________________________________


In [22]:
print(model.predict(embedded_docs))

[[[-0.01578617  0.04461962  0.04802236 -0.00704158  0.03963793
    0.01413275  0.02468728  0.02478636  0.02080567  0.04688337]
  [-0.01578617  0.04461962  0.04802236 -0.00704158  0.03963793
    0.01413275  0.02468728  0.02478636  0.02080567  0.04688337]
  [-0.01578617  0.04461962  0.04802236 -0.00704158  0.03963793
    0.01413275  0.02468728  0.02478636  0.02080567  0.04688337]
  [-0.01578617  0.04461962  0.04802236 -0.00704158  0.03963793
    0.01413275  0.02468728  0.02478636  0.02080567  0.04688337]
  [-0.0323526   0.01805052 -0.04953244 -0.00985793  0.03497951
   -0.02000805 -0.00138368 -0.03965854  0.00917099 -0.00762725]
  [ 0.02498222  0.02167604  0.03912831  0.01065516 -0.04264345
   -0.04813756 -0.02976544 -0.00978677  0.00080154 -0.00627498]
  [-0.04967245 -0.01964748 -0.01310959  0.047027    0.03613646
    0.02049594  0.01491186 -0.01273557  0.01629576  0.01379821]
  [-0.03424722  0.00466108 -0.0020447  -0.03272136  0.02644435
   -0.01025734  0.00448327 -0.03133271 -0.024428

In [23]:
embedded_docs[0]

array([   0,    0,    0,    0,  993, 7202, 5567,  863])

In [25]:
print(model.predict(embedded_docs)[0])

[[-0.01578617  0.04461962  0.04802236 -0.00704158  0.03963793  0.01413275
   0.02468728  0.02478636  0.02080567  0.04688337]
 [-0.01578617  0.04461962  0.04802236 -0.00704158  0.03963793  0.01413275
   0.02468728  0.02478636  0.02080567  0.04688337]
 [-0.01578617  0.04461962  0.04802236 -0.00704158  0.03963793  0.01413275
   0.02468728  0.02478636  0.02080567  0.04688337]
 [-0.01578617  0.04461962  0.04802236 -0.00704158  0.03963793  0.01413275
   0.02468728  0.02478636  0.02080567  0.04688337]
 [-0.0323526   0.01805052 -0.04953244 -0.00985793  0.03497951 -0.02000805
  -0.00138368 -0.03965854  0.00917099 -0.00762725]
 [ 0.02498222  0.02167604  0.03912831  0.01065516 -0.04264345 -0.04813756
  -0.02976544 -0.00978677  0.00080154 -0.00627498]
 [-0.04967245 -0.01964748 -0.01310959  0.047027    0.03613646  0.02049594
   0.01491186 -0.01273557  0.01629576  0.01379821]
 [-0.03424722  0.00466108 -0.0020447  -0.03272136  0.02644435 -0.01025734
   0.00448327 -0.03133271 -0.02442837  0.04633893]]