# Word Embedding Techniques using Embedding Layer in Keras¶

In [7]:
#tf >  2.0
from tensorflow.keras.preprocessing.text import one_hot 

from tensorflow.keras.layers import Embedding
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
import numpy as np

In [3]:
sent = ['the glass of milky bar',
      'the glass of milk',
     'the glass of juice',
     'the cup of tea',
    'I am a good boy',
     'I am a good developer',
     'understand the meaning of words',
     'your videos are good',]

In [4]:
sent

['the glass of milky bar',
 'the glass of milk',
 'the glass of juice',
 'the cup of tea',
 'I am a good boy',
 'I am a good developer',
 'understand the meaning of words',
 'your videos are good']

In [5]:
voc_size = 10000

## one hot representation

In [6]:
onehot_rep = [one_hot(words , voc_size) for words in sent]
onehot_rep

[[8096, 9840, 2422, 7424, 2023],
 [8096, 9840, 2422, 555],
 [8096, 9840, 2422, 6967],
 [8096, 9434, 2422, 2984],
 [1947, 2289, 5950, 2725, 7200],
 [1947, 2289, 5950, 2725, 1994],
 [6036, 8096, 3083, 2422, 6317],
 [6903, 6573, 8156, 2725]]

## Word Embedding Representation

In [10]:
sent_length = 8
embedded_docs = pad_sequences(onehot_rep, padding= 'pre' , maxlen = sent_length)
embedded_docs

array([[   0,    0,    0, 8096, 9840, 2422, 7424, 2023],
       [   0,    0,    0,    0, 8096, 9840, 2422,  555],
       [   0,    0,    0,    0, 8096, 9840, 2422, 6967],
       [   0,    0,    0,    0, 8096, 9434, 2422, 2984],
       [   0,    0,    0, 1947, 2289, 5950, 2725, 7200],
       [   0,    0,    0, 1947, 2289, 5950, 2725, 1994],
       [   0,    0,    0, 6036, 8096, 3083, 2422, 6317],
       [   0,    0,    0,    0, 6903, 6573, 8156, 2725]], dtype=int32)

In [11]:
dim = 15

In [13]:
model = Sequential()
model.add(Embedding(voc_size, 10, input_length= sent_length))
model.compile('adam', 'mse')

In [15]:
model.summary()


Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, 8, 10)             100000    
Total params: 100,000
Trainable params: 100,000
Non-trainable params: 0
_________________________________________________________________


In [16]:
model.predict(embedded_docs)

array([[[-1.62999630e-02, -1.10927112e-02, -2.10504606e-03,
         -2.19029784e-02,  2.18643658e-02, -1.19810104e-02,
          1.80439465e-02, -4.06552777e-02, -4.85915318e-02,
         -8.98659229e-03],
        [-1.62999630e-02, -1.10927112e-02, -2.10504606e-03,
         -2.19029784e-02,  2.18643658e-02, -1.19810104e-02,
          1.80439465e-02, -4.06552777e-02, -4.85915318e-02,
         -8.98659229e-03],
        [-1.62999630e-02, -1.10927112e-02, -2.10504606e-03,
         -2.19029784e-02,  2.18643658e-02, -1.19810104e-02,
          1.80439465e-02, -4.06552777e-02, -4.85915318e-02,
         -8.98659229e-03],
        [-1.15729459e-02, -2.88550146e-02, -1.39546283e-02,
          1.26013868e-02, -1.41350515e-02,  3.84845249e-02,
         -4.24353965e-02, -2.20077764e-02,  4.90409471e-02,
          4.29835953e-02],
        [ 4.95642461e-02, -3.91426571e-02, -3.97079475e-02,
         -3.09710391e-02, -1.84136629e-03,  2.66902708e-02,
         -1.70029290e-02,  2.60855593e-02,  3.608867

# embedding matrix

In [21]:
len(embedded_docs[0]), len(model.predict(embedded_docs)[0])

(8, 8)

In [17]:
embedded_docs[0]

array([   0,    0,    0, 8096, 9840, 2422, 7424, 2023], dtype=int32)

In [18]:
model.predict(embedded_docs)[0]

array([[-0.01629996, -0.01109271, -0.00210505, -0.02190298,  0.02186437,
        -0.01198101,  0.01804395, -0.04065528, -0.04859153, -0.00898659],
       [-0.01629996, -0.01109271, -0.00210505, -0.02190298,  0.02186437,
        -0.01198101,  0.01804395, -0.04065528, -0.04859153, -0.00898659],
       [-0.01629996, -0.01109271, -0.00210505, -0.02190298,  0.02186437,
        -0.01198101,  0.01804395, -0.04065528, -0.04859153, -0.00898659],
       [-0.01157295, -0.02885501, -0.01395463,  0.01260139, -0.01413505,
         0.03848452, -0.0424354 , -0.02200778,  0.04904095,  0.0429836 ],
       [ 0.04956425, -0.03914266, -0.03970795, -0.03097104, -0.00184137,
         0.02669027, -0.01700293,  0.02608556,  0.03608867,  0.04661092],
       [-0.00452213,  0.02706112, -0.00343639, -0.03432515, -0.01385891,
        -0.02343267,  0.02147913, -0.02317892,  0.04735391, -0.03396606],
       [ 0.00093521,  0.04736722, -0.03085929, -0.0496689 ,  0.00251191,
        -0.04921473,  0.02101027, -0.01181071

(8, 8)