## Word Embedding Techniques using Embedding Layer in Keras

In [1]:
# Libraries used  in tensorflow 2.x

In [2]:
from tensorflow.keras.preprocessing.text import one_hot

In [3]:
# Sentences
sent = ['hi this is Shubham jha',
       'I am 23 year old guy',
       'I am from Bihar madhubani',
       'I live in kota rajathan']

In [4]:
sent

['hi this is Shubham jha',
 'I am 23 year old guy',
 'I am from Bihar madhubani',
 'I live in kota rajathan']

In [5]:
## Set voc_size 
voc_size = 10000

### One Hot representation

In [6]:
onehot_repr = [one_hot(words,voc_size)for words in sent]
print(onehot_repr)

[[103, 6268, 8146, 2536, 1213], [4510, 5202, 9700, 4246, 430, 8362], [4510, 5202, 9883, 6421, 6050], [4510, 1642, 9733, 2505, 6093]]


### Word Embedding Representation

In [7]:
from tensorflow.keras.layers import Embedding
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential

In [8]:
import numpy as np

In [9]:
# reshape in same length to all characters
sent_length = 8
embedded_values = pad_sequences(onehot_repr,padding='pre',maxlen=sent_length)
print(embedded_values)

[[   0    0    0  103 6268 8146 2536 1213]
 [   0    0 4510 5202 9700 4246  430 8362]
 [   0    0    0 4510 5202 9883 6421 6050]
 [   0    0    0 4510 1642 9733 2505 6093]]


In [10]:
dim = 10

In [11]:
model = Sequential()
model.add(Embedding(voc_size,dim,input_length=sent_length))

In [12]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, 8, 10)             100000    
Total params: 100,000
Trainable params: 100,000
Non-trainable params: 0
_________________________________________________________________


In [13]:
model.compile('adam','mse')

In [14]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, 8, 10)             100000    
Total params: 100,000
Trainable params: 100,000
Non-trainable params: 0
_________________________________________________________________


In [15]:
model.predict(embedded_values)

array([[[-0.03491072, -0.03902986, -0.01752711,  0.04510926,
          0.02649695, -0.04145315, -0.00436689, -0.04637349,
          0.03674043, -0.02549165],
        [-0.03491072, -0.03902986, -0.01752711,  0.04510926,
          0.02649695, -0.04145315, -0.00436689, -0.04637349,
          0.03674043, -0.02549165],
        [-0.03491072, -0.03902986, -0.01752711,  0.04510926,
          0.02649695, -0.04145315, -0.00436689, -0.04637349,
          0.03674043, -0.02549165],
        [ 0.03554502,  0.00405333,  0.04036378, -0.04457144,
         -0.01359695, -0.00619979, -0.02678922,  0.00802074,
         -0.0024282 ,  0.04488332],
        [-0.00296807, -0.01828475, -0.01821804, -0.01454581,
          0.00625868, -0.02196404,  0.01091263,  0.04485265,
          0.04419329,  0.0426516 ],
        [ 0.02838298,  0.03548601, -0.01416143,  0.01287091,
         -0.01987321, -0.02983977, -0.0052244 ,  0.00914057,
          0.00621306, -0.01302276],
        [ 0.0279713 , -0.00348071, -0.02627986, -0.0

In [16]:
embedded_values[0]

array([   0,    0,    0,  103, 6268, 8146, 2536, 1213])

### Reference Link:
https://machinelearningmastery.com/use-word-embedding-layers-deep-learning-keras/