# **Word Embedding Using Keras **

In [1]:
from tensorflow.keras.preprocessing.text import one_hot

In [2]:
sentences=['There is a dog',
           'Here is my cute puppy',
           'Some childrens are playing in the park',
           'I am a student',
           'I used to play football everyday']

In [3]:
sentences

['There is a dog',
 'Here is my cute puppy',
 'Some childrens are playing in the park',
 'I am a student',
 'I used to play football everyday']

In [4]:
#set the vocab size
voc_size=15000

# **One_hot representation**

In [5]:
one_hot_repr=[one_hot(word,voc_size)for word in sentences]
print(one_hot_repr)

[[2493, 10968, 5401, 13622], [6503, 10968, 3545, 13540, 4824], [12497, 13767, 12632, 11425, 12172, 4746, 11691], [3224, 6095, 5401, 8751], [3224, 5058, 12665, 3978, 13971, 6976]]


# **Word Embedding Representation**

In [7]:
from tensorflow.keras.layers import Embedding
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential

In [9]:
set_length=9
word_embedding=pad_sequences(one_hot_repr,maxlen=set_length,padding='pre')
print(word_embedding)

[[    0     0     0     0     0  2493 10968  5401 13622]
 [    0     0     0     0  6503 10968  3545 13540  4824]
 [    0     0 12497 13767 12632 11425 12172  4746 11691]
 [    0     0     0     0     0  3224  6095  5401  8751]
 [    0     0     0  3224  5058 12665  3978 13971  6976]]


In [20]:
dim=10
model=Sequential()
model.add(Embedding(voc_size,10,input_length=set_length))
model.compile('adam','mse')

In [21]:
model.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_3 (Embedding)      (None, 9, 10)             150000    
Total params: 150,000
Trainable params: 150,000
Non-trainable params: 0
_________________________________________________________________


In [22]:
print(model.predict(word_embedding))

[[[-4.1010570e-02  2.1181133e-02 -3.0836439e-02 -3.9284505e-02
    5.4427981e-03  1.8496837e-02 -3.7180297e-03 -3.9139427e-02
    2.8651182e-02  1.9710336e-02]
  [-4.1010570e-02  2.1181133e-02 -3.0836439e-02 -3.9284505e-02
    5.4427981e-03  1.8496837e-02 -3.7180297e-03 -3.9139427e-02
    2.8651182e-02  1.9710336e-02]
  [-4.1010570e-02  2.1181133e-02 -3.0836439e-02 -3.9284505e-02
    5.4427981e-03  1.8496837e-02 -3.7180297e-03 -3.9139427e-02
    2.8651182e-02  1.9710336e-02]
  [-4.1010570e-02  2.1181133e-02 -3.0836439e-02 -3.9284505e-02
    5.4427981e-03  1.8496837e-02 -3.7180297e-03 -3.9139427e-02
    2.8651182e-02  1.9710336e-02]
  [-4.1010570e-02  2.1181133e-02 -3.0836439e-02 -3.9284505e-02
    5.4427981e-03  1.8496837e-02 -3.7180297e-03 -3.9139427e-02
    2.8651182e-02  1.9710336e-02]
  [-4.0877175e-02  2.8463427e-02  4.9743284e-02 -1.4010537e-02
    2.0367727e-03  3.5348583e-02  1.9631956e-02  4.5044534e-03
    2.0932350e-02 -2.8740391e-03]
  [-4.4651330e-02  3.5501245e-02  1.4056

In [24]:
word_embedding[0]

array([    0,     0,     0,     0,     0,  2493, 10968,  5401, 13622],
      dtype=int32)

In [23]:
print(model.predict(word_embedding)[0])

[[-0.04101057  0.02118113 -0.03083644 -0.0392845   0.0054428   0.01849684
  -0.00371803 -0.03913943  0.02865118  0.01971034]
 [-0.04101057  0.02118113 -0.03083644 -0.0392845   0.0054428   0.01849684
  -0.00371803 -0.03913943  0.02865118  0.01971034]
 [-0.04101057  0.02118113 -0.03083644 -0.0392845   0.0054428   0.01849684
  -0.00371803 -0.03913943  0.02865118  0.01971034]
 [-0.04101057  0.02118113 -0.03083644 -0.0392845   0.0054428   0.01849684
  -0.00371803 -0.03913943  0.02865118  0.01971034]
 [-0.04101057  0.02118113 -0.03083644 -0.0392845   0.0054428   0.01849684
  -0.00371803 -0.03913943  0.02865118  0.01971034]
 [-0.04087717  0.02846343  0.04974328 -0.01401054  0.00203677  0.03534858
   0.01963196  0.00450445  0.02093235 -0.00287404]
 [-0.04465133  0.03550125  0.01405649  0.01600998  0.01273224  0.03968881
   0.01060154 -0.02153678  0.01948085  0.03296271]
 [-0.01179219 -0.01215688  0.00893767 -0.01783282  0.04800056 -0.02347744
  -0.03136964  0.04533205  0.0489498   0.04313442]
