## Tutorial Link
## https://towardsdatascience.com/nlp-word-embedding-made-easy-30b46b261d6b

In [1]:
from tensorflow.keras.preprocessing.text import one_hot

In [2]:
### sentences
sent=[  'the glass of milk',
     'the glass of juice',
     'the cup of tea',
    'I am a good boy',
     'I am a good developer',
     'understand the meaning of words',
     'your videos are good',]

In [3]:
sent

['the glass of milk',
 'the glass of juice',
 'the cup of tea',
 'I am a good boy',
 'I am a good developer',
 'understand the meaning of words',
 'your videos are good']

In [4]:
### Vocabulary size
voc_size=10000

In [8]:
onehot_repr = [one_hot(words, voc_size)for words in sent]
print(onehot_repr)

[[9731, 5576, 9773, 7296], [9731, 5576, 9773, 9360], [9731, 5505, 9773, 1797], [1947, 323, 1272, 3009, 9255], [1947, 323, 1272, 3009, 9918], [9674, 9731, 7962, 9773, 209], [9287, 343, 4711, 3009]]


In [9]:
from tensorflow.keras.layers import Embedding
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential

In [10]:
import numpy as np

In [11]:
sent_length = 8
embedded_docs = pad_sequences(onehot_repr, padding='pre', maxlen=sent_length)
print(embedded_docs)

[[   0    0    0    0 9731 5576 9773 7296]
 [   0    0    0    0 9731 5576 9773 9360]
 [   0    0    0    0 9731 5505 9773 1797]
 [   0    0    0 1947  323 1272 3009 9255]
 [   0    0    0 1947  323 1272 3009 9918]
 [   0    0    0 9674 9731 7962 9773  209]
 [   0    0    0    0 9287  343 4711 3009]]


In [12]:
dim = 10

In [13]:
model = Sequential()
model.add(Embedding(voc_size, dim, input_length=sent_length))
model.compile('adam', 'mse')

In [15]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, 8, 10)             100000    
Total params: 100,000
Trainable params: 100,000
Non-trainable params: 0
_________________________________________________________________


In [16]:
print(model.predict(embedded_docs))

[[[ 1.42321102e-02 -4.50952426e-02 -1.60229206e-02  3.29800732e-02
    1.27527378e-02 -1.11314878e-02  2.42735781e-02  2.77336575e-02
   -2.48240959e-02  1.27353184e-02]
  [ 1.42321102e-02 -4.50952426e-02 -1.60229206e-02  3.29800732e-02
    1.27527378e-02 -1.11314878e-02  2.42735781e-02  2.77336575e-02
   -2.48240959e-02  1.27353184e-02]
  [ 1.42321102e-02 -4.50952426e-02 -1.60229206e-02  3.29800732e-02
    1.27527378e-02 -1.11314878e-02  2.42735781e-02  2.77336575e-02
   -2.48240959e-02  1.27353184e-02]
  [ 1.42321102e-02 -4.50952426e-02 -1.60229206e-02  3.29800732e-02
    1.27527378e-02 -1.11314878e-02  2.42735781e-02  2.77336575e-02
   -2.48240959e-02  1.27353184e-02]
  [ 4.50136401e-02  3.38586904e-02  1.71069764e-02 -1.92039609e-02
   -2.55879164e-02  1.86433680e-02  5.65264374e-03 -1.10994689e-02
    4.39140089e-02 -1.35707147e-02]
  [-4.70095873e-03 -2.17345599e-02 -3.74098197e-02  2.46417187e-02
    3.22736986e-02  4.76365797e-02  2.85202153e-02  3.62368710e-02
   -4.97239232e-

In [17]:
embedded_docs[0]

array([   0,    0,    0,    0, 9731, 5576, 9773, 7296])

In [18]:
print(model.predict(embedded_docs)[0])

[[ 0.01423211 -0.04509524 -0.01602292  0.03298007  0.01275274 -0.01113149
   0.02427358  0.02773366 -0.0248241   0.01273532]
 [ 0.01423211 -0.04509524 -0.01602292  0.03298007  0.01275274 -0.01113149
   0.02427358  0.02773366 -0.0248241   0.01273532]
 [ 0.01423211 -0.04509524 -0.01602292  0.03298007  0.01275274 -0.01113149
   0.02427358  0.02773366 -0.0248241   0.01273532]
 [ 0.01423211 -0.04509524 -0.01602292  0.03298007  0.01275274 -0.01113149
   0.02427358  0.02773366 -0.0248241   0.01273532]
 [ 0.04501364  0.03385869  0.01710698 -0.01920396 -0.02558792  0.01864337
   0.00565264 -0.01109947  0.04391401 -0.01357071]
 [-0.00470096 -0.02173456 -0.03740982  0.02464172  0.0322737   0.04763658
   0.02852022  0.03623687 -0.04972392 -0.0278414 ]
 [-0.01006527  0.0485202  -0.00690852 -0.02973951  0.02614218  0.04963417
   0.01371709 -0.0271311  -0.02879826  0.0092113 ]
 [ 0.00634987 -0.04832709 -0.01894755 -0.00828067  0.03398755  0.03094165
  -0.00920495 -0.03253962  0.03009878 -0.01540221]]