In [1]:
import tensorflow as tf
from tensorflow.keras.preprocessing.text import one_hot

In [4]:
sentences = ['the glass of milk',
             'the glass of juice',
             'the cup of tea',
             'I am a good boy',
             'I am a good developer',
             'understand the meaning of words',
             'your videos are good',]

In [5]:
# Vocabulary size
vocab_size = 10000

In [6]:
# One-Hot Representation
onehot_rep = [one_hot(words, vocab_size) for words in sentences]
print(onehot_rep)

[[1060, 6660, 836, 4407], [1060, 6660, 836, 2014], [1060, 580, 836, 1926], [8355, 3648, 1018, 8803, 836], [8355, 3648, 1018, 8803, 1116], [5040, 1060, 5966, 836, 2210], [1833, 6472, 7851, 8803]]


### Word Embedding Representation

In [7]:
from tensorflow.keras.layers import Embedding
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
import numpy as np

In [8]:
embedded = pad_sequences(onehot_rep, padding = 'pre', maxlen = 8)
print(embedded)

[[   0    0    0    0 1060 6660  836 4407]
 [   0    0    0    0 1060 6660  836 2014]
 [   0    0    0    0 1060  580  836 1926]
 [   0    0    0 8355 3648 1018 8803  836]
 [   0    0    0 8355 3648 1018 8803 1116]
 [   0    0    0 5040 1060 5966  836 2210]
 [   0    0    0    0 1833 6472 7851 8803]]


In [9]:
dimension = 10

In [10]:
model = Sequential()
model.add(Embedding(vocab_size, dimension, input_length = 8))
model.compile('adam', 'mse')

In [11]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, 8, 10)             100000    
Total params: 100,000
Trainable params: 100,000
Non-trainable params: 0
_________________________________________________________________


In [12]:
print(model.predict(embedded))

[[[ 0.01709919  0.03714674 -0.02420747 -0.01656368  0.0258099
    0.04616873 -0.04471682  0.02431568  0.03996749  0.00923408]
  [ 0.01709919  0.03714674 -0.02420747 -0.01656368  0.0258099
    0.04616873 -0.04471682  0.02431568  0.03996749  0.00923408]
  [ 0.01709919  0.03714674 -0.02420747 -0.01656368  0.0258099
    0.04616873 -0.04471682  0.02431568  0.03996749  0.00923408]
  [ 0.01709919  0.03714674 -0.02420747 -0.01656368  0.0258099
    0.04616873 -0.04471682  0.02431568  0.03996749  0.00923408]
  [ 0.045544   -0.02070954  0.03926838 -0.0154717  -0.00357853
    0.04922569  0.00964344 -0.01280612  0.04085752  0.04647157]
  [-0.01436935  0.01457267  0.00048836 -0.01949531 -0.02812622
   -0.02529275 -0.02702036  0.04742164  0.03548289  0.00952857]
  [ 0.02149722 -0.0230199   0.01408359 -0.03707697 -0.01603352
    0.01237345  0.00807869  0.0195457  -0.04230101  0.04144115]
  [ 0.00400649 -0.01008996  0.02787039  0.02355083 -0.0498836
   -0.01607851  0.00340768  0.0260266  -0.04049311  0

In [16]:
embedded[4]

array([   0,    0,    0, 8355, 3648, 1018, 8803, 1116], dtype=int32)

In [18]:
print(model.predict(embedded)[4])

[[ 0.01709919  0.03714674 -0.02420747 -0.01656368  0.0258099   0.04616873
  -0.04471682  0.02431568  0.03996749  0.00923408]
 [ 0.01709919  0.03714674 -0.02420747 -0.01656368  0.0258099   0.04616873
  -0.04471682  0.02431568  0.03996749  0.00923408]
 [ 0.01709919  0.03714674 -0.02420747 -0.01656368  0.0258099   0.04616873
  -0.04471682  0.02431568  0.03996749  0.00923408]
 [ 0.0130134   0.02475092 -0.04653429 -0.02829204  0.02148281 -0.02242895
  -0.02035832  0.048136    0.03153661  0.03855262]
 [-0.00064713 -0.02667314 -0.0285339  -0.02555592  0.02026156  0.04670774
   0.03446886 -0.01641709  0.04906097 -0.01980313]
 [ 0.03423649 -0.01772517 -0.03650464  0.03237328  0.00777652 -0.01468886
   0.03856048 -0.00476459  0.04212097 -0.00546886]
 [-0.02153161  0.04769537 -0.04685248 -0.0244412  -0.02655314  0.00273131
   0.02342851  0.02300498  0.02567173 -0.00558908]
 [ 0.00092576  0.04441604  0.02299753 -0.04124577  0.04635311 -0.03928717
  -0.02732318 -0.00915002 -0.0440268  -0.00294819]]

Each value in 'embedded' matrix has been converted into 10 dimensional vectors 