# Word Embedding Using Tensorflow

In [1]:
from tensorflow.keras.preprocessing.text import one_hot

In [2]:
# Sentences
sent=['the glass of milk',
     'the glass of juice',
     'the cup of tea',
    'I am a good boy',
     'I am a good developer',
     'understand the meaning of words',
     'your videos are good']

In [3]:
sent

['the glass of milk',
 'the glass of juice',
 'the cup of tea',
 'I am a good boy',
 'I am a good developer',
 'understand the meaning of words',
 'your videos are good']

In [4]:
# Vocabulary size
voc_size=10000

In [5]:
# One Hot Representation
onehot_repr=[one_hot(words,voc_size)for words in sent] 
print(onehot_repr)

[[4689, 1869, 8952, 1527], [4689, 1869, 8952, 7679], [4689, 6710, 8952, 5036], [607, 9959, 4407, 3727, 8137], [607, 9959, 4407, 3727, 5721], [7144, 4689, 9622, 8952, 2741], [9272, 4839, 9134, 3727]]


In [6]:
# Word Embedding Represntation

from tensorflow.keras.layers import Embedding
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential

In [7]:
sent_length=8
embedded_docs=pad_sequences(onehot_repr,padding='pre',maxlen=sent_length)

In [8]:
print(embedded_docs)

[[   0    0    0    0 4689 1869 8952 1527]
 [   0    0    0    0 4689 1869 8952 7679]
 [   0    0    0    0 4689 6710 8952 5036]
 [   0    0    0  607 9959 4407 3727 8137]
 [   0    0    0  607 9959 4407 3727 5721]
 [   0    0    0 7144 4689 9622 8952 2741]
 [   0    0    0    0 9272 4839 9134 3727]]


In [9]:
# Dimensions (Features)
dim=10

In [10]:
model=Sequential()
model.add(Embedding(voc_size,dim,input_length=sent_length))
model.compile('adam','mse')

In [11]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, 8, 10)             100000    
Total params: 100,000
Trainable params: 100,000
Non-trainable params: 0
_________________________________________________________________


In [12]:
print(model.predict(embedded_docs))

[[[ 0.02758859  0.03379801  0.00183558  0.04615463  0.02937852
   -0.00973382 -0.03139307  0.04604765  0.01290881 -0.03113697]
  [ 0.02758859  0.03379801  0.00183558  0.04615463  0.02937852
   -0.00973382 -0.03139307  0.04604765  0.01290881 -0.03113697]
  [ 0.02758859  0.03379801  0.00183558  0.04615463  0.02937852
   -0.00973382 -0.03139307  0.04604765  0.01290881 -0.03113697]
  [ 0.02758859  0.03379801  0.00183558  0.04615463  0.02937852
   -0.00973382 -0.03139307  0.04604765  0.01290881 -0.03113697]
  [-0.03633793  0.01762137  0.03656869  0.04199174 -0.00560738
   -0.0197666  -0.04147099  0.01102536  0.00205735  0.01588171]
  [-0.03855052 -0.01517526  0.04531181 -0.02992832 -0.02628078
   -0.04479082 -0.04257697 -0.04141228  0.04665705 -0.03322168]
  [ 0.01327333  0.01241081 -0.04323846  0.02567888  0.04165795
   -0.01158419 -0.02764077  0.00239409  0.02421508  0.00929437]
  [ 0.04497439  0.04634639  0.00037529  0.00869386  0.01985374
   -0.01138335 -0.00994324  0.02518977  0.037773

In [13]:
# One Hot representation of the first sent along with padding
embedded_docs[0]

array([   0,    0,    0,    0, 4689, 1869, 8952, 1527])

In [14]:
# Embedded matrix representation for the first sentence
print(model.predict(embedded_docs)[0])

[[ 0.02758859  0.03379801  0.00183558  0.04615463  0.02937852 -0.00973382
  -0.03139307  0.04604765  0.01290881 -0.03113697]
 [ 0.02758859  0.03379801  0.00183558  0.04615463  0.02937852 -0.00973382
  -0.03139307  0.04604765  0.01290881 -0.03113697]
 [ 0.02758859  0.03379801  0.00183558  0.04615463  0.02937852 -0.00973382
  -0.03139307  0.04604765  0.01290881 -0.03113697]
 [ 0.02758859  0.03379801  0.00183558  0.04615463  0.02937852 -0.00973382
  -0.03139307  0.04604765  0.01290881 -0.03113697]
 [-0.03633793  0.01762137  0.03656869  0.04199174 -0.00560738 -0.0197666
  -0.04147099  0.01102536  0.00205735  0.01588171]
 [-0.03855052 -0.01517526  0.04531181 -0.02992832 -0.02628078 -0.04479082
  -0.04257697 -0.04141228  0.04665705 -0.03322168]
 [ 0.01327333  0.01241081 -0.04323846  0.02567888  0.04165795 -0.01158419
  -0.02764077  0.00239409  0.02421508  0.00929437]
 [ 0.04497439  0.04634639  0.00037529  0.00869386  0.01985374 -0.01138335
  -0.00994324  0.02518977  0.03777392  0.01919368]]
