# Importing Libraries:

In [12]:
import tensorflow as tf
from tensorflow.keras.preprocessing.text import one_hot

In [13]:
# sentences
sentences = [ 'the glass of milk',
              'the glass of juice',
              'the cup of tea',
              'I am a good boy',
              'I am a good developer',
              'understand the meaning of words',
              'your videos are good',
            ]

# dictionary_size:
dict_size=10000

# Creating One Hot Representation of sentences:

In [14]:
oneHotRepres = [one_hot(words,dict_size) for words in sentences]
oneHotRepres # fetches the index of each word in each sentence from dictionary

[[2004, 6952, 6611, 5140],
 [2004, 6952, 6611, 6705],
 [2004, 8961, 6611, 6704],
 [3625, 547, 5950, 9870, 836],
 [3625, 547, 5950, 9870, 1160],
 [6921, 2004, 9828, 6611, 3698],
 [2533, 1516, 9257, 9870]]

# Importing necessary libraries for word Embedding:

In [15]:
from keras.models import Sequential
from keras.layers import Embedding
from keras.preprocessing.sequence import pad_sequences

## Using OneHotRepres create Embedding matrix of vectors: for that;

- First satisfy the req of same length of sentence(equal amount of    words in each sentence), for this use pad_sequences

In [16]:
# fix the sentence length:
sentence_len = 8

padded_sentences = pad_sequences(
    sequences=oneHotRepres,
    padding='pre',
    maxlen=sentence_len,
)
padded_sentences

array([[   0,    0,    0,    0, 2004, 6952, 6611, 5140],
       [   0,    0,    0,    0, 2004, 6952, 6611, 6705],
       [   0,    0,    0,    0, 2004, 8961, 6611, 6704],
       [   0,    0,    0, 3625,  547, 5950, 9870,  836],
       [   0,    0,    0, 3625,  547, 5950, 9870, 1160],
       [   0,    0,    0, 6921, 2004, 9828, 6611, 3698],
       [   0,    0,    0,    0, 2533, 1516, 9257, 9870]], dtype=int32)

- Create model with embedding layer: 

In [17]:
# dimension_size is the size of each vector, where each vector represents a word in sentence 
dimension_size=10

model = Sequential()
model.add(Embedding(
    input_dim = dict_size,
    output_dim = dimension_size,
    input_length = sentence_len,
)) # this will create matrix of vectors

model.compile(
    optimizer='adam',
    loss='mse'
)

In [18]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, 8, 10)             100000    
Total params: 100,000
Trainable params: 100,000
Non-trainable params: 0
_________________________________________________________________


# View the model created matrix of vectors:

In [19]:
print(model.predict(padded_sentences))


[[[-0.00566865 -0.02244756 -0.03879438  0.01663173 -0.00990709
    0.0107444   0.01006147 -0.01069459 -0.04335741 -0.017226  ]
  [-0.00566865 -0.02244756 -0.03879438  0.01663173 -0.00990709
    0.0107444   0.01006147 -0.01069459 -0.04335741 -0.017226  ]
  [-0.00566865 -0.02244756 -0.03879438  0.01663173 -0.00990709
    0.0107444   0.01006147 -0.01069459 -0.04335741 -0.017226  ]
  [-0.00566865 -0.02244756 -0.03879438  0.01663173 -0.00990709
    0.0107444   0.01006147 -0.01069459 -0.04335741 -0.017226  ]
  [-0.03593638  0.02018959  0.01617993 -0.02772974  0.04679427
   -0.04746431 -0.01610799  0.04304544  0.02472633  0.0055216 ]
  [-0.00520004 -0.01349672 -0.02784869  0.04319289 -0.00509202
   -0.04871743  0.03194995 -0.03213909  0.00278695  0.00949256]
  [ 0.0166446  -0.04654507 -0.02456887  0.00564244  0.02344747
    0.02563907  0.04912988  0.00915558  0.04932548  0.02292186]
  [ 0.02634725 -0.00056863  0.02340334 -0.00568091 -0.02338189
   -0.04797307  0.01423924 -0.02016311 -0.010325

# View model created matrix of vector for a simgle word: 

In [20]:
print(padded_sentences[0]) # word @ 0th index

[   0    0    0    0 2004 6952 6611 5140]


In [21]:
print(model.predict(padded_sentences)[0]) # word @ 0th index

[[-0.00566865 -0.02244756 -0.03879438  0.01663173 -0.00990709  0.0107444
   0.01006147 -0.01069459 -0.04335741 -0.017226  ]
 [-0.00566865 -0.02244756 -0.03879438  0.01663173 -0.00990709  0.0107444
   0.01006147 -0.01069459 -0.04335741 -0.017226  ]
 [-0.00566865 -0.02244756 -0.03879438  0.01663173 -0.00990709  0.0107444
   0.01006147 -0.01069459 -0.04335741 -0.017226  ]
 [-0.00566865 -0.02244756 -0.03879438  0.01663173 -0.00990709  0.0107444
   0.01006147 -0.01069459 -0.04335741 -0.017226  ]
 [-0.03593638  0.02018959  0.01617993 -0.02772974  0.04679427 -0.04746431
  -0.01610799  0.04304544  0.02472633  0.0055216 ]
 [-0.00520004 -0.01349672 -0.02784869  0.04319289 -0.00509202 -0.04871743
   0.03194995 -0.03213909  0.00278695  0.00949256]
 [ 0.0166446  -0.04654507 -0.02456887  0.00564244  0.02344747  0.02563907
   0.04912988  0.00915558  0.04932548  0.02292186]
 [ 0.02634725 -0.00056863  0.02340334 -0.00568091 -0.02338189 -0.04797307
   0.01423924 -0.02016311 -0.01032519  0.03599269]]
