In [1]:
import tensorflow as tf
print("TensorFlow version:", tf.__version__)
print("Num GPUs Available:", len(tf.config.list_physical_devices('GPU')))

TensorFlow version: 2.18.0
Num GPUs Available: 1


In [2]:
# importing library to perform one hot encoding
from tensorflow.keras.preprocessing.text import one_hot

In [3]:
# sentences
sent=[  'the glass of milk',
     'the glass of juice',
     'the cup of tea',
    'I am a good boy',
     'I am a good developer',
     'understand the meaning of words',
     'your videos are good']

In [4]:
sent

['the glass of milk',
 'the glass of juice',
 'the cup of tea',
 'I am a good boy',
 'I am a good developer',
 'understand the meaning of words',
 'your videos are good']

In [7]:
# Initialize Vocabulary size
voc_size=500

### One Hot Representation
One hot will capture all index of words present in sentence

In [8]:
onehot_repr=[one_hot(words,voc_size)for words in sent]
print(onehot_repr)

[[184, 333, 426, 335], [184, 333, 426, 156], [184, 248, 426, 52], [249, 217, 365, 68, 348], [249, 217, 365, 68, 219], [6, 184, 22, 426, 46], [55, 176, 257, 68]]


### Word Embedding Represntation

In [9]:
# importing required libraries
from tensorflow.keras.layers import Embedding
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
import numpy as np

In [10]:
# pre padding --> it will make the size of all sentences as 8 ( to train neurat net we need all sentences of fixed size)
# How ? either add zeros at last (post) or at first (pre)
sent_length=8 # assuming max sentence length as 8
embedded_docs=pad_sequences(onehot_repr,padding='pre',maxlen=sent_length)
print(embedded_docs)

[[  0   0   0   0 184 333 426 335]
 [  0   0   0   0 184 333 426 156]
 [  0   0   0   0 184 248 426  52]
 [  0   0   0 249 217 365  68 348]
 [  0   0   0 249 217 365  68 219]
 [  0   0   0   6 184  22 426  46]
 [  0   0   0   0  55 176 257  68]]


In [11]:
# 10 feature dimesnions
dim=10

In [18]:
model=Sequential() # imitialize the sequential model
model.add(Embedding(voc_size,10,input_length=sent_length)) # embedding layer --> voc_size , dim , sent_length
model.compile('adam','mse') # adam optimizer



In [14]:
model.summary()

In [15]:
# 'the glass of milk',
embedded_docs[0]

array([  0,   0,   0,   0, 184, 333, 426, 335], dtype=int32)

In [16]:
model.predict(embedded_docs[0])

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 740ms/step


array([[-0.02951518, -0.04821581,  0.01190503,  0.03063432, -0.01647324,
        -0.00326739,  0.0382161 , -0.02596929,  0.00453269, -0.02552432],
       [-0.02951518, -0.04821581,  0.01190503,  0.03063432, -0.01647324,
        -0.00326739,  0.0382161 , -0.02596929,  0.00453269, -0.02552432],
       [-0.02951518, -0.04821581,  0.01190503,  0.03063432, -0.01647324,
        -0.00326739,  0.0382161 , -0.02596929,  0.00453269, -0.02552432],
       [-0.02951518, -0.04821581,  0.01190503,  0.03063432, -0.01647324,
        -0.00326739,  0.0382161 , -0.02596929,  0.00453269, -0.02552432],
       [ 0.02876845, -0.0331925 , -0.03746675,  0.02610873,  0.02092457,
        -0.03205695,  0.00508217,  0.00120362,  0.0436721 ,  0.03283633],
       [ 0.01118007,  0.04341436, -0.04497228,  0.03749922, -0.00399504,
        -0.01113594, -0.00393068, -0.03248816, -0.03626423,  0.0018231 ],
       [-0.00467325,  0.02081173, -0.01120172,  0.01038041, -0.01854578,
        -0.00875723, -0.0080632 ,  0.00645522

In [19]:
print(model.predict(embedded_docs))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 189ms/step
[[[ 2.53690407e-03 -2.23483890e-03 -2.47465018e-02 -2.34511849e-02
    3.76849882e-02  4.46288921e-02  6.41108677e-03 -9.88727808e-03
   -2.97616255e-02  3.77387516e-02]
  [ 2.53690407e-03 -2.23483890e-03 -2.47465018e-02 -2.34511849e-02
    3.76849882e-02  4.46288921e-02  6.41108677e-03 -9.88727808e-03
   -2.97616255e-02  3.77387516e-02]
  [ 2.53690407e-03 -2.23483890e-03 -2.47465018e-02 -2.34511849e-02
    3.76849882e-02  4.46288921e-02  6.41108677e-03 -9.88727808e-03
   -2.97616255e-02  3.77387516e-02]
  [ 2.53690407e-03 -2.23483890e-03 -2.47465018e-02 -2.34511849e-02
    3.76849882e-02  4.46288921e-02  6.41108677e-03 -9.88727808e-03
   -2.97616255e-02  3.77387516e-02]
  [-4.85042334e-02  4.94827367e-02 -1.28576756e-02  4.75092866e-02
   -2.37333775e-03  2.83046812e-03  4.30486314e-02 -1.94484722e-02
   -3.53161916e-02 -2.10878607e-02]
  [ 1.01671368e-03 -3.88364196e-02  1.09237209e-02 -2.81348359e-02
    4.5529