### Word2Vec Embedding

In [1]:
from tensorflow.keras.preprocessing.text import one_hot

In [2]:
### sentences
sent=[  'the glass of milk',
     'the glass of juice',
     'the cup of tea',
    'I am a good boy',
     'I am a good developer',
     'understand the meaning of words',
     'your videos are good',]

In [3]:
sent

['the glass of milk',
 'the glass of juice',
 'the cup of tea',
 'I am a good boy',
 'I am a good developer',
 'understand the meaning of words',
 'your videos are good']

In [4]:
## Define the Vocabulary size
vocab_size=10000

In [5]:
### One Hot Representation
one_hot_representations = [one_hot(words, vocab_size) for words in sent]
one_hot_representations

[[6388, 6014, 2620, 246],
 [6388, 6014, 2620, 9211],
 [6388, 2477, 2620, 2740],
 [1151, 6577, 3038, 9777, 7970],
 [1151, 6577, 3038, 9777, 1205],
 [3694, 6388, 7989, 2620, 7298],
 [4948, 5811, 4456, 9777]]

In [6]:
## Word Embedding Representation
from tensorflow.keras.layers import Embedding
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential

In [7]:
import numpy as np

In [10]:
sent_len=8
embedded_docs = pad_sequences(one_hot_representations, padding='pre', maxlen=sent_len)
embedded_docs

array([[   0,    0,    0,    0, 6388, 6014, 2620,  246],
       [   0,    0,    0,    0, 6388, 6014, 2620, 9211],
       [   0,    0,    0,    0, 6388, 2477, 2620, 2740],
       [   0,    0,    0, 1151, 6577, 3038, 9777, 7970],
       [   0,    0,    0, 1151, 6577, 3038, 9777, 1205],
       [   0,    0,    0, 3694, 6388, 7989, 2620, 7298],
       [   0,    0,    0,    0, 4948, 5811, 4456, 9777]])

In [11]:
## Feature Representation
dim = 10

In [12]:
model = Sequential()
model.add(Embedding(vocab_size, dim, input_length=sent_len))
model.compile('adam', 'mse')





In [13]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 8, 10)             100000    
                                                                 
Total params: 100000 (390.62 KB)
Trainable params: 100000 (390.62 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [14]:
model.predict(embedded_docs)



array([[[ 0.00284271,  0.01342512,  0.03625572,  0.03124808,
         -0.00405245,  0.03966117, -0.03558288, -0.04208861,
         -0.04252188,  0.02581248],
        [ 0.00284271,  0.01342512,  0.03625572,  0.03124808,
         -0.00405245,  0.03966117, -0.03558288, -0.04208861,
         -0.04252188,  0.02581248],
        [ 0.00284271,  0.01342512,  0.03625572,  0.03124808,
         -0.00405245,  0.03966117, -0.03558288, -0.04208861,
         -0.04252188,  0.02581248],
        [ 0.00284271,  0.01342512,  0.03625572,  0.03124808,
         -0.00405245,  0.03966117, -0.03558288, -0.04208861,
         -0.04252188,  0.02581248],
        [ 0.00534222, -0.04960351, -0.03861881, -0.00427437,
          0.03081706, -0.00310978,  0.02975724,  0.04340969,
          0.0046097 , -0.03107804],
        [ 0.01618524, -0.01965902, -0.00494396, -0.04008073,
          0.03500918,  0.02717813,  0.01163474,  0.01224322,
         -0.0361814 ,  0.02706844],
        [ 0.04775765,  0.03937414,  0.03765552, -0.0

In [15]:
embedded_docs[0]

array([   0,    0,    0,    0, 6388, 6014, 2620,  246])

In [16]:
model.predict(embedded_docs[0])



array([[ 0.00284271,  0.01342512,  0.03625572,  0.03124808, -0.00405245,
         0.03966117, -0.03558288, -0.04208861, -0.04252188,  0.02581248],
       [ 0.00284271,  0.01342512,  0.03625572,  0.03124808, -0.00405245,
         0.03966117, -0.03558288, -0.04208861, -0.04252188,  0.02581248],
       [ 0.00284271,  0.01342512,  0.03625572,  0.03124808, -0.00405245,
         0.03966117, -0.03558288, -0.04208861, -0.04252188,  0.02581248],
       [ 0.00284271,  0.01342512,  0.03625572,  0.03124808, -0.00405245,
         0.03966117, -0.03558288, -0.04208861, -0.04252188,  0.02581248],
       [ 0.00534222, -0.04960351, -0.03861881, -0.00427437,  0.03081706,
        -0.00310978,  0.02975724,  0.04340969,  0.0046097 , -0.03107804],
       [ 0.01618524, -0.01965902, -0.00494396, -0.04008073,  0.03500918,
         0.02717813,  0.01163474,  0.01224322, -0.0361814 ,  0.02706844],
       [ 0.04775765,  0.03937414,  0.03765552, -0.04611336,  0.01678853,
        -0.01114877, -0.01060078,  0.00735381