## Word Embedding Techniques

In [2]:
from keras.preprocessing.text import one_hot

In [4]:
sent = [
    "My name is Abhinav",
    "I play Cricket",
    "I play COC",
    "I love coding",
    "Never loose your passion"
]

In [5]:
dict_size = 10000

#### One Hot Encoding

In [6]:
one_hot_rep = [one_hot(word, dict_size) for word in sent]

In [7]:
one_hot_rep

[[4014, 1075, 189, 1592],
 [6096, 4617, 5895],
 [6096, 4617, 3204],
 [6096, 9818, 4293],
 [3169, 9001, 6463, 8838]]

#### Word Embedding

In [11]:
from tensorflow.keras.preprocessing.sequence import pad_sequences
from keras.layers import Embedding
from keras.models import Sequential

In [12]:
import numpy as np

In [14]:
sent_len = 6
embed = pad_sequences(one_hot_rep, padding = 'pre', maxlen= sent_len)
embed

array([[   0,    0, 4014, 1075,  189, 1592],
       [   0,    0,    0, 6096, 4617, 5895],
       [   0,    0,    0, 6096, 4617, 3204],
       [   0,    0,    0, 6096, 9818, 4293],
       [   0,    0, 3169, 9001, 6463, 8838]], dtype=int32)

In [16]:
dim = 10
model = Sequential()
model.add(Embedding(dict_size, dim, input_length = sent_len))
model.compile('Adam', 'mse')

In [18]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 6, 10)             100000    
                                                                 
Total params: 100,000
Trainable params: 100,000
Non-trainable params: 0
_________________________________________________________________


In [19]:
model.predict(embed)



array([[[ 0.02868808, -0.0201443 , -0.03743352,  0.01819273,
         -0.04758736,  0.01621306, -0.02936265, -0.01795735,
          0.03042107,  0.04652282],
        [ 0.02868808, -0.0201443 , -0.03743352,  0.01819273,
         -0.04758736,  0.01621306, -0.02936265, -0.01795735,
          0.03042107,  0.04652282],
        [ 0.04455418, -0.03196137, -0.01688349,  0.0452983 ,
         -0.03164688, -0.01411376,  0.04228655,  0.04899528,
          0.03139291, -0.02944908],
        [-0.0491448 , -0.03743197,  0.04244404,  0.03854531,
         -0.01626402,  0.02947721,  0.03353201,  0.0175595 ,
         -0.01843844,  0.04035402],
        [ 0.02446846, -0.03302245,  0.03236702,  0.03697034,
          0.04170531,  0.04534252,  0.03958776,  0.04662228,
         -0.00085901, -0.04826151],
        [-0.00901182, -0.00805793,  0.02708325,  0.0060608 ,
         -0.03754038,  0.00447209,  0.0352683 ,  0.00424144,
          0.04625836,  0.02940277]],

       [[ 0.02868808, -0.0201443 , -0.03743352,  0

In [21]:
embed[0]

array([   0,    0, 4014, 1075,  189, 1592], dtype=int32)

In [20]:
model.predict(embed)[0]



array([[ 0.02868808, -0.0201443 , -0.03743352,  0.01819273, -0.04758736,
         0.01621306, -0.02936265, -0.01795735,  0.03042107,  0.04652282],
       [ 0.02868808, -0.0201443 , -0.03743352,  0.01819273, -0.04758736,
         0.01621306, -0.02936265, -0.01795735,  0.03042107,  0.04652282],
       [ 0.04455418, -0.03196137, -0.01688349,  0.0452983 , -0.03164688,
        -0.01411376,  0.04228655,  0.04899528,  0.03139291, -0.02944908],
       [-0.0491448 , -0.03743197,  0.04244404,  0.03854531, -0.01626402,
         0.02947721,  0.03353201,  0.0175595 , -0.01843844,  0.04035402],
       [ 0.02446846, -0.03302245,  0.03236702,  0.03697034,  0.04170531,
         0.04534252,  0.03958776,  0.04662228, -0.00085901, -0.04826151],
       [-0.00901182, -0.00805793,  0.02708325,  0.0060608 , -0.03754038,
         0.00447209,  0.0352683 ,  0.00424144,  0.04625836,  0.02940277]],
      dtype=float32)