## Word Embedding Techniques using Embedding Layer in Keras

### Import Important Libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
import warnings
warnings.filterwarnings('ignore')
import re
import nltk

In [2]:
import keras
import tensorflow

In [3]:
from keras.preprocessing.text import one_hot

In [4]:
### sentences
sent=['the glass of milk',
     'the glass of juice',
     'the cup of tea',
    'I am a good boy',
     'I am a good developer',
     'understand the meaning of words',
     'your videos are good']

In [5]:
print(sent)

['the glass of milk', 'the glass of juice', 'the cup of tea', 'I am a good boy', 'I am a good developer', 'understand the meaning of words', 'your videos are good']


In [6]:
sent

['the glass of milk',
 'the glass of juice',
 'the cup of tea',
 'I am a good boy',
 'I am a good developer',
 'understand the meaning of words',
 'your videos are good']

In [7]:
# Vocabulary size
voc_size=10000

### One Hot Representation

In [8]:
onehot_rep=[one_hot(words,voc_size) for words in sent]

In [9]:
print(onehot_rep)

[[3044, 7608, 2625, 8908], [3044, 7608, 2625, 7110], [3044, 9934, 2625, 271], [2974, 1957, 2881, 3135, 278], [2974, 1957, 2881, 3135, 6650], [3106, 3044, 7462, 2625, 9946], [584, 6799, 3042, 3135]]


### Word Embedding Represntation

In [10]:
from keras.layers import Embedding
from keras.models import Sequential
from keras.preprocessing.sequence import pad_sequences

In [11]:
sent_length=8

In [12]:
embedded_docs=pad_sequences(onehot_rep,
    maxlen=sent_length,
    padding='pre')

In [13]:
print(embedded_docs)

[[   0    0    0    0 3044 7608 2625 8908]
 [   0    0    0    0 3044 7608 2625 7110]
 [   0    0    0    0 3044 9934 2625  271]
 [   0    0    0 2974 1957 2881 3135  278]
 [   0    0    0 2974 1957 2881 3135 6650]
 [   0    0    0 3106 3044 7462 2625 9946]
 [   0    0    0    0  584 6799 3042 3135]]


In [14]:
dim=10

In [15]:
model=Sequential()

In [16]:
model.add(Embedding(voc_size,10,input_length=sent_length))

In [17]:
model.compile(optimizer='Adam',loss='mse',metrics=['accuracy'])

In [18]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, 8, 10)             100000    
Total params: 100,000
Trainable params: 100,000
Non-trainable params: 0
_________________________________________________________________


In [19]:
print(model.predict(embedded_docs))

[[[ 3.60885002e-02 -2.85141822e-02 -3.74285206e-02  3.96836884e-02
   -2.94219255e-02 -3.11623458e-02 -1.75984018e-02 -4.38874476e-02
    1.64636411e-02 -3.83419506e-02]
  [ 3.60885002e-02 -2.85141822e-02 -3.74285206e-02  3.96836884e-02
   -2.94219255e-02 -3.11623458e-02 -1.75984018e-02 -4.38874476e-02
    1.64636411e-02 -3.83419506e-02]
  [ 3.60885002e-02 -2.85141822e-02 -3.74285206e-02  3.96836884e-02
   -2.94219255e-02 -3.11623458e-02 -1.75984018e-02 -4.38874476e-02
    1.64636411e-02 -3.83419506e-02]
  [ 3.60885002e-02 -2.85141822e-02 -3.74285206e-02  3.96836884e-02
   -2.94219255e-02 -3.11623458e-02 -1.75984018e-02 -4.38874476e-02
    1.64636411e-02 -3.83419506e-02]
  [-2.40303520e-02  4.31276448e-02 -2.59369854e-02  3.11488248e-02
    9.96699184e-03 -1.26585737e-02 -1.16947778e-02 -2.38564741e-02
   -4.89860065e-02 -4.23277393e-02]
  [-2.59581953e-03  4.45671715e-02  4.68119495e-02  3.90434898e-02
   -4.45747264e-02 -3.71501669e-02 -3.50582823e-02 -2.25970745e-02
    6.87447935e-

In [20]:
embedded_docs[0]

array([   0,    0,    0,    0, 3044, 7608, 2625, 8908])

In [21]:
model.predict(embedded_docs[0])



array([[[ 0.0360885 , -0.02851418, -0.03742852,  0.03968369,
         -0.02942193, -0.03116235, -0.0175984 , -0.04388745,
          0.01646364, -0.03834195]],

       [[ 0.0360885 , -0.02851418, -0.03742852,  0.03968369,
         -0.02942193, -0.03116235, -0.0175984 , -0.04388745,
          0.01646364, -0.03834195]],

       [[ 0.0360885 , -0.02851418, -0.03742852,  0.03968369,
         -0.02942193, -0.03116235, -0.0175984 , -0.04388745,
          0.01646364, -0.03834195]],

       [[ 0.0360885 , -0.02851418, -0.03742852,  0.03968369,
         -0.02942193, -0.03116235, -0.0175984 , -0.04388745,
          0.01646364, -0.03834195]],

       [[-0.02403035,  0.04312764, -0.02593699,  0.03114882,
          0.00996699, -0.01265857, -0.01169478, -0.02385647,
         -0.04898601, -0.04232774]],

       [[-0.00259582,  0.04456717,  0.04681195,  0.03904349,
         -0.04457473, -0.03715017, -0.03505828, -0.02259707,
          0.00687448, -0.0449139 ]],

       [[-0.03123105, -0.04324057, -0.03