In [2]:
from tensorflow.keras.preprocessing.text import Tokenizer

In [3]:
sent = [
    'the glass of milk',
    'the glass of juice',
    'the cup of tea',
    'I am a good boy',
    'I am a good developer',
    'understand the meaning of words',
    'your videos are good'
]

In [4]:
tokenizer = Tokenizer(num_words=10000,oov_token='<OOv>')
tokenizer.fit_on_texts(sent)


In [6]:
sequences = tokenizer.texts_to_sequences(sent)
print("word index",tokenizer.word_index)
print("sequences",sequences)

word index {'<OOv>': 1, 'the': 2, 'of': 3, 'good': 4, 'glass': 5, 'i': 6, 'am': 7, 'a': 8, 'milk': 9, 'juice': 10, 'cup': 11, 'tea': 12, 'boy': 13, 'developer': 14, 'understand': 15, 'meaning': 16, 'words': 17, 'your': 18, 'videos': 19, 'are': 20}
sequences [[2, 5, 3, 9], [2, 5, 3, 10], [2, 11, 3, 12], [6, 7, 8, 4, 13], [6, 7, 8, 4, 14], [15, 2, 16, 3, 17], [18, 19, 20, 4]]


In [8]:
#  Padding
from tensorflow.keras.preprocessing.sequence import pad_sequences
sent_length = 8
embedded_docs = pad_sequences(sequences,maxlen = sent_length,padding = 'pre')
print("after padding: \n",embedded_docs)

after padding: 
 [[ 0  0  0  0  2  5  3  9]
 [ 0  0  0  0  2  5  3 10]
 [ 0  0  0  0  2 11  3 12]
 [ 0  0  0  6  7  8  4 13]
 [ 0  0  0  6  7  8  4 14]
 [ 0  0  0 15  2 16  3 17]
 [ 0  0  0  0 18 19 20  4]]


In [10]:
# Embedding layer
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding
model = Sequential()
model.add(Embedding(input_dim = 10000,output_dim = 10,input_length = sent_length))
model.compile(optimizer = 'adam',loss = 'mse')



In [11]:
model.summary()

In [13]:
embeddings = model.predict(embedded_docs)
print("Embedding for all sentences:\n", embeddings)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 81ms/step
Embedding for all sentences:
 [[[-0.00830802 -0.03738978 -0.00489508 -0.04549494  0.03731677
    0.0269596  -0.03257505 -0.01786872 -0.04782006 -0.01729386]
  [-0.00830802 -0.03738978 -0.00489508 -0.04549494  0.03731677
    0.0269596  -0.03257505 -0.01786872 -0.04782006 -0.01729386]
  [-0.00830802 -0.03738978 -0.00489508 -0.04549494  0.03731677
    0.0269596  -0.03257505 -0.01786872 -0.04782006 -0.01729386]
  [-0.00830802 -0.03738978 -0.00489508 -0.04549494  0.03731677
    0.0269596  -0.03257505 -0.01786872 -0.04782006 -0.01729386]
  [-0.01557188  0.04567356  0.0296104   0.01037627  0.0493581
   -0.02282393 -0.04433627  0.03915936 -0.0240819   0.01494971]
  [-0.01331793  0.03991386 -0.0313729   0.00733435  0.00830947
    0.01834855 -0.03679634  0.04178255 -0.0379437  -0.02279428]
  [-0.02965988 -0.01455581  0.02145943 -0.02299287  0.00428454
    0.00978596  0.00454102 -0.02001349  0.00653629 -0.00875586]
  [-0.0216

In [None]:
print("Embedding for first sentence:\n", embeddings[0])

Embedding for first sentence:
 [[-0.00830802 -0.03738978 -0.00489508 -0.04549494  0.03731677  0.0269596
  -0.03257505 -0.01786872 -0.04782006 -0.01729386]
 [-0.00830802 -0.03738978 -0.00489508 -0.04549494  0.03731677  0.0269596
  -0.03257505 -0.01786872 -0.04782006 -0.01729386]
 [-0.00830802 -0.03738978 -0.00489508 -0.04549494  0.03731677  0.0269596
  -0.03257505 -0.01786872 -0.04782006 -0.01729386]
 [-0.00830802 -0.03738978 -0.00489508 -0.04549494  0.03731677  0.0269596
  -0.03257505 -0.01786872 -0.04782006 -0.01729386]
 [-0.01557188  0.04567356  0.0296104   0.01037627  0.0493581  -0.02282393
  -0.04433627  0.03915936 -0.0240819   0.01494971]
 [-0.01331793  0.03991386 -0.0313729   0.00733435  0.00830947  0.01834855
  -0.03679634  0.04178255 -0.0379437  -0.02279428]
 [-0.02965988 -0.01455581  0.02145943 -0.02299287  0.00428454  0.00978596
   0.00454102 -0.02001349  0.00653629 -0.00875586]
 [-0.02166904  0.04692328 -0.01898044  0.00092759 -0.01637239 -0.01361872
   0.03004623 -0.0375174

: 