In [1]:
#!pip install tensorflow

In [2]:
import tensorflow as tf
print(tf.__version__)

2.18.0


In [3]:
from tensorflow.keras.preprocessing.text import one_hot

In [4]:
sentences = ["The cat slept under the warm sun.",
             "I forgot my keys at home again",
             "She danced gracefully across the wooden floor.",
             "Rain fell softly in the town.",
             "He always drinks coffee before the meeting.",
             "They live near the river."]

In [5]:
sentences

['The cat slept under the warm sun.',
 'I forgot my keys at home again',
 'She danced gracefully across the wooden floor.',
 'Rain fell softly in the town.',
 'He always drinks coffee before the meeting.',
 'They live near the river.']

In [6]:
voc_size = 800

In [7]:
onehot_rep = [one_hot(sentence, voc_size) for sentence in sentences]
print(onehot_rep)

[[487, 226, 768, 552, 487, 667, 597], [565, 406, 380, 799, 9, 345, 425], [717, 192, 707, 97, 487, 35, 506], [760, 136, 108, 379, 487, 156], [793, 358, 131, 595, 64, 487, 450], [238, 333, 186, 487, 353]]


In [8]:
from tensorflow.keras.layers import Embedding
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential

In [9]:
import numpy as np

In [10]:
# pre-padding
sent_length = 10
embedded_docs = pad_sequences(onehot_rep, padding='pre', maxlen = sent_length)
print(embedded_docs)

[[  0   0   0 487 226 768 552 487 667 597]
 [  0   0   0 565 406 380 799   9 345 425]
 [  0   0   0 717 192 707  97 487  35 506]
 [  0   0   0   0 760 136 108 379 487 156]
 [  0   0   0 793 358 131 595  64 487 450]
 [  0   0   0   0   0 238 333 186 487 353]]


In [11]:
dim = 20

In [12]:
model = Sequential()
model.add(Embedding(voc_size, dim))
model.compile(optimizer='adam', loss='mse')
model.build(input_shape=(None, sent_length))

In [13]:
model.summary()

In [14]:
embedded_docs[0]

array([  0,   0,   0, 487, 226, 768, 552, 487, 667, 597], dtype=int32)

In [15]:
prediction = model.predict(np.expand_dims(embedded_docs[0], axis=0))
print(prediction)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 455ms/step
[[[-1.40851736e-02 -8.57321173e-03  1.01863854e-02 -5.99764287e-04
    4.94033806e-02  3.29439528e-02  2.48055570e-02  3.58377807e-02
    3.17856409e-02  2.97233723e-02  3.12947296e-02  3.89843024e-02
    4.57506441e-02 -3.08294781e-02  1.92602016e-02 -1.84207782e-02
   -1.53660886e-02  2.43029110e-02 -9.03522968e-03  2.92396061e-02]
  [-1.40851736e-02 -8.57321173e-03  1.01863854e-02 -5.99764287e-04
    4.94033806e-02  3.29439528e-02  2.48055570e-02  3.58377807e-02
    3.17856409e-02  2.97233723e-02  3.12947296e-02  3.89843024e-02
    4.57506441e-02 -3.08294781e-02  1.92602016e-02 -1.84207782e-02
   -1.53660886e-02  2.43029110e-02 -9.03522968e-03  2.92396061e-02]
  [-1.40851736e-02 -8.57321173e-03  1.01863854e-02 -5.99764287e-04
    4.94033806e-02  3.29439528e-02  2.48055570e-02  3.58377807e-02
    3.17856409e-02  2.97233723e-02  3.12947296e-02  3.89843024e-02
    4.57506441e-02 -3.08294781e-02  1.92602016e-02 -1.

In [16]:
prediction = model.predict(embedded_docs)
print(prediction)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 423ms/step
[[[-1.40851736e-02 -8.57321173e-03  1.01863854e-02 ...  2.43029110e-02
   -9.03522968e-03  2.92396061e-02]
  [-1.40851736e-02 -8.57321173e-03  1.01863854e-02 ...  2.43029110e-02
   -9.03522968e-03  2.92396061e-02]
  [-1.40851736e-02 -8.57321173e-03  1.01863854e-02 ...  2.43029110e-02
   -9.03522968e-03  2.92396061e-02]
  ...
  [-2.98568849e-02 -7.34003633e-03  4.11977880e-02 ...  1.16208084e-02
   -4.36469913e-02  4.12144177e-02]
  [ 6.50960207e-03 -4.11647446e-02  1.88718326e-02 ... -8.03098083e-05
    2.51701586e-02  2.11212747e-02]
  [ 1.05492845e-02 -5.73696941e-03  2.95851715e-02 ... -2.93368101e-02
    4.56637032e-02 -4.31499593e-02]]

 [[-1.40851736e-02 -8.57321173e-03  1.01863854e-02 ...  2.43029110e-02
   -9.03522968e-03  2.92396061e-02]
  [-1.40851736e-02 -8.57321173e-03  1.01863854e-02 ...  2.43029110e-02
   -9.03522968e-03  2.92396061e-02]
  [-1.40851736e-02 -8.57321173e-03  1.01863854e-02 ...  2.43029

In [17]:
sent=["The world is a better place",
      "Marvel series is my favourite movie",
      "I like DC movies",
      "the cat is eating the food",
      "Tom and Jerry is my favourite movie",
      "Python is my favourite programming language"
      ]

In [18]:
sent

['The world is a better place',
 'Marvel series is my favourite movie',
 'I like DC movies',
 'the cat is eating the food',
 'Tom and Jerry is my favourite movie',
 'Python is my favourite programming language']

In [19]:
voc_size2 = 500

In [20]:
onehot_rep = [one_hot(sentence, voc_size2) for sentence in sent]
print(onehot_rep)

[[244, 366, 111, 144, 298, 303], [470, 49, 111, 142, 301, 124], [139, 6, 353, 339], [244, 368, 111, 317, 244, 43], [434, 143, 382, 111, 142, 301, 124], [303, 111, 142, 301, 6, 435]]


In [21]:
#pre-padding
sent_length2 = 8
embedded_docs2 = pad_sequences(onehot_rep, padding='pre', maxlen = sent_length2)
print(embedded_docs2)

[[  0   0 244 366 111 144 298 303]
 [  0   0 470  49 111 142 301 124]
 [  0   0   0   0 139   6 353 339]
 [  0   0 244 368 111 317 244  43]
 [  0 434 143 382 111 142 301 124]
 [  0   0 303 111 142 301   6 435]]


In [22]:
dim = 15

In [23]:
model2 = Sequential()
model2.add(Embedding(voc_size2, dim))
model2.build(input_shape=(None, sent_length2))

In [24]:
model2.summary()

In [25]:
#'The world is a better place'
embedded_docs2[0]

array([  0,   0, 244, 366, 111, 144, 298, 303], dtype=int32)

In [29]:
print(model2.predict(np.expand_dims(embedded_docs2[0], axis=0)))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 111ms/step
[[[ 0.02391589  0.01001958  0.01729863  0.04229536 -0.01741004
   -0.04127076  0.04046633  0.0221996   0.00871606 -0.00891266
    0.00156021  0.02642641 -0.03368062  0.04161533 -0.01710169]
  [ 0.02391589  0.01001958  0.01729863  0.04229536 -0.01741004
   -0.04127076  0.04046633  0.0221996   0.00871606 -0.00891266
    0.00156021  0.02642641 -0.03368062  0.04161533 -0.01710169]
  [ 0.02011608 -0.0307309  -0.03351319 -0.0205757  -0.02718682
    0.02531549  0.00735302  0.02799315 -0.02684431 -0.02796882
   -0.03155934  0.03334211 -0.01070524  0.0026815  -0.0388129 ]
  [-0.03627858  0.02070883  0.02845222  0.02393677 -0.02504662
    0.00654352 -0.03110939 -0.03442134  0.0348632   0.03069473
    0.03569337 -0.01404748 -0.03546386 -0.02164885  0.01698441]
  [ 0.01156516 -0.02661455 -0.04607034  0.04842607 -0.0208254
    0.04806367 -0.04408463  0.00746564  0.02166747 -0.00553671
   -0.0288537  -0.00480817  0.02296517 -0.

In [30]:
print(model2.predict(embedded_docs2))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 108ms/step
[[[ 0.02391589  0.01001958  0.01729863  0.04229536 -0.01741004
   -0.04127076  0.04046633  0.0221996   0.00871606 -0.00891266
    0.00156021  0.02642641 -0.03368062  0.04161533 -0.01710169]
  [ 0.02391589  0.01001958  0.01729863  0.04229536 -0.01741004
   -0.04127076  0.04046633  0.0221996   0.00871606 -0.00891266
    0.00156021  0.02642641 -0.03368062  0.04161533 -0.01710169]
  [ 0.02011608 -0.0307309  -0.03351319 -0.0205757  -0.02718682
    0.02531549  0.00735302  0.02799315 -0.02684431 -0.02796882
   -0.03155934  0.03334211 -0.01070524  0.0026815  -0.0388129 ]
  [-0.03627858  0.02070883  0.02845222  0.02393677 -0.02504662
    0.00654352 -0.03110939 -0.03442134  0.0348632   0.03069473
    0.03569337 -0.01404748 -0.03546386 -0.02164885  0.01698441]
  [ 0.01156516 -0.02661455 -0.04607034  0.04842607 -0.0208254
    0.04806367 -0.04408463  0.00746564  0.02166747 -0.00553671
   -0.0288537  -0.00480817  0.02296517 -0.