### Importing modules

In [20]:
from tensorflow.keras.preprocessing.text import one_hot
from tensorflow.keras.layers import Embedding
from tensorflow.keras.utils import pad_sequences
from tensorflow.keras.models import Sequential
import numpy as np

### Demo sentences

In [2]:
sent=[  'the glass of milk',
     'the glass of juice',
     'the cup of tea',
    'I am a good boy',
     'I am a good developer',
     'understand the meaning of words',
     'your videos are good',]

### Vocabulary size

In [3]:
voc=10000

### OHE

In [5]:
one_hot_repr=[]
for word in sent:
    one_hot_repr.append(one_hot(word,voc))
one_hot_repr

[[5502, 4948, 8963, 5744],
 [5502, 4948, 8963, 2711],
 [5502, 826, 8963, 4426],
 [1982, 2243, 6522, 4161, 4925],
 [1982, 2243, 6522, 4161, 7296],
 [4553, 5502, 1470, 8963, 153],
 [2647, 9090, 6401, 4161]]

### Sequence padding

In [10]:
sent_length=8
padded_doc=pad_sequences(one_hot_repr,padding="post",maxlen=sent_length)
padded_doc

array([[5502, 4948, 8963, 5744,    0,    0,    0,    0],
       [5502, 4948, 8963, 2711,    0,    0,    0,    0],
       [5502,  826, 8963, 4426,    0,    0,    0,    0],
       [1982, 2243, 6522, 4161, 4925,    0,    0,    0],
       [1982, 2243, 6522, 4161, 7296,    0,    0,    0],
       [4553, 5502, 1470, 8963,  153,    0,    0,    0],
       [2647, 9090, 6401, 4161,    0,    0,    0,    0]], dtype=int32)

### Embedding layer

In [12]:
dim=10

In [13]:
model=Sequential()
model.add(Embedding(voc,dim,input_length=sent_length))

In [14]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 8, 10)             100000    
                                                                 
Total params: 100000 (390.62 KB)
Trainable params: 100000 (390.62 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [15]:
model.compile(optimizer="adam",loss="binary_crossentropy")

In [16]:
model.predict(padded_doc)

2024-08-20 13:53:49.463765: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.




array([[[-4.7506511e-02,  2.6345421e-02, -1.9881837e-03, -3.3878494e-02,
          4.3805186e-02, -3.1193383e-03,  8.3130822e-03,  3.3704642e-02,
          3.0024648e-03,  2.3652945e-02],
        [-4.6246421e-02, -4.2559016e-02, -1.0761164e-02, -3.4118608e-02,
          2.2608805e-02,  4.6626691e-02, -2.1323919e-02,  2.0639706e-02,
          5.2236021e-05,  2.0693373e-02],
        [ 2.2160199e-02,  2.0448033e-02, -3.4740996e-02, -4.3388139e-02,
          4.0083017e-02, -2.8900838e-02, -4.3852817e-02,  4.4123020e-02,
          3.9559256e-02, -8.1152916e-03],
        [-1.8852545e-02, -4.4096556e-02, -4.6085991e-02,  3.6819566e-02,
         -4.1535247e-02, -1.5919041e-02,  5.9199445e-03,  4.4077840e-02,
         -2.0632302e-02, -2.7477145e-03],
        [ 3.8922492e-02,  4.7350336e-02,  3.5214033e-02, -2.1944214e-02,
          2.7646150e-02, -3.3704221e-02,  7.1261302e-03,  2.2747207e-02,
          2.4272490e-02, -7.9869032e-03],
        [ 3.8922492e-02,  4.7350336e-02,  3.5214033e-02, -2.

In [18]:
padded_doc[0]

array([5502, 4948, 8963, 5744,    0,    0,    0,    0], dtype=int32)

In [19]:
model.predict(padded_doc[0])



array([[-4.7506511e-02,  2.6345421e-02, -1.9881837e-03, -3.3878494e-02,
         4.3805186e-02, -3.1193383e-03,  8.3130822e-03,  3.3704642e-02,
         3.0024648e-03,  2.3652945e-02],
       [-4.6246421e-02, -4.2559016e-02, -1.0761164e-02, -3.4118608e-02,
         2.2608805e-02,  4.6626691e-02, -2.1323919e-02,  2.0639706e-02,
         5.2236021e-05,  2.0693373e-02],
       [ 2.2160199e-02,  2.0448033e-02, -3.4740996e-02, -4.3388139e-02,
         4.0083017e-02, -2.8900838e-02, -4.3852817e-02,  4.4123020e-02,
         3.9559256e-02, -8.1152916e-03],
       [-1.8852545e-02, -4.4096556e-02, -4.6085991e-02,  3.6819566e-02,
        -4.1535247e-02, -1.5919041e-02,  5.9199445e-03,  4.4077840e-02,
        -2.0632302e-02, -2.7477145e-03],
       [ 3.8922492e-02,  4.7350336e-02,  3.5214033e-02, -2.1944214e-02,
         2.7646150e-02, -3.3704221e-02,  7.1261302e-03,  2.2747207e-02,
         2.4272490e-02, -7.9869032e-03],
       [ 3.8922492e-02,  4.7350336e-02,  3.5214033e-02, -2.1944214e-02,
   