In [1]:
sentences = ['This YouTube channel is perfect',
            'Your videos are quite good',
            'I enjoy this channel',
            'I did not learn much from your videos',
            'Excellent',
            'poor work',
            'it is not a good channel']

In [2]:
from tensorflow.keras.preprocessing.text import one_hot

In [3]:
vocab_size = 500

In [4]:
encodded_docs = [one_hot(sent, vocab_size) for sent in sentences]
encodded_docs

[[363, 367, 387, 393, 140],
 [49, 372, 184, 317, 122],
 [312, 10, 363, 387],
 [312, 130, 307, 61, 27, 13, 49, 372],
 [304],
 [395, 391],
 [266, 393, 307, 357, 122, 387]]

In [5]:
max_len = 8 #because: 'I did not learn much from your videos'

In [6]:
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [7]:
padded_docs = pad_sequences(encodded_docs, padding = 'pre', maxlen= max_len)
padded_docs

array([[  0,   0,   0, 363, 367, 387, 393, 140],
       [  0,   0,   0,  49, 372, 184, 317, 122],
       [  0,   0,   0,   0, 312,  10, 363, 387],
       [312, 130, 307,  61,  27,  13,  49, 372],
       [  0,   0,   0,   0,   0,   0,   0, 304],
       [  0,   0,   0,   0,   0,   0, 395, 391],
       [  0,   0, 266, 393, 307, 357, 122, 387]], dtype=int32)

#####**Padding** is a special form of masking where the masked steps are at the start or the end of a sequence. Padding comes from the need to encode sequence data into contiguous batches: in order to make all sequences in a batch fit a given standard length, it is necessary to pad or truncate some sequences

In [8]:
from tensorflow.keras.layers import Embedding
from tensorflow.keras.models import Sequential

In [9]:
dim = 10 #for features

In [10]:
model = Sequential()
model.add(Embedding(vocab_size, dim, input_length= max_len))

In [11]:
model.compile(optimizer = 'adam', loss = 'mse')

In [12]:
model.summary()   #8 is max_len

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 8, 10)             5000      
                                                                 
Total params: 5,000
Trainable params: 5,000
Non-trainable params: 0
_________________________________________________________________


In [13]:
model.predict(padded_docs)



array([[[ 1.4937151e-02,  4.5698289e-02,  3.5751116e-02, -4.5654621e-02,
         -2.0502770e-02,  3.4934130e-02,  1.4272843e-02, -5.1577203e-03,
          1.5482951e-02, -3.8478054e-02],
        [ 1.4937151e-02,  4.5698289e-02,  3.5751116e-02, -4.5654621e-02,
         -2.0502770e-02,  3.4934130e-02,  1.4272843e-02, -5.1577203e-03,
          1.5482951e-02, -3.8478054e-02],
        [ 1.4937151e-02,  4.5698289e-02,  3.5751116e-02, -4.5654621e-02,
         -2.0502770e-02,  3.4934130e-02,  1.4272843e-02, -5.1577203e-03,
          1.5482951e-02, -3.8478054e-02],
        [ 1.0377873e-02,  4.8545215e-02, -2.2197081e-02, -3.5139907e-02,
          4.3953110e-02, -6.9891922e-03,  4.7295477e-02,  3.6688160e-02,
          2.0407330e-02, -1.8194843e-02],
        [-2.0008162e-04, -2.0929610e-02, -1.9152761e-03,  9.9848956e-05,
          3.8611460e-02,  3.4551695e-04, -4.9361300e-02,  4.6174217e-02,
          2.3159731e-02,  1.3379741e-02],
        [-3.3457063e-02, -3.3339486e-03,  5.9772134e-03,  1.

In [14]:
padded_docs[1]

array([  0,   0,   0,  49, 372, 184, 317, 122], dtype=int32)

In [15]:
model.predict(padded_docs[1])



array([[ 0.01493715,  0.04569829,  0.03575112, -0.04565462, -0.02050277,
         0.03493413,  0.01427284, -0.00515772,  0.01548295, -0.03847805],
       [ 0.01493715,  0.04569829,  0.03575112, -0.04565462, -0.02050277,
         0.03493413,  0.01427284, -0.00515772,  0.01548295, -0.03847805],
       [ 0.01493715,  0.04569829,  0.03575112, -0.04565462, -0.02050277,
         0.03493413,  0.01427284, -0.00515772,  0.01548295, -0.03847805],
       [ 0.01164789,  0.04886588,  0.00606759, -0.02454543, -0.01544728,
        -0.01726481,  0.03407775,  0.04624688, -0.00264418,  0.03813205],
       [ 0.01541556,  0.0215686 ,  0.01644886, -0.03761393, -0.03297211,
         0.04129836,  0.02086029,  0.04406682, -0.0239777 , -0.00281906],
       [-0.00248141, -0.03269788,  0.04949896,  0.03749346, -0.04019185,
         0.01912839,  0.03749127,  0.03847038,  0.00161525, -0.01523112],
       [-0.0163789 ,  0.02832511, -0.02826318, -0.03895397, -0.03141497,
         0.03316658, -0.01815916, -0.04364952