# Text processing with one hot

In [50]:
from tensorflow.keras.preprocessing.text import one_hot

In [51]:
#sentences
sentences = [
    'I love my dog',
    'I love my cat',
    'You love my dog!',
    'Do you think my dog is amazing?',
    'I love my dog so much',
    'You love my dog so much',
    'My dog not eat milk',
]

In [52]:
sentences

['I love my dog',
 'I love my cat',
 'You love my dog!',
 'Do you think my dog is amazing?',
 'I love my dog so much',
 'You love my dog so much',
 'My dog not eat milk']

In [53]:
# define the vocabulary size
vocabulary_size = 10000

In [56]:
# one hot representation
one_hot_rep = [one_hot(words, vocabulary_size)for words in sentences]
one_hot_rep

[[6092, 6262, 3838, 318],
 [6092, 6262, 3838, 7715],
 [5741, 6262, 3838, 318],
 [2661, 5741, 9125, 3838, 318, 1132, 8342],
 [6092, 6262, 3838, 318, 2217, 6452],
 [5741, 6262, 3838, 318, 2217, 6452],
 [3838, 318, 7145, 7665, 6554]]

# Text processing with embedding

In [57]:
import numpy as np
from tensorflow.keras.layers import Embedding
from tensorflow.keras.utils import pad_sequences
from tensorflow.keras.models import Sequential

In [58]:
sentence_lengths = [len(sentence.split()) for sentence in sentences]
sentence_lengths

[4, 4, 4, 7, 6, 6, 5]

In [59]:
sentences_lenth = 10
embedded_docs = pad_sequences(one_hot_rep, padding='pre', maxlen=sentences_lenth)
embedded_docs

array([[   0,    0,    0,    0,    0,    0, 6092, 6262, 3838,  318],
       [   0,    0,    0,    0,    0,    0, 6092, 6262, 3838, 7715],
       [   0,    0,    0,    0,    0,    0, 5741, 6262, 3838,  318],
       [   0,    0,    0, 2661, 5741, 9125, 3838,  318, 1132, 8342],
       [   0,    0,    0,    0, 6092, 6262, 3838,  318, 2217, 6452],
       [   0,    0,    0,    0, 5741, 6262, 3838,  318, 2217, 6452],
       [   0,    0,    0,    0,    0, 3838,  318, 7145, 7665, 6554]],
      dtype=int32)

In [60]:
# feature representation
dim = 50

In [61]:
# build the model
model = Sequential()
model.add(Embedding(vocabulary_size, dim, input_length=sentences_lenth))
model.compile(optimizer='adam', loss='mse')

In [62]:
model.summary()

In [63]:
model.predict(embedded_docs)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 134ms/step


array([[[-0.00208008,  0.02943983, -0.00325581, ...,  0.00570505,
         -0.03204455,  0.02218181],
        [-0.00208008,  0.02943983, -0.00325581, ...,  0.00570505,
         -0.03204455,  0.02218181],
        [-0.00208008,  0.02943983, -0.00325581, ...,  0.00570505,
         -0.03204455,  0.02218181],
        ...,
        [-0.00596442,  0.01160081,  0.0152604 , ..., -0.03469158,
          0.02481532,  0.04158188],
        [-0.03763022, -0.00050361, -0.03827984, ...,  0.01759607,
          0.01533912,  0.03012153],
        [-0.04316144,  0.04733696, -0.03220451, ..., -0.04939551,
          0.02674231, -0.02059283]],

       [[-0.00208008,  0.02943983, -0.00325581, ...,  0.00570505,
         -0.03204455,  0.02218181],
        [-0.00208008,  0.02943983, -0.00325581, ...,  0.00570505,
         -0.03204455,  0.02218181],
        [-0.00208008,  0.02943983, -0.00325581, ...,  0.00570505,
         -0.03204455,  0.02218181],
        ...,
        [-0.00596442,  0.01160081,  0.0152604 , ..., -

In [64]:
final_em = np.array([embedded_docs[0]])

In [65]:
model.predict(final_em)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 72ms/step


array([[[-0.00208008,  0.02943983, -0.00325581, -0.02407793,
          0.01277002, -0.02747508,  0.00384021, -0.03540571,
          0.02102438,  0.00848391, -0.04540065, -0.02933977,
          0.02076329,  0.038271  , -0.04615474, -0.03510301,
          0.0309649 , -0.04042078,  0.04862747, -0.00362438,
          0.01788623, -0.04204601,  0.0203975 , -0.0494202 ,
          0.03306413,  0.04364783,  0.00407093,  0.04195272,
         -0.04087664,  0.00698473,  0.03501235,  0.00508351,
          0.00679648,  0.04408343,  0.02109433, -0.02607795,
          0.01619231,  0.02986728,  0.00792662,  0.02568928,
          0.04734674,  0.01360991, -0.01639351,  0.0113615 ,
         -0.04817072, -0.0103906 ,  0.01085452,  0.00570505,
         -0.03204455,  0.02218181],
        [-0.00208008,  0.02943983, -0.00325581, -0.02407793,
          0.01277002, -0.02747508,  0.00384021, -0.03540571,
          0.02102438,  0.00848391, -0.04540065, -0.02933977,
          0.02076329,  0.038271  , -0.04615474, -

In [66]:
model.summary()