**Basic Implementation of Embedding Encoding**

In [1]:
import numpy as np

docs = ['go india',
        'india india',
        'hip hip hurray',
        'jeetega bhai jeetega india jeetega',
        'bharat mata ki jai',
        'kholi kholi',
        'sachin sachin',
        'dhoni dhoni',
        'modi ji ki jai',
        'inquilab zindabad']

In [2]:
from tensorflow.keras.preprocessing.text import Tokenizer
tokenizer = Tokenizer()

In [3]:
tokenizer.fit_on_texts(docs)

In [4]:
len(tokenizer.word_index)

17

In [5]:
sequences = tokenizer.texts_to_sequences(docs)
sequences

[[9, 1],
 [1, 1],
 [3, 3, 10],
 [2, 11, 2, 1, 2],
 [12, 13, 4, 5],
 [6, 6],
 [7, 7],
 [8, 8],
 [14, 15, 4, 5],
 [16, 17]]

In [6]:
from keras.utils import pad_sequences
sequences = pad_sequences(sequences,padding='post')
sequences

array([[ 9,  1,  0,  0,  0],
       [ 1,  1,  0,  0,  0],
       [ 3,  3, 10,  0,  0],
       [ 2, 11,  2,  1,  2],
       [12, 13,  4,  5,  0],
       [ 6,  6,  0,  0,  0],
       [ 7,  7,  0,  0,  0],
       [ 8,  8,  0,  0,  0],
       [14, 15,  4,  5,  0],
       [16, 17,  0,  0,  0]], dtype=int32)

In [14]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding

model = Sequential()
model.add(Embedding(len(tokenizer.word_index) +1 , output_dim=2))
model.predict(sequences)
model.summary()

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 106ms/step


In [17]:
model.compile('adam','accuracy')

In [18]:
pred = model.predict(sequences)
print(pred)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59ms/step
[[[ 0.01885298  0.01275929]
  [-0.02476645 -0.04151171]
  [-0.00524745 -0.00295002]
  [-0.00524745 -0.00295002]
  [-0.00524745 -0.00295002]]

 [[-0.02476645 -0.04151171]
  [-0.02476645 -0.04151171]
  [-0.00524745 -0.00295002]
  [-0.00524745 -0.00295002]
  [-0.00524745 -0.00295002]]

 [[-0.03130114 -0.00646871]
  [-0.03130114 -0.00646871]
  [ 0.02056481 -0.04544734]
  [-0.00524745 -0.00295002]
  [-0.00524745 -0.00295002]]

 [[ 0.02789021  0.02188687]
  [-0.03527584  0.01727612]
  [ 0.02789021  0.02188687]
  [-0.02476645 -0.04151171]
  [ 0.02789021  0.02188687]]

 [[ 0.01022566 -0.01330455]
  [ 0.04690676  0.01125572]
  [-0.04679919 -0.00109095]
  [-0.01555126  0.03955   ]
  [-0.00524745 -0.00295002]]

 [[ 0.04366377 -0.03381246]
  [ 0.04366377 -0.03381246]
  [-0.00524745 -0.00295002]
  [-0.00524745 -0.00295002]
  [-0.00524745 -0.00295002]]

 [[-0.01752266 -0.00782673]
  [-0.01752266 -0.00782673]
  [-0.00524745 -0.00

In [21]:
from keras.datasets import imdb
from keras.utils import pad_sequences
from keras import Sequential
from keras.layers import Dense,SimpleRNN,Embedding,Flatten

In [34]:
(X_train,y_train),(X_test,y_test) = imdb.load_data(num_words=10000)

In [24]:
X_train = pad_sequences(X_train,padding='post',maxlen=50)
X_test = pad_sequences(X_test,padding='post',maxlen=50)

In [25]:
X_train.shape

(25000, 50)

In [43]:
# Re-load the IMDB dataset with num_words
from keras.datasets import imdb
(X_train,y_train),(X_test,y_test) = imdb.load_data(num_words=10000)

# Re-pad the sequences
from keras.utils import pad_sequences
X_train = pad_sequences(X_train,padding='post',maxlen=50)
X_test = pad_sequences(X_test,padding='post',maxlen=50)

# Re-define the model
from keras.models import Sequential
from keras.layers import Dense,SimpleRNN,Embedding

model = Sequential()
model.add(Embedding(10000 + 1, 2))
model.add(SimpleRNN(32,return_sequences=False))
model.add(Dense(1,activation='sigmoid'))

# Build the model by making a prediction and display the summary
model.predict(X_train)
model.summary()



[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step


In [44]:
model.compile('adam',loss='binary_crossentropy',metrics=['acc'])
history = model.fit(X_train,y_train,epochs=5,validation_data=(X_test,y_test))

Epoch 1/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 15ms/step - acc: 0.5429 - loss: 0.6777 - val_acc: 0.7870 - val_loss: 0.4522
Epoch 2/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 14ms/step - acc: 0.8212 - loss: 0.4061 - val_acc: 0.8104 - val_loss: 0.4196
Epoch 3/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 14ms/step - acc: 0.8673 - loss: 0.3223 - val_acc: 0.7952 - val_loss: 0.4664
Epoch 4/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 15ms/step - acc: 0.8941 - loss: 0.2670 - val_acc: 0.8016 - val_loss: 0.4697
Epoch 5/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 19ms/step - acc: 0.9191 - loss: 0.2227 - val_acc: 0.7692 - val_loss: 0.5140
