## Implementing simple RNN using IMDB dataset

In [17]:
from tensorflow.keras.preprocessing.text import one_hot


In [18]:
sentences = [
    "This movie was absolutely fantastic and I loved every minute of it",
    "The plot was terrible and the acting was even worse",
    "I would highly recommend this film to anyone who enjoys great cinema",
    "What a waste of time this movie turned out to be",
    "The cinematography was stunning and the soundtrack was perfect",
    "I fell asleep halfway through this boring disaster",
    "An incredible masterpiece that will be remembered for years",
    "The worst film I have ever seen in my entire life",
    "A touching story with brilliant performances from the entire cast",
    "Completely disappointed with this overhyped mess"
]

In [19]:
voc_size = 10000

In [20]:
# one hot reqpresentation
one_hot_repr = [one_hot(words, voc_size) for words in sentences]

one_hot_repr


[[756, 2561, 7837, 6880, 8917, 3495, 3722, 8947, 9776, 4674, 6340, 1092],
 [3617, 4529, 7837, 2090, 3495, 3617, 8704, 7837, 6264, 1619],
 [3722, 6809, 9363, 1194, 756, 3705, 7991, 7924, 9101, 2022, 8909, 9095],
 [2275, 3874, 2260, 6340, 3995, 756, 2561, 6152, 4828, 7991, 7881],
 [3617, 3154, 7837, 4528, 3495, 3617, 4085, 7837, 1136],
 [3722, 7040, 7893, 4870, 4069, 756, 4483, 3110],
 [561, 3485, 789, 7588, 5153, 7881, 2918, 8989, 2334],
 [3617, 1581, 3705, 3722, 8012, 9003, 9184, 2036, 1414, 6056, 8272],
 [3874, 3916, 1962, 8026, 1478, 5693, 3603, 3617, 6056, 2495],
 [6475, 8919, 8026, 756, 6458, 1203]]

In [21]:
# word embedding representation

from tensorflow.keras.layers import Embedding
from tensorflow.keras.utils import pad_sequences
from tensorflow.keras.models import Sequential
import numpy as np

In [22]:
sent_length = 12

embedded_docs = pad_sequences(one_hot_repr, padding='pre', maxlen=sent_length)
print(embedded_docs)

[[ 756 2561 7837 6880 8917 3495 3722 8947 9776 4674 6340 1092]
 [   0    0 3617 4529 7837 2090 3495 3617 8704 7837 6264 1619]
 [3722 6809 9363 1194  756 3705 7991 7924 9101 2022 8909 9095]
 [   0 2275 3874 2260 6340 3995  756 2561 6152 4828 7991 7881]
 [   0    0    0 3617 3154 7837 4528 3495 3617 4085 7837 1136]
 [   0    0    0    0 3722 7040 7893 4870 4069  756 4483 3110]
 [   0    0    0  561 3485  789 7588 5153 7881 2918 8989 2334]
 [   0 3617 1581 3705 3722 8012 9003 9184 2036 1414 6056 8272]
 [   0    0 3874 3916 1962 8026 1478 5693 3603 3617 6056 2495]
 [   0    0    0    0    0    0 6475 8919 8026  756 6458 1203]]


In [23]:
dim = 10

In [24]:
model = Sequential()
model.add(Embedding(voc_size,dim, input_length=sent_length))
model.compile('adam', 'mse')

In [25]:
model.summary()

In [27]:
model.predict(embedded_docs)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step


array([[[ 2.6432049e-02,  2.5287237e-02,  3.5019923e-02, ...,
         -1.5116144e-02,  2.0227168e-02,  2.0289849e-02],
        [ 3.6632791e-03,  1.2026541e-03,  5.9381872e-04, ...,
          3.3703934e-02, -6.2593222e-03,  8.3283186e-03],
        [ 1.0251414e-02,  2.6616249e-02,  4.2907227e-02, ...,
         -1.8903458e-02, -5.0890669e-03, -4.4019725e-02],
        ...,
        [ 4.8533332e-02,  1.4355842e-02,  4.0052023e-02, ...,
         -3.8517930e-02, -3.3502568e-02,  2.2946600e-02],
        [ 3.4026455e-02, -3.4504723e-02, -2.6524890e-02, ...,
         -2.8271377e-02, -4.0648520e-02, -3.7857078e-02],
        [-3.2889031e-02, -8.8576898e-03, -3.4160696e-02, ...,
          2.9709313e-02,  3.3657793e-02, -1.7538369e-02]],

       [[ 4.5679066e-02, -2.4674583e-02,  4.4963431e-02, ...,
         -3.9546870e-02,  1.8252842e-03,  4.6974625e-02],
        [ 4.5679066e-02, -2.4674583e-02,  4.4963431e-02, ...,
         -3.9546870e-02,  1.8252842e-03,  4.6974625e-02],
        [-3.7252329e-02, 