In [37]:
import numpy as np
import pandas as pd
import tensorflow as tf

from tensorflow.keras.datasets import imdb

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Dense, Flatten, LSTM, Dropout, GRU, Bidirectional

# Neural Networks for Language Processing


### Live demos

In [2]:
VOCABULARY_SIZE = 10000
OOV = -1
SEQUENCE_LENGTH = 800

In [3]:
(X_train, y_train),(X_test, y_tesg) = imdb.load_data(num_words = VOCABULARY_SIZE, oov_char = OOV)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz


In [4]:
X_train.shape

(25000,)

In [5]:
# Tokens
X_train[0][:10]

[1, 14, 22, 16, 43, 530, 973, 1622, 1385, 65]

In [6]:
# Add padding
tf.keras.utils.pad_sequences(X_train[0:5], maxlen = SEQUENCE_LENGTH, padding= 'post')

array([[   1,   14,   22, ...,    0,    0,    0],
       [   1,  194, 1153, ...,    0,    0,    0],
       [   1,   14,   47, ...,    0,    0,    0],
       [   1,    4,   -1, ...,    0,    0,    0],
       [   1,  249, 1323, ...,    0,    0,    0]], dtype=int32)

In [7]:
imdb.get_word_index()['the'], imdb.get_word_index()['in'], imdb.get_word_index()['table']

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb_word_index.json


(1, 8, 2699)

In [8]:
indices = imdb.get_word_index()

In [9]:
words = {v : k for k, v in indices.items()}

In [10]:
words[14]

'as'

In [11]:
[words[x] for x in X_train[0][:10]]

['the',
 'as',
 'you',
 'with',
 'out',
 'themselves',
 'powerful',
 'lets',
 'loves',
 'their']

In [32]:
tf.keras.backend.clear_session()

In [35]:
model = Sequential([
    Input((SEQUENCE_LENGTH, 32)),
    LSTM(5, return_sequences = True),
    LSTM(10, return_sequences = True),
    LSTM(5),
    Flatten(),

    Dense(VOCABULARY_SIZE, activation = 'sigmoid')
])

In [36]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_2 (LSTM)               (None, 800, 5)            760       
                                                                 
 lstm_3 (LSTM)               (None, 800, 10)           640       
                                                                 
 lstm_4 (LSTM)               (None, 5)                 320       
                                                                 
 flatten_1 (Flatten)         (None, 5)                 0         
                                                                 
 dense_1 (Dense)             (None, 10000)             60000     
                                                                 
Total params: 61720 (241.09 KB)
Trainable params: 61720 (241.09 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [26]:
result = model.predict(tf.random.uniform((8, SEQUENCE_LENGTH, 32)))



In [27]:
result[:, -1, :]

array([[-0.14611295,  0.05316133, -0.12258671, -0.251388  ,  0.18120152,
        -0.01242162,  0.12644134,  0.08739218,  0.01126338, -0.11866633],
       [-0.20747215,  0.07647247, -0.17439966, -0.27277955,  0.13246548,
         0.00483407,  0.10315726,  0.09722485, -0.04257786, -0.09272692],
       [-0.13763303,  0.04947582, -0.12502146, -0.22137845,  0.17251582,
        -0.0043946 ,  0.11614415,  0.09656259, -0.02113449, -0.10541159],
       [-0.1866539 ,  0.05802964, -0.1590902 , -0.26024818,  0.15599081,
         0.00179216,  0.12791266,  0.09878547, -0.04061526, -0.1081463 ],
       [-0.20871742,  0.10185578, -0.17021753, -0.297816  ,  0.15523706,
         0.01090729,  0.12743008,  0.11002985, -0.02715253, -0.12974916],
       [-0.17752571,  0.08694539, -0.15014464, -0.2786307 ,  0.16349144,
        -0.00081053,  0.12258483,  0.08657756, -0.01352062, -0.1125991 ],
       [-0.19700857,  0.05817133, -0.18600684, -0.29967293,  0.12924203,
        -0.01454302,  0.09667588,  0.11077305