In [1]:
import numpy as np
import tensorflow as tf

"""Loading the Dataset"""

from tensorflow.keras.datasets import imdb

In [2]:
"""### **Data Preprocessing**"""

words=20000
max_length=100

(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=words)


In [3]:
"""Padding the Text"""

x_train = tf.keras.preprocessing.sequence.pad_sequences(x_train, maxlen=max_length)

x_test = tf.keras.preprocessing.sequence.pad_sequences(x_test, maxlen=max_length)

word_size=words
word_size

embed_size=128

### The first argument of the embedding layer is the number of distinct words in the dataset. This argument is defined as large enough so that every word in the corpus can be encoded uniquely. In this project, we have defined the word_size to be 20000. The second argument shows the number of embedding vectors. Each word in the corpus will be shown by the size of the embedding.

In [4]:
"""### Building a Recurrent Neural Network"""

imdb_model=tf.keras.Sequential()

# Embedding Layer
imdb_model.add(tf.keras.layers.Embedding(word_size, embed_size, input_shape=(x_train.shape[1],)))

# LSTM Layer
imdb_model.add(tf.keras.layers.LSTM(units=128, activation='tanh'))

# Output Layer
imdb_model.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))

imdb_model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 100, 128)          2560000   
                                                                 
 lstm (LSTM)                 (None, 128)               131584    
                                                                 
 dense (Dense)               (None, 1)                 129       
                                                                 
Total params: 2,691,713
Trainable params: 2,691,713
Non-trainable params: 0
_________________________________________________________________


In [5]:
"""#### Compiling the model"""

imdb_model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy'])

In [6]:
"""#### Training the model"""

imdb_model.fit(x_train, y_train, epochs=5, batch_size=128)

test_loss, test_acurracy = imdb_model.evaluate(x_test, y_test)

print("Test accuracy: {}".format(test_acurracy))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Test accuracy: 0.8322799801826477


## Source: https://towardsai.net/p/deep-learning/text-classification-with-rnn