#Simple RNN Movie Review Sentiment Analysis

In [2]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.datasets import imdb
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Dense, SimpleRNN
from tensorflow.keras.callbacks import EarlyStopping

In [3]:
#Loading the imdb dataset
vocab_size = 10000
(X_train,y_train),(X_test,y_test) = imdb.load_data(num_words=vocab_size)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [4]:
print(X_train.shape)
print(X_test.size)

(25000,)
25000


In [5]:
#First sentence, it is one hot encoded
sample_sentence = X_train[0]
sample_label = y_train[0]
print("First Sentence", sample_sentence)
print("First Label", sample_label)

First Sentence [1, 14, 22, 16, 43, 530, 973, 1622, 1385, 65, 458, 4468, 66, 3941, 4, 173, 36, 256, 5, 25, 100, 43, 838, 112, 50, 670, 2, 9, 35, 480, 284, 5, 150, 4, 172, 112, 167, 2, 336, 385, 39, 4, 172, 4536, 1111, 17, 546, 38, 13, 447, 4, 192, 50, 16, 6, 147, 2025, 19, 14, 22, 4, 1920, 4613, 469, 4, 22, 71, 87, 12, 16, 43, 530, 38, 76, 15, 13, 1247, 4, 22, 17, 515, 17, 12, 16, 626, 18, 2, 5, 62, 386, 12, 8, 316, 8, 106, 5, 4, 2223, 5244, 16, 480, 66, 3785, 33, 4, 130, 12, 16, 38, 619, 5, 25, 124, 51, 36, 135, 48, 25, 1415, 33, 6, 22, 12, 215, 28, 77, 52, 5, 14, 407, 16, 82, 2, 8, 4, 107, 117, 5952, 15, 256, 4, 2, 7, 3766, 5, 723, 36, 71, 43, 530, 476, 26, 400, 317, 46, 7, 4, 2, 1029, 13, 104, 88, 4, 381, 15, 297, 98, 32, 2071, 56, 26, 141, 6, 194, 7486, 18, 4, 226, 22, 21, 134, 476, 26, 480, 5, 144, 30, 5535, 18, 51, 36, 28, 224, 92, 25, 104, 4, 226, 65, 16, 38, 1334, 88, 12, 16, 283, 5, 16, 4472, 113, 103, 32, 15, 16, 5345, 19, 178, 32]
First Label 1


In [6]:
#Padding the review
max_len = 500
X_train = sequence.pad_sequences(X_train,maxlen=max_len)
X_test = sequence.pad_sequences(X_test,maxlen=max_len)

In [7]:
print("First Sentence", X_train[0])
print("First Label", X_test[0])

First Sentence [   0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0  

In [8]:
#Training SIMPLE RNN
model = Sequential()
model.add(Embedding(input_dim=vocab_size,input_length=max_len,output_dim=128))
model.add(SimpleRNN(128,activation='relu'))
model.add(Dense(1,activation='sigmoid'))



In [9]:
model.summary()

In [10]:
#Defining callbacks
earlystopper = EarlyStopping(monitor='val_loss',patience=5,restore_best_weights=True)

In [11]:
model.compile(optimizer='adam',loss='binary_crossentropy', metrics=['accuracy'])

In [12]:
history = model.fit(
    X_train,y_train,
    epochs=10,
    batch_size=16,
    validation_split=0.2,
    callbacks=[earlystopper]
)

Epoch 1/10
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m237s[0m 188ms/step - accuracy: 0.6092 - loss: 850514.3125 - val_accuracy: 0.7704 - val_loss: 0.4824
Epoch 2/10
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m257s[0m 184ms/step - accuracy: 0.8342 - loss: 0.3758 - val_accuracy: 0.7944 - val_loss: 0.4567
Epoch 3/10
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m275s[0m 194ms/step - accuracy: 0.8858 - loss: 0.2785 - val_accuracy: 0.7966 - val_loss: 0.4761
Epoch 4/10
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m253s[0m 187ms/step - accuracy: 0.9137 - loss: 0.2209 - val_accuracy: 0.7768 - val_loss: 0.5274
Epoch 5/10
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m261s[0m 186ms/step - accuracy: 0.9318 - loss: 0.1838 - val_accuracy: 0.7946 - val_loss: 0.5600
Epoch 6/10
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m232s[0m 185ms/step - accuracy: 0.9449 - loss: 0.1523 - val_accuracy: 0.7848 - val_

In [13]:
model.save('simple_rnn_model.h5')

