## Data

In [1]:

from tensorflow.keras.layers import SimpleRNN, LSTM, GRU, Bidirectional, Dense, Embedding
from tensorflow.keras.datasets import imdb
from tensorflow.keras.models import Sequential
from tensorflow.keras.preprocessing import sequence
import numpy as np

In [2]:
vocab_size = 4000
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=vocab_size)
print(x_train[0])

[1, 14, 22, 16, 43, 530, 973, 1622, 1385, 65, 458, 2, 66, 3941, 4, 173, 36, 256, 5, 25, 100, 43, 838, 112, 50, 670, 2, 9, 35, 480, 284, 5, 150, 4, 172, 112, 167, 2, 336, 385, 39, 4, 172, 2, 1111, 17, 546, 38, 13, 447, 4, 192, 50, 16, 6, 147, 2025, 19, 14, 22, 4, 1920, 2, 469, 4, 22, 71, 87, 12, 16, 43, 530, 38, 76, 15, 13, 1247, 4, 22, 17, 515, 17, 12, 16, 626, 18, 2, 5, 62, 386, 12, 8, 316, 8, 106, 5, 4, 2223, 2, 16, 480, 66, 3785, 33, 4, 130, 12, 16, 38, 619, 5, 25, 124, 51, 36, 135, 48, 25, 1415, 33, 6, 22, 12, 215, 28, 77, 52, 5, 14, 407, 16, 82, 2, 8, 4, 107, 117, 2, 15, 256, 4, 2, 7, 3766, 5, 723, 36, 71, 43, 530, 476, 26, 400, 317, 46, 7, 4, 2, 1029, 13, 104, 88, 4, 381, 15, 297, 98, 32, 2071, 56, 26, 141, 6, 194, 2, 18, 4, 226, 22, 21, 134, 476, 26, 480, 5, 144, 30, 2, 18, 51, 36, 28, 224, 92, 25, 104, 4, 226, 65, 16, 38, 1334, 88, 12, 16, 283, 5, 16, 2, 113, 103, 32, 15, 16, 2, 19, 178, 32]


In [3]:
# Getting all the words from word_index dictionary
word_idx = imdb.get_word_index()

In [4]:
# Converting the index as key and the words as values
word_idx = {i: word for word, i in word_idx.items()}

In [5]:
print([word_idx[i] for i in x_train[0]])

['the', 'as', 'you', 'with', 'out', 'themselves', 'powerful', 'lets', 'loves', 'their', 'becomes', 'and', 'had', 'journalist', 'of', 'lot', 'from', 'anyone', 'to', 'have', 'after', 'out', 'atmosphere', 'never', 'more', 'room', 'and', 'it', 'so', 'heart', 'shows', 'to', 'years', 'of', 'every', 'never', 'going', 'and', 'help', 'moments', 'or', 'of', 'every', 'and', 'visual', 'movie', 'except', 'her', 'was', 'several', 'of', 'enough', 'more', 'with', 'is', 'now', 'current', 'film', 'as', 'you', 'of', 'mine', 'and', 'unfortunately', 'of', 'you', 'than', 'him', 'that', 'with', 'out', 'themselves', 'her', 'get', 'for', 'was', 'camp', 'of', 'you', 'movie', 'sometimes', 'movie', 'that', 'with', 'scary', 'but', 'and', 'to', 'story', 'wonderful', 'that', 'in', 'seeing', 'in', 'character', 'to', 'of', '70s', 'and', 'with', 'heart', 'had', 'shadows', 'they', 'of', 'here', 'that', 'with', 'her', 'serious', 'to', 'have', 'does', 'when', 'from', 'why', 'what', 'have', 'critics', 'they', 'is', 'you', 

In [6]:
print("Max length of a review:: ", len(max((x_train + x_test), key = len)))
print("Min length of a review:: ", len(min((x_train + x_test), key = len)))

Max length of a review::  2697
Min length of a review::  70


In [7]:
max_words = 400

In [8]:
x_train = sequence.pad_sequences(x_train, maxlen = max_words)
x_test = sequence.pad_sequences(x_test, maxlen = max_words)

In [9]:
x_valid, y_valid = x_train[:64], y_train[:64]
x_train_, y_train_ = x_train[64:], y_train[64:]

In [10]:
# word's embedding size to be 32
embd_len = 32

## SimpleRNN 

In [13]:
RNN_model = Sequential(name = "Simple_RNN")

In [14]:
RNN_model.add(Embedding(vocab_size, embd_len, input_length = max_words))

In [15]:
RNN_model.add(SimpleRNN(128, activation = 'tanh', return_sequences = False))

In [16]:
RNN_model.add(Dense(1, activation = 'sigmoid'))

In [17]:
RNN_model.summary()

Model: "Simple_RNN"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 400, 32)           160000    
                                                                 
 simple_rnn (SimpleRNN)      (None, 128)               20608     
                                                                 
 dense (Dense)               (None, 1)                 129       
                                                                 
Total params: 180737 (706.00 KB)
Trainable params: 180737 (706.00 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [18]:
RNN_model.compile(loss = "binary_crossentropy", metrics = ['accuracy'], optimizer = 'adam')

In [19]:
N = 5
history = RNN_model.fit(x_train_, y_train_, batch_size = 64, epochs = N, verbose = 1, validation_data = (x_valid, y_valid))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [20]:
print("Simple_RNN Score: ", RNN_model.evaluate(x_test, y_test, verbose = 0))

Simple_RNN Score:  [0.6343585848808289, 0.6412000060081482]


## Gated Recurrent Units (GRU)

In [21]:
gru_model = Sequential(name = "GRU_Model")

In [22]:
gru_model.add(Embedding(vocab_size, embd_len, input_length = max_words))

In [23]:
gru_model.add(GRU(128, activation = 'tanh', return_sequences = False))

In [24]:
gru_model.add(Dense(1, activation = 'sigmoid'))

In [25]:
gru_model.summary()

Model: "GRU_Model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_1 (Embedding)     (None, 400, 32)           160000    
                                                                 
 gru (GRU)                   (None, 128)               62208     
                                                                 
 dense_1 (Dense)             (None, 1)                 129       
                                                                 
Total params: 222337 (868.50 KB)
Trainable params: 222337 (868.50 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [26]:
gru_model.compile(loss = "binary_crossentropy", metrics = ['accuracy'], optimizer = 'adam')

In [27]:
N = 5
history2 = gru_model.fit(x_train_, y_train_, batch_size = 64, epochs = N, verbose = 1, validation_data = (x_valid, y_valid))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [28]:
print("GRU Score: ", gru_model.evaluate(x_test, y_test, verbose = 0))

GRU Score:  [0.3044564127922058, 0.879040002822876]


## Long Short Term Memory (LSTM) 

In [11]:
lstm_model = Sequential(name = "LSTM_Model")

In [12]:
lstm_model.add(Embedding(vocab_size, embd_len, input_length = max_words))

In [13]:
lstm_model.add(LSTM(128, activation = 'relu', return_sequences = False))

In [14]:
from tensorflow.keras.layers import Dropout
lstm_model.add(Dropout(0.2))

lstm_model.add(Dense(1, activation = 'sigmoid'))

In [15]:
lstm_model.summary()

Model: "LSTM_Model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 400, 32)           128000    
                                                                 
 lstm (LSTM)                 (None, 128)               82432     
                                                                 
 dropout (Dropout)           (None, 128)               0         
                                                                 
 dense (Dense)               (None, 1)                 129       
                                                                 
Total params: 210561 (822.50 KB)
Trainable params: 210561 (822.50 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [16]:
from tensorflow.keras.optimizers import Adam

optimizer = Adam(learning_rate = 0.0001, clipvalue = 1.0)

lstm_model.compile(loss = "binary_crossentropy", metrics = ['accuracy'], optimizer = optimizer)

_ Need more tweeking of the hyperparameters _

* When `clipvalue = 1.0` and Dropout is 0.1 or 0.5 the accuracy is up but the loss is around 100-130
* When max words is 200 or 100 its much better, need to solve for more max words

In [17]:
N = 1
history3 = lstm_model.fit(x_train_, y_train_, batch_size = 64, epochs = N, verbose = 1, validation_data = (x_valid, y_valid))



In [18]:
print("LSTM Score: ", lstm_model.evaluate(x_test, y_test, verbose = 0))

LSTM Score:  [nan, 0.5]


## Bi-directional LSTM

In [19]:
bi_lstm_model = Sequential(name = "Bidirectional_LSTM")

In [20]:
bi_lstm_model.add(Embedding(vocab_size, embd_len, input_length = max_words))

In [21]:
bi_lstm_model.add(Bidirectional(LSTM(128, activation = 'tanh', return_sequences = False)))

In [22]:
bi_lstm_model.add(Dense(1, activation = 'sigmoid'))

In [23]:
bi_lstm_model.summary()

Model: "Bidirectional_LSTM"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_1 (Embedding)     (None, 400, 32)           128000    
                                                                 
 bidirectional (Bidirection  (None, 256)               164864    
 al)                                                             
                                                                 
 dense_1 (Dense)             (None, 1)                 257       
                                                                 
Total params: 293121 (1.12 MB)
Trainable params: 293121 (1.12 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [24]:
bi_lstm_model.compile(loss="binary_crossentropy", metrics = ['accuracy'], optimizer = 'adam')

In [25]:
N = 1
history4 = bi_lstm_model.fit(x_train_, y_train_, batch_size = 64, epochs = N, verbose = 1, validation_data = (x_test, y_test))



In [26]:
print("Bidirectional LSTM model Score: ", bi_lstm_model.evaluate(x_test, y_test, verbose = 0))

Bidirectional LSTM model Score:  [0.345187783241272, 0.8540400266647339]
