# Predicting sentiments for IMDB data

using GRU

# 1- Import key Modules

In [2]:
# support both Python 2 and Python 3 with minimal overhead.
from __future__ import absolute_import, division, print_function

# I am an engineer. I care only about error not warning. So, let's be maverick and ignore warnings.
import warnings
warnings.filterwarnings('ignore')

In [3]:
import numpy as np
import pandas as pd

In [4]:
from keras.preprocessing import sequence
from keras.models import Sequential
from keras.layers import LSTM
from keras.layers import Embedding
from keras.layers import Dense

In [5]:
# Visualization 
import matplotlib.pyplot as plt
import seaborn as sns
from pprint import pprint
%matplotlib inline
%pylab inline

Populating the interactive namespace from numpy and matplotlib


# 2-Loading and preparing data

In [6]:
from keras.datasets import imdb
max_words = 20000
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_words)

In [7]:
print("X_train length: ", len(x_train))
print("X_test length: ", len(x_test))

X_train length:  25000
X_test length:  25000


In [8]:
word_to_index = imdb.get_word_index()
index_to_word = {v: k for k, v in word_to_index.items()}

In [9]:
print(x_train[0])

[1, 14, 22, 16, 43, 530, 973, 1622, 1385, 65, 458, 4468, 66, 3941, 4, 173, 36, 256, 5, 25, 100, 43, 838, 112, 50, 670, 2, 9, 35, 480, 284, 5, 150, 4, 172, 112, 167, 2, 336, 385, 39, 4, 172, 4536, 1111, 17, 546, 38, 13, 447, 4, 192, 50, 16, 6, 147, 2025, 19, 14, 22, 4, 1920, 4613, 469, 4, 22, 71, 87, 12, 16, 43, 530, 38, 76, 15, 13, 1247, 4, 22, 17, 515, 17, 12, 16, 626, 18, 19193, 5, 62, 386, 12, 8, 316, 8, 106, 5, 4, 2223, 5244, 16, 480, 66, 3785, 33, 4, 130, 12, 16, 38, 619, 5, 25, 124, 51, 36, 135, 48, 25, 1415, 33, 6, 22, 12, 215, 28, 77, 52, 5, 14, 407, 16, 82, 10311, 8, 4, 107, 117, 5952, 15, 256, 4, 2, 7, 3766, 5, 723, 36, 71, 43, 530, 476, 26, 400, 317, 46, 7, 4, 12118, 1029, 13, 104, 88, 4, 381, 15, 297, 98, 32, 2071, 56, 26, 141, 6, 194, 7486, 18, 4, 226, 22, 21, 134, 476, 26, 480, 5, 144, 30, 5535, 18, 51, 36, 28, 224, 92, 25, 104, 4, 226, 65, 16, 38, 1334, 88, 12, 16, 283, 5, 16, 4472, 113, 103, 32, 15, 16, 5345, 19, 178, 32]


In [10]:
print(" ".join([index_to_word[x] for x in x_train[0]]))

the as you with out themselves powerful lets loves their becomes reaching had journalist of lot from anyone to have after out atmosphere never more room and it so heart shows to years of every never going and help moments or of every chest visual movie except her was several of enough more with is now current film as you of mine potentially unfortunately of you than him that with out themselves her get for was camp of you movie sometimes movie that with scary but pratfalls to story wonderful that in seeing in character to of 70s musicians with heart had shadows they of here that with her serious to have does when from why what have critics they is you that isn't one will very to as itself with other tricky in of seen over landed for anyone of and br show's to whether from than out themselves history he name half some br of 'n odd was two most of mean for 1 any an boat she he should is thought frog but of script you not while history he heart to real at barrel but when from one bit then

In [11]:
print("Min value:", min(y_train), "Max value:", max(y_train))

Min value: 0 Max value: 1


### 2.1.average and median

In [12]:
import numpy as np

average_length = np.mean([len(x) for x in x_train])
median_length = sorted([len(x) for x in x_train])[len(x_train) // 2]

In [13]:
print("Average sequence length: ", average_length)
print("Median sequence length: ", median_length)

Average sequence length:  238.71364
Median sequence length:  178


so we shall keep sentence length somewhat close to these values. I will go for average length and hence , sentence lenth will be 240

### 2.2.Embedding sequence i.e padding

In [14]:
max_sequence_length = 240

from keras.preprocessing import sequence

x_train = sequence.pad_sequences(x_train, maxlen=max_sequence_length, padding='post', truncating='post')
x_test = sequence.pad_sequences(x_test, maxlen=max_sequence_length, padding='post', truncating='post')

print('X_train shape: ', x_train.shape)

X_train shape:  (25000, 240)


# 3- Model Building

In [15]:
from keras.models import Sequential
from keras.layers import GRU
from keras.layers import Embedding
from keras.layers import Dense
from keras.callbacks import EarlyStopping

### 3.1.Single laye GRU Model

In [16]:
hidden_size = 32

sl_model = Sequential()
sl_model.add(Embedding(max_words, hidden_size))
sl_model.add(GRU(hidden_size, activation='tanh', dropout=0.2, recurrent_dropout=0.2))
sl_model.add(Dense(1, activation='sigmoid'))
sl_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [17]:
sl_model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, None, 32)          640000    
_________________________________________________________________
gru_1 (GRU)                  (None, 32)                6240      
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 33        
Total params: 646,273
Trainable params: 646,273
Non-trainable params: 0
_________________________________________________________________


In [18]:
epochs = 3
batch_size=16

sl_model.fit(x_train, y_train,validation_data=(x_test,y_test),
             callbacks=[EarlyStopping(monitor='val_loss', patience=3, min_delta=0.0001)],
             epochs=epochs, batch_size=batch_size, shuffle=True)


Train on 25000 samples, validate on 25000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.callbacks.History at 0x172496f7948>

In [19]:
loss, acc = sl_model.evaluate(x_test, y_test)



In [20]:
print('Single layer model -- ACC {} -- LOSS {}'.format(acc, loss))

Single layer model -- ACC 0.5224000215530396 -- LOSS 0.691173624382019


### 3.2.Stacked Layered GRU

In [21]:
d_model = Sequential()
d_model.add(Embedding(max_words, hidden_size))
d_model.add(GRU(hidden_size, activation='tanh', dropout=0.2, recurrent_dropout=0.2, return_sequences=True))
d_model.add(GRU(hidden_size, activation='tanh', dropout=0.2, recurrent_dropout=0.2))
d_model.add(Dense(1, activation='sigmoid'))
d_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [22]:
d_model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_2 (Embedding)      (None, None, 32)          640000    
_________________________________________________________________
gru_2 (GRU)                  (None, None, 32)          6240      
_________________________________________________________________
gru_3 (GRU)                  (None, 32)                6240      
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 33        
Total params: 652,513
Trainable params: 652,513
Non-trainable params: 0
_________________________________________________________________


In [23]:
history=d_model.fit(x_train, y_train,validation_data=(x_test,y_test),
                    callbacks=[EarlyStopping(monitor='val_loss', patience=3, min_delta=0.0001)],
                    epochs=epochs, batch_size=batch_size, shuffle=True)

Train on 25000 samples, validate on 25000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3


In [24]:
d_loss, d_acc = d_model.evaluate(x_test, y_test)



As we can stacked GRU didn't work as well as we assumed. More optimization will be done in coming code books