In [None]:
from tensorflow.keras.datasets import imdb

In [0]:
vocab_size = 8000

In [0]:
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=vocab_size)

#### Assessing the type of the obtained data

In [11]:
print(type(X_train))
print(type(X_train[5]))
print(X_train[5])

<class 'numpy.ndarray'>
<class 'list'>
[1, 778, 128, 74, 12, 630, 163, 15, 4, 1766, 7982, 1051, 2, 32, 85, 156, 45, 40, 148, 139, 121, 664, 665, 10, 10, 1361, 173, 4, 749, 2, 16, 3804, 8, 4, 226, 65, 12, 43, 127, 24, 2, 10, 10]


In [0]:
maxlen = 200

#### Staging and pre-processing our data

In [None]:
from tensorflow.keras import preprocessing

In [0]:
X_train = preprocessing.sequence.pad_sequences(X_train, maxlen=maxlen)
X_test = preprocessing.sequence.pad_sequences(X_test, maxlen=maxlen)

In [19]:
print(X_train[5])

[   0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    1  778  128   74   12  630  163   15    4 1766 7982
 1051    2   32   85  156   45   40  148  139  121  664  665   10   10
 1361  173    4  749    2   16 3804    8    4  226   65   12   43  127
   24 

### Using Plain RNNs

In [0]:
import numpy as np
import tensorflow as tf

np.random.seed(42)
tf.random.set_seed(42)

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNN, Flatten, Dense, Embedding, SpatialDropout1D, Dropout

In [None]:
model_rnn = Sequential()
model_rnn.add(Embedding(vocab_size, output_dim=32))
model_rnn.add(SpatialDropout1D(0.4))

### Exercise 6.1: Build and train RNN model for sentiment classification

1. To the model, add a SimpleRNN layer with 32 neurons (chosen arbitrarily, another hyper-parameter to tune)

In [0]:
model_rnn.add(SimpleRNN(32))

2. Next, add a dropout layer with 40% dropout (an arbitrary choice again)

In [None]:
model_rnn.add(Dropout(0.4))

3. Add a dense layer with a sigmoid activation to complete the model architecture. This is the output layer that makes the prediction.

In [None]:
model_rnn.add(Dense(1, activation='sigmoid'))

4. Compile the model and view the model summary.

In [None]:
model_rnn.compile(loss='binary_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

In [37]:
model_rnn.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, None, 32)          256000    
_________________________________________________________________
spatial_dropout1d_1 (Spatial (None, None, 32)          0         
_________________________________________________________________
simple_rnn (SimpleRNN)       (None, 32)                2080      
_________________________________________________________________
dropout_1 (Dropout)          (None, 32)                0         
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 33        
Total params: 258,113
Trainable params: 258,113
Non-trainable params: 0
_________________________________________________________________


5. Fit the model on the train data with a batch size of 128 for 10 epochs (both of these are hyper-parameters that you can tune). 

In [38]:
history_rnn = model_rnn.fit(X_train, y_train, batch_size=128, validation_split=0.2, epochs = 10)

Train on 20000 samples, validate on 5000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


6. Make predictions on the test data using the predict_classes method of the model and use accuract accuracy_score method from sklearn.

In [0]:
y_test_pred = model_rnn.predict_classes(X_test)

In [0]:
from sklearn.metrics import accuracy_score

In [48]:
print(accuracy_score(y_test, y_test_pred))

0.85128

### Exercise 6.x: LSTM based sentiment classification model

1.	Import the LSTM layer from Keras layers

In [0]:
from tensorflow.keras.layers import LSTM

2.	Instantiate the sequential model, add the embedding layer with appropriate dimensions and add 40% spatial dropout

In [None]:
model_lstm = Sequential()
model_lstm.add(Embedding(vocab_size, output_dim=32))
model_lstm.add(SpatialDropout1D(0.4))

3.	Add a LSTM layer with 32 cells.

In [None]:
model_lstm.add(LSTM(32))

4.	Add the dropout (40% dropout) and dense layers, compile the model and print the model summary

In [None]:
model_lstm.add(Dropout(0.4))
model_lstm.add(Dense(1, activation='sigmoid'))

model_lstm.compile(loss='binary_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

In [52]:
model_lstm.summary()

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_2 (Embedding)      (None, None, 32)          256000    
_________________________________________________________________
spatial_dropout1d_2 (Spatial (None, None, 32)          0         
_________________________________________________________________
lstm (LSTM)                  (None, 32)                8320      
_________________________________________________________________
dropout_2 (Dropout)          (None, 32)                0         
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 33        
Total params: 264,353
Trainable params: 264,353
Non-trainable params: 0
_________________________________________________________________


5.	Fit on the training data for 5 epochs (gives best result for the model) with a batch size of 128.

In [53]:
history_lstm = model_lstm.fit(X_train, y_train, batch_size=128, validation_split=0.2, epochs=5)

Train on 20000 samples, validate on 5000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


6.	Make predictions on the test set and print the accuracy score

In [0]:
y_test_pred = model_lstm.predict_classes(X_test)

In [56]:
print(accuracy_score(y_test, y_test_pred))

0.87032


### Exercise 6.x: GRU based sentiment classification model

1.	Import the GRU layer from Keras layers

In [0]:
from tensorflow.keras.layers import GRU

2.	Instantiate the sequential model, add the embedding layer with appropriate dimensions and add 40% spatial dropout

In [None]:
model_gru = Sequential()
model_gru.add(Embedding(vocab_size, output_dim=32))
model_gru.add(SpatialDropout1D(0.4))

3.	Add a GRU layer with 32 cells. Set the parameter reset_after to False (this is a minor TensorFlow 2 implementation detail, to maintain consistency with the implementation of plain RNNs and LSTMs).

In [None]:
model_gru.add(GRU(32, reset_after=False))

4. Add the dropout (40% dropout) and dense layers, compile the model and print the model summary

In [0]:
model_gru.add(Dropout(0.4))
model_gru.add(Dense(1, activation='sigmoid'))

model_gru.compile(loss='binary_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

In [70]:
model_gru.summary()

Model: "sequential_7"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_4 (Embedding)      (None, None, 32)          256000    
_________________________________________________________________
spatial_dropout1d_4 (Spatial (None, None, 32)          0         
_________________________________________________________________
gru_1 (GRU)                  (None, 32)                6240      
_________________________________________________________________
dropout_4 (Dropout)          (None, 32)                0         
_________________________________________________________________
dense_4 (Dense)              (None, 1)                 33        
Total params: 262,273
Trainable params: 262,273
Non-trainable params: 0
_________________________________________________________________


5. Fit on the training data for 4 epochs (which gives us the best result)

In [71]:
history_gru = model_gru.fit(X_train, y_train, batch_size=128, validation_split=0.2, epochs = 4)

Train on 20000 samples, validate on 5000 samples
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


6. Make predictions on the test set and print the accuracy score

In [0]:
y_test_pred = model_gru.predict_classes(X_test)

In [74]:
accuracy_score(y_test, y_test_pred)

0.87156

### Exercise 6.x: Bi-directional LSTM based sentiment classification model

1.	Import the Bidirectional layer from Keras layers. This layer is essentially a wrapper you can use other RNNs around.

In [0]:
from tensorflow.keras.layers import Bidirectional

2.	Instantiate the sequential model, add the embedding layer with appropriate dimensions and add 40% spatial dropout

In [None]:
model_bilstm = Sequential()
model_bilstm.add(Embedding(vocab_size, output_dim=32))
model_bilstm.add(SpatialDropout1D(0.4))

3.	Add a Bidirectional wrapper on an LSTM layer with 32 cells

In [None]:
model_bilstm.add(Bidirectional(LSTM(32)))

4.	Add the dropout (40% dropout) and dense layers, compile the model and print the model summary

In [0]:
model_bilstm.add(Dropout(0.4))
model_bilstm.add(Dense(1, activation='sigmoid'))

model_bilstm.compile(loss='binary_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

In [84]:
model_bilstm.summary()

Model: "sequential_10"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_6 (Embedding)      (None, None, 32)          256000    
_________________________________________________________________
spatial_dropout1d_6 (Spatial (None, None, 32)          0         
_________________________________________________________________
bidirectional_1 (Bidirection (None, 64)                16640     
_________________________________________________________________
dropout_6 (Dropout)          (None, 64)                0         
_________________________________________________________________
dense_6 (Dense)              (None, 1)                 65        
Total params: 272,705
Trainable params: 272,705
Non-trainable params: 0
_________________________________________________________________


5.	Fit on the training data for 4 epochs, batch size 128

In [85]:
history_bilstm = model_bilstm.fit(X_train, y_train, batch_size=128, validation_split=0.2, epochs = 4)

Train on 20000 samples, validate on 5000 samples
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


6.	Make predictions on the test set and print the accuracy score

In [0]:
y_test_pred = model_bilstm.predict_classes(X_test)

In [88]:
accuracy_score(y_test, y_test_pred)

0.877

### Exercise 6.x: Stacked LSTM based sentiment classification model

1.	Instantiate the sequential model, add the embedding layer with appropriate dimensions and add 40% spatial dropout

In [None]:
model_stack = Sequential()
model_stack.add(Embedding(vocab_size, output_dim=32))
model_stack.add(SpatialDropout1D(0.4))

2.	Add a Bidirectional wrapper on an LSTM layer with 32 cells. Make sure the specify “return_sequences” as True in the LSTM layer. This would return the output of the LSTM at each time step, which can then be passed to the next LSTM layer.

In [None]:
model_stack.add(LSTM(32, return_sequences=True))

3.	Add a Bidirectional wrapper on an LSTM layer with 32 cells. This time, you don’t need to return sequences. You can either specify the return_sequences option to False, or skip it altogether (default value is False).

In [None]:
model_stack.add(LSTM(32, return_sequences=False))

4.	Add the dropout (50% dropout, higher as we’re building a more complex model) and dense layers, compile the model and print the model summary

In [104]:
model_stack.add(Dropout(0.5))
model_stack.add(Dense(1, activation='sigmoid'))

model_stack.compile(loss='binary_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

model_stack.summary()

Model: "sequential_14"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_9 (Embedding)      (None, None, 32)          256000    
_________________________________________________________________
spatial_dropout1d_9 (Spatial (None, None, 32)          0         
_________________________________________________________________
lstm_7 (LSTM)                (None, None, 32)          8320      
_________________________________________________________________
lstm_8 (LSTM)                (None, 32)                8320      
_________________________________________________________________
dropout_9 (Dropout)          (None, 32)                0         
_________________________________________________________________
dense_9 (Dense)              (None, 1)                 33        
Total params: 272,673
Trainable params: 272,673
Non-trainable params: 0
_______________________________________________

5.	Fit on the training data for 4 epochs 

In [105]:
history_stack = model_stack.fit(X_train, y_train, batch_size=128, validation_split=0.2, epochs = 4)

Train on 20000 samples, validate on 5000 samples
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


6.	Make predictions on the test set and print the accuracy score

In [0]:
y_test_pred = model_stack.predict_classes(X_test)

In [108]:
accuracy_score(y_test, y_test_pred)

0.87572