In [1]:
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Dense, SimpleRNN, LSTM, GRU


In [2]:
# load the IMDB dataset and unrapping the data

(X_train,y_train),(X_test,y_test)=imdb.load_data(num_words=10000)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz


In [3]:
X_train

array([list([1, 14, 22, 16, 43, 530, 973, 1622, 1385, 65, 458, 4468, 66, 3941, 4, 173, 36, 256, 5, 25, 100, 43, 838, 112, 50, 670, 2, 9, 35, 480, 284, 5, 150, 4, 172, 112, 167, 2, 336, 385, 39, 4, 172, 4536, 1111, 17, 546, 38, 13, 447, 4, 192, 50, 16, 6, 147, 2025, 19, 14, 22, 4, 1920, 4613, 469, 4, 22, 71, 87, 12, 16, 43, 530, 38, 76, 15, 13, 1247, 4, 22, 17, 515, 17, 12, 16, 626, 18, 2, 5, 62, 386, 12, 8, 316, 8, 106, 5, 4, 2223, 5244, 16, 480, 66, 3785, 33, 4, 130, 12, 16, 38, 619, 5, 25, 124, 51, 36, 135, 48, 25, 1415, 33, 6, 22, 12, 215, 28, 77, 52, 5, 14, 407, 16, 82, 2, 8, 4, 107, 117, 5952, 15, 256, 4, 2, 7, 3766, 5, 723, 36, 71, 43, 530, 476, 26, 400, 317, 46, 7, 4, 2, 1029, 13, 104, 88, 4, 381, 15, 297, 98, 32, 2071, 56, 26, 141, 6, 194, 7486, 18, 4, 226, 22, 21, 134, 476, 26, 480, 5, 144, 30, 5535, 18, 51, 36, 28, 224, 92, 25, 104, 4, 226, 65, 16, 38, 1334, 88, 12, 16, 283, 5, 16, 4472, 113, 103, 32, 15, 16, 5345, 19, 178, 32]),
       list([1, 194, 1153, 194, 8255, 78, 228,

In [4]:
X_train[0]

[1,
 14,
 22,
 16,
 43,
 530,
 973,
 1622,
 1385,
 65,
 458,
 4468,
 66,
 3941,
 4,
 173,
 36,
 256,
 5,
 25,
 100,
 43,
 838,
 112,
 50,
 670,
 2,
 9,
 35,
 480,
 284,
 5,
 150,
 4,
 172,
 112,
 167,
 2,
 336,
 385,
 39,
 4,
 172,
 4536,
 1111,
 17,
 546,
 38,
 13,
 447,
 4,
 192,
 50,
 16,
 6,
 147,
 2025,
 19,
 14,
 22,
 4,
 1920,
 4613,
 469,
 4,
 22,
 71,
 87,
 12,
 16,
 43,
 530,
 38,
 76,
 15,
 13,
 1247,
 4,
 22,
 17,
 515,
 17,
 12,
 16,
 626,
 18,
 2,
 5,
 62,
 386,
 12,
 8,
 316,
 8,
 106,
 5,
 4,
 2223,
 5244,
 16,
 480,
 66,
 3785,
 33,
 4,
 130,
 12,
 16,
 38,
 619,
 5,
 25,
 124,
 51,
 36,
 135,
 48,
 25,
 1415,
 33,
 6,
 22,
 12,
 215,
 28,
 77,
 52,
 5,
 14,
 407,
 16,
 82,
 2,
 8,
 4,
 107,
 117,
 5952,
 15,
 256,
 4,
 2,
 7,
 3766,
 5,
 723,
 36,
 71,
 43,
 530,
 476,
 26,
 400,
 317,
 46,
 7,
 4,
 2,
 1029,
 13,
 104,
 88,
 4,
 381,
 15,
 297,
 98,
 32,
 2071,
 56,
 26,
 141,
 6,
 194,
 7486,
 18,
 4,
 226,
 22,
 21,
 134,
 476,
 26,
 480,
 5,
 144,
 30,
 5535,
 18,

In [5]:
X_train=pad_sequences(X_train,maxlen=100)  # Taking max of 100 character legth word sp all word les than 100 will have 0 has starting part and more
# 100 character word length will be deleted

In [6]:
X_train

array([[1415,   33,    6, ...,   19,  178,   32],
       [ 163,   11, 3215, ...,   16,  145,   95],
       [1301,    4, 1873, ...,    7,  129,  113],
       ...,
       [  11,    6, 4065, ...,    4, 3586,    2],
       [ 100, 2198,    8, ...,   12,    9,   23],
       [  78, 1099,   17, ...,  204,  131,    9]], dtype=int32)

In [7]:
X_train[0]

array([1415,   33,    6,   22,   12,  215,   28,   77,   52,    5,   14,
        407,   16,   82,    2,    8,    4,  107,  117, 5952,   15,  256,
          4,    2,    7, 3766,    5,  723,   36,   71,   43,  530,  476,
         26,  400,  317,   46,    7,    4,    2, 1029,   13,  104,   88,
          4,  381,   15,  297,   98,   32, 2071,   56,   26,  141,    6,
        194, 7486,   18,    4,  226,   22,   21,  134,  476,   26,  480,
          5,  144,   30, 5535,   18,   51,   36,   28,  224,   92,   25,
        104,    4,  226,   65,   16,   38, 1334,   88,   12,   16,  283,
          5,   16, 4472,  113,  103,   32,   15,   16, 5345,   19,  178,
         32], dtype=int32)

In [8]:
len(X_train[2])  # here every index word will give length 100 as we have taken 100 word length max

100

In [9]:
X_test=pad_sequences(X_test,maxlen=100)

In [10]:
X_test  # Here we can see all words with less than 100 character length will have 0 as starting to make this equal length

array([[   0,    0,    0, ...,   14,    6,  717],
       [   6,  976, 2078, ...,  125,    4, 3077],
       [   4, 5673,    7, ...,    9,   57,  975],
       ...,
       [   0,    0,    0, ...,   21,  846, 5518],
       [   0,    1,   11, ..., 2302,    7,  470],
       [  56,   96,  346, ...,   34, 2005, 2643]], dtype=int32)

In [11]:
modelSimpleRNN = Sequential([
           Embedding(10000,32,input_length=100), #embedding later to convcer word into vectors, 10000 unique word we are fetching for vector vocubalry
                                                 #32 is no. of vector dimention for each word which we want to convert
           SimpleRNN(5,return_sequences=True),# RNN layer with five units
           SimpleRNN(5),#this is another rnn layer
           Dense(1,activation="sigmoid")#outputlayer for the binary claasificaiton
])


In [12]:
modelSimpleRNN.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 100, 32)           320000    
                                                                 
 simple_rnn (SimpleRNN)      (None, 100, 5)            190       
                                                                 
 simple_rnn_1 (SimpleRNN)    (None, 5)                 55        
                                                                 
 dense (Dense)               (None, 1)                 6         
                                                                 
Total params: 320251 (1.22 MB)
Trainable params: 320251 (1.22 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [13]:
# defining the same deep concept for the LSTM

modelLSTM = Sequential([
    Embedding(10000,32,input_length=100),
    LSTM(5,return_sequences=True),
    LSTM(5),
    Dense(1, activation="sigmoid")
])

In [14]:
modelLSTM.summary()  # LSTM has more parameter as LSTM has 4 hidden layer(forget gate, update gate and output gate)

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_1 (Embedding)     (None, 100, 32)           320000    
                                                                 
 lstm (LSTM)                 (None, 100, 5)            760       
                                                                 
 lstm_1 (LSTM)               (None, 5)                 220       
                                                                 
 dense_1 (Dense)             (None, 1)                 6         
                                                                 
Total params: 320986 (1.22 MB)
Trainable params: 320986 (1.22 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [15]:
# defining the same deep concept for the GRU


modelGRU = Sequential([
    Embedding(10000,32,input_length=100),
    GRU(5,return_sequences=True),
    GRU(5),
    Dense(1, activation="sigmoid")
])

In [16]:
modelGRU.summary()  # GRU has less parameteres as compare to LSTM as it has only 2 hidden layer(2 gate)

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_2 (Embedding)     (None, 100, 32)           320000    
                                                                 
 gru (GRU)                   (None, 100, 5)            585       
                                                                 
 gru_1 (GRU)                 (None, 5)                 180       
                                                                 
 dense_2 (Dense)             (None, 1)                 6         
                                                                 
Total params: 320771 (1.22 MB)
Trainable params: 320771 (1.22 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


# What is the use of compilation of model
Explanation of the code:
This code snippet uses the compile() method to configure several settings for each of the three RNN models (SimpleRNN, LSTM, and GRU) before training them.

1. optimizer='adam':

This specifies the optimization algorithm used to update the model's weights during training.
Adam is a commonly used optimizer for neural networks, known for its efficient and stable performance.
2. loss='binary_crossentropy':

This defines the loss function used to measure the model's error during training.
Binary cross-entropy is a suitable loss function for binary classification tasks like sentiment analysis, where the model predicts a probability between 0 and 1 for each class.
3. metrics=['accuracy']:

This specifies the metrics used to evaluate the model's performance during training and testing.
Accuracy is a commonly used metric for classification tasks, measuring the percentage of correctly classified instances.
Why is model compile required for RNN models?
Model compilation is a crucial step in any deep learning project, including RNNs. It serves several essential purposes:

1. Configuring the training process:

Compiling the model defines the optimizer, loss function, and metrics used during training.
This allows the model to learn from data and improve its performance.
2. Preparing the internal state:

RNNs rely on internal states (hidden states) to process sequences of data.
Compiling the model initializes these states and ensures they are properly handled during training and prediction.
3. Checking for errors:

The compilation process can detect certain errors in the model architecture or configuration.
This helps identify and fix issues before proceeding with training.
4. Optimizing computational efficiency:

Compiling the model allows the framework to optimize certain operations for the specific hardware and software environment.
This can improve the overall training speed and performance of the model.
In summary, compiling an RNN model is essential for configuring the training process, initializing internal states, checking for errors, and optimizing computational efficiency. It ensures the model is properly set up for learning and making accurate predictions on unseen data

In [17]:
# Compile each RNN model
modelSimpleRNN.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

modelLSTM.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

modelGRU.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Is avlidation data and test data are same

No, validation data and test data are not the same, although they both play important roles in machine learning and deep learning. Here's how they differ:

Validation data:

Used to fine-tune the model during training.
Helps to prevent overfitting by allowing the model to evaluate its performance on data it hasn't seen before.
Used to adjust hyperparameters like the learning rate or number of hidden units.
Typically not included in the final evaluation of the model's performance.

Test data:

Used to evaluate the final trained model on unseen data.
Provides an unbiased estimate of the model'sgeneralizability to real-world scenarios.
Never used to train or fine-tune the model.

Key Differences:

Feature--Validation data--Test data
Purpose--Fine-tuning model,--preventing overfitting--Evaluating model's generalizability
Usage--During training--After training
Included in final evaluation--No--Yes


Analogy:

Think of validation data as a dress rehearsal before a play. You use it to practice and make adjustments before the final performance.
Think of test data as the actual performance of the play. It shows how well the play goes over with a real audience.

In summary:

Validation data helps you build a better model.
Test data tells you how well your model works in the real world.
Both are essential for ensuring your model is reliable and performs well on unseen data.

In [18]:
# Train the model

history1 = modelSimpleRNN.fit(X_train, y_train, epochs=5, batch_size=32, validation_split=0.2)

history2 = modelLSTM.fit(X_train, y_train, epochs=5, batch_size=32, validation_split=0.2)

history3 = modelGRU.fit(X_train, y_train, epochs=5, batch_size=32, validation_split=0.2)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


# Let experiment with test data

In [20]:
X_test[0].shape

(100,)

In [24]:
y_test[0].shape

()

In [28]:
X_test_data = X_test[0][0:100].reshape(1,-1)

In [26]:
#y_test_data = y_test[0][0:100].reshape(1,-1)

In [30]:
X_test_data.shape

(1, 100)

In [52]:
modelSimpleRNN.predict(X_test_data)



array([[0.18255018]], dtype=float32)

In [53]:
modelLSTM.predict(X_test_data)



array([[0.02839947]], dtype=float32)

In [54]:
modelGRU.predict(X_test_data)



array([[0.2695214]], dtype=float32)

# bi-directional RNN and LSTM and GRU

In [55]:
from tensorflow.keras.layers import Embedding, Bidirectional, SimpleRNN, LSTM,GRU,Dense

In [56]:
X_train


array([[1415,   33,    6, ...,   19,  178,   32],
       [ 163,   11, 3215, ...,   16,  145,   95],
       [1301,    4, 1873, ...,    7,  129,  113],
       ...,
       [  11,    6, 4065, ...,    4, 3586,    2],
       [ 100, 2198,    8, ...,   12,    9,   23],
       [  78, 1099,   17, ...,  204,  131,    9]], dtype=int32)

In [57]:
X_test


array([[   0,    0,    0, ...,   14,    6,  717],
       [   6,  976, 2078, ...,  125,    4, 3077],
       [   4, 5673,    7, ...,    9,   57,  975],
       ...,
       [   0,    0,    0, ...,   21,  846, 5518],
       [   0,    1,   11, ..., 2302,    7,  470],
       [  56,   96,  346, ...,   34, 2005, 2643]], dtype=int32)

In [58]:
maxlen=100
embedding_dim=32

In [62]:
# Below is SimpleRNN model

modelSimpleRNN = Sequential([
           Embedding(10000,32,input_length=100), #embedding later to convcer word into vectors, 10000 unique word we are fetching for vector vocubalry
                                                 #32 is no. of vector dimention for each word which we want to convert
           SimpleRNN(5,return_sequences=True),# RNN layer with five units
           SimpleRNN(5),#this is another rnn layer
           Dense(1,activation="sigmoid")#outputlayer for the binary claasificaiton
])

In [63]:
modelSimpleRNN.summary()

Model: "sequential_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_5 (Embedding)     (None, 100, 32)           320000    
                                                                 
 simple_rnn_5 (SimpleRNN)    (None, 100, 5)            190       
                                                                 
 simple_rnn_6 (SimpleRNN)    (None, 5)                 55        
                                                                 
 dense_5 (Dense)             (None, 1)                 6         
                                                                 
Total params: 320251 (1.22 MB)
Trainable params: 320251 (1.22 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [64]:
# So by this way we can convert same SimleRNN to bi-directional Simple RNN

modelSimpleRNNBidirectional = Sequential([
           Embedding(10000,32,input_length=100), #embedding later to convcer word into vectors, 10000 unique word we are fetching for vector vocubalry
                                                 #32 is no. of vector dimention for each word which we want to convert
           Bidirectional(SimpleRNN(5,return_sequences=True)),# RNN layer with five units by adding bi-directional layer
           #SimpleRNN(5),#this is another rnn layer. This will removed
           Dense(1,activation="sigmoid")#outputlayer for the binary claasificaiton
])

In [65]:
modelSimpleRNNBidirectional.summary()  # In bidirectional total parameter increased

Model: "sequential_6"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_6 (Embedding)     (None, 100, 32)           320000    
                                                                 
 bidirectional_1 (Bidirecti  (None, 100, 10)           380       
 onal)                                                           
                                                                 
 dense_6 (Dense)             (None, 100, 1)            11        
                                                                 
Total params: 320391 (1.22 MB)
Trainable params: 320391 (1.22 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [67]:
modelLSTMBidirectional = Sequential([
    Embedding(10000,32,input_length=100),
    Bidirectional(LSTM(5,return_sequences=True)),
    #LSTM(5),
    Dense(1, activation="sigmoid")
])

In [68]:
modelLSTMBidirectional.summary()  # Here in LSTM bidirectional as well paramter increased

Model: "sequential_8"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_8 (Embedding)     (None, 100, 32)           320000    
                                                                 
 bidirectional_3 (Bidirecti  (None, 100, 10)           1520      
 onal)                                                           
                                                                 
 dense_8 (Dense)             (None, 100, 1)            11        
                                                                 
Total params: 321531 (1.23 MB)
Trainable params: 321531 (1.23 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [69]:
modelGRUBidirectional = Sequential([
    Embedding(10000,32,input_length=100),
    Bidirectional(GRU(5,return_sequences=True)),
    #GRU(5),
    Dense(1, activation="sigmoid")
])

In [70]:
modelGRUBidirectional.summary()  # same Here as well

Model: "sequential_9"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_9 (Embedding)     (None, 100, 32)           320000    
                                                                 
 bidirectional_4 (Bidirecti  (None, 100, 10)           1170      
 onal)                                                           
                                                                 
 dense_9 (Dense)             (None, 100, 1)            11        
                                                                 
Total params: 321181 (1.23 MB)
Trainable params: 321181 (1.23 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
