# End to end Deep Learning Project using Simple RNN

In [1]:
import numpy as np
import tensorflow as ts
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding,Dense, SimpleRNN


In [2]:
## Load the imdb dataset
max_features=10000 ## Vocab size
(X_train,y_train),(X_test,y_test)=imdb.load_data(num_words=max_features)

## Print the shape of data
print(f'Training data shape : {X_train.shape}, Training labels shape : {y_train.shape}')
print(f'Testing data shape : {X_test.shape}, Testing labels shape : {y_train.shape}')

Training data shape : (25000,), Training labels shape : (25000,)
Testing data shape : (25000,), Testing labels shape : (25000,)


In [3]:
# Inspect sample reviews and its label
sample_review=X_train[0] ## THESE ARE ONE HOT REPRESENTATION OF SENTENCE
sample_label=y_train[0]

print(f'This is my sample review (as integers) : {sample_review}')
print(f'This is my sample label (as integers) : {sample_label}')

This is my sample review (as integers) : [1, 14, 22, 16, 43, 530, 973, 1622, 1385, 65, 458, 4468, 66, 3941, 4, 173, 36, 256, 5, 25, 100, 43, 838, 112, 50, 670, 2, 9, 35, 480, 284, 5, 150, 4, 172, 112, 167, 2, 336, 385, 39, 4, 172, 4536, 1111, 17, 546, 38, 13, 447, 4, 192, 50, 16, 6, 147, 2025, 19, 14, 22, 4, 1920, 4613, 469, 4, 22, 71, 87, 12, 16, 43, 530, 38, 76, 15, 13, 1247, 4, 22, 17, 515, 17, 12, 16, 626, 18, 2, 5, 62, 386, 12, 8, 316, 8, 106, 5, 4, 2223, 5244, 16, 480, 66, 3785, 33, 4, 130, 12, 16, 38, 619, 5, 25, 124, 51, 36, 135, 48, 25, 1415, 33, 6, 22, 12, 215, 28, 77, 52, 5, 14, 407, 16, 82, 2, 8, 4, 107, 117, 5952, 15, 256, 4, 2, 7, 3766, 5, 723, 36, 71, 43, 530, 476, 26, 400, 317, 46, 7, 4, 2, 1029, 13, 104, 88, 4, 381, 15, 297, 98, 32, 2071, 56, 26, 141, 6, 194, 7486, 18, 4, 226, 22, 21, 134, 476, 26, 480, 5, 144, 30, 5535, 18, 51, 36, 28, 224, 92, 25, 104, 4, 226, 65, 16, 38, 1334, 88, 12, 16, 283, 5, 16, 4472, 113, 103, 32, 15, 16, 5345, 19, 178, 32]
This is my sample l

In [4]:
## Mapping of words index back to words( fro understanding)
word_index=imdb.get_word_index()
reverse_word_index={values:keys for keys,values in word_index.items()}
## interchanging key and values

| **Part**                                                    | **Explanation**                                                                     |
| ----------------------------------------------------------- | ----------------------------------------------------------------------------------- |
| `sample_review`                                             | A list of integers representing a tokenized review.                                 |
| `i-3`                                                       | Adjusts the index because Keras reserves 0, 1, and 2 for special tokens.            |
| `reverse_word_index.get(i-3, '?')`                          | Looks up the word for each adjusted index. Returns `'?'` if the index is not found. |
| `[reverse_word_index.get(i-3, '?') for i in sample_review]` | Converts all integers in the review to words.                                       |
| `' '.join(...)`                                             | Joins the list of words into a single string with spaces.                           |


In [5]:
decorded_review=' '.join([reverse_word_index.get(i-3,'?') for i in sample_review])

In [6]:
decorded_review

"? this film was just brilliant casting location scenery story direction everyone's really suited the part they played and you could just imagine being there robert ? is an amazing actor and now the same being director ? father came from the same scottish island as myself so i loved the fact there was a real connection with this film the witty remarks throughout the film were great it was just brilliant so much that i bought the film as soon as it was released for ? and would recommend it to everyone to watch and the fly fishing was amazing really cried at the end it was so sad and you know what they say if you cry at a film it must have been good and this definitely was also ? to the two little boy's that played the ? of norman and paul they were just brilliant children are often left out of the ? list i think because the stars that play them all grown up are such a big profile for the whole film but these children are amazing and should be praised for what they have done don't you th

In [7]:
max_len=500 ## max text in a single sentence

X_train=sequence.pad_sequences(X_train,maxlen=max_len) ## for padding by default pre-padding
X_test=sequence.pad_sequences(X_test,maxlen=max_len)
X_train

array([[   0,    0,    0, ...,   19,  178,   32],
       [   0,    0,    0, ...,   16,  145,   95],
       [   0,    0,    0, ...,    7,  129,  113],
       ...,
       [   0,    0,    0, ...,    4, 3586,    2],
       [   0,    0,    0, ...,   12,    9,   23],
       [   0,    0,    0, ...,  204,  131,    9]], dtype=int32)

| **Parameter**          | **Explanation**                                                                                         |
| ---------------------- | ------------------------------------------------------------------------------------------------------- |
| `max_features`         | Vocabulary size: total number of unique words you're considering (e.g., 10,000 most common).            |
| `128`                  | **Embedding dimension**: the size of the dense vector for each word (i.e., how many features per word). |
| `input_length=max_len` | Length of input sequences (number of tokens per input).                                                 |


| **Component**       | **Explanation**                                                                |
| ------------------- | ------------------------------------------------------------------------------ |
| `SimpleRNN`         | A type of Recurrent Neural Network layer that processes sequence data.         |
| `128`               | Number of **RNN units** (i.e., size of the hidden state output).               |
| `activation='relu'` | Activation function used inside the RNN unit (ReLU instead of default `tanh`). |



You are creating 128 RNN units — each like a neuron that not only processes input at each time step but also keeps a memory of the previous time step (via a hidden state).

| **Component**          | **Explanation**                                                      |
| ---------------------- | -------------------------------------------------------------------- |
| `Dense(1)`             | A fully connected output layer with **1 neuron**.                    |
| `activation='sigmoid'` | Used for **binary classification** (output will be between 0 and 1). |


In [8]:
## train  simple rnn

model = Sequential()
model.add(Embedding(max_features,128,input_length=max_len)) # dim=128 # vocabsize=maxfeatures
model.add(SimpleRNN(128,activation='relu'))
model.add(Dense(1,activation='sigmoid'))



In [9]:
model.summary()

In [10]:
#Create an instance of early stopping callback
from tensorflow.keras.callbacks import EarlyStopping
early_stopping=EarlyStopping(monitor='val_loss',patience=5,restore_best_weights=True)


In [11]:
model.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])

| **Parameter**                      | **Meaning / Function**                                                                                           |
| ---------------------------------- | ---------------------------------------------------------------------------------------------------------------- |
| `X_train`                          | Input training data (e.g., sequences of word indices).                                                           |
| `y_train`                          | Target labels for training (e.g., 0 for negative review, 1 for positive).                                        |
| `validation_data=(X_test, y_test)` | Data used to evaluate the model after each epoch (optional if `validation_split` is used).                       |
| `batch_size=32`                    | Number of samples processed before the model is updated. Smaller batch = slower but often better generalization. |
| `epochs=10`                        | Number of times the model will iterate over the entire `X_train` dataset.                                        |
| `validation_split=0.2`             | 20% of the training data will be used as validation data **from `X_train`** (conflicts with `validation_data`).  |
| `callbacks=[early_stopping]`       | Stop training early if performance on validation data stops improving.                                           |


In [12]:
history=model.fit(X_train,y_train,validation_data=(X_test,y_test),
          batch_size=32,epochs=10,
          validation_split=0.2,
          callbacks=[early_stopping])

Epoch 1/10


[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m139s[0m 173ms/step - accuracy: 0.6232 - loss: 5999256064.0000 - val_accuracy: 0.5748 - val_loss: 0.6612
Epoch 2/10
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m127s[0m 163ms/step - accuracy: 0.7174 - loss: 2.7109 - val_accuracy: 0.7884 - val_loss: 0.4682
Epoch 3/10
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m132s[0m 169ms/step - accuracy: 0.8568 - loss: 0.3613 - val_accuracy: 0.8175 - val_loss: 0.4202
Epoch 4/10
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m129s[0m 165ms/step - accuracy: 0.8977 - loss: 0.2688 - val_accuracy: 0.8065 - val_loss: 0.4518
Epoch 5/10
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m132s[0m 168ms/step - accuracy: 0.8963 - loss: 0.2701 - val_accuracy: 0.8041 - val_loss: 0.4539
Epoch 6/10
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m134s[0m 171ms/step - accuracy: 0.9311 - loss: 0.1883 - val_accuracy: 0.8108 - val_loss: 0.4659
Epoch 

In [15]:
## Save my model file
model.save('simple_rnn_imdb.h5')

