'''END to ENd Deep Learning using Simple RNN'''

In [None]:
# 🔹 Libraries to Import
import numpy as np
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense
# •	numpy: For numerical operations.
# •	tensorflow & keras: For building the neural network.
# •	imdb: Built-in dataset of TensorFlow.
# •	sequence: For padding sequences (to make sentence lengths equal).
# •	Sequential: Used to create the model step-by-step.
# •	Embedding: Turns word indexes into dense vectors.
# •	SimpleRNN: Recurrent Neural Network layer.
# •	Dense: Fully connected layer for classification.


# 🔹 Load and Prepare the Dataset
max_features = 10000  # Vocabulary size (top 10,000 frequent words)

(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)
# •	We are using only top 10,000 most frequent words.
# •	x_train: List of movie reviews as integer word indexes.
# •	y_train: Labels → 1 (positive), 0 (negative).
# ✅ Print dataset size:
# Print the shape of the data
print(f'Training data shape: {x_train.shape}, Training labels shape: {y_train.shape}')
print(f'Testing data shape: {x_test.shape}, Testing labels shape: {y_test.shape}')
print(len(x_train), len(x_test))  # 25000 training and 25000 testing reviews
# Training data shape: (25000,), Training labels shape: (25000,)
# Testing data shape: (25000,), Testing labels shape: (25000,)
# 25000 25000

# 🔹 Look at One Example Review
sample_review = x_train[0]
sample_label = y_train[0]
# •	x_train[0] → A review as a list of integers.
# 📌 These are not actual words but indexes of the words.
# 🔍 Example:
print("Sample review as integers:", sample_review)
print("Sample label:", sample_label)  # 1 = Positive review
# Sample review as integers: [1, 14, 22, 16, 43, 530, 973, 1622, 1385, 65, 458, 4468, 66, 3941, 4, 173, 36, 256, 5, 25, 100, 43, 838, 112, 50, 670, 2, 9, 35, 480, 284, 5, 150, 4, 172, 112, 167, 2, 336, 385, 39, 4, 172, 4536, 1111, 17, 546, 38, 13, 447, 4, 192, 50, 16, 6, 147, 2025, 19, 14, 22, 4, 1920, 4613, 469, 4, 22, 71, 87, 12, 16, 43, 530, 38, 76, 15, 13, 1247, 4, 22, 17, 515, 17, 12, 16, 626, 18, 2, 5, 62, 386, 12, 8, 316, 8, 106, 5, 4, 2223, 5244, 16, 480, 66, 3785, 33, 4, 130, 12, 16, 38, 619, 5, 25, 124, 51, 36, 135, 48, 25, 1415, 33, 6, 22, 12, 215, 28, 77, 52, 5, 14, 407, 16, 82, 2, 8, 4, 107, 117, 5952, 15, 256, 4, 2, 7, 3766, 5, 723, 36, 71, 43, 530, 476, 26, 400, 317, 46, 7, 4, 2, 1029, 13, 104, 88, 4, 381, 15, 297, 98, 32, 2071, 56, 26, 141, 6, 194, 7486, 18, 4, 226, 22, 21, 134, 476, 26, 480, 5, 144, 30, 5535, 18, 51, 36, 28, 224, 92, 25, 104, 4, 226, 65, 16, 38, 1334, 88, 12, 16, 283, 5, 16, 4472, 113, 103, 32, 15, 16, 5345, 19, 178, 32]
# Sample label: 1


# 🔹 Convert Integer Indexes Back to Words

word_index = imdb.get_word_index()
reverse_word_index = {value: key for key, value in word_index.items()}
# •	word_index: A dictionary mapping words to their indexes.
# •	reverse_word_index: Reverse mapping (index → word).
# ✅ Decode the review:
decoded_review = ' '.join([reverse_word_index.get(i - 3, '?') for i in sample_review])
print(decoded_review)
# ? this film was just brilliant casting location scenery story direction everyone's really suited the part they played and you could just imagine being there robert ? is an amazing actor and now the same being director ? father came from the same scottish island as myself so i loved the fact there was a real connection with this film the witty remarks throughout the film were great it was just brilliant so much that i bought the film as soon as it was released for ? and would recommend it to everyone to watch and the fly fishing was amazing really cried at the end it was so sad and you know what they say if you cry at a film it must have been good and this definitely was also ? to the two little boy's that played the ? of norman and paul they were just brilliant children are often left out of the ? list i think because the stars that play them all grown up are such a big profile for the whole film but these children are amazing and should be praised for what they have done don't you think the whole story was so lovely because it was true and was someone's life after all that was shared with us all

# •	Why i - 3?
# o	Because the first 3 indexes (0,1,2) are reserved for padding, start, and unknown tokens.
# •	If a word index is not found → ? is shown.
# 🔍 Example output:
# ? this film was just brilliant casting location story direction ...

# 🔹 Pad Sequences (Make All Reviews Same Length)

max_length = 500  # Limit every review to 500 words

x_train = sequence.pad_sequences(x_train, maxlen=max_length)
x_test = sequence.pad_sequences(x_test, maxlen=max_length)
# •	Shorter reviews are padded with 0s at the beginning (pre-padding).
# •	This ensures all reviews are of equal length = 500 words.
# 🔍 Check an example after padding:
print(x_train[0])
# 📌 Output:
# [   0    0    0    0    0    0    0    0    0    0    0    0    0    0
#     0    0    0    0    0    0    0    0    0    0    0    0    0    0
#     0    0    0    0    0    0    0    0    0    0    0    0    0    0
#     0    0    0    0    0    0    0    0    0    0    0    0    0    0
#     0    0    0    0    0    0    0    0    0    0    0    0    0    0
#     0    0    0    0    0    0    0    0    0    0    0    0    0    0
#     0    0    0    0    0    0    0    0    0    0    0    0    0    0
#     0    0    0    0    0    0    0    0    0    0    0    0    0    0
#     0    0    0    0    0    0    0    0    0    0    0    0    0    0
#     0    0    0    0    0    0    0    0    0    0    0    0    0    0
#     0    0    0    0    0    0    0    0    0    0    0    0    0    0
#     0    0    0    0    0    0    0    0    0    0    0    0    0    0
#     0    0    0    0    0    0    0    0    0    0    0    0    0    0
#     0    0    0    0    0    0    0    0    0    0    0    0    0    0
#     0    0    0    0    0    0    0    0    0    0    0    0    0    0
#     0    0    0    0    0    0    0    0    0    0    0    0    0    0
#     0    0    0    0    0    0    0    0    0    0    0    0    0    0
#     0    0    0    0    0    0    0    0    0    0    0    0    0    0
#     0    0    0    0    0    0    0    0    0    0    0    0    0    0
#     0    0    0    0    0    0    0    0    0    0    0    0    0    0
#     0    0    1   14   22   16   43  530  973 1622 1385   65  458 4468
#    66 3941    4  173   36  256    5   25  100   43  838  112   50  670
#     2    9   35  480  284    5  150    4  172  112  167    2  336  385
#    39    4  172 4536 1111   17  546   38   13  447    4  192   50   16
#     6  147 2025   19   14   22    4 1920 4613  469    4   22   71   87
#    12   16   43  530   38   76   15   13 1247    4   22   17  515   17
#    12   16  626   18    2    5   62  386   12    8  316    8  106    5
#     4 2223 5244   16  480   66 3785   33    4  130   12   16   38  619
#     5   25  124   51   36  135   48   25 1415   33    6   22   12  215
#    28   77   52    5   14  407   16   82    2    8    4  107  117 5952
#    15  256    4    2    7 3766    5  723   36   71   43  530  476   26
#   400  317   46    7    4    2 1029   13  104   88    4  381   15  297
#    98   32 2071   56   26  141    6  194 7486   18    4  226   22   21
#   134  476   26  480    5  144   30 5535   18   51   36   28  224   92
#    25  104    4  226   65   16   38 1334   88   12   16  283    5   16
#  4472  113  103   32   15   16 5345   19  178   32]




Training data shape: (25000,), Training labels shape: (25000,)
Testing data shape: (25000,), Testing labels shape: (25000,)
25000 25000
Sample review as integers: [1, 14, 22, 16, 43, 530, 973, 1622, 1385, 65, 458, 4468, 66, 3941, 4, 173, 36, 256, 5, 25, 100, 43, 838, 112, 50, 670, 2, 9, 35, 480, 284, 5, 150, 4, 172, 112, 167, 2, 336, 385, 39, 4, 172, 4536, 1111, 17, 546, 38, 13, 447, 4, 192, 50, 16, 6, 147, 2025, 19, 14, 22, 4, 1920, 4613, 469, 4, 22, 71, 87, 12, 16, 43, 530, 38, 76, 15, 13, 1247, 4, 22, 17, 515, 17, 12, 16, 626, 18, 2, 5, 62, 386, 12, 8, 316, 8, 106, 5, 4, 2223, 5244, 16, 480, 66, 3785, 33, 4, 130, 12, 16, 38, 619, 5, 25, 124, 51, 36, 135, 48, 25, 1415, 33, 6, 22, 12, 215, 28, 77, 52, 5, 14, 407, 16, 82, 2, 8, 4, 107, 117, 5952, 15, 256, 4, 2, 7, 3766, 5, 723, 36, 71, 43, 530, 476, 26, 400, 317, 46, 7, 4, 2, 1029, 13, 104, 88, 4, 381, 15, 297, 98, 32, 2071, 56, 26, 141, 6, 194, 7486, 18, 4, 226, 22, 21, 134, 476, 26, 480, 5, 144, 30, 5535, 18, 51, 36, 28, 224, 92, 25,

'''Train Simple RNN Model on IMDB Reviews'''

In [None]:
model = Sequential()
 # 📏 Sequential Model is used to create a sequential model.
# Why we need a Sequential model?
# •	→ Sequential model is used to create a sequential model.

# 🔹 Add Embedding Layer
model.add(Embedding(max_features, 128, input_length=max_length))
# •	📘 Embedding layer helps in converting each word (represented by an index) into a dense vector of fixed size.
# •	max_features: Vocabulary size (top 10,000 words).
# •	128: Number of dimensions for the vector representation of each word.
# •	input_length: Max number of words in a sentence (here, 500).
# 📝 So, each sentence will be converted into a 500 × 128 matrix.

# 🔹 Add Simple RNN Layer
model.add(SimpleRNN(128, activation='relu'))
# •	🔁 SimpleRNN processes sequence data and keeps track of what it has seen before.
# •	128: Number of RNN units (neurons).
# •	activation='relu': Activation function for the layer.
# 📝 RNN helps in understanding the sequence and relationships between words.

# 🔹 Add Output Layer (Dense)
model.add(Dense(1, activation='sigmoid'))
# •	🎯 Dense layer gives the final output.
# •	1: One output node (since it's binary classification: Positive or Negative).
# •	sigmoid: Converts the output to a probability between 0 and 1.

# 🔹 Model Summary
model.summary()
# Model: "sequential"
# ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓
# ┃ Layer (type)                   ┃ Output Shape           ┃     Param #   ┃
# ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩
# │ embedding (Embedding)          │ ?                      │     0 (unbuilt)│
# ├─────────────────────────────────┼────────────────────────┼───────────────┤
# │ simple_rnn (SimpleRNN)         │ ?                      │     0 (unbuilt)│
# ├─────────────────────────────────┼────────────────────────┼───────────────┤
# │ dense (Dense)                  │ ?                      │     0 (unbuilt)│
# └─────────────────────────────────┴────────────────────────┴───────────────┘


# •	📊 Shows a table with layer types, shapes, and number of parameters.
# •	Helps you understand how many total parameters your model is training.
# 📝 If it were a multi-class problem, we would use Dense(num_classes, activation='softmax').

# 🔹 Compile the Model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
# •	⚙️ This step prepares the model for training.
# •	optimizer='adam': Adam optimizer is efficient and adaptive.
# •	loss='binary_crossentropy': Used for binary classification.
# •	metrics=['accuracy']: Monitor accuracy during training.


# 🔹 Set Up Early Stopping
from tensorflow.keras.callbacks import EarlyStopping
early_stopping = EarlyStopping(
    monitor='val_loss', # 📏 Monitor the validation type of loss which minimizes during training.
    patience=5,
    restore_best_weights=True
)
# •	🛑 EarlyStopping stops training when the model stops improving on validation data.
# •	monitor='val_loss': Watch the validation loss.
# •	patience=5: Wait 5 epochs before stopping if no improvement.
# •	restore_best_weights=True: After stopping, load the best weights (where val_loss was minimum).
# 📝 This prevents overfitting and saves training time.


# 🔹 Train the Model with Early Stopping
history = model.fit(
    x_train, y_train,
    epochs=10,
    batch_size=32,
    validation_split=0.2,
    callbacks=[early_stopping]
)
# •	🎓 This is where the model learns from data.
# •	x_train, y_train: Training data and labels.
# •	epochs=10: Train for 10 cycles.
# •	batch_size=32: Process 32 samples at a time.
# •	validation_split=0.2: 20% of training data used for validation.
# •	callbacks=[early_stopping]: Monitor training and apply early stopping.
# 📝 This training will take some time, especially with 25,000 records.
# With every epoch, the model becomes better at predicting the correct output (hopefully!).
# The model won’t learn from this 20% — it only uses it to evaluate how well it's doing while training.


# Epoch 1/10
# 625/625 ━━━━━━━━━━━━━━━━━━━━ 105s 166ms/step - accuracy: 0.6281 - loss: 51328021692416.0000 - val_accuracy: 0.6294 - val_loss: 0.6551
# Epoch 2/10
# 625/625 ━━━━━━━━━━━━━━━━━━━━ 114s 183ms/step - accuracy: 0.5788 - loss: 0.7105 - val_accuracy: 0.5338 - val_loss: 0.7310
# Epoch 3/10
# 625/625 ━━━━━━━━━━━━━━━━━━━━ 108s 173ms/step - accuracy: 0.6382 - loss: 0.6346 - val_accuracy: 0.5842 - val_loss: 0.6551
# Epoch 4/10
# 625/625 ━━━━━━━━━━━━━━━━━━━━ 113s 181ms/step - accuracy: 0.6794 - loss: 0.6031 - val_accuracy: 0.5776 - val_loss: 0.6492
# Epoch 5/10
# 625/625 ━━━━━━━━━━━━━━━━━━━━ 113s 180ms/step - accuracy: 0.7049 - loss: 0.5834 - val_accuracy: 0.5940 - val_loss: 0.6505
# Epoch 6/10
# 625/625 ━━━━━━━━━━━━━━━━━━━━ 150s 240ms/step - accuracy: 0.7225 - loss: 0.5634 - val_accuracy: 0.6230 - val_loss: 0.6335
# Epoch 7/10
# 625/625 ━━━━━━━━━━━━━━━━━━━━ 156s 250ms/step - accuracy: 0.7366 - loss: 0.5451 - val_accuracy: 0.6062 - val_loss: 0.6434
# Epoch 8/10
# 625/625 ━━━━━━━━━━━━━━━━━━━━ 169s 270ms/step - accuracy: 0.7387 - loss: 0.5328 - val_accuracy: 0.6258 - val_loss: 0.6366
# Epoch 9/10
# 625/625 ━━━━━━━━━━━━━━━━━━━━ 178s 284ms/step - accuracy: 0.7594 - loss: 0.5172 - val_accuracy: 0.6302 - val_loss: 0.6345
# Epoch 10/10
# 625/625 ━━━━━━━━━━━━━━━━━━━━ 107s 170ms/step - accuracy: 0.7635 - loss: 0.5026 - val_accuracy: 0.6294 - val_loss: 0.6366



Epoch 1/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m105s[0m 166ms/step - accuracy: 0.6281 - loss: 51328021692416.0000 - val_accuracy: 0.6294 - val_loss: 0.6551
Epoch 2/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m114s[0m 183ms/step - accuracy: 0.5788 - loss: 0.7105 - val_accuracy: 0.5338 - val_loss: 0.7310
Epoch 3/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m108s[0m 173ms/step - accuracy: 0.6382 - loss: 0.6346 - val_accuracy: 0.5842 - val_loss: 0.6551
Epoch 4/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m113s[0m 181ms/step - accuracy: 0.6794 - loss: 0.6031 - val_accuracy: 0.5776 - val_loss: 0.6492
Epoch 5/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m113s[0m 180ms/step - accuracy: 0.7049 - loss: 0.5834 - val_accuracy: 0.5940 - val_loss: 0.6505
Epoch 6/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m150s[0m 240ms/step - accuracy: 0.7225 - loss: 0.5634 - val_accuracy: 0.6230 - val_loss

In [8]:
# ✔️ Saving the Model
# •	💾 Now the model is saved using the .h5 format.
# •	📁 We can save logs too and visualize them using TensorBoard.
# •	💡 Command used:
model.save("simple_RNN_IMDb.h5")


