In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.preprocessing.sequence import pad_sequences


## Import The Dataset

In [None]:
num_words=10000  # Use only top 15,000 most frequent words( vocab_size )
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=10000)


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step


In [None]:
# words indexes of sentence 1
print("Review (indices) of Sentence 1 :",x_train[0])

# O/P of the sentence 1
print("Sentiment of Sentence 1 :", y_train[0])

Review (indices) of Sentence 1 : [1, 14, 22, 16, 43, 530, 973, 1622, 1385, 65, 458, 4468, 66, 3941, 4, 173, 36, 256, 5, 25, 100, 43, 838, 112, 50, 670, 2, 9, 35, 480, 284, 5, 150, 4, 172, 112, 167, 2, 336, 385, 39, 4, 172, 4536, 1111, 17, 546, 38, 13, 447, 4, 192, 50, 16, 6, 147, 2025, 19, 14, 22, 4, 1920, 4613, 469, 4, 22, 71, 87, 12, 16, 43, 530, 38, 76, 15, 13, 1247, 4, 22, 17, 515, 17, 12, 16, 626, 18, 2, 5, 62, 386, 12, 8, 316, 8, 106, 5, 4, 2223, 5244, 16, 480, 66, 3785, 33, 4, 130, 12, 16, 38, 619, 5, 25, 124, 51, 36, 135, 48, 25, 1415, 33, 6, 22, 12, 215, 28, 77, 52, 5, 14, 407, 16, 82, 2, 8, 4, 107, 117, 5952, 15, 256, 4, 2, 7, 3766, 5, 723, 36, 71, 43, 530, 476, 26, 400, 317, 46, 7, 4, 2, 1029, 13, 104, 88, 4, 381, 15, 297, 98, 32, 2071, 56, 26, 141, 6, 194, 7486, 18, 4, 226, 22, 21, 134, 476, 26, 480, 5, 144, 30, 5535, 18, 51, 36, 28, 224, 92, 25, 104, 4, 226, 65, 16, 38, 1334, 88, 12, 16, 283, 5, 16, 4472, 113, 103, 32, 15, 16, 5345, 19, 178, 32]
Sentiment of Sentence 1 : 1

In [None]:
# total sentences in train data
x_train.shape

(25000,)

In [None]:
# total sentences in test data
x_test.shape

(25000,)

In [None]:
# Making each sentence in the dataset of same length->100
maxlen = 500  # You can choose 100, 200, etc.
x_train_padded = pad_sequences(x_train, maxlen=maxlen, padding='post', truncating='post')
x_test_padded = pad_sequences(x_test, maxlen=maxlen, padding='post', truncating='post')


## Build The RNN Model

In [None]:
# Initialize the model
model=Sequential()

In [None]:
# Embedding Layer(convert, word_index-> vec)
# vec dimension of each word os 128
model.add(Embedding(input_dim=num_words, output_dim=128))

In [None]:
# Create RNN I/P layer
model.add(SimpleRNN(units=128, activation='relu'))

In [None]:
# Create O/P Layer
model.add(Dense(units=1, activation='sigmoid'))

In [None]:
model.build(input_shape=(None, maxlen))  # 500 is the input sequence length


In [None]:
model.summary()

In [None]:
# Set rules for Training
# defaul learning Rate is 0.001 in adam optimizer
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
# Early Stopping
from tensorflow.keras.callbacks import EarlyStopping

early_stopping=EarlyStopping(monitor='val_loss',  # What metric to monitor ('val_loss' is common)
                             patience=5,          # How many epochs to wait before stopping the training after no improvement
                             restore_best_weights=True   # Optional: restores model weights from the epoch with the best value
                             )

In [None]:
# Train the Model with Early Stopping

model_history= model.fit(x_train_padded, y_train, epochs=10, batch_size=32, validation_split=0.2, callbacks=[early_stopping])

Epoch 1/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m125s[0m 197ms/step - accuracy: 0.5020 - loss: 1760879.0000 - val_accuracy: 0.4960 - val_loss: 0.6939
Epoch 2/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m145s[0m 201ms/step - accuracy: 0.5154 - loss: 0.6921 - val_accuracy: 0.5048 - val_loss: 0.6934
Epoch 3/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m125s[0m 200ms/step - accuracy: 0.5194 - loss: 1197.1859 - val_accuracy: 0.4954 - val_loss: 0.7044
Epoch 4/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m141s[0m 199ms/step - accuracy: 0.5086 - loss: 0.6876 - val_accuracy: 0.4958 - val_loss: 0.6958
Epoch 5/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m143s[0m 201ms/step - accuracy: 0.5249 - loss: 0.6782 - val_accuracy: 0.5052 - val_loss: 0.6960
Epoch 6/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m140s[0m 199ms/step - accuracy: 0.5356 - loss: 0.6723 - val_accuracy: 0.4968 - val_loss: 0.

In [None]:
# Save the Model
from google.colab import drive
drive.mount('/content/drive')


model.save('/content/drive/MyDrive/SimpleRNN_IMDB_Model.h5')
print("saved")



Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
saved


In [None]:
import os
print(os.listdir())


['.config', 'drive', 'sample_data']
