# **RNN MODEL**

# importing Libraries

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense
from tensorflow.keras.datasets import imdb
from tensorflow.keras import preprocessing

# Dataset loading

In [None]:
# Set parameters for the data.
# max_features: The number of words to consider as features (e.g., the top 10,000 most frequent words).
max_features = 10000

# maxlen: The maximum length of each movie review. Longer reviews will be truncated, shorter ones will be padded.
maxlen = 200

# Load the IMDB dataset. The words are already encoded as integers.
# 'num_words=max_features' keeps only the top 'max_features' words.
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)

# Preprocess the data to ensure all sequences have the same length.
# 'padding="post"' adds zeros at the end of reviews that are shorter than 'maxlen'.
x_train = preprocessing.sequence.pad_sequences(x_train, maxlen=maxlen, padding="post")
x_test = preprocessing.sequence.pad_sequences(x_test, maxlen=maxlen, padding="post")

print("Shape of training data:", x_train.shape)
print("Shape of test data:", x_test.shape)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Shape of training data: (25000, 200)
Shape of test data: (25000, 200)


# Build the RNN Model

In [None]:
def create_rnn_model():
    # Use the Sequential API to build a model layer by layer.
    model = Sequential()

    # The Embedding layer: This takes the integer-encoded words and maps them to dense vectors.
    # It learns a numerical representation for each word.
    # Arguments: (number of words, embedding dimension, input length).
    model.add(Embedding(max_features, 32))

    # The SimpleRNN layer: This is the core of the RNN.
    # It processes the sequences of word vectors one at a time, maintaining an internal state.
    # The '32' is the number of units in the RNN layer.
    model.add(SimpleRNN(32))

    # The final output layer: A single neuron with a 'sigmoid' activation function.
    # The sigmoid function squashes the output to a value between 0 and 1,
    # representing the probability that the review is positive.
    model.add(Dense(1, activation='sigmoid'))

    return model

# Compile and Train the Model

In [None]:
# Create an instance of the model.
rnn_model = create_rnn_model()

# Compile the model with an optimizer, a loss function, and metrics to monitor.
# 'rmsprop' is a good optimizer for RNNs.
# 'binary_crossentropy' is the standard loss function for binary classification (positive or negative).
# 'accuracy' is the metric we'll use to measure how well the model performs.
rnn_model.compile(optimizer='rmsprop',
                  loss='binary_crossentropy',
                  metrics=['accuracy'])

# Train the model on the training data.
# 'epochs' is the number of times the model will see the entire dataset.
# 'batch_size' is the number of samples per gradient update.
history = rnn_model.fit(x_train, y_train, epochs=5, batch_size=128, validation_split=0.2)

Epoch 1/5
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 61ms/step - accuracy: 0.5079 - loss: 0.6925 - val_accuracy: 0.5430 - val_loss: 0.6844
Epoch 2/5
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 67ms/step - accuracy: 0.5857 - loss: 0.6646 - val_accuracy: 0.5500 - val_loss: 0.6871
Epoch 3/5
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 66ms/step - accuracy: 0.6665 - loss: 0.6059 - val_accuracy: 0.6608 - val_loss: 0.6404
Epoch 4/5
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 66ms/step - accuracy: 0.6978 - loss: 0.5529 - val_accuracy: 0.6812 - val_loss: 0.6298
Epoch 5/5
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 63ms/step - accuracy: 0.7974 - loss: 0.4543 - val_accuracy: 0.6424 - val_loss: 0.6571


# Evaluate the Model

In [None]:
# Evaluate the model on the test dataset.
# This gives us a final measure of how well the model generalizes to new data.
loss, accuracy = rnn_model.evaluate(x_test, y_test)

print(f"\nTest loss: {loss:.4f}")
print(f"Test accuracy: {accuracy:.4f}")

[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 11ms/step - accuracy: 0.6371 - loss: 0.6663

Test loss: 0.6677
Test accuracy: 0.6345
