## RNN DEMO

In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense, Dropout, BatchNormalization
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score


## LOAD DATASET

In [2]:

# Set random seed for reproducibility
np.random.seed(42)
tf.random.set_seed(42)

# Download IMDB dataset
print("Loading dataset...")
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.imdb.load_data(num_words=10000)


Loading dataset...
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step


In [5]:

# Get word index mapping
word_index = tf.keras.datasets.imdb.get_word_index()

# Create a reverse mapping to decode the reviews
reverse_word_index = {value: key for key, value in word_index.items()}
decode_review = lambda review: ' '.join([reverse_word_index.get(i - 3, '?') for i in review])



In [6]:
# Print sample review
print("\nSample review:")
print(decode_review(x_train[0]))
print(f"Label: {'Positive' if y_train[0] == 1 else 'Negative'}")


Sample review:
? this film was just brilliant casting location scenery story direction everyone's really suited the part they played and you could just imagine being there robert ? is an amazing actor and now the same being director ? father came from the same scottish island as myself so i loved the fact there was a real connection with this film the witty remarks throughout the film were great it was just brilliant so much that i bought the film as soon as it was released for ? and would recommend it to everyone to watch and the fly fishing was amazing really cried at the end it was so sad and you know what they say if you cry at a film it must have been good and this definitely was also ? to the two little boy's that played the ? of norman and paul they were just brilliant children are often left out of the ? list i think because the stars that play them all grown up are such a big profile for the whole film but these children are amazing and should be praised for what they have do

In [8]:
#Parameters
vocab_size = 10000
max_length = 250
embedding_dim = 100
rnn_units = 128
batch_size = 64
epochs = 5

# Pad sequences
print("\nPreparing data...")
x_train = pad_sequences(x_train, maxlen=max_length, padding='post', truncating='post')
x_test = pad_sequences(x_test, maxlen=max_length, padding='post', truncating='post')

# Print data shapes
print(f"Training data shape: {x_train.shape}")
print(f"Test data shape: {x_test.shape}")


Preparing data...
Training data shape: (25000, 250)
Test data shape: (25000, 250)


## BUILD MODEL

In [9]:
# Create validation split
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.2, random_state=42)
print(f"Validation data shape: {x_val.shape}")

# Build the RNN model
print("\nBuilding RNN model...")
model = Sequential([
    Embedding(vocab_size, embedding_dim, mask_zero=True),
    BatchNormalization(),
    SimpleRNN(rnn_units, return_sequences=True,
              kernel_initializer='glorot_uniform',
              recurrent_initializer='orthogonal'),
    BatchNormalization(),
    SimpleRNN(rnn_units//2,
              kernel_initializer='glorot_uniform',
              recurrent_initializer='orthogonal'),
    BatchNormalization(),
    Dense(64, activation='relu', kernel_initializer='he_normal'),
    Dropout(0.3),
    Dense(32, activation='relu', kernel_initializer='he_normal'),
    Dropout(0.3),
    Dense(1, activation='sigmoid')
])


Validation data shape: (5000, 250)

Building RNN model...


In [10]:
# Compile the model with class weights
total_samples = len(y_train)
pos_samples = np.sum(y_train)
neg_samples = total_samples - pos_samples
class_weight = {
    0: total_samples / (2 * neg_samples),
    1: total_samples / (2 * pos_samples)
}

model.compile(
    optimizer=Adam(learning_rate=0.001),
    loss='binary_crossentropy',
    metrics=['accuracy']
)

# Model summary
model.summary()

In [11]:
# Define callbacks
callbacks = [
    ModelCheckpoint(
        'best_rnn_model.keras',
        monitor='val_accuracy',
        save_best_only=True,
        verbose=1
    )
]

# Train the model
print("\nTraining the model...")
history = model.fit(
    x_train, y_train,
    validation_data=(x_val, y_val),
    epochs=epochs,
    batch_size=batch_size,
    callbacks=callbacks,
    class_weight=class_weight,
    verbose=1
)



Training the model...
Epoch 1/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 247ms/step - accuracy: 0.5098 - loss: 0.8287
Epoch 1: val_accuracy improved from -inf to 0.54740, saving model to best_rnn_model.keras
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m89s[0m 265ms/step - accuracy: 0.5098 - loss: 0.8285 - val_accuracy: 0.5474 - val_loss: 0.6851
Epoch 2/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 243ms/step - accuracy: 0.6363 - loss: 0.6300
Epoch 2: val_accuracy improved from 0.54740 to 0.62600, saving model to best_rnn_model.keras
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m140s[0m 260ms/step - accuracy: 0.6364 - loss: 0.6299 - val_accuracy: 0.6260 - val_loss: 0.6347
Epoch 3/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 248ms/step - accuracy: 0.7100 - loss: 0.5646
Epoch 3: val_accuracy did not improve from 0.62600
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m83s[0m 265m

In [12]:
# Print training history
print("\nTraining History:")
for epoch in range(len(history.history['loss'])):  # Use actual number of completed epochs
    print(f"Epoch {epoch+1}/{len(history.history['loss'])}:")
    print(f"  Training Loss: {history.history['loss'][epoch]:.4f}")
    print(f"  Training Accuracy: {history.history['accuracy'][epoch]:.4f}")
    print(f"  Validation Loss: {history.history['val_loss'][epoch]:.4f}")
    print(f"  Validation Accuracy: {history.history['val_accuracy'][epoch]:.4f}")



Training History:
Epoch 1/5:
  Training Loss: 0.7567
  Training Accuracy: 0.5218
  Validation Loss: 0.6851
  Validation Accuracy: 0.5474
Epoch 2/5:
  Training Loss: 0.6121
  Training Accuracy: 0.6618
  Validation Loss: 0.6347
  Validation Accuracy: 0.6260
Epoch 3/5:
  Training Loss: 0.5589
  Training Accuracy: 0.7173
  Validation Loss: 0.6668
  Validation Accuracy: 0.5832
Epoch 4/5:
  Training Loss: 0.4875
  Training Accuracy: 0.7727
  Validation Loss: 0.8709
  Validation Accuracy: 0.5690
Epoch 5/5:
  Training Loss: 0.4588
  Training Accuracy: 0.7912
  Validation Loss: 0.6870
  Validation Accuracy: 0.5612


In [13]:
# Evaluate on test set
print("\n" + "="*50)
print("TESTING ON TEST SET")
print("="*50)
test_loss, test_accuracy = model.evaluate(x_test, y_test, verbose=1)
print(f"Test accuracy: {test_accuracy:.4f}")
print(f"Test loss: {test_loss:.4f}")

# Generate predictions on test set
print("\nGenerating predictions on test set...")
y_pred_prob = model.predict(x_test, verbose=1)
y_pred = (y_pred_prob > 0.5).astype(int).flatten()

# Calculate detailed metrics on test set
print("\nTest Set Classification Report:")
report = classification_report(y_test, y_pred)
print(report)


TESTING ON TEST SET
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 36ms/step - accuracy: 0.5759 - loss: 0.6741
Test accuracy: 0.5761
Test loss: 0.6742

Generating predictions on test set...
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 36ms/step

Test Set Classification Report:
              precision    recall  f1-score   support

           0       0.57      0.59      0.58     12500
           1       0.58      0.57      0.57     12500

    accuracy                           0.58     25000
   macro avg       0.58      0.58      0.58     25000
weighted avg       0.58      0.58      0.58     25000



In [14]:
# Print confusion matrix for test set
cm = confusion_matrix(y_test, y_pred)
print("\nConfusion Matrix (Test Set):")
print(f"                | Predicted Negative | Predicted Positive |")
print(f"Actual Negative | {cm[0][0]:<18} | {cm[0][1]:<18} |")
print(f"Actual Positive | {cm[1][0]:<18} | {cm[1][1]:<18} |")


Confusion Matrix (Test Set):
                | Predicted Negative | Predicted Positive |
Actual Negative | 7318               | 5182               |
Actual Positive | 5416               | 7084               |


In [15]:
# Display sample test set predictions
print("\nSample Test Set Predictions:")
for i in range(5):
    review = decode_review(x_test[i])
    prediction = "Positive" if y_pred[i] == 1 else "Negative"
    actual = "Positive" if y_test[i] == 1 else "Negative"
    confidence = y_pred_prob[i][0]
    print("-" * 80)
    print(f"Review: {review[:100]}...")
    print(f"Predicted: {prediction} (confidence: {confidence:.4f}), Actual: {actual}")

# Calculate additional metrics
accuracy = accuracy_score(y_test, y_pred)
report_dict = classification_report(y_test, y_pred, output_dict=True)
precision = report_dict['weighted avg']['precision']
recall = report_dict['weighted avg']['recall']
f1 = report_dict['weighted avg']['f1-score']


Sample Test Set Predictions:
--------------------------------------------------------------------------------
Review: ? please give this one a miss br br ? ? and the rest of the cast rendered terrible performances the ...
Predicted: Negative (confidence: 0.1314), Actual: Negative
--------------------------------------------------------------------------------
Review: ? this film requires a lot of patience because it focuses on mood and character development the plot...
Predicted: Negative (confidence: 0.4092), Actual: Positive
--------------------------------------------------------------------------------
Review: ? many animation buffs consider ? ? the great forgotten genius of one special branch of the art pupp...
Predicted: Negative (confidence: 0.4783), Actual: Positive
--------------------------------------------------------------------------------
Review: ? i generally love this type of movie however this time i found myself wanting to kick the screen si...
Predicted: Positive (

In [16]:
# Summary of test results
print("\n" + "="*50)
print("TEST SET SUMMARY")
print("="*50)
print(f"Total test samples: {len(y_test)}")
print(f"Correct predictions: {sum(y_pred == y_test)}")
print(f"Incorrect predictions: {sum(y_pred != y_test)}")
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-Score: {f1:.4f}")

# Class-specific metrics
print("\nClass-specific metrics:")
print("Negative reviews:")
print(f"  Precision: {report_dict['0']['precision']:.4f}")
print(f"  Recall: {report_dict['0']['recall']:.4f}")
print(f"  F1-Score: {report_dict['0']['f1-score']:.4f}")

print("Positive reviews:")
print(f"  Precision: {report_dict['1']['precision']:.4f}")
print(f"  Recall: {report_dict['1']['recall']:.4f}")
print(f"  F1-Score: {report_dict['1']['f1-score']:.4f}")

print("\nDone!")


TEST SET SUMMARY
Total test samples: 25000
Correct predictions: 14402
Incorrect predictions: 10598
Accuracy: 0.5761
Precision: 0.5761
Recall: 0.5761
F1-Score: 0.5760

Class-specific metrics:
Negative reviews:
  Precision: 0.5747
  Recall: 0.5854
  F1-Score: 0.5800
Positive reviews:
  Precision: 0.5775
  Recall: 0.5667
  F1-Score: 0.5721

Done!
