<a href="https://colab.research.google.com/github/NadaHany1/NLP/blob/main/simple_RNN_sentiment_analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [17]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping

In [18]:
vocab_size = 10000
max_length = 300

(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=vocab_size)

x_train = pad_sequences(x_train, maxlen=max_length)
x_test = pad_sequences(x_test, maxlen=max_length)

split_fraction = 0.8
split_index = int(len(x_train) * split_fraction)

x_valid, y_valid = x_train[split_index:], y_train[split_index:]
x_train, y_train = x_train[:split_index], y_train[:split_index]

print(f"Training set size: {len(x_train)}")
print(f"Validation set size: {len(x_valid)}")
print(f"Test set size: {len(x_test)}")

Training set size: 20000
Validation set size: 5000
Test set size: 25000


In [19]:
model = Sequential([
    Embedding(input_dim=vocab_size, output_dim=64, input_length=max_length),
    SimpleRNN(64, return_sequences=False),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])

model.compile(loss="binary_crossentropy", optimizer=Adam(learning_rate=0.001), metrics=["accuracy"])

In [20]:
early_stop = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

history = model.fit(x_train, y_train,
                    epochs=10, batch_size=32,
                    validation_data=(x_valid, y_valid),
                    callbacks=[early_stop])

Epoch 1/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m52s[0m 81ms/step - accuracy: 0.5410 - loss: 0.6865 - val_accuracy: 0.7534 - val_loss: 0.5180
Epoch 2/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m80s[0m 78ms/step - accuracy: 0.8069 - loss: 0.4348 - val_accuracy: 0.7312 - val_loss: 0.5453
Epoch 3/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m80s[0m 75ms/step - accuracy: 0.8855 - loss: 0.2965 - val_accuracy: 0.8010 - val_loss: 0.5287
Epoch 4/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 77ms/step - accuracy: 0.9496 - loss: 0.1458 - val_accuracy: 0.8168 - val_loss: 0.5379


In [21]:
test_loss, test_acc = model.evaluate(x_test, y_test)
print(f"Test Accuracy: {test_acc:.4f}")

[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 18ms/step - accuracy: 0.7600 - loss: 0.5145
Test Accuracy: 0.7571
