In [7]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Embedding, SimpleRNN, Dense
from tensorflow.keras.preprocessing.sequence import pad_sequences


In [8]:
vocab_size = 10000  # Top 10,000 most frequent words

(x_train, y_train), (x_test, y_test) = keras.datasets.imdb.load_data(
    num_words=vocab_size
)


In [9]:
max_length = 200  # Fixed review length

x_train = pad_sequences(x_train, maxlen=max_length)
x_test = pad_sequences(x_test, maxlen=max_length)


In [10]:
model = Sequential([
    # Input layer (explicitly defines input shape)
    Input(shape=(max_length,)),

    # Embedding layer: word index → dense vector
    Embedding(input_dim=vocab_size, output_dim=64),

    # Simple RNN layer: core recurrent architecture
    SimpleRNN(64),

    # Output layer: binary sentiment prediction
    Dense(1, activation='sigmoid')
])


In [11]:
model.summary()


In [12]:
model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)


In [13]:
history = model.fit(
    x_train,
    y_train,
    epochs=5,
    batch_size=64,
    validation_split=0.2
)


Epoch 1/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 69ms/step - accuracy: 0.5571 - loss: 0.6709 - val_accuracy: 0.7662 - val_loss: 0.4945
Epoch 2/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 65ms/step - accuracy: 0.8141 - loss: 0.4209 - val_accuracy: 0.7860 - val_loss: 0.4867
Epoch 3/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 68ms/step - accuracy: 0.8964 - loss: 0.2666 - val_accuracy: 0.7760 - val_loss: 0.5347
Epoch 4/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 66ms/step - accuracy: 0.9680 - loss: 0.1043 - val_accuracy: 0.7118 - val_loss: 0.7636
Epoch 5/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 67ms/step - accuracy: 0.9888 - loss: 0.0405 - val_accuracy: 0.7722 - val_loss: 0.7031


In [14]:
test_loss, test_accuracy = model.evaluate(x_test, y_test)
print("Test Accuracy:", test_accuracy)


[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 13ms/step - accuracy: 0.7598 - loss: 0.7645
Test Accuracy: 0.7621600031852722


In [15]:
word_index = keras.datasets.imdb.get_word_index()


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb_word_index.json
[1m1641221/1641221[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [16]:
def encode_review(text):
    words = text.lower().split()
    encoded = [word_index.get(word, 2) for word in words]
    return pad_sequences([encoded], maxlen=max_length)


In [17]:
sample_review = "this movie was absolutely fantastic and inspiring"
encoded_review = encode_review(sample_review)

prediction = model.predict(encoded_review)

if prediction[0][0] > 0.5:
    print("Positive Review")
else:
    print("Negative Review")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 175ms/step
Positive Review
