## End to end Deep Learning Project Using Simple RNN

In [3]:
# !pip install -r requirements.txt

In [4]:
import re
import pickle
import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds

from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping

In [5]:
# --------------------------
# Step 1: Load raw IMDB dataset
# --------------------------
train_data, test_data = tfds.load(
    "imdb_reviews",
    split=["train", "test"],
    as_supervised=True
)

train_texts = [text.numpy().decode("utf-8") for text, label in train_data]
train_labels = [label.numpy() for text, label in train_data]

test_texts = [text.numpy().decode("utf-8") for text, label in test_data]
test_labels = [label.numpy() for text, label in test_data]



Downloading and preparing dataset Unknown size (download: Unknown size, generated: Unknown size, total: Unknown size) to /root/tensorflow_datasets/imdb_reviews/plain_text/1.0.0...


Dl Completed...: 0 url [00:00, ? url/s]

Dl Size...: 0 MiB [00:00, ? MiB/s]

Generating splits...:   0%|          | 0/3 [00:00<?, ? splits/s]

Generating train examples...: 0 examples [00:00, ? examples/s]

Shuffling /root/tensorflow_datasets/imdb_reviews/plain_text/incomplete.X8GVJ8_1.0.0/imdb_reviews-train.tfrecor…

Generating test examples...: 0 examples [00:00, ? examples/s]

Shuffling /root/tensorflow_datasets/imdb_reviews/plain_text/incomplete.X8GVJ8_1.0.0/imdb_reviews-test.tfrecord…

Generating unsupervised examples...: 0 examples [00:00, ? examples/s]

Shuffling /root/tensorflow_datasets/imdb_reviews/plain_text/incomplete.X8GVJ8_1.0.0/imdb_reviews-unsupervised.…

Dataset imdb_reviews downloaded and prepared to /root/tensorflow_datasets/imdb_reviews/plain_text/1.0.0. Subsequent calls will reuse this data.


In [7]:
# --------------------------
# Step 2: Tokenizer
# --------------------------
max_features = 10000   # vocabulary size
max_len = 200          # shorter than 500 helps RNNs learn faster

tokenizer = Tokenizer(num_words=max_features, oov_token="<OOV>")
tokenizer.fit_on_texts(train_texts)

# Save tokenizer
with open("tokenizer.pickle", "wb") as handle:
    pickle.dump(tokenizer, handle, protocol=pickle.HIGHEST_PROTOCOL)

# Convert to sequences + pad
x_train_seq = tokenizer.texts_to_sequences(train_texts)
x_test_seq = tokenizer.texts_to_sequences(test_texts)

x_train_pad = pad_sequences(x_train_seq, maxlen=max_len, padding="post", truncating="post")
x_test_pad = pad_sequences(x_test_seq, maxlen=max_len, padding="post", truncating="post")

y_train = np.array(train_labels)
y_test = np.array(test_labels)


In [8]:
# --------------------------
# Step 3: Build the model
# --------------------------
model = Sequential([
    Embedding(max_features, 128, input_length=max_len),
    LSTM(128, return_sequences=False),
    Dropout(0.5),
    Dense(1, activation="sigmoid")
])

model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])




In [9]:
# --------------------------
# Step 4: Train
# --------------------------
early_stopping = EarlyStopping(monitor="val_loss", patience=5, restore_best_weights=True)

history = model.fit(
    x_train_pad, y_train,
    epochs=50,
    batch_size=64,
    validation_split=0.2,
    callbacks=[early_stopping]
)

Epoch 1/50
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 14ms/step - accuracy: 0.5169 - loss: 0.6932 - val_accuracy: 0.5144 - val_loss: 0.6899
Epoch 2/50
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 13ms/step - accuracy: 0.5630 - loss: 0.6681 - val_accuracy: 0.7958 - val_loss: 0.5131
Epoch 3/50
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 15ms/step - accuracy: 0.7205 - loss: 0.5836 - val_accuracy: 0.6978 - val_loss: 0.6091
Epoch 4/50
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 13ms/step - accuracy: 0.7420 - loss: 0.5603 - val_accuracy: 0.6502 - val_loss: 0.6368
Epoch 5/50
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 13ms/step - accuracy: 0.7013 - loss: 0.5763 - val_accuracy: 0.7312 - val_loss: 0.5866
Epoch 6/50
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 15ms/step - accuracy: 0.8185 - loss: 0.4258 - val_accuracy: 0.8394 - val_loss: 0.3825
Epoch 7/50
[1m313/31

In [10]:
# --------------------------
# Step 5: Helper functions
# --------------------------
def preprocess_text(text, max_len=200):
    # Clean text (optional)
    text = text.lower()
    text = re.sub(r"[^a-zA-Z0-9\s]", "", text)

    seq = tokenizer.texts_to_sequences([text])
    padded = pad_sequences(seq, maxlen=max_len, padding="post", truncating="post")
    return padded

def predict_sentiment(review):
    processed = preprocess_text(review)
    prediction = model.predict(processed)
    sentiment = "Positive" if prediction[0][0] > 0.5 else "Negative"
    return sentiment, prediction[0][0]

In [11]:
# --------------------------
# Step 6: Test predictions
# --------------------------
examples = [
    "The movie was amazing and full of joy!",
    "The movie was terrible and I hated it",
    "It was just okay, not too bad, not too good",
    "The movie was good"
]

for example in examples:
    sentiment, prob = predict_sentiment(example)
    print(f"Review: {example}")
    print(f"Sentiment: {sentiment}, Probability: {prob:.4f}\n")
    print("=====================================================")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 122ms/step
Review: The movie was amazing and full of joy!
Sentiment: Positive, Probability: 0.8490

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
Review: The movie was terrible and I hated it
Sentiment: Negative, Probability: 0.1096

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
Review: It was just okay, not too bad, not too good
Sentiment: Negative, Probability: 0.0674

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
Review: The movie was good
Sentiment: Negative, Probability: 0.4007



In [13]:
model.summary()

In [12]:
model.save("sentiment_LSTM_model.keras")

In [13]:
import tensorflow as tf, keras
print(tf.__version__, keras.__version__)



2.19.0 3.10.0
