In [1]:
# Import necessary libraries
from tensorflow.keras.layers import SimpleRNN, LSTM, GRU, Bidirectional, Dense, Embedding
from tensorflow.keras.datasets import imdb
from tensorflow.keras.models import Sequential
import numpy as np

In [2]:
# Set the vocabulary size for the dataset (top 10000 most frequent words)
vocab_size = 10000

In [3]:
# Load the IMDB dataset with only the top 5000 words
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=vocab_size)
print(x_train[0])  # Print the first review to understand the tokenized input

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
[1, 14, 22, 16, 43, 530, 973, 1622, 1385, 65, 458, 4468, 66, 3941, 4, 173, 36, 256, 5, 25, 100, 43, 838, 112, 50, 670, 2, 9, 35, 480, 284, 5, 150, 4, 172, 112, 167, 2, 336, 385, 39, 4, 172, 4536, 1111, 17, 546, 38, 13, 447, 4, 192, 50, 16, 6, 147, 2025, 19, 14, 22, 4, 1920, 4613, 469, 4, 22, 71, 87, 12, 16, 43, 530, 38, 76, 15, 13, 1247, 4, 22, 17, 515, 17, 12, 16, 626, 18, 2, 5, 62, 386, 12, 8, 316, 8, 106, 5, 4, 2223, 5244, 16, 480, 66, 3785, 33, 4, 130, 12, 16, 38, 619, 5, 25, 124, 51, 36, 135, 48, 25, 1415, 33, 6, 22, 12, 215, 28, 77, 52, 5, 14, 407, 16, 82, 2, 8, 4, 107, 117, 5952, 15, 256, 4, 2, 7, 3766, 5, 723, 36, 71, 43, 530, 476, 26, 400, 317, 46, 7, 4, 2, 1029, 13, 104, 88, 4, 381, 15, 297, 98, 32, 2071, 56, 26, 141, 6, 194, 7486, 18, 4, 226, 22, 21, 134, 476, 26, 480, 5, 144, 30, 5535, 18, 51, 36, 28,

In [4]:
# Load the word index mapping from the dataset and adjust for reserved indices
word_index = imdb.get_word_index()
word_index = {k: (v + 3) for k, v in word_index.items()}  # Shift by 3 to account for reserved indices
word_index["<PAD>"] = 0
word_index["<START>"] = 1
word_index["<UNK>"] = 2
word_index["<UNUSED>"] = 3

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb_word_index.json
[1m1641221/1641221[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [5]:
# Preprocess data: pad sequences to a uniform length of max_words (400 in this case)
from tensorflow.keras.preprocessing import sequence
max_words = 400
x_train = sequence.pad_sequences(x_train, maxlen=max_words)
x_test = sequence.pad_sequences(x_test, maxlen=max_words)

# Split off a small portion of the training data for validation
x_valid, y_valid = x_train[:64], y_train[:64]  # First 64 samples for validation
x_train, y_train = x_train[64:], y_train[64:]  # Remaining samples for training
x_valid  # View the validation set

array([[   0,    0,    0, ...,   19,  178,   32],
       [   0,    0,    0, ...,   16,  145,   95],
       [   0,    0,    0, ...,    7,  129,  113],
       ...,
       [   0,    0,    0, ...,  437,    7,   58],
       [   0,    0,    0, ...,   11,    6,    2],
       [1004,  142,   55, ...,    4, 5269, 1117]], dtype=int32)

In [6]:
# Define embedding dimension
embd_len = 32

In [7]:
# Create and define a Simple RNN model
RNN_model = Sequential(name="Simple_RNN")
RNN_model.add(Embedding(vocab_size,  # Vocabulary size
                        embd_len,    # Embedding dimension
                        input_length=max_words))  # Input sequence length
RNN_model.add(SimpleRNN(128,         # Number of RNN units
                        activation='tanh',  # Activation function
                        return_sequences=False))  # Only the last output is needed
RNN_model.add(Dense(1, activation='sigmoid'))  # Output layer for binary classification
print(RNN_model.summary())  # Print model architecture

# Compile the RNN model
RNN_model.compile(
    loss="binary_crossentropy",  # Loss function for binary classification
    optimizer='adam',            # Optimizer for gradient descent
    metrics=['accuracy']         # Metric for evaluation
)

# Train the RNN model
history = RNN_model.fit(x_train, y_train,
                        batch_size=64,       # Batch size for training
                        epochs=10,            # Number of epochs
                        verbose=1,           # Show training progress
                        validation_data=(x_valid, y_valid))  # Use validation data
print()
print("Simple_RNN Score--->", RNN_model.evaluate(x_test, y_test, verbose=0))  # Evaluate on test set



None
Epoch 1/10
[1m390/390[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 44ms/step - accuracy: 0.5189 - loss: 0.6962 - val_accuracy: 0.4531 - val_loss: 0.7056
Epoch 2/10
[1m390/390[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 40ms/step - accuracy: 0.6420 - loss: 0.6311 - val_accuracy: 0.8125 - val_loss: 0.5011
Epoch 3/10
[1m390/390[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 41ms/step - accuracy: 0.7809 - loss: 0.4710 - val_accuracy: 0.7969 - val_loss: 0.4482
Epoch 4/10
[1m390/390[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 40ms/step - accuracy: 0.8445 - loss: 0.3718 - val_accuracy: 0.7031 - val_loss: 0.6030
Epoch 5/10
[1m390/390[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 40ms/step - accuracy: 0.8202 - loss: 0.4031 - val_accuracy: 0.8125 - val_loss: 0.4989
Epoch 6/10
[1m390/390[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 40ms/step - accuracy: 0.8739 - loss: 0.3097 - val_accuracy: 0.7812 - val_loss: 0.4388
Epoch 7/10


In [8]:
# Create and define a GRU-based model
gru_model = Sequential(name="GRU_Model")
gru_model.add(Embedding(vocab_size, embd_len, input_length=max_words))
gru_model.add(SimpleRNN(128, activation='tanh', return_sequences=False))
gru_model.add(Dense(1, activation='sigmoid'))
print(gru_model.summary())

# Compile and train the GRU model
gru_model.compile(loss="binary_crossentropy", optimizer='adam', metrics=['accuracy'])
history2 = gru_model.fit(x_train, y_train,
                         batch_size=64,
                         epochs=10,
                         verbose=2,
                         validation_data=(x_valid, y_valid))
print()
print("GRU Model Score--->", gru_model.evaluate(x_test, y_test, verbose=0))

None
Epoch 1/10
390/390 - 18s - 47ms/step - accuracy: 0.5774 - loss: 0.6592 - val_accuracy: 0.5781 - val_loss: 0.6447
Epoch 2/10
390/390 - 15s - 38ms/step - accuracy: 0.6970 - loss: 0.5740 - val_accuracy: 0.4375 - val_loss: 1.1498
Epoch 3/10
390/390 - 21s - 53ms/step - accuracy: 0.7726 - loss: 0.4830 - val_accuracy: 0.7812 - val_loss: 0.5631
Epoch 4/10
390/390 - 20s - 52ms/step - accuracy: 0.8061 - loss: 0.4327 - val_accuracy: 0.7500 - val_loss: 0.5424
Epoch 5/10
390/390 - 15s - 38ms/step - accuracy: 0.7715 - loss: 0.4706 - val_accuracy: 0.5938 - val_loss: 0.8600
Epoch 6/10
390/390 - 15s - 39ms/step - accuracy: 0.8262 - loss: 0.4004 - val_accuracy: 0.7812 - val_loss: 0.5336
Epoch 7/10
390/390 - 15s - 39ms/step - accuracy: 0.8547 - loss: 0.3426 - val_accuracy: 0.7812 - val_loss: 0.5701
Epoch 8/10
390/390 - 20s - 52ms/step - accuracy: 0.8611 - loss: 0.3280 - val_accuracy: 0.7188 - val_loss: 0.5961
Epoch 9/10
390/390 - 21s - 53ms/step - accuracy: 0.8328 - loss: 0.3746 - val_accuracy: 0.75

In [9]:
# Create and define an LSTM-based model
lstm_model = Sequential(name="LSTM_Model")
lstm_model.add(Embedding(vocab_size, embd_len, input_length=max_words))
lstm_model.add(SimpleRNN(128, activation='relu', return_sequences=False))
lstm_model.add(Dense(1, activation='sigmoid'))
print(lstm_model.summary())

# Compile and train the LSTM model
lstm_model.compile(loss="binary_crossentropy", optimizer='adam', metrics=['accuracy'])
history3 = lstm_model.fit(x_train, y_train,
                          batch_size=64,
                          epochs=10,
                          verbose=2,
                          validation_data=(x_valid, y_valid))
print()
print("LSTM Model Score--->", lstm_model.evaluate(x_test, y_test, verbose=0))

None
Epoch 1/10
390/390 - 19s - 48ms/step - accuracy: 0.6083 - loss: 0.6788 - val_accuracy: 0.6562 - val_loss: 0.6030
Epoch 2/10
390/390 - 15s - 38ms/step - accuracy: 0.7173 - loss: 479.0268 - val_accuracy: 0.4688 - val_loss: 0.7139
Epoch 3/10
390/390 - 15s - 37ms/step - accuracy: 0.7892 - loss: 0.4667 - val_accuracy: 0.8438 - val_loss: 0.4453
Epoch 4/10
390/390 - 21s - 53ms/step - accuracy: 0.8420 - loss: 0.4011 - val_accuracy: 0.8438 - val_loss: 0.4672
Epoch 5/10
390/390 - 20s - 52ms/step - accuracy: 0.6119 - loss: 1150.6320 - val_accuracy: 0.7031 - val_loss: 0.6462
Epoch 6/10
390/390 - 21s - 53ms/step - accuracy: 0.6952 - loss: 0.5846 - val_accuracy: 0.6719 - val_loss: 0.6163
Epoch 7/10
390/390 - 20s - 52ms/step - accuracy: 0.5442 - loss: 368.6443 - val_accuracy: 0.6094 - val_loss: 1.6690
Epoch 8/10
390/390 - 21s - 53ms/step - accuracy: 0.4999 - loss: 1.6245 - val_accuracy: 0.6094 - val_loss: 1.0992
Epoch 9/10
390/390 - 20s - 52ms/step - accuracy: 0.5116 - loss: 1.0753 - val_accurac

In [10]:
# Create and define a Bidirectional LSTM model
bi_lstm_model = Sequential(name="Bidirectional_LSTM")
bi_lstm_model.add(Embedding(vocab_size, embd_len, input_length=max_words))
bi_lstm_model.add(SimpleRNN(128, activation='tanh', return_sequences=False))
bi_lstm_model.add(Dense(1, activation='sigmoid'))
print(bi_lstm_model.summary())

# Compile and train the Bidirectional LSTM model
bi_lstm_model.compile(loss="binary_crossentropy", optimizer='adam', metrics=['accuracy'])
history4 = bi_lstm_model.fit(x_train, y_train,
                             batch_size=64,
                             epochs=10,
                             verbose=2,
                             validation_data=(x_valid, y_valid))
print()
print("Bidirectional LSTM Model Score--->", bi_lstm_model.evaluate(x_test, y_test, verbose=0))

None
Epoch 1/10
390/390 - 19s - 48ms/step - accuracy: 0.5538 - loss: 0.6809 - val_accuracy: 0.6562 - val_loss: 0.6478
Epoch 2/10
390/390 - 15s - 38ms/step - accuracy: 0.6620 - loss: 0.6172 - val_accuracy: 0.7500 - val_loss: 0.5858
Epoch 3/10
390/390 - 21s - 53ms/step - accuracy: 0.7483 - loss: 0.5224 - val_accuracy: 0.6094 - val_loss: 0.6808
Epoch 4/10
390/390 - 20s - 52ms/step - accuracy: 0.7958 - loss: 0.4532 - val_accuracy: 0.6875 - val_loss: 0.5586
Epoch 5/10
390/390 - 20s - 52ms/step - accuracy: 0.7717 - loss: 0.4785 - val_accuracy: 0.7500 - val_loss: 0.5344
Epoch 6/10
390/390 - 21s - 53ms/step - accuracy: 0.8529 - loss: 0.3507 - val_accuracy: 0.7812 - val_loss: 0.4808
Epoch 7/10
390/390 - 20s - 52ms/step - accuracy: 0.8904 - loss: 0.2851 - val_accuracy: 0.7812 - val_loss: 0.4671
Epoch 8/10
390/390 - 20s - 52ms/step - accuracy: 0.9026 - loss: 0.2562 - val_accuracy: 0.7812 - val_loss: 0.4857
Epoch 9/10
390/390 - 21s - 54ms/step - accuracy: 0.9062 - loss: 0.2412 - val_accuracy: 0.40

In [12]:
# Function to preprocess raw text and predict sentiment
from tensorflow.keras.preprocessing.text import tokenizer_from_json
from tensorflow.keras.preprocessing.sequence import pad_sequences

def preprocess_text(text, word_index, max_words):
    # Tokenize and convert words to integers using word_index
    tokens = [word_index.get(word, 0) for word in text.split()]  # 0 for unknown words
    # Pad sequence to match the model's input size
    tokens = pad_sequences([tokens], maxlen=max_words)
    return tokens

def predict_sentiment(model, text, word_index, max_words):
    # Preprocess the input text
    processed_text = preprocess_text(text, word_index, max_words)
    # Predict the sentiment
    prediction = model.predict(processed_text, verbose=0)
    # Return the sentiment
    return "Positive" if prediction >= 0.5 else "Negative"

sample_text = input("Sentence: ")
preprocessed_text = preprocess_text(sample_text.lower(), word_index, max_words)
print("Processed Input:", preprocessed_text)
sentiment = predict_sentiment(RNN_model, sample_text.lower(), word_index, max_words)
print("Predicted Sentiment:", sentiment)

Sentence: The movie was fantastic and had great acting
Processed Input: [[  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   