  Sentiment Analysis on IMDB dataset

In [None]:
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, GRU, Dense, Dropout
from tensorflow.keras.datasets import imdb
import numpy as np

# Set hyperparameters
vocab_size = 10000   # Number of unique words to keep
max_length = 200     # Maximum sequence length
embedding_dim = 128  # Word embedding dimension
gru_units = 64       # Number of GRU units
batch_size = 64
epochs = 10

# Load the IMDB dataset
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=vocab_size)

# Pad sequences to ensure uniform length
X_train = pad_sequences(X_train, maxlen=max_length, padding='post', truncating='post')
X_test = pad_sequences(X_test, maxlen=max_length, padding='post', truncating='post')

# Build the GRU model
model1 = Sequential([
    Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_length=max_length),
    GRU(units=gru_units, return_sequences=True),  # First GRU layer
    Dropout(0.5),
    GRU(units=gru_units),  # Second GRU layer
    Dropout(0.5),
    Dense(1, activation='sigmoid')  # Output layer for binary classification
])

# Compile the model
model1.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
model1.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, validation_data=(X_test, y_test))

# Evaluate the model
loss, accuracy = model1.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy:.4f}")


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step




Epoch 1/10
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 26ms/step - accuracy: 0.5109 - loss: 0.6931 - val_accuracy: 0.6610 - val_loss: 0.6622
Epoch 2/10
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 25ms/step - accuracy: 0.5706 - loss: 0.6815 - val_accuracy: 0.7516 - val_loss: 0.5196
Epoch 3/10
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 30ms/step - accuracy: 0.8603 - loss: 0.3397 - val_accuracy: 0.8665 - val_loss: 0.3114
Epoch 4/10
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 30ms/step - accuracy: 0.9327 - loss: 0.1903 - val_accuracy: 0.8655 - val_loss: 0.3457
Epoch 5/10
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 24ms/step - accuracy: 0.9700 - loss: 0.1017 - val_accuracy: 0.8541 - val_loss: 0.4425
Epoch 6/10
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 29ms/step - accuracy: 0.9828 - loss: 0.0628 - val_accuracy: 0.8500 - val_loss: 0.6381
Epoch 7/10
[1m3

In [None]:
model1.save("textclassification_GRU_model.keras")

In [None]:
# Load IMDB word index
word_index = imdb.get_word_index()
reverse_word_index = {value: key for key, value in word_index.items()}

# Function to preprocess text input
def preprocess_text(text):
    words = text.lower().split()  # Convert to lowercase and split words
    sequence = [word_index[word] for word in words if word in word_index and word_index[word] < vocab_size]
    return pad_sequences([sequence], maxlen=max_length, padding='post', truncating='post')

# Function to predict sentiment
def predict_sentiment(text):
    processed_text = preprocess_text(text)
    prediction = model1.predict(processed_text)[0][0]
    sentiment = "Positive" if prediction > 0.6 else "Negative"
    print(f"Review: {text}\nPredicted label: {sentiment} ")


predict_sentiment("This movie was fantastic! I loved every moment.")
predict_sentiment("Worst film ever. ")
predict_sentiment("It was an okay movie, nothing too special.")
predict_sentiment("I couldn't sit through the whole movie; it was painfully dull.")
predict_sentiment("An average film with some good moments but overall forgettable.")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
Review: This movie was fantastic! I loved every moment.
Predicted label: Positive 
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
Review: Worst film ever. 
Predicted label: Positive 
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
Review: It was an okay movie, nothing too special.
Predicted label: Negative 
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
Review: I couldn't sit through the whole movie; it was painfully dull.
Predicted label: Positive 
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
Review: An average film with some good moments but overall forgettable.
Predicted label: Positive 
