In [None]:
import pandas as pd
import numpy as np
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GRU, Dense, Embedding
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical

In [None]:
# 1. Load and Preprocess Data
data = pd.read_csv("IMDB Dataset.csv")

In [None]:
data.sample(4)

Unnamed: 0,review,sentiment
7888,If this film doesn't at least be selected for ...,positive
7513,"Once upon a time, in Sweden, there was a poor ...",positive
35446,Kate Beckinsale is excellent as the manipulati...,positive
12236,Good lord! This movie needs to have a new clas...,negative


In [None]:
# Define vocabulary size and embedding dimension
vocab_size = 10000
max_length = 120
embedding_dim = 64

In [None]:
# Text preprocessing
tokenizer = Tokenizer(num_words=vocab_size)
tokenizer.fit_on_texts(data['review'])
sequences = tokenizer.texts_to_sequences(data['review'])
padded = pad_sequences(sequences, maxlen=max_length)

In [None]:
# 2. Convert Sentiment Labels to Integers
sentiment_mapping = {'positive': 0, 'negative': 1, 'neutral': 2}
data['sentiment'] = data['sentiment'].map(sentiment_mapping)

In [None]:
# Split into train and test
X_train, X_test, y_train, y_test = train_test_split(padded, data['sentiment'], test_size=0.2)

In [None]:
# 3. One-Hot Encode Sentiment Labels
num_classes = 3
y_train = to_categorical(y_train, num_classes)
y_test = to_categorical(y_test, num_classes)

In [None]:
# 4. Build the GRU Model
model = Sequential()
model.add(Embedding(vocab_size, embedding_dim, input_length=max_length))
model.add(GRU(64))
model.add(Dense(3, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [None]:
# 5. Train the Model
model.fit(X_train, y_train, epochs=10, batch_size=64)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x7c0dbb2e9a80>

In [None]:
# 6. Evaluate the Model
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print('Test Accuracy:', test_accuracy)

Test Accuracy: 0.8587999939918518


In [None]:
# 7. Make Predictions on New Reviews
def predict_sentiment(new_review):
  new_sequence = tokenizer.texts_to_sequences([new_review])
  new_padded = pad_sequences(new_sequence, maxlen=max_length)
  probabilities = model.predict(new_padded)[0]
  sentiment_class = np.argmax(probabilities)
  labels = ['positive','negative', 'neutral' ]
  return labels[sentiment_class]

In [None]:
# Example Usage
new_review = "This movie was truly wonderful and nice!"
predicted_sentiment = predict_sentiment(new_review)
print("Predicted Sentiment:", predicted_sentiment)

Predicted Sentiment: positive


**Remember:**

* **Dataset:** Replace "imdb_reviews.csv" with the actual path to your dataset file.
* **Hyperparameter Tuning:** Consider experimenting with `vocab_size`, `embedding_dim`, GRU units, and training epochs for potential improvements.



<div class="md-recitation">
  Sources
  <ol>
  <li><a href="https://www.analyticsvidhya.com/blog/2021/08/sentiment-analysis-using-bidirectional-stacked-lstm/">https://www.analyticsvidhya.com/blog/2021/08/sentiment-analysis-using-bidirectional-stacked-lstm/</a></li>
  <li><a href="https://medium.com/swlh/build-train-and-deploy-tensorflow-deep-learning-models-on-amazon-sagemaker-a-complete-workflow-2e8af2e8c08e">https://medium.com/swlh/build-train-and-deploy-tensorflow-deep-learning-models-on-amazon-sagemaker-a-complete-workflow-2e8af2e8c08e</a></li>
  </ol>
</div>