In [1]:
import tensorflow as tf
from keras.preprocessing.sequence import pad_sequences
from keras.datasets import imdb

# Load the dataset
vocab_size = 10000
max_length = 200
embedding_dim = 32

# Split the dataset
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=vocab_size)

# Padding sequences to have the same length
x_train = pad_sequences(x_train, maxlen=max_length, padding='post', truncating='post')
x_test = pad_sequences(x_test, maxlen=max_length, padding='post', truncating='post')

print(f'x_train shape: {x_train.shape}')
print(f'x_test shape: {x_test.shape}')


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 0us/step
x_train shape: (25000, 200)
x_test shape: (25000, 200)


In [2]:
from keras.models import Sequential
from keras.layers import Embedding, GRU, Dense, Dropout

# Build the model
model = Sequential([
    Embedding(vocab_size, embedding_dim, input_length=max_length),
    GRU(64, return_sequences=True),
    GRU(32),
    Dense(16, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Model summary
model.summary()




In [3]:
# Train the model
history = model.fit(x_train, y_train, epochs=10, batch_size=64, validation_split=0.2)

# Evaluate the model
test_loss, test_accuracy = model.evaluate(x_test, y_test)
print(f'Test Loss: {test_loss}')
print(f'Test Accuracy: {test_accuracy}')


Epoch 1/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 131ms/step - accuracy: 0.5000 - loss: 0.6934 - val_accuracy: 0.4938 - val_loss: 0.6932
Epoch 2/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 132ms/step - accuracy: 0.5080 - loss: 0.6924 - val_accuracy: 0.5474 - val_loss: 0.6776
Epoch 3/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 134ms/step - accuracy: 0.5976 - loss: 0.6451 - val_accuracy: 0.7696 - val_loss: 0.5064
Epoch 4/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 134ms/step - accuracy: 0.8253 - loss: 0.4153 - val_accuracy: 0.8562 - val_loss: 0.3425
Epoch 5/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 133ms/step - accuracy: 0.9093 - loss: 0.2581 - val_accuracy: 0.8524 - val_loss: 0.3902
Epoch 6/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 138ms/step - accuracy: 0.9361 - loss: 0.1845 - val_accuracy: 0.8646 - val_loss: 0.3834
Epoch 7/10

In [17]:
# Example prediction
import numpy as np

def predict_sentiment(text):
    # Tokenize and pad the text
    text_sequence = imdb.get_word_index()
    tokens = [text_sequence.get(word, 0) for word in text.lower().split()]
    tokens_padded = pad_sequences([tokens], maxlen=max_length, padding='post', truncating='post')

    # Predict sentiment
    prediction = model.predict(tokens_padded)
    sentiment = 'positive' if prediction >= 0.5 else 'negative'
    return sentiment

# Predict sentiment for a new review
new_review = "The movie was out of the world and I loved it"
print(f'Sentiment: {predict_sentiment(new_review)}')


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
Sentiment: positive
