In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding, GlobalAveragePooling1D
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences


The reviews are already converted into sequences of integers, where each integer represents a word in a dictionary.

 Function Vectorize_sequence This is also known as one-hot encoding. The result is a binary matrix that indicates the presence or absence of each word in each review.


In [2]:
# Set the number of words to consider as features
vocab_size = 10000

# Load data
(train_data, train_labels), (test_data, test_labels) = imdb.load_data(num_words=vocab_size)

# A function to vectorize the sequences into 2D tensor
def vectorize_sequences(sequences, dimension=vocab_size):
    results = np.zeros((len(sequences), dimension))
    for i, sequence in enumerate(sequences):
        results[i, sequence] = 1.
    return results

# Vectorize data
x_train = vectorize_sequences(train_data)
x_test = vectorize_sequences(test_data)
y_train = np.array(train_labels).astype("float32")
y_test = np.array(test_labels).astype("float32")


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz


In [3]:
# Model definition
model = Sequential([
    Dense(16, activation='relu', input_shape=(vocab_size,)),
    Dense(16, activation='relu'),
    Dense(1, activation='sigmoid')
])

# Compile the model
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])


In [4]:
# Train the model
history = model.fit(x_train, y_train, epochs=10, batch_size=512, validation_split=0.2)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [5]:
# Evaluate the model
results = model.evaluate(x_test, y_test)
print(f"Test Loss: {results[0]} - Test Accuracy: {results[1]}")


Test Loss: 0.48037055134773254 - Test Accuracy: 0.8617200255393982


In [27]:
# Example review (replace with any text)
review = "This movie was excellent! The performances were outstanding and I was captivated from start to finish."

# Convert review to tokens
def review_to_tokens(review):
    word_index = imdb.get_word_index()
    tokens = [word_index.get(w, 0) for w in review.lower().split()]
    return vectorize_sequences([tokens])

# Predict the sentiment
prediction = model.predict(review_to_tokens(review))
print("Review sentiment (0=negative, 1=positive):", prediction[0,0])


Review sentiment (0=negative, 1=positive): 0.8027048
