In [1]:
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Dense, Dropout, Flatten

# Load the IMDb dataset
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=10000)

In [2]:
# Pad sequences to ensure all input data is of the same length
max_length = 200
X_train_padded = pad_sequences(X_train, maxlen=max_length)
X_test_padded = pad_sequences(X_test, maxlen=max_length)

In [3]:
# Check the number of samples (length of the dataset)
print("Number of training samples:", len(X_train))
print("Number of test samples:", len(X_test))

# Check the length of the first review (before padding)
print("Length of first training review:", len(X_train[0]))


Number of training samples: 25000
Number of test samples: 25000
Length of first training review: 218


In [4]:
# Build the ANN model
model = Sequential()

In [5]:
# Embedding layer
model.add(Embedding(input_dim=10000, output_dim=128))

# Flatten layer: Flattens the output from the Embedding layer before passing it to the Dense layers
model.add(Flatten())

# Fully connected hidden layer
model.add(Dense(128, activation='relu'))

# Dropout to prevent overfitting
model.add(Dropout(0.5))

# Fully connected hidden layer
model.add(Dense(64, activation='relu'))

# Dropout to prevent overfitting
model.add(Dropout(0.5))

# Output layer for binary classification
model.add(Dense(1, activation='sigmoid'))

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Print the model summary
model.summary()

In [6]:
# Train the model
history = model.fit(X_train_padded, y_train, epochs=10, batch_size=64, validation_split=0.2)


Epoch 1/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 70ms/step - accuracy: 0.6222 - loss: 0.6160 - val_accuracy: 0.8618 - val_loss: 0.3254
Epoch 2/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 68ms/step - accuracy: 0.9416 - loss: 0.1653 - val_accuracy: 0.8410 - val_loss: 0.4065
Epoch 3/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 74ms/step - accuracy: 0.9941 - loss: 0.0254 - val_accuracy: 0.8406 - val_loss: 0.6802
Epoch 4/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 69ms/step - accuracy: 0.9970 - loss: 0.0101 - val_accuracy: 0.8408 - val_loss: 0.7707
Epoch 5/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 67ms/step - accuracy: 0.9964 - loss: 0.0101 - val_accuracy: 0.8360 - val_loss: 0.8785
Epoch 6/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 69ms/step - accuracy: 0.9956 - loss: 0.0123 - val_accuracy: 0.8428 - val_loss: 0.9237
Epoch 7/10
[1m3

In [7]:
# Evaluate the model on test data
test_loss, test_acc = model.evaluate(X_test_padded, y_test)

print(f'Test Loss: {test_loss}')
print(f'Test Accuracy: {test_acc}')


[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 7ms/step - accuracy: 0.8333 - loss: 1.0960
Test Loss: 1.1019620895385742
Test Accuracy: 0.834119975566864


Predicting Sentiment for new reviews

In [8]:
# Example to predict sentiment on a new review
def predict_sentiment(review):
    # Convert the review to a sequence of integers and pad
    review_seq = imdb.get_word_index()
    tokens = [review_seq.get(word, 2) for word in review.lower().split()]  # 2 is the default for unknown words
    padded_tokens = pad_sequences([tokens], maxlen=max_length)

    # Predict the sentiment (1 = positive, 0 = negative)
    prediction = model.predict(padded_tokens)[0][0]
    return 'Positive' if prediction > 0.5 else 'Negative'

# Example usage
print(predict_sentiment("I absolutely loved this movie!"))


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb_word_index.json
[1m1641221/1641221[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 71ms/step
Positive


In [9]:
# Evaluate the model on the test set
test_loss, test_accuracy = model.evaluate(X_test_padded, y_test)

print(f"Test Accuracy: {test_accuracy:.4f}")


[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 6ms/step - accuracy: 0.8333 - loss: 1.0960
Test Accuracy: 0.8341
