In [1]:
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.datasets import imdb

In [2]:
# Load dataset (IMDB reviews for sentiment analysis)
max_words = 10000  # Only consider the top 10,000 words
maxlen = 100  # Maximum length of each input sequence


In [3]:
# Load data
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_words)


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [4]:
# Pad sequences to ensure uniform input size
x_train = pad_sequences(x_train, maxlen=maxlen)
x_test = pad_sequences(x_test, maxlen=maxlen)


In [5]:
# Build the LSTM model
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(input_dim=max_words, output_dim=128, input_length=maxlen),  # Word embeddings
    tf.keras.layers.LSTM(64),  # LSTM layer
    tf.keras.layers.Dense(1, activation='sigmoid')  # Output layer for binary classification
])




In [6]:
# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


In [7]:
# Model summary
model.summary()


In [None]:
# Train the model
model.fit(x_train, y_train, epochs=5, batch_size=64, validation_data=(x_test, y_test))

Epoch 1/5
[1m 52/391[0m [32m━━[0m[37m━━━━━━━━━━━━━━━━━━[0m [1m51s[0m 153ms/step - accuracy: 0.5736 - loss: 0.6793