In [2]:
import tensorflow as tf
import tensorflow_datasets as tfds
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Load the IMDB dataset
(train_data, test_data), info = tfds.load(
    'imdb_reviews',
    split=['train', 'test'],
    as_supervised=True,
    with_info=True
)

# Prepare dummy sample size for simplicity
train_data = train_data.take(1000)
test_data = test_data.take(200)

# Tokenizer setup
tokenizer = tf.keras.preprocessing.text.Tokenizer(num_words=10000, oov_token="<OOV>")

# Convert text to list
train_sentences = []
train_labels = []
for s, l in train_data:
    train_sentences.append(s.numpy().decode('utf-8'))
    train_labels.append(l.numpy())

test_sentences = []
test_labels = []
for s, l in test_data:
    test_sentences.append(s.numpy().decode('utf-8'))
    test_labels.append(l.numpy())

# Fit tokenizer
tokenizer.fit_on_texts(train_sentences)

# Text to sequence
train_sequences = tokenizer.texts_to_sequences(train_sentences)
test_sequences = tokenizer.texts_to_sequences(test_sentences)

# Padding
max_len = 200
train_padded = pad_sequences(train_sequences, maxlen=max_len, padding='post', truncating='post')
test_padded = pad_sequences(test_sequences, maxlen=max_len, padding='post', truncating='post')

# Convert to tensors
train_padded = tf.convert_to_tensor(train_padded)
train_labels = tf.convert_to_tensor(train_labels)
test_padded = tf.convert_to_tensor(test_padded)
test_labels = tf.convert_to_tensor(test_labels)

# Build model
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(10000, 16, input_length=max_len),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(32)),
    tf.keras.layers.Dense(24, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

# Train
model.fit(train_padded, train_labels, epochs=5, validation_data=(test_padded, test_labels))




Epoch 1/5
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 179ms/step - accuracy: 0.5058 - loss: 0.6933 - val_accuracy: 0.4500 - val_loss: 0.6947
Epoch 2/5
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 140ms/step - accuracy: 0.5246 - loss: 0.6903 - val_accuracy: 0.4600 - val_loss: 0.6950
Epoch 3/5
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 229ms/step - accuracy: 0.5686 - loss: 0.6732 - val_accuracy: 0.5100 - val_loss: 0.6956
Epoch 4/5
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 133ms/step - accuracy: 0.7103 - loss: 0.5449 - val_accuracy: 0.6550 - val_loss: 0.6506
Epoch 5/5
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 179ms/step - accuracy: 0.9232 - loss: 0.2492 - val_accuracy: 0.6550 - val_loss: 0.7102


<keras.src.callbacks.history.History at 0x79d2bbacc8d0>