In [1]:
import tensorflow as tf
import tensorflow_datasets as tfds
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np

# Parameters
vocab_size = 10000  # Number of words to consider as features
max_length = 120    # Cut texts after this number of words
embedding_dim = 16  # Dimensionality of the embedding layer
trunc_type = 'post' # Truncate from the end of the review
oov_tok = "<OOV>"   # Token for out of vocabulary words
padding_type = 'post'
training_size = 25000

# Load the IMDB dataset
imdb, info = tfds.load("imdb_reviews", with_info=True, as_supervised=True)
train_data, test_data = imdb['train'], imdb['test']

# Prepare the datasets
train_sentences = []
test_sentences = []
train_labels = []
test_labels = []

# TensorFlow datasets are stored in a tf.data.Dataset object, which is a generator.
for sentence, label in tfds.as_numpy(train_data):
    train_sentences.append(sentence.decode('utf-8'))
    train_labels.append(label)

for sentence, label in tfds.as_numpy(test_data):
    test_sentences.append(sentence.decode('utf-8'))
    test_labels.append(label)

train_labels = np.array(train_labels)
test_labels = np.array(test_labels)

# Tokenize the sentences
tokenizer = Tokenizer(num_words=vocab_size, oov_token=oov_tok)
tokenizer.fit_on_texts(train_sentences)

# Convert the sentences to sequences
train_sequences = tokenizer.texts_to_sequences(train_sentences)
test_sequences = tokenizer.texts_to_sequences(test_sentences)

# Pad the sequences so they're all the same length
train_padded = pad_sequences(train_sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type)
test_padded = pad_sequences(test_sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type)

# Define the neural network model
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size, embedding_dim, input_length=max_length),
    tf.keras.layers.GlobalAveragePooling1D(),
    tf.keras.layers.Dense(24, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])

# Train the model
num_epochs = 10
history = model.fit(train_padded, train_labels, epochs=num_epochs, validation_data=(test_padded, test_labels), verbose=2)

# Evaluate the model
results = model.evaluate(test_padded, test_labels)
print(results)


2023-11-05 13:08:50.111339: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
  from .autonotebook import tqdm as notebook_tqdm


Epoch 1/10
782/782 - 6s - loss: 0.4975 - accuracy: 0.7759 - val_loss: 0.3795 - val_accuracy: 0.8300 - 6s/epoch - 7ms/step
Epoch 2/10
782/782 - 4s - loss: 0.2996 - accuracy: 0.8744 - val_loss: 0.3807 - val_accuracy: 0.8302 - 4s/epoch - 5ms/step
Epoch 3/10
782/782 - 4s - loss: 0.2434 - accuracy: 0.9042 - val_loss: 0.3903 - val_accuracy: 0.8330 - 4s/epoch - 5ms/step
Epoch 4/10
782/782 - 4s - loss: 0.2101 - accuracy: 0.9198 - val_loss: 0.4259 - val_accuracy: 0.8266 - 4s/epoch - 5ms/step
Epoch 5/10
782/782 - 4s - loss: 0.1834 - accuracy: 0.9338 - val_loss: 0.4689 - val_accuracy: 0.8194 - 4s/epoch - 5ms/step
Epoch 6/10
782/782 - 4s - loss: 0.1651 - accuracy: 0.9408 - val_loss: 0.5156 - val_accuracy: 0.8128 - 4s/epoch - 5ms/step
Epoch 7/10
782/782 - 4s - loss: 0.1477 - accuracy: 0.9495 - val_loss: 0.5739 - val_accuracy: 0.8036 - 4s/epoch - 5ms/step
Epoch 8/10
782/782 - 4s - loss: 0.1329 - accuracy: 0.9562 - val_loss: 0.6129 - val_accuracy: 0.8016 - 4s/epoch - 5ms/step
Epoch 9/10
782/782 - 4s 