In [1]:
import pandas as pd
import tensorflow_datasets as tfds
train_dataset, val_dataset, test_dataset = tfds.load('imdb_reviews', split=["train[:90%]", "train[90%:]", "test"], as_supervised=True)

In [2]:
test_df = pd.DataFrame(test_dataset, columns=['Review','Label'])

In [3]:
test_df.head()

Unnamed: 0,Review,Label
0,"tf.Tensor(b""There are films that make careers....","tf.Tensor(1, shape=(), dtype=int64)"
1,"tf.Tensor(b""A blackly comic tale of a down-tro...","tf.Tensor(1, shape=(), dtype=int64)"
2,"tf.Tensor(b'Scary Movie 1-4, Epic Movie, Date ...","tf.Tensor(0, shape=(), dtype=int64)"
3,tf.Tensor(b'Poor Shirley MacLaine tries hard t...,"tf.Tensor(0, shape=(), dtype=int64)"
4,tf.Tensor(b'As a former Erasmus student I enjo...,"tf.Tensor(1, shape=(), dtype=int64)"


In [5]:
import numpy as np
import tensorflow as tf

In [6]:
tf.random.set_seed(42)
train_set = train_dataset.shuffle(5000, seed=42).batch(32).prefetch(1)
valid_set = val_dataset.batch(32).prefetch(1)
test_set = test_dataset.batch(32).prefetch(1)

In [7]:
vocab_size =1000
text_vec_layer = tf.keras.layers.TextVectorization(max_tokens=vocab_size)
text_vec_layer.adapt(train_dataset.map(lambda reviews, labels: reviews))

In [10]:
embed_size = 128
inputs = tf.keras.layers.Input(shape=[], dtype=tf.string)
token_ids = text_vec_layer(inputs)
Z = tf.keras.layers.Embedding(vocab_size, embed_size,  mask_zero=True)(token_ids)
Z = tf.keras.layers.GRU(128,  dropout=0.2)(Z)
outputs = tf.keras.layers.Dense(1, activation="sigmoid")(Z)
model = tf.keras.Model(inputs=[inputs], outputs=[outputs])

In [11]:
model.compile(loss="binary_crossentropy", optimizer="nadam", metrics=["accuracy"])
history = model.fit(train_set, validation_data=valid_set, epochs=10)

Epoch 1/10




[1m704/704[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m480s[0m 675ms/step - accuracy: 0.6669 - loss: 0.6038 - val_accuracy: 0.8072 - val_loss: 0.4495
Epoch 2/10
[1m704/704[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m492s[0m 699ms/step - accuracy: 0.8291 - loss: 0.3977 - val_accuracy: 0.8636 - val_loss: 0.3149
Epoch 3/10
[1m704/704[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m506s[0m 719ms/step - accuracy: 0.8787 - loss: 0.2995 - val_accuracy: 0.8660 - val_loss: 0.3157
Epoch 4/10
[1m704/704[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m506s[0m 718ms/step - accuracy: 0.8890 - loss: 0.2711 - val_accuracy: 0.8684 - val_loss: 0.3050
Epoch 5/10
[1m704/704[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1064s[0m 2s/step - accuracy: 0.8947 - loss: 0.2567 - val_accuracy: 0.8668 - val_loss: 0.3089
Epoch 6/10
[1m704/704[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m404s[0m 573ms/step - accuracy: 0.9014 - loss: 0.2465 - val_accuracy: 0.8612 - val_loss: 0.3170
Epoch 7/10
[1m70

In [12]:
model.evaluate(test_set, batch_size=128)

[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m86s[0m 110ms/step - accuracy: 0.8585 - loss: 0.3960


[0.38889485597610474, 0.858959972858429]

In [13]:
result = model.predict(test_set)

[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m91s[0m 116ms/step


In [14]:
test_df[5600:5601]

Unnamed: 0,Review,Label
5600,tf.Tensor(b'This is one of the most awful movi...,"tf.Tensor(0, shape=(), dtype=int64)"


In [15]:
result[5600]

array([0.00140015], dtype=float32)

In [17]:
test_df[755:756]

Unnamed: 0,Review,Label
755,"tf.Tensor(b""The net is an excellent movie! It'...","tf.Tensor(1, shape=(), dtype=int64)"


In [18]:
result[755]

array([0.9979615], dtype=float32)