In [47]:
import tensorflow as tf

imdb = tf.keras.datasets.imdb

train_data, test_data = imdb.load_data()

In [76]:
import numpy as np
word_index = {k: (v+3) for k,v in imdb.get_word_index().items()}
word_index["<PAD>"] = 0
word_index["<START>"] = 1
word_index["<UNK>"] = 2  # unknown
word_index["<UNUSED>"] = 3

reverse_word_index = dict([(value, key) for key, value in word_index.items()])

train_dataset_texts = []
train_dataset_labels = []
for index in range(len(train_data[0])):
    train_dataset_texts.append(' '.join([ reverse_word_index.get(i, '?') for i in train_data[0][index]]))
    train_dataset_labels.append(train_data[1][index])

test_dataset_texts = []
test_dataset_labels = []
for index in range(len(test_data[0])):
    test_dataset_texts.append(' '.join([ reverse_word_index.get(i, '?') for i in test_data[0][index]]))
    test_dataset_labels.append(test_data[1][index])


train_dataset = tf.data.Dataset.from_tensor_slices((train_dataset_texts, train_dataset_labels))
test_dataset = tf.data.Dataset.from_tensor_slices((test_dataset_texts, test_dataset_labels))

In [81]:
BUFFER_SIZE = 10000
BATCH_SIZE = 64

train_dataset = train_dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)
test_dataset = test_dataset.batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)

In [82]:
vocab_size = 10000

encoder = tf.keras.layers.TextVectorization(
    max_tokens=vocab_size)
encoder.adapt(train_dataset.map(lambda text, label: text))

In [92]:
class SentimentalClassifier(tf.keras.Model):
    def __init__(self, encoder, dense_units):
        super(SentimentalClassifier, self).__init__()
        self.encoder = encoder
        self.embedding = tf.keras.layers.Embedding(input_dim=len(encoder.get_vocabulary()),
                                                    output_dim=dense_units,
                                                    mask_zero=True)
        # It performs average pooling across the temporal dimension of the input data,
        # reducing the spatial dimensionality of the data while preserving important features.
        # self.averager = tf.keras.layers.GlobalAveragePooling1D()
        self.gru = tf.keras.layers.Bidirectional(tf.keras.layers.GRU(dense_units, return_state=True, return_sequences=True))
        self.dense = tf.keras.layers.Dense(dense_units, activation='relu')
        self.outputer = tf.keras.layers.Dense(1, activation='sigmoid')
    
    def call(self, inputs, state=None):
        vector = self.encoder(inputs)
        vector = self.embedding(vector)
        y, forward_state, backward_state = self.gru(vector, initial_state=state)
        y = self.dense(y)
        output = self.outputer(y)
        #the second tf.concat param is the axis of concat, 0 will only append, 1 will append line by line and so
        return output, tf.concat([forward_state, backward_state], 1)

In [93]:
model = SentimentalClassifier(encoder=encoder, dense_units=64)
model.compile(optimizer='adam', loss=tf.keras.losses.BinaryCrossentropy(from_logits=True))

sample_text = ('The movie was cool. The animation and the graphics '
               'were out of this world. I would recommend this movie.')
model(np.array([sample_text]))

(<tf.Tensor: shape=(1, 19, 1), dtype=float32, numpy=
 array([[[0.4965589 ],
         [0.50012517],
         [0.5004365 ],
         [0.4931547 ],
         [0.4947679 ],
         [0.4989229 ],
         [0.5025305 ],
         [0.49466473],
         [0.49846768],
         [0.50109106],
         [0.50188833],
         [0.4969383 ],
         [0.49871266],
         [0.50345856],
         [0.49946272],
         [0.4973263 ],
         [0.49614328],
         [0.49713218],
         [0.49834543]]], dtype=float32)>,
 <tf.Tensor: shape=(1, 128), dtype=float32, numpy=
 array([[ 1.41319772e-03, -8.33016541e-03,  5.35984291e-03,
         -6.95339264e-03, -3.09642055e-03, -1.03756376e-02,
         -4.15473152e-03, -2.42858753e-03,  1.05134500e-02,
          4.63201711e-03,  1.34592196e-02, -2.94765062e-03,
         -4.53861617e-03, -2.78137065e-02, -1.90704539e-02,
         -2.03352980e-03, -8.59928690e-03,  1.47838760e-02,
          8.87841266e-03,  4.18538228e-03, -1.04959197e-02,
          4.10443079

In [None]:
model.fit(train_dataset, epochs=10,
            validation_data=test_dataset,
            validation_steps=30)

In [None]:
model.evaluate(test_dataset)