In [1]:
import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_datasets as tfds

In [None]:
#60% of train is for train and the other 40% is for validation
splits = ['train[:60%]', 'train[-40%:]', 'test']

data, info = tfds.load(name='imdb_reviews', with_info=True, split=splits, as_supervised=True)
train, valid, test = data

In [None]:
num_train_examples = info.splits['train'].num_examples
num_test_examples = info.splits['test'].num_examples
num_classes = info.features['label'].num_classes

print('The Dataset has a total of:')
print(f'{num_classes} classes')

print(f'{num_train_examples} movie reviews for training')
print(f'{num_test_examples} movie reviews for testing')

The Dataset has a total of:
2 classes
25000 movie reviews for training
25000 movie reviews for testing


In [None]:
class_names = ['negative', 'positive']

In [None]:
for sentence, label in train.take(2):
  print(sentence.numpy())
  print(class_names[label.numpy()])

b"This was an absolutely terrible movie. Don't be lured in by Christopher Walken or Michael Ironside. Both are great actors, but this must simply be their worst role in history. Even their great acting could not redeem this movie's ridiculous storyline. This movie is an early nineties US propaganda piece. The most pathetic scenes were those when the Columbian rebels were making their cases for revolutions. Maria Conchita Alonso appeared phony, and her pseudo-love affair with Walken was nothing but a pathetic emotional plug in a movie that was devoid of any real meaning. I am disappointed that there are movies like this, ruining actor's like Christopher Walken's good name. I could barely sit through it."
negative
b'I have been known to fall asleep during films, but this is usually due to a combination of things including, really tired, being warm and comfortable on the sette and having just eaten a lot. However on this occasion I fell asleep because the film was rubbish. The plot develo

#Getting word embeddings from Tensorflow Hub#

In [2]:
emb_link = 'https://tfhub.dev/tensorflow/bert_zh_L-12_H-768_A-12/4'

emb_layer = hub.KerasLayer(emb_link, trainable=True, input_shape=[], dtype=tf.string)

In [None]:
text_input = tf.keras.layers.Input(shape=(), dtype=tf.string)

encoder_inputs = emb_layer(text_input)
encoder = hub.KerasLayer(
    "https://tfhub.dev/tensorflow/bert_zh_L-12_H-768_A-12/4",
    trainable=True)
outputs = encoder(encoder_inputs)
pooled_output = outputs["pooled_output"]      # [batch_size, 768].
sequence_output = outputs["sequence_output"]  # [batch_size, seq_length, 768].

In [None]:
batch_size = 512

train_batch = train.shuffle(num_train_examples).batch(batch_size).prefetch(1)
val_batch = valid.batch(batch_size).prefetch(1)
test_batch = test.batch(batch_size)

In [None]:
model = tf.keras.Sequential([
                             hub_layer,
                             #Define custom layers from here
                             tf.keras.layers.Dense(16, activation='relu'),
                             tf.keras.layers.Dense(1, activation='sigmoid')
])

In [None]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc'])
model.fit(train_batch, validation_data=val_batch,epochs=20, verbose=1)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x7f9af7e01278>

In [None]:
model.evaluate(test_batch)



[0.5643014311790466, 0.8498799800872803]

In [None]:
preds = model.predict(['This movie was ok'])

#If you dont convert to float it cannot round to 2dp, since the decimal will be unable to be represented by binary floating point
print(f'Score: {round(float(preds[0,0]),2)}/1')
if tf.greater(preds, 0.5):
  print('Positive :)')
else:
  print('Negative! :(')

Score: 0.42/1
Negative! :(


In [None]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
keras_layer (KerasLayer)     (None, 128)               124642688 
_________________________________________________________________
dense (Dense)                (None, 16)                2064      
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 17        
Total params: 124,644,769
Trainable params: 124,644,769
Non-trainable params: 0
_________________________________________________________________
