In [1]:
import numpy as np
import tensorflow as tf
import os
from tensorflow.keras.layers import Input, Dense ,Dropout,LSTM, Embedding, Bidirectional
from tensorflow.keras.models import Model
from tensorflow.keras.losses import BinaryCrossentropy

In [2]:
tf.__version__

'2.1.0'

In [3]:
import tensorflow_datasets as tfds

# See all registered datasets
tfds.list_builders()




['abstract_reasoning',
 'aeslc',
 'aflw2k3d',
 'amazon_us_reviews',
 'arc',
 'bair_robot_pushing_small',
 'beans',
 'big_patent',
 'bigearthnet',
 'billsum',
 'binarized_mnist',
 'binary_alpha_digits',
 'c4',
 'caltech101',
 'caltech_birds2010',
 'caltech_birds2011',
 'cars196',
 'cassava',
 'cats_vs_dogs',
 'celeb_a',
 'celeb_a_hq',
 'cfq',
 'chexpert',
 'cifar10',
 'cifar100',
 'cifar10_1',
 'cifar10_corrupted',
 'citrus_leaves',
 'cityscapes',
 'civil_comments',
 'clevr',
 'cmaterdb',
 'cnn_dailymail',
 'coco',
 'coil100',
 'colorectal_histology',
 'colorectal_histology_large',
 'cos_e',
 'curated_breast_imaging_ddsm',
 'cycle_gan',
 'deep_weeds',
 'definite_pronoun_resolution',
 'diabetic_retinopathy_detection',
 'div2k',
 'dmlab',
 'downsampled_imagenet',
 'dsprites',
 'dtd',
 'duke_ultrasound',
 'dummy_dataset_shared_generator',
 'dummy_mnist',
 'emnist',
 'eraser_multi_rc',
 'esnli',
 'eurosat',
 'fashion_mnist',
 'flic',
 'flores',
 'food101',
 'gap',
 'gigaword',
 'glue',
 'gr

In [4]:
BUFFER_SIZE = 10000
BATCH_SIZE = 40
EMBED_DIM = 70
RNN_UNITS = 70
VECSIZE = 100

In [5]:
dataset, info = tfds.load('imdb_reviews/subwords8k', with_info=True, as_supervised=True)

train_dataset, test_dataset = dataset['train'], dataset['test']

# Transform to dataset , shuffle and batch 
train_dataset = train_dataset.shuffle(BUFFER_SIZE)
train_dataset = train_dataset.padded_batch(BATCH_SIZE, padded_shapes = ([None],[]))
# test data as well
test_dataset = test_dataset.padded_batch(BATCH_SIZE, padded_shapes = ([None],[]))


In [6]:
# allows encoding and decoding of text to integers
encoder = info.features['text'].encoder

In [7]:
VOCAB_SIZE = info.features['text'].vocab_size

In [8]:
# creating callback

# Directory where the checkpoints will be saved
checkpoint_dir = './training_checkpoints'

# Name of the checkpoint files
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_latest")

checkpoint_callback=tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix,
    save_weights_only=True)


In [9]:
def build_model(vocab_size, embedding_dim, rnn_units, batch_size):
    """
    Basic model build
    """
    i = Input(shape=(None,), batch_size=batch_size )
    x = Embedding(vocab_size, embedding_dim)(i)
    x = Bidirectional(LSTM(rnn_units))(x)
#    x = Dense(rnn_units*2, activation='relu')(x)
#    x = Dropout(0.5)(x)
    x = Dense(rnn_units, activation='relu')(x)
    x = Dense(1)(x)

    model = Model(i,x)
    
    return model

In [10]:
model = build_model(VOCAB_SIZE,EMBED_DIM,RNN_UNITS,BATCH_SIZE)

In [11]:
model.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(40, None)]              0         
_________________________________________________________________
embedding (Embedding)        (40, None, 70)            572950    
_________________________________________________________________
bidirectional (Bidirectional (40, 140)                 78960     
_________________________________________________________________
dense (Dense)                (40, 70)                  9870      
_________________________________________________________________
dense_1 (Dense)              (40, 1)                   71        
Total params: 661,851
Trainable params: 661,851
Non-trainable params: 0
_________________________________________________________________


In [12]:
model.compile(loss=BinaryCrossentropy(from_logits=True),
              optimizer=tf.keras.optimizers.Adam(1e-4),
              metrics=['accuracy'])


In [None]:
history = model.fit(train_dataset, epochs=9,
                    validation_data=test_dataset, 
                    validation_steps=10,
                    callbacks=[checkpoint_callback]
                   )

Epoch 1/9
    356/Unknown - 33s 94ms/step - loss: 0.6793 - accuracy: 0.5107

In [None]:
# reload the model to return single predictions
model_reloaded = build_model(VOCAB_SIZE,EMBED_DIM,RNN_UNITS,1)  # set batch size to 1

# reload weights and rebuild
model_reloaded.load_weights(tf.train.latest_checkpoint(checkpoint_dir))
model_reloaded.build(tf.TensorShape([1, None]))

In [None]:
model_reloaded.summary()

In [None]:
def sample_predict(sample_pred_text):
    encoded_sample_pred_text = encoder.encode(sample_pred_text)

    encoded_sample_pred_text = tf.cast(encoded_sample_pred_text, tf.float32)
    predictions = model_reloaded.predict(tf.expand_dims(encoded_sample_pred_text, 0))

    return (predictions)


In [None]:
sample_pred_text = 'The movie was great, amazing movie with good graphics'
sample_pred_text_bad = 'The movie was Horrible, Dont know what they were thinking'


In [None]:
sample_predict(sample_pred_text)


In [None]:
sample_predict(sample_pred_text_bad)