# Keras transfer for NLP

In [None]:
import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_datasets as tfds

train_data, test_data = tfds.load(name="imdb_reviews", split=["train", "test"], 
                                  batch_size=-1, as_supervised=True)

train_examples, train_labels = tfds.as_numpy(train_data)
test_examples, test_labels = tfds.as_numpy(test_data)

In [None]:
We are trying to predict that the imdb reviews are positive or negative just by reading the texts

# Using gnews-swivel-20dim.

In [None]:
model = "https://tfhub.dev/google/tf2-preview/gnews-swivel-20dim/1"
hub_layer = hub.KerasLayer(model, output_shape=[20], input_shape=[], 
                           dtype=tf.string, trainable=True)

In [None]:
# Output of below Hub layer
#<tf.Tensor: id=305, shape=(3, 20), dtype=float32, numpy=
# array([[ 3.9819887 , -4.4838037 ,  5.177359  , -2.3643482 , -3.2938678 ,
#         -3.5364532 , -2.4786978 ,  2.5525482 ,  6.688532  , -2.3076782 ,
#         -1.9807833 ,  1.1315885 , -3.0339816 , -0.7604128 , -5.743445  ,
#          3.4242578 ,  4.790099  , -4.03061   , -5.992149  , -1.7297493 ],
#        [ 3.4232912 , -4.230874  ,  4.1488533 , -0.29553518, -6.802391  ,
#         -2.5163853 , -4.4002395 ,  1.905792  ,  4.7512794 , -0.40538004,
#         -4.3401685 ,  1.0361497 ,  0.9744097 ,  0.71507156, -6.2657013 ,
#          0.16533905,  4.560262  , -1.3106939 , -3.1121316 , -2.1338716 ],
#        [ 3.8508697 , -5.003031  ,  4.8700504 , -0.04324996, -5.893603  ,
#         -5.2983093 , -4.004676  ,  4.1236343 ,  6.267754  ,  0.11632943,
#         -3.5934832 ,  0.8023905 ,  0.56146765,  0.9192484 , -7.3066816 ,
#          2.8202746 ,  6.2000837 , -3.5709393 , -4.564525  , -2.305622  ]],
#       dtype=float32)>

In [None]:
# These are the first three movie reviews
train_examples[:3]
# this is unstructured dataset therefore pass it to hub-layer
hub_layer(train_examples[:3])
# This hub layer will convert text to vectors.
model = tf.keras.Sequential()
model.add(hub_layer)
model.add(tf.keras.layers.Dense(16, activation='relu'))
model.add(tf.keras.layers.Dense(1, activation='sigmoid'))
# Its a binary classifier as either it is a positive review or negative
model.summary()


model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])


x_val = train_examples[:10000]
partial_x_train = train_examples[10000:]

y_val = train_labels[:10000]
partial_y_train = train_labels[10000:]

history = model.fit(partial_x_train,
                    partial_y_train,
                    epochs=40,
                    batch_size=512,
                    validation_data=(x_val, y_val),
                    verbose=1)

results = model.evaluate(test_data, test_labels)

print(results)

# To chart how training progress
history_dict = history.history
history_dict.keys()


%matplotlib inline
import matplotlib.pyplot as plt

acc = history_dict['accuracy']
val_acc = history_dict['val_accuracy']
loss = history_dict['loss']
val_loss = history_dict['val_loss']

epochs = range(1, len(acc) + 1)

# "bo" is for "blue dot"
plt.plot(epochs, loss, 'bo', label='Training loss')
# b is for "solid blue line"
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.show()