# basic text classification

[source site](https://www.tensorflow.org/beta/tutorials/keras/basic_text_classification_with_tfhub)

In [1]:
import numpy as np
import tensorflow as tf

import tensorflow_hub as hub 
import tensorflow_datasets as tfds  # load libaries, check environment
# if you didn't install the library
# pip install "tensorflow_hub==0.4.0"
# pip install tensorflow-datasets

print("Version: ", tf.__version__)
print("Eager mode: ", tf.executing_eagerly())
print("Hub version: ", hub.__version__)
print("GPU is", "available" if tf.test.is_gpu_available() else "NOT AVAILABLE")

Version:  2.0.0-beta1
Eager mode:  True
Hub version:  0.4.0
GPU is available


In [2]:
# Split the training set into 60% and 40%, so we'll end up with 15,000 examples
# for training, 10,000 examples for validation and 25,000 examples for testing.
train_validation_split = tfds.Split.TRAIN.subsplit([6, 4])

(train_data, validation_data), test_data = tfds.load(
    name="imdb_reviews", 
    split=(train_validation_split, tfds.Split.TEST),
    as_supervised=True)

In [3]:
train_examples_batch, train_labels_batch = next(iter(train_data.batch(10)))
train_examples_batch

<tf.Tensor: id=214, shape=(10,), dtype=string, numpy=
array([b"As a lifelong fan of Dickens, I have invariably been disappointed by adaptations of his novels.<br /><br />Although his works presented an extremely accurate re-telling of human life at every level in Victorian Britain, throughout them all was a pervasive thread of humour that could be both playful or sarcastic as the narrative dictated. In a way, he was a literary caricaturist and cartoonist. He could be serious and hilarious in the same sentence. He pricked pride, lampooned arrogance, celebrated modesty, and empathised with loneliness and poverty. It may be a clich\xc3\xa9, but he was a people's writer.<br /><br />And it is the comedy that is so often missing from his interpretations. At the time of writing, Oliver Twist is being dramatised in serial form on BBC television. All of the misery and cruelty is their, but non of the humour, irony, and savage lampoonery. The result is just a dark, dismal experience: the story p

In [4]:
train_labels_batch # 1 = positive, 0 = negative

<tf.Tensor: id=215, shape=(10,), dtype=int64, numpy=array([1, 1, 1, 1, 1, 1, 0, 1, 1, 0])>

In [5]:
embedding = "https://tfhub.dev/google/tf2-preview/gnews-swivel-20dim/1"
hub_layer = hub.KerasLayer(embedding, input_shape=[], 
                           dtype=tf.string, trainable=True)
hub_layer(train_examples_batch[:3]) # pretrained layer embedding text review to vector arrary (length 20)

<tf.Tensor: id=406, shape=(3, 20), dtype=float32, numpy=
array([[ 3.9819887 , -4.4838037 ,  5.177359  , -2.3643482 , -3.2938678 ,
        -3.5364532 , -2.4786978 ,  2.5525482 ,  6.688532  , -2.3076782 ,
        -1.9807833 ,  1.1315885 , -3.0339816 , -0.7604128 , -5.743445  ,
         3.4242578 ,  4.790099  , -4.03061   , -5.992149  , -1.7297493 ],
       [ 3.4232912 , -4.230874  ,  4.1488533 , -0.29553518, -6.802391  ,
        -2.5163853 , -4.4002395 ,  1.905792  ,  4.7512794 , -0.40538004,
        -4.3401685 ,  1.0361497 ,  0.9744097 ,  0.71507156, -6.2657013 ,
         0.16533905,  4.560262  , -1.3106939 , -3.1121316 , -2.1338716 ],
       [ 3.8508697 , -5.003031  ,  4.8700504 , -0.04324996, -5.893603  ,
        -5.2983093 , -4.004676  ,  4.1236343 ,  6.267754  ,  0.11632943,
        -3.5934832 ,  0.8023905 ,  0.56146765,  0.9192484 , -7.3066816 ,
         2.8202746 ,  6.2000837 , -3.5709393 , -4.564525  , -2.305622  ]],
      dtype=float32)>

In [6]:
class MyModel(tf.keras.Model):
    def __init__(self):
        super(MyModel, self).__init__()
        self.hub = hub_layer
        self.d1 = tf.keras.layers.Dense(16, activation='relu')
        self.d2 = tf.keras.layers.Dense(1, activation='softmax')
    def call(self, x):
        x = self.hub(x)
        x = self.d1(x)
        return self.d2(x)
# build modes with class
model = MyModel()

In [7]:
print(model.summary)

<bound method Network.summary of <__main__.MyModel object at 0x7f94303c3550>>


In [8]:
loss_object = tf.keras.losses.BinaryCrossentropy()

optimizer = tf.keras.optimizers.Adam()

In [9]:
train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.BinaryAccuracy(name='train_accuracy')
# for evaluation metrix
test_loss = tf.keras.metrics.Mean(name='test_loss')
test_accuracy = tf.keras.metrics.BinaryAccuracy(name='test_accuracy')

In [10]:
@tf.function
def train_step(reviews, labels):
    with tf.GradientTape() as tape:
        predictions = model(reviews)
        loss = loss_object(labels, predictions) 
    gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
    
    train_loss(loss) # for evaluation metric
    train_accuracy(labels, predictions) # # for evaluation metric

In [11]:
@tf.function
def test_step(reviews, labels):
  predictions = model(reviews)
  t_loss = loss_object(labels, predictions)
# note that there's no gradients or optimizer function, 
# just predictions and loss calculations.
  test_loss(t_loss)
  test_accuracy(labels, predictions)

In [12]:
# train_ds = train_data.shuffle(10000).batch(512)
# validation_ds = validation_data.batch(512)
# test_ds = test_data.batch(512) # for batch training

In [35]:
EPOCHS = 20

for epoch in range(EPOCHS):
    for i in range(15):
      train_x_batch, train_y_batch = next(iter(train_data.batch(512)))
      vali_x_batch, vali_y_batch = next(iter(validation_data.batch(512)))
      train_step(train_x_batch, train_y_batch)
      test_step(vali_x_batch, vali_y_batch)

    template = 'Epoch {}, Loss: {}, Accuracy: {}, Test Loss: {}, Test Accuracy: {}'
    print (template.format(epoch+1,
                         train_loss.result(),
                         train_accuracy.result()*100,
                         test_loss.result(),
                         test_accuracy.result()*100))

Epoch 1, Loss: 7.247348308563232, Accuracy: 52.734375, Test Loss: 7.636671543121338, Test Accuracy: 50.1953125
Epoch 2, Loss: 7.247347354888916, Accuracy: 52.734375, Test Loss: 7.636667251586914, Test Accuracy: 50.1953125
Epoch 3, Loss: 7.247346878051758, Accuracy: 52.734375, Test Loss: 7.636665344238281, Test Accuracy: 50.1953125
Epoch 4, Loss: 7.247350215911865, Accuracy: 52.734375, Test Loss: 7.636663913726807, Test Accuracy: 50.1953125
Epoch 5, Loss: 7.247354507446289, Accuracy: 52.734375, Test Loss: 7.63666296005249, Test Accuracy: 50.1953125
Epoch 6, Loss: 7.247357368469238, Accuracy: 52.734375, Test Loss: 7.636662006378174, Test Accuracy: 50.1953125
Epoch 7, Loss: 7.247359275817871, Accuracy: 52.734375, Test Loss: 7.636661529541016, Test Accuracy: 50.1953125
Epoch 8, Loss: 7.247361183166504, Accuracy: 52.734375, Test Loss: 7.636663913726807, Test Accuracy: 50.1953125
Epoch 9, Loss: 7.24735689163208, Accuracy: 52.734375, Test Loss: 7.636669158935547, Test Accuracy: 50.1953125
Epo

### I couldn't reveal the cause of not working optimizing.
### So, I have to build model sequentially as source site.

In [36]:
model = tf.keras.Sequential()
model.add(hub_layer)
model.add(tf.keras.layers.Dense(16, activation='relu'))
model.add(tf.keras.layers.Dense(1, activation='sigmoid'))

model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
keras_layer (KerasLayer)     (None, 20)                400020    
_________________________________________________________________
dense_2 (Dense)              (None, 16)                336       
_________________________________________________________________
dense_3 (Dense)              (None, 1)                 17        
Total params: 400,373
Trainable params: 400,373
Non-trainable params: 0
_________________________________________________________________


In [37]:
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

In [38]:
history = model.fit(train_data.shuffle(10000).batch(512),
                    epochs=20,
                    validation_data=validation_data.batch(512),
                    verbose=1)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [39]:
results = model.evaluate(test_data.batch(512), verbose=0)
for name, value in zip(model.metrics_names, results):
  print("%s: %.3f" % (name, value))

loss: 0.319
accuracy: 0.865
