# TensorFlow Hub - Text classification

- Based on: https://www.tensorflow.org/hub/tutorials/tf2_text_classification?hl=pt-br

# Import Dependencies

In [1]:
import numpy as np
import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_datasets as tfds

print('TensorFlow version: ', tf.__version__)
print('TensorFlow Hub version: ', hub.__version__)

TensorFlow version:  2.14.0
TensorFlow Hub version:  0.15.0


# Load Dataset

- Extracted from imdb: 0 - negative, 1 - positive

In [None]:
train_data, test_data = tfds.load(name='imdb_reviews', split = ['train','test'], batch_size=-1, as_supervised=True)

X_train, y_train = tfds.as_numpy(train_data)
X_test, y_test = tfds.as_numpy(test_data)

In [3]:
X_train.shape, y_train.shape

((25000,), (25000,))

In [4]:
X_test.shape, y_test.shape

((25000,), (25000,))

In [5]:
np.unique(y_train, return_counts=True)

(array([0, 1]), array([12500, 12500]))

In [6]:
np.unique(y_test, return_counts=True)

(array([0, 1]), array([12500, 12500]))

# Building and training Neural Networks

In [7]:
# More models: https://tfhub.dev/s?module-type=text-embedding
model_path = 'https://tfhub.dev/google/nnlm-en-dim50/2'

In [8]:
embedding_layer = hub.KerasLayer(model_path, input_shape = [], dtype = tf.string, trainable = True)

In [None]:
# Word embedding: https://iaexpert.academy/2019/04/12/word-embedding-transformando-palavras-em-numeros/
embedding_layer(X_train[0:2])

In [12]:
model = tf.keras.Sequential()
model.add(embedding_layer)

# 50 nuerons or inputs which represents the text will be connected to 16 nuerals
model.add(tf.keras.layers.Dense(units = 16, activation = 'relu'))
# output layer - connects 16 hidden neurals to only 1 neural
model.add(tf.keras.layers.Dense(units = 1))

model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 keras_layer (KerasLayer)    (None, 50)                48190600  
                                                                 
 dense (Dense)               (None, 16)                816       
                                                                 
 dense_1 (Dense)             (None, 1)                 17        
                                                                 
Total params: 48191433 (183.84 MB)
Trainable params: 48191433 (183.84 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [14]:
# complie the modle
model.compile(optimizer = 'adam', loss = tf.losses.BinaryCrossentropy(from_logits = True), metrics = ['accuracy'])

In [None]:
model.fit(X_train, y_train, epochs = 20, batch_size = 512, verbose = 1)

In [None]:
results = model.evaluate(X_test, y_test)
print(results)

# Predictions

In [None]:
X_test[0:5]

In [None]:
y_test[0:5]

In [None]:
predictions = model.predict(X_test[0:5])

In [None]:
predictions

In [None]:
predictions = tf.nn.sigmoid(predictions).numpy()
predictions

In [None]:
predictions = (predictions >= 0.5)
predictions