<a href="https://colab.research.google.com/github/DouglasDiasEng/Text-Classification/blob/main/Text_Classification_(Clean_Code).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [21]:
!pip install -U tf-keras tensorflow-hub




In [22]:
import numpy as np
import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_datasets as tfds
import os
os.environ["TF_USE_LEGACY_KERAS"] = "1"
print("TensorFlow version:", tf.__version__)
print("Hub version:", hub.__version__)
print("TensorFlow Datasets version: ", tfds.__version__)

TensorFlow version: 2.20.0
Hub version: 0.16.1
TensorFlow Datasets version:  4.9.9


In [23]:
train_data, test_data = tfds.load(name="imdb_reviews", split=["train", "test"], batch_size=-1, as_supervised=True)
X_train, y_train = tfds.as_numpy(train_data)
X_test, y_test = tfds.as_numpy(test_data)

In [24]:
print('First 5 samples of X_train:')
for i in range(5):
    print(f'Review: {X_train[i][:100]}...')  # Shows the first 100 characters of the review
    print(f'Label: {y_train[i]}')
    print('---')

print('\nX_train shape:', X_train.shape)
print('y_train shape:', y_train.shape)
print('X_test shape:', X_test.shape)
print('y_test shape:', y_test.shape)


First 5 samples of X_train:
Review: b"This was an absolutely terrible movie. Don't be lured in by Christopher Walken or Michael Ironside. "...
Label: 0
---
Review: b'I have been known to fall asleep during films, but this is usually due to a combination of things in'...
Label: 0
---
Review: b'Mann photographs the Alberta Rocky Mountains in a superb fashion, and Jimmy Stewart and Walter Brenn'...
Label: 0
---
Review: b'This is the kind of film for a snowy Sunday afternoon when the rest of the world can go ahead with i'...
Label: 1
---
Review: b'As others have mentioned, all the women that go nude in this film are mostly absolutely gorgeous. Th'...
Label: 1
---

X_train shape: (25000,)
y_train shape: (25000,)
X_test shape: (25000,)
y_test shape: (25000,)


In [25]:
np.unique(y_train, return_counts=True)


(array([0, 1]), array([12500, 12500]))

In [26]:
np.unique(y_test, return_counts=True)

(array([0, 1]), array([12500, 12500]))

In [27]:
model_path = 'https://tfhub.dev/google/nnlm-en-dim50/2'

In [28]:
embedding_layer = hub.KerasLayer(model_path, input_shape = [], dtype = tf.string, trainable = True)

In [29]:
embedding_layer(X_train[0:2])

<tf.Tensor: shape=(2, 50), dtype=float32, numpy=
array([[ 0.5423194 , -0.01190171,  0.06337537,  0.0686297 , -0.16776839,
        -0.10581177,  0.168653  , -0.04998823, -0.31148052,  0.07910344,
         0.15442258,  0.01488661,  0.03930155,  0.19772716, -0.12215477,
        -0.04120982, -0.27041087, -0.21922147,  0.26517656, -0.80739075,
         0.25833526, -0.31004202,  0.2868321 ,  0.19433866, -0.29036498,
         0.0386285 , -0.78444123, -0.04793238,  0.41102988, -0.36388886,
        -0.58034706,  0.30269453,  0.36308962, -0.15227163, -0.4439151 ,
         0.19462997,  0.19528405,  0.05666233,  0.2890704 , -0.28468323,
        -0.00531206,  0.0571938 , -0.3201319 , -0.04418665, -0.08550781,
        -0.55847436, -0.2333639 , -0.20782956, -0.03543065, -0.17533456],
       [ 0.56338924, -0.12339553, -0.10862677,  0.7753425 , -0.07667087,
        -0.15752274,  0.01872334, -0.08169781, -0.3521876 ,  0.46373403,
        -0.08492758,  0.07166861, -0.00670818,  0.12686071, -0.19326551,
 

In [30]:
# Define a wrapper layer as suggested by the error message
class EmbeddingWrapper(tf.keras.layers.Layer):
    def __init__(self, hub_layer, **kwargs):
        super().__init__(**kwargs)
        self.hub_layer = hub_layer

    def call(self, inputs):
        return self.hub_layer(inputs)

# Instantiate the original hub.KerasLayer
embedding_hub_layer = hub.KerasLayer(model_path, input_shape=[], dtype=tf.string, trainable=True)

# Wrap the hub_layer
wrapped_embedding_layer = EmbeddingWrapper(embedding_hub_layer)

In [31]:
input_layer = tf.keras.Input(shape=[], dtype=tf.string)
x = wrapped_embedding_layer(input_layer) # Use the wrapped embedding layer
x = tf.keras.layers.Dense(units=16, activation='relu')(x)
output_layer = tf.keras.layers.Dense(units=1)(x)

model = tf.keras.Model(inputs=input_layer, outputs=output_layer)
model.summary()