# RNN para Classificação de Texto com Tensorflow 2.0
# Classificação Binária

In [1]:
import  os
import  tensorflow as tf
import  numpy as np

from tensorflow import keras
from tensorflow.keras import layers, models, datasets

In [2]:
tf.__version__

'2.0.0-beta1'

In [3]:
tf.random.set_seed(22)
np.random.seed(22)
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
assert tf.__version__.startswith('2.')

In [4]:
# fix random seed for reproducibility
np.random.seed(7)
# load the dataset but only keep the top n words(gravar apenas as primeiras 10000 palavras)
top_words = 10000
# truncate and pad input sequences(o máximo de comprimento da review é de 80)
max_review_length = 80
(X_train, y_train), (X_test, y_test) = keras.datasets.imdb.load_data(num_words=top_words)


In [5]:
# fazer que as reviews tenham todas o mesmo tamanho(80)
print('Pad sequences (samples x time)')
x_train = keras.preprocessing.sequence.pad_sequences(X_train, maxlen=max_review_length)
x_test = keras.preprocessing.sequence.pad_sequences(X_test, maxlen=max_review_length)
print('x_train shape:', x_train.shape)
print('x_test shape:', x_test.shape)

Pad sequences (samples x time)
x_train shape: (25000, 80)
x_test shape: (25000, 80)


## Modelo RNN

In [6]:
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(10000, 64),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64)),
    tf.keras.layers.Dense(64, activation='relu'),
    
    # Sigmoid porque é um problema de classificação binária
    tf.keras.layers.Dense(1, activation='sigmoid')
])

## Compilar e Ajustar o Modelo

In [7]:
model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

In [8]:
units = 64
num_classes = 2
batch_size = 32
epochs = 1 # devereiam ser mais mas tornam o computador muito lento

In [9]:
# train
model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs,
              validation_data=(x_test, y_test), verbose=1)

W0619 14:38:00.735188  1316 deprecation.py:323] From C:\Users\Bruno\Anaconda3\lib\site-packages\tensorflow\python\ops\math_grad.py:1250: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


Train on 25000 samples, validate on 25000 samples
Epoch 1/2
Epoch 2/2


<tensorflow.python.keras.callbacks.History at 0x8c12a39be0>

# Avaliar o Modelo

In [10]:
# evaluate on test set
scores = model.evaluate(x_test, y_test, batch_size, verbose=1)



In [11]:
print("Final test loss and accuracy :", scores)

Final test loss and accuracy : [0.3763929127120972, 0.835]
