<a href="https://colab.research.google.com/github/TSerra-PT/TensorFlow2.0/blob/main/Class3_Recurrent_Neural_Network.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Step 1 - Install TensorFlow

In [1]:
!pip install tensorflow



In [2]:
!pip install numpy



## Step 2: Importing Libraries

In [24]:
import tensorflow as tf
import numpy as np
from tensorflow.keras.datasets import imdb
from tensorflow.keras.layers import Input, Embedding

In [4]:
tf.__version__

'2.18.0'

## Step 3: Preprocessing

### Configuring the parameters for the database

In [6]:
number_of_words = 20000
max_len = 100

### Loading IMDB database

In [7]:
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=number_of_words)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step


In [8]:
X_train.shape

(25000,)

In [9]:
X_train

array([list([1, 14, 22, 16, 43, 530, 973, 1622, 1385, 65, 458, 4468, 66, 3941, 4, 173, 36, 256, 5, 25, 100, 43, 838, 112, 50, 670, 2, 9, 35, 480, 284, 5, 150, 4, 172, 112, 167, 2, 336, 385, 39, 4, 172, 4536, 1111, 17, 546, 38, 13, 447, 4, 192, 50, 16, 6, 147, 2025, 19, 14, 22, 4, 1920, 4613, 469, 4, 22, 71, 87, 12, 16, 43, 530, 38, 76, 15, 13, 1247, 4, 22, 17, 515, 17, 12, 16, 626, 18, 19193, 5, 62, 386, 12, 8, 316, 8, 106, 5, 4, 2223, 5244, 16, 480, 66, 3785, 33, 4, 130, 12, 16, 38, 619, 5, 25, 124, 51, 36, 135, 48, 25, 1415, 33, 6, 22, 12, 215, 28, 77, 52, 5, 14, 407, 16, 82, 10311, 8, 4, 107, 117, 5952, 15, 256, 4, 2, 7, 3766, 5, 723, 36, 71, 43, 530, 476, 26, 400, 317, 46, 7, 4, 12118, 1029, 13, 104, 88, 4, 381, 15, 297, 98, 32, 2071, 56, 26, 141, 6, 194, 7486, 18, 4, 226, 22, 21, 134, 476, 26, 480, 5, 144, 30, 5535, 18, 51, 36, 28, 224, 92, 25, 104, 4, 226, 65, 16, 38, 1334, 88, 12, 16, 283, 5, 16, 4472, 113, 103, 32, 15, 16, 5345, 19, 178, 32]),
       list([1, 194, 1153, 194, 82

In [10]:
X_train[0]

[1,
 14,
 22,
 16,
 43,
 530,
 973,
 1622,
 1385,
 65,
 458,
 4468,
 66,
 3941,
 4,
 173,
 36,
 256,
 5,
 25,
 100,
 43,
 838,
 112,
 50,
 670,
 2,
 9,
 35,
 480,
 284,
 5,
 150,
 4,
 172,
 112,
 167,
 2,
 336,
 385,
 39,
 4,
 172,
 4536,
 1111,
 17,
 546,
 38,
 13,
 447,
 4,
 192,
 50,
 16,
 6,
 147,
 2025,
 19,
 14,
 22,
 4,
 1920,
 4613,
 469,
 4,
 22,
 71,
 87,
 12,
 16,
 43,
 530,
 38,
 76,
 15,
 13,
 1247,
 4,
 22,
 17,
 515,
 17,
 12,
 16,
 626,
 18,
 19193,
 5,
 62,
 386,
 12,
 8,
 316,
 8,
 106,
 5,
 4,
 2223,
 5244,
 16,
 480,
 66,
 3785,
 33,
 4,
 130,
 12,
 16,
 38,
 619,
 5,
 25,
 124,
 51,
 36,
 135,
 48,
 25,
 1415,
 33,
 6,
 22,
 12,
 215,
 28,
 77,
 52,
 5,
 14,
 407,
 16,
 82,
 10311,
 8,
 4,
 107,
 117,
 5952,
 15,
 256,
 4,
 2,
 7,
 3766,
 5,
 723,
 36,
 71,
 43,
 530,
 476,
 26,
 400,
 317,
 46,
 7,
 4,
 12118,
 1029,
 13,
 104,
 88,
 4,
 381,
 15,
 297,
 98,
 32,
 2071,
 56,
 26,
 141,
 6,
 194,
 7486,
 18,
 4,
 226,
 22,
 21,
 134,
 476,
 26,
 480,
 5,
 144,
 30,

Original database with the texts: https://www.kaggle.com/lakshmi25npathi/imdb-dataset-of-50k-movie-reviews

In [11]:
y_train

array([1, 0, 0, ..., 0, 1, 0])

### Padding the sequences (texts) to have the same length

In [12]:
len(X_train[0])

218

In [13]:
len(X_train[1])

189

In [14]:
X_train = tf.keras.preprocessing.sequence.pad_sequences(X_train, maxlen=max_len)

In [15]:
len(X_train[0])

100

In [16]:
len(X_train[1])

100

In [17]:
X_test = tf.keras.preprocessing.sequence.pad_sequences(X_test, maxlen=max_len)

## Step 4: Building a Recurrent Neural Network

### Defining the model

In [26]:
model = tf.keras.Sequential()

### Adding the embedding layer

In [19]:
X_train.shape[1]

100

In [27]:
model.add(Input(shape=(X_train.shape[1],)))  # Input layer
model.add(Embedding(input_dim=number_of_words, output_dim=128))  # Embedding layer

Embeddings: https://www.tensorflow.org/guide/embedding Article Word Embeddings: https://iaexpert.com.br/index.php/2019/04/12/word-embedding-transformando-palavras-em-numeros/

### Adding layer LSTM

- units: 128
- activation: tanh

In [28]:
model.add(tf.keras.layers.LSTM(units=128, activation='tanh'))

### Adding exit layer

- units: 1
- activation: sigmoid

In [29]:
model.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))

## Compiling the model

In [30]:
model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy'])

In [31]:
model.summary()

### Training the model

In [33]:
model.fit(X_train, y_train, epochs=3, batch_size=128)

Epoch 1/3
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m79s[0m 390ms/step - accuracy: 0.6161 - loss: 0.6340
Epoch 2/3
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m83s[0m 394ms/step - accuracy: 0.8282 - loss: 0.3937
Epoch 3/3
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 395ms/step - accuracy: 0.8631 - loss: 0.3235


<keras.src.callbacks.history.History at 0x788324233d50>

## Evaluating the model

In [34]:
test_loss, test_acurracy = model.evaluate(X_test, y_test)

[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m50s[0m 64ms/step - accuracy: 0.8545 - loss: 0.3455


In [35]:
print("Test accuracy: {}".format(test_acurracy))

Test accuracy: 0.8529199957847595


In [36]:
test_loss

0.34289979934692383