# Gated Recurrent Units

A Gated Recurrent Units (GRU) network is a Recurent Neural Network (RNN) that has the capacity to remember.

Note: Biderectionnal GRU cells can be used when the entire text in available to the model at inference.

Let's build a GRU network "from scratch" to have a better understanding!

## Hand Made GRU Network

In [None]:
import os
import numpy as np
import pandas as pd
import pickle
import re

import tensorflow as tf
from keras.preprocessing.text import Tokenizer
from keras.models import Model
from keras.layers import Input, Embedding, GRU, Dense, Dropout
from keras.optimizers import RMSprop
from keras.callbacks import Callback, LearningRateScheduler
from keras.callbacks import ModelCheckpoint
from keras.utils import plot_model

# Clear TensorFlow session
tf.keras.backend.clear_session()

import matplotlib.pyplot as plt

### Download Data

In [None]:
# !bash data_script.sh 11339 aesop

### Load Data

In [None]:
def preprocess(text, start_story):
    text = text.lower()
    text = start_story + text
    text = text.replace("\n\n\n\n\n", start_story)
    text = text.replace("\n", " ")   # Replace newline character by a space
    text = re.sub("  +", ". ", text) # Replace space followed by oen or more space by a point and a space
    text = text.strip()              # Remove leading and trailing spaces
    text = text.replace("..", ".")   # Replace double point by a single point
    text = re.sub('([!"#$%&()*+,-./:;<=>?@[\]^_`{|}~])', r' \1', text)
    text = re.sub("\s{2,}", " ", text) # Replace two whitespace character by one space
    return text

In [None]:
FILE_PATH = "./data/aesop/data.txt"

SEQ_LENGTH = 20
START_STORY = "|" * SEQ_LENGTH

with open(FILE_PATH, encoding="utf-8-sig") as f:
    text = f.read()

# Preprocessing
text = preprocess(text, START_STORY)

# Tokenization
tokenizer = Tokenizer(char_level=False,
                      filters="")
tokenizer.fit_on_texts([text])
NB_TOTAL_WORDS = len(tokenizer.word_index) + 1
token_list = tokenizer.texts_to_sequences([text])[0]

print(f"Total words = {NB_TOTAL_WORDS}")
print(f"Tokenizer word index = {tokenizer.word_index}")
print(f"Tokenizer token list = {tokenizer.texts_to_sequences([text])}")

In [None]:
print(f'Tokenizer token list = {tokenizer.texts_to_sequences(["man the lion"])}')
print(f'Pre-processed text sample = {preprocess("man the lion", START_STORY)}')
print(f'Tokenizer token list = {tokenizer.texts_to_sequences([preprocess("man the lion", START_STORY)])}')

In [None]:
def generate_sequences(token_list, sequence_length, step, nb_classes):
    X = []
    y = []
    for i in range(0, len(token_list) - sequence_length, step):
        X.append(token_list[i:i+sequence_length])
        y.append(token_list[i+sequence_length])

    y = tf.keras.utils.to_categorical(y, num_classes=nb_classes)

    nb_seq = len(X)
    print(f"Number of sequences = {nb_seq}")

    return X, y, nb_seq

In [None]:
# Build dataset
STEP = 1

X, y, NB_SEQ = generate_sequences(token_list, SEQ_LENGTH, STEP, NB_TOTAL_WORDS)

X = np.array(X)
y = np.array(y)

### GRU Network

In [None]:
class GRU_RNN():

    def __init__(self, nb_units, embedding_size, total_words, dropout_rate=0.2,learning_rate=0.001):
        self.nb_units = nb_units
        self.embedding_size = embedding_size
        self.total_words = total_words
        self.dropout_rate = dropout_rate
        self.learning_rate = learning_rate
        
        # Input
        self.input = Input(shape=(None,))

        # Layers
        x = Embedding(input_dim=self.total_words,
                      output_dim=self.embedding_size,
                      name="embedding")(self.input)
        x = GRU(units=self.nb_units,
                 name="gru")(x)
        x = Dropout(rate=dropout_rate,
                    name="dropout")(x)
        
        # Output
        self.output = Dense(units=self.total_words,
                            activation="softmax",
                            name="output")(x)
        
        # Model
        self.model = Model(self.input, self.output)

        # Compile
        self.optimizer = RMSprop(learning_rate=self.learning_rate)
        self.model.compile(optimizer=self.optimizer,
                           loss="categorical_crossentropy")
        
    def summary(self):
        self.model.summary()

    def plot_model(self, file_path="model/viz/model.png"):
        plot_model(self.model, to_file=file_path, show_shapes=True, show_layer_names=True)

    def load_weights(self, file_path="./model/weights/save"):
        self.model.load_weights(file_path)
        
    def fit(self, X_train, y_train, batch_size=32, epochs=100):
        self.model.fit(x=X_train,
                       y=y_train,
                       batch_size=batch_size,
                       epochs=epochs,
                       shuffle=True)
        
    def predict(self, x):
        return self.model.predict(x)
    
    def save_weights(self, file_path="./model/weights/save"):
        self.model.save_weights(file_path)

In [None]:
NB_UNITS = 256
EMBEDDING_SIZE = 100
DROPOUT_RATE = 0.2
LEARNING_RATE = 0.001

gru = GRU_RNN(nb_units=NB_UNITS,
              embedding_size=EMBEDDING_SIZE,
              total_words=NB_TOTAL_WORDS,
              dropout_rate=DROPOUT_RATE,
              learning_rate=LEARNING_RATE)

In [None]:
gru.plot_model()

In [None]:
gru.summary()

### Stacked-LSTM Network

In [None]:
class Stacked_GRU_RNN():

    def __init__(self, nb_units, embedding_size, total_words, dropout_rate=0.2,learning_rate=0.001):
        self.nb_units = nb_units
        self.embedding_size = embedding_size
        self.total_words = total_words
        self.dropout_rate = dropout_rate
        self.learning_rate = learning_rate
        
        # Input
        self.input = Input(shape=(None,))

        # Layers
        x = Embedding(input_dim=self.total_words,
                      output_dim=self.embedding_size,
                      name="embedding")(self.input)
        x = GRU(units=self.nb_units,
                 return_sequences=True,
                 name="gru_1")(x)
        x = GRU(units=self.nb_units,
                 name="gru_2")(x)
        x = Dropout(rate=dropout_rate,
                    name="dropout")(x)
        
        # Output
        self.output = Dense(units=self.total_words,
                            activation="softmax",
                            name="output")(x)
        
        # Model
        self.model = Model(self.input, self.output)

        # Compile
        self.optimizer = RMSprop(learning_rate=self.learning_rate)
        self.model.compile(optimizer=self.optimizer,
                           loss="categorical_crossentropy")
        
    def summary(self):
        self.model.summary()

    def plot_model(self, file_path="model/viz/model.png"):
        plot_model(self.model, to_file=file_path, show_shapes=True, show_layer_names=True)

    def load_weights(self, file_path="./model/weights/save"):
        self.model.load_weights(file_path)
        
    def fit(self, X_train, y_train, batch_size=32, epochs=100):
        self.model.fit(x=X_train,
                       y=y_train,
                       batch_size=batch_size,
                       epochs=epochs,
                       shuffle=True)
        
    def predict(self, x):
        return self.model.predict(x)
    
    def save_weights(self, file_path="./model/weights/save"):
        self.model.save_weights(file_path)

In [None]:
NB_UNITS = 256
EMBEDDING_SIZE = 100
DROPOUT_RATE = 0.2
LEARNING_RATE = 0.001

stacked_gru = Stacked_GRU_RNN(nb_units=NB_UNITS,
                              embedding_size=EMBEDDING_SIZE,
                              total_words=NB_TOTAL_WORDS,
                              dropout_rate=DROPOUT_RATE,
                              learning_rate=LEARNING_RATE)

In [None]:
stacked_gru.plot_model()

In [None]:
stacked_gru.summary()

### Train

In [None]:
BATCH_SIZE = 32
EPOCHS = 100

gru.fit(X, y, BATCH_SIZE, EPOCHS)

In [None]:
BATCH_SIZE = 32
EPOCHS = 4

stacked_gru.fit(X, y, BATCH_SIZE, EPOCHS)

### Evaluate LSTM Network

### Save Model

In [None]:
gru.save_weights()

In [None]:
stacked_gru.save_weights()

### Load Pre-Trained Model

In [None]:
gru.load_weights()

In [None]:
stacked_gru.load_weights()

### Predictions

In [None]:
def sample_with_temp(preds, temperature=1.0):
    """
    Sample an index from a probability array.
    """
    preds = np.asarray(preds).astype("float64")
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probs = np.random.multinomial(1, preds, 1)
    return np.argmax(probs)

In [None]:
def generate_text(tokenizer, model, seed_text, nb_words, max_sequence_length, temperature):
    output_text = seed_text
    seed_text = preprocess(seed_text, START_STORY)
    seed_text = seed_text
    print(f"seed_text = {seed_text}")

    for i in range(nb_words):
        token_list = tokenizer.texts_to_sequences([seed_text])[0]
        token_list = token_list[-max_sequence_length:] # take the last max_sequence_length tokens (cap the input sequence to improve performance)
        token_list = np.reshape(token_list, (1, max_sequence_length))

        probs = model.predict(token_list, verbose=0)[0]
        y_class = sample_with_temp(probs, temperature)

        output_word = tokenizer.index_word[y_class] if y_class > 0 else ""

        if output_word == "|":
            break

        seed_text += output_word + " "
        output_text += output_word + " "

    return output_text

In [None]:
# Seed text
seed_text = "the lion and the man"

In [None]:
output_text = generate_text(tokenizer=tokenizer,
                            model=gru.model,
                            seed_text=seed_text,
                            nb_words=100,
                            max_sequence_length=20,
                            temperature=0.2)

print(output_text)

In [None]:
output_text = generate_text(tokenizer=tokenizer,
                            model=stacked_gru.model,
                            seed_text=seed_text,
                            nb_words=100,
                            max_sequence_length=20,
                            temperature=0.2)

print(output_text)