<a href="https://colab.research.google.com/github/ElFosco/NLP_argument_creation/blob/dev_rl/seq2seq_arg_quality.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Installations

In [None]:
!pip install -q tf-models-official

## Imports

In [None]:
import os
import re
import numpy as np
import random
import pickle
from tqdm import tqdm
import matplotlib.pyplot as plt

# Tensorflow
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.callbacks import Callback
from tensorflow.keras import layers

# BERT
import tensorflow_hub as hub
import tensorflow_text as text

# Embeddings
import gensim  
import gensim.downloader as gloader

# Data & Pre-processing
import nltk
nltk.download('wordnet')
from nltk.stem import WordNetLemmatizer
import pandas as pd


# Defining hyperparameters
BUFFER_SIZE = 50000
TRAIN_BUFFER = 2048  # Shoud be lower than BUFFER_SIZE
EMBED_DIM = 100
LATENT_DIM = 512
NUM_HEADS = 8
BATCH_SIZE = 256
BATCH_SIZE_RL = 32

# Datasets
train_dataset = "/gdrive/MyDrive/NLP/arg_quality_rank_30k.csv"

# Embedding model
embed_model = "/gdrive/MyDrive/NLP/glove_{}_pickle".format(EMBED_DIM)

# BERT models weights
quality_model_weights = "/gdrive/MyDrive/NLP/classifierIBM30k.h5"
r1_model_weights = "/gdrive/MyDrive/NLP/classifierNLI.h5"

# BERT model scores of start sentences, based on best model of BERT
start_scores = "/gdrive/MyDrive/NLP/start_scores_bert_pickle"

In [None]:
from google.colab import drive
drive.mount('/gdrive')

## Data

### Methods

In [None]:
lemmatizer = WordNetLemmatizer()
def preprocess_pretrain(text):
    text = text.lower()
    text = re.sub('\"|-|\\\\|`', ' ', text)  # delete this chars from the string ["-\`]
    text = re.sub('\n', ' ', text)
    text = re.sub('^[.]+', '', text)         # delete dots at the beginning of the sentence
    text = re.sub("([?.!,])", r" \1 ", text)
    text = re.sub('\. \.', '.', text)        # delete . .
    text = re.sub('&', ' and ', text)        # replace & with and
    text = re.sub(' +', ' ', text)           # delete additional whitespace
    text = text.strip()
    text = " ".join(["[start]", text, "[end]"])
    text = " ".join([lemmatizer.lemmatize(x) for x in text.split()])
    return text


def preprocess_rl(text):
    text = text.lower()
    text = re.sub('\"|-|\\\\|`', ' ', text)  # delete this chars from the string ["-\`]
    text = re.sub('\n', ' ', text)
    text = re.sub('^[.]+', '', text)         # delete dots at the beginning of the sentence
    text = re.sub("([?.!,])", r" \1 ", text)
    text = re.sub('\. \.', '.', text)        # delete . .
    text = re.sub('&', ' and ', text)        # replace & with and
    text = re.sub(' +', ' ', text)           # delete additional whitespace
    text = text.strip()
    text = " ".join([lemmatizer.lemmatize(x) for x in text.split()])
    return text


def load_embedding_model(model_type: str,
                         embedding_dimension: int = 50) -> gensim.models.keyedvectors.KeyedVectors:
    """
    Loads a pre-trained word embedding model via gensim library.

    :param model_type: name of the word embedding model to load.
    :param embedding_dimension: size of the embedding space to consider

    :return
        - pre-trained word embedding model (gensim KeyedVectors object)
    """

    download_path = ""

    # Find the correct embedding model name
    if model_type.strip().lower() == 'word2vec':
        download_path = "word2vec-google-news-300"

    elif model_type.strip().lower() == 'glove':
        download_path = "glove-wiki-gigaword-{}".format(embedding_dimension)
    elif model_type.strip().lower() == 'fasttext':
        download_path = "fasttext-wiki-news-subwords-300"
    else:
        raise AttributeError("Unsupported embedding model type! Available ones: word2vec, glove, fasttext")

    # Check download
    try:
        emb_model = gloader.load(download_path)
    except ValueError as e:
        print("Invalid embedding model name! Check the embedding dimension:")
        print("Word2Vec: 300")
        print("Glove: 50, 100, 200, 300")
        raise e

    return emb_model


def check_OOV_terms(embedding_vocabulary, word_listing):
    """
    Checks differences between pre-trained embedding model vocabulary
    and dataset specific vocabulary in order to highlight out-of-vocabulary terms.

    :param embedding_vocabulary: pre-trained word embedding model vocab (list)
    :param word_listing: dataset specific vocabulary (list)

    :return
        - list of OOV terms
    """
    
    oov = set(word_listing).difference(embedding_vocabulary)
    return list(oov)


def build_embedding_matrix(embedding_model,
                           embedding_dimension,
                           word_to_idx,
                           vocab_size,
                           oov_terms):
    """
    Builds the embedding matrix of a specific dataset given a pre-trained word embedding model

    :param embedding_model: pre-trained word embedding model (gensim wrapper)
    :param word_to_idx: vocabulary map (word -> index) (dict)
    :param vocab_size: size of the vocabulary
    :param oov_terms: list of OOV terms (list)

    :return
        - embedding matrix that assigns a high dimensional vector to each word in the dataset specific vocabulary (shape |V| x d)
    """
    embedding_matrix = np.zeros((vocab_size, embedding_dimension), dtype=np.float32)

    for word, idx in tqdm(word_to_idx.items()):
        try:
            embedding_vector = embedding_model[word]
        except (KeyError, TypeError):
            embedding_vector = np.random.uniform(low=-0.05, high=0.05, size=embedding_dimension)

        embedding_matrix[idx] = embedding_vector

    return embedding_matrix


def update_embedding_matrix(embedding_model, 
                            embedding_dimension,
                            word_to_idx,
                            vocab_size,
                            oov_terms):
    """
    Builds the embedding matrix of a specific dataset given a pre-trained emdedding matrix

    :param embedding_model: pre-trained embedding matrix
    :param word_to_idx: vocabulary map (word -> index) (dict)
    :param vocab_size: size of the vocabulary
    :param oov_terms: list of OOV terms (list)

    :return
        - embedding matrix that assigns a high dimensional vector to each word in the dataset specific vocabulary (shape |V| x d)
    """
    embedding_matrix = np.zeros((vocab_size, embedding_dimension), dtype=np.float32)

    for word, idx in tqdm(word_to_idx.items()):
        try:
            embedding_vector = embedding_model[idx]
        except (TypeError, IndexError):
            embedding_vector = np.random.uniform(low=-0.05, high=0.05, size=embedding_dimension)

        embedding_matrix[idx] = embedding_vector

    return embedding_matrix


class KerasTokenizer(object):
    """
    A simple high-level wrapper for the Keras tokenizer.
    """

    def __init__(self, build_embedding_matrix=False, embedding_dimension=None,
                 embedding_model_type=None, tokenizer_args=None, embedding_model=None):
        if build_embedding_matrix:
            assert embedding_model_type is not None
            assert embedding_dimension is not None and type(embedding_dimension) == int

        self.build_embedding_matrix = build_embedding_matrix
        self.embedding_dimension = embedding_dimension
        self.embedding_model_type = embedding_model_type
        self.embedding_model = embedding_model
        self.embedding_matrix = None
        self.vocab = None

        tokenizer_args = {} if tokenizer_args is None else tokenizer_args
        assert isinstance(tokenizer_args, dict) or isinstance(tokenizer_args, collections.OrderedDict)

        self.tokenizer_args = tokenizer_args

    def build_vocab(self, data, **kwargs):
        print('Fitting tokenizer...')
        self.tokenizer = tf.keras.preprocessing.text.Tokenizer(**self.tokenizer_args)
        self.tokenizer.fit_on_texts(data)
        print('Fit completed!')

        self.vocab = self.tokenizer.word_index

        if self.build_embedding_matrix:
            if self.embedding_model is None:
              print('Loading embedding model! It may take a while...')
              self.embedding_model = load_embedding_model(model_type=self.embedding_model_type, 
                                                          embedding_dimension=self.embedding_dimension)
            
            print('Checking OOV terms in train...')
            self.oov_terms_train = check_OOV_terms(embedding_vocabulary=set(self.embedding_model.vocab.keys()),
                                             word_listing=list(self.vocab.keys()))
            
            print("Total OOV terms: {0} ({1:.2f}%)".format(len(self.oov_terms_train), 100*float(len(self.oov_terms_train)) / len(self.vocab)))

            print('Building the embedding matrix for train...')
            self.embedding_matrix = build_embedding_matrix(embedding_model=self.embedding_model,
                                                           word_to_idx=self.vocab,
                                                           vocab_size=len(self.vocab)+1,          
                                                           embedding_dimension=self.embedding_dimension,
                                                           oov_terms=self.oov_terms_train)
            print('Done for train!')

    def update_vocab(self, data, **kwargs):
      self.tokenizer.fit_on_texts(data)
      if self.build_embedding_matrix:
        old_vocab = self.vocab
        self.vocab = self.tokenizer.word_index
        print('Checking OOV terms...')
        self.oov_terms = check_OOV_terms(embedding_vocabulary=set(old_vocab.keys()), 
                                         word_listing=list(self.vocab.keys()))
        
        print("Total OOV terms: {0} ({1:.2f}%)".format(len(self.oov_terms), 100*float(len(self.oov_terms)) / len(self.vocab)))

        print('Building the embedding matrix...')
        self.embedding_matrix = update_embedding_matrix(embedding_model=self.embedding_matrix,
                                                       word_to_idx=self.vocab,
                                                       vocab_size=len(self.vocab)+1,          
                                                       embedding_dimension=self.embedding_dimension,
                                                       oov_terms=self.oov_terms)

    def get_info(self):
        return {
            'build_embedding_matrix': self.build_embedding_matrix,
            'embedding_dimension': self.embedding_dimension,
            'embedding_model_type': self.embedding_model_type,
            'embedding_matrix': self.embedding_matrix.shape if self.embedding_matrix is not None else self.embedding_matrix,
            'embedding_model': self.embedding_model,
            'vocab_size': len(self.vocab) + 1,
        }

    def tokenize(self, text):
        return text

    def convert_tokens_to_ids(self, tokens):
        if type(tokens) == str:
            return self.tokenizer.texts_to_sequences([tokens])[0]
        else:
            return self.tokenizer.texts_to_sequences(tokens)

    def convert_ids_to_tokens(self, ids):
        return self.tokenizer.sequences_to_texts(ids)


def convert_text(df, tokenizer, is_training=False, max_seq_length=None, 
                 pretrain=False):
    """
    Converts input text sequences using a given tokenizer

    :param texts: either a list or numpy ndarray of strings
    :tokenizer: an instantiated tokenizer
    :is_training: whether input texts are from the training split or not
    :max_seq_length: the max token sequence previously computed with
    :pretrain: Converting dataset for training or dataset for RL
    training texts.

    :return
        text_ids: a nested list on token indices
        max_seq_length: the max token sequence previously computed with
        training texts.
    """

    text_ids = tokenizer.convert_tokens_to_ids(df)

    # Padding
    if is_training:
        max_seq_length = int(np.quantile([len(seq) for seq in text_ids], 0.95))
    else:
        assert max_seq_length is not None

    # Making sure that last token is [end]
    if pretrain:
      for i in range(len(text_ids)):
        if len(text_ids[i]) < max_seq_length:
          text_ids[i][-1] = tokenizer.vocab['[end]']
        else:
          text_ids[i][max_seq_length-1] = tokenizer.vocab['[end]']

    text_ids = [seq + [0] * (max_seq_length - len(seq)) for seq in text_ids]
    text_ids = np.array([seq[:max_seq_length] for seq in text_ids])

    if is_training:
        return text_ids, max_seq_length
    else:
        return text_ids


def decode_sentence(input_sentence, preprocess):
    # Mapping the input sentence to tokens and adding start and end tokens
    tokenized_input_sentence = tokenizer.convert_tokens_to_ids(
        [preprocess(input_sentence)]
    )[0]
    tokenized_input_sentence = tf.pad(
        tokenized_input_sentence,
        [[0, max_seq_length - tf.shape(tokenized_input_sentence)[0]]])
    # Initializing the initial sentence consisting of only the start token.
    tokenized_target_sentence = tf.expand_dims(tokenizer.vocab["[start]"], 0)
    decoded_sentence = ""

    for i in range(max_seq_length):
        # Get the predictions
        predictions = fnet.predict(
            {
                "encoder_inputs": tf.expand_dims(tokenized_input_sentence, 0),
                "decoder_inputs": tf.expand_dims(
                    tf.pad(
                        tokenized_target_sentence,
                        [[0, max_seq_length - tf.shape(tokenized_target_sentence)[0]]],
                    ),
                    0,
                ),
            }
        )
        # Calculating the token of step i (sentence len is i-1) with maximum probability and getting the corresponding word
        sampled_token_index = tf.argmax(predictions[0, i, :])  # predictions.shape == (batch_size, max_seq_len, vocab_size)
        if tf.equal(sampled_token_index, 0):
            break
        sampled_token = tokenizer.tokenizer.index_word[sampled_token_index.numpy()]
        # If sampled token is the end token then stop generating and return the sentence
        if tf.equal(sampled_token_index, tokenizer.vocab["[end]"]):
            break
        decoded_sentence += sampled_token + " "
        tokenized_target_sentence = tf.concat(
            [tokenized_target_sentence, [sampled_token_index]], 0
        )
    return decoded_sentence

### Loading 

In [None]:
df = pd.read_csv(train_dataset)
df = df.drop(["WA", "stance_WA", "stance_WA_conf"], axis=1)
df.head()

### Data preprocessing

In [None]:
df.loc[2,"argument"] = "zero tolerance policy in schools should not be adopted as circumstances are often not black and white, being more nuanced. no one should be written off due to a mistake of judgement."
pretrain_series = df.apply(lambda row : preprocess_pretrain(row['argument']), axis = 1)

rl_series = df.apply(lambda row : preprocess_rl(row['argument']), axis = 1)
topic_list = list(df.apply(lambda row : row['topic'].lower(), axis = 1))
pretrain_series.head()

### Train, test, val splits

In [None]:
is_training_data =  df['set']=='train'
is_validation_data =  df['set']=='dev'
is_test_data =  df['set']=='test'

x_train = pretrain_series[is_training_data]
x_train = x_train.append(pretrain_series[is_validation_data])
x_test  = pretrain_series[is_test_data]

### Tokenization

In [None]:
# load embeddings from glove
if os.path.exists(embed_model):
  with open(embed_model, "rb") as f:
    embedding_model = pickle.load(f)
else:
  embedding_model = load_embedding_model(model_type="glove", 
                                         embedding_dimension=EMBED_DIM)

In [None]:
# creating tokenizer and vocabulary

tokenizer_args = {
    'oov_token': "OOV_TOKEN",  # The vocabulary id for unknown terms during text conversion
    'lower' : True,  # default
    'filters' : '' 
}

tokenizer = KerasTokenizer(tokenizer_args=tokenizer_args,
                           build_embedding_matrix=True,
                           embedding_dimension=EMBED_DIM,
                           embedding_model_type="glove", 
                           embedding_model=embedding_model)

tokenizer.build_vocab(x_train)
tokenizer.update_vocab(x_test)
VOCAB_SIZE = len(tokenizer.vocab)

tokenizer_info = tokenizer.get_info()

print('Tokenizer info: ', tokenizer_info)

### Tokenizing and padding sentences using `TextVectorization`

In [None]:
x_train_tokens, max_seq_length = convert_text(x_train, tokenizer, True, pretrain=True)
x_test_tokens = convert_text(x_test, tokenizer, max_seq_length=max_seq_length, pretrain=True)
print("Max token sequence: {}".format(max_seq_length))
print('X train shape: ', x_train_tokens.shape)
print('X test shape: ', x_test_tokens.shape)

### Tensorflow Dataset for Pre-training

In [None]:
train_dataset = tf.data.Dataset.from_tensor_slices((x_train_tokens, x_train_tokens))
test_dataset = tf.data.Dataset.from_tensor_slices((x_test_tokens, x_test_tokens))

def vectorize_text(inputs, outputs):
    # One extra padding token to the right to match the output shape
    outputs = tf.pad(outputs, [[0, 1]])
    return (
        {"encoder_inputs": inputs, "decoder_inputs": outputs[:-1]},
        {"outputs": outputs[1:]},
    )

train_dataset = train_dataset.map(vectorize_text, num_parallel_calls=tf.data.AUTOTUNE)
test_dataset = test_dataset.map(vectorize_text, num_parallel_calls=tf.data.AUTOTUNE)

train_dataset = (
    train_dataset.cache()
    .shuffle(BUFFER_SIZE)
    .batch(BATCH_SIZE)
    .prefetch(tf.data.AUTOTUNE)
)
test_dataset = test_dataset.cache().batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)

### Dataset for RL

In [None]:
x_train = rl_series[is_training_data]
x_val = rl_series[is_validation_data]
x_test = rl_series[is_test_data]

# -2 because we will add start and end
x_train_rl = convert_text(x_train, tokenizer, max_seq_length=max_seq_length-2, pretrain=False)
x_val_rl = convert_text(x_val, tokenizer, max_seq_length=max_seq_length-2, pretrain=False)
x_test_rl= convert_text(x_test, tokenizer, max_seq_length=max_seq_length-2, pretrain=False)

## Seq2Seq Model

### Creating the FNet Encoder

In [None]:
class FNetEncoder(layers.Layer):
    def __init__(self, embed_dim, dense_dim, **kwargs):
        super(FNetEncoder, self).__init__(**kwargs)
        self.embed_dim = embed_dim
        self.dense_dim = dense_dim
        self.dense_proj = keras.Sequential(
            [
                layers.Dense(dense_dim, activation="relu"),
                layers.Dense(embed_dim),
            ]
        )
        self.layernorm_1 = layers.LayerNormalization()
        self.layernorm_2 = layers.LayerNormalization()

    def call(self, inputs):
        # Casting the inputs to complex64
        inp_complex = tf.cast(inputs, tf.complex64)
        # Projecting the inputs to the frequency domain using FFT2D and
        # extracting the real part of the output
        fft = tf.math.real(tf.signal.fft2d(inp_complex))
        proj_input = self.layernorm_1(inputs + fft)
        proj_output = self.dense_proj(proj_input)
        return self.layernorm_2(proj_input + proj_output)

### Creating the Decoder

In [None]:
class PositionalEmbedding(layers.Layer):
    def __init__(self, sequence_length, vocab_size, embed_dim, **kwargs):
        super(PositionalEmbedding, self).__init__(**kwargs)
        self.token_embeddings = layers.Embedding(
            input_dim=vocab_size, output_dim=embed_dim
        )
        self.position_embeddings = layers.Embedding(
            input_dim=sequence_length, output_dim=embed_dim
        )
        self.sequence_length = sequence_length
        self.vocab_size = vocab_size
        self.embed_dim = embed_dim

    def call(self, inputs):
        length = tf.shape(inputs)[-1]
        positions = tf.range(start=0, limit=length, delta=1)
        embedded_tokens = self.token_embeddings(inputs)
        embedded_positions = self.position_embeddings(positions)
        return embedded_tokens + embedded_positions

    def compute_mask(self, inputs, mask=None):
        return tf.math.not_equal(inputs, 0)


class FNetDecoder(layers.Layer):
    def __init__(self, embed_dim, latent_dim, num_heads, **kwargs):
        super(FNetDecoder, self).__init__(**kwargs)
        self.embed_dim = embed_dim
        self.latent_dim = latent_dim
        self.num_heads = num_heads
        self.attention_1 = layers.MultiHeadAttention(
            num_heads=num_heads, key_dim=embed_dim
        )
        self.attention_2 = layers.MultiHeadAttention(
            num_heads=num_heads, key_dim=embed_dim
        )
        self.dense_proj = keras.Sequential(
            [
                layers.Dense(latent_dim, activation="relu"),
                layers.Dense(embed_dim),
            ]
        )
        self.layernorm_1 = layers.LayerNormalization()
        self.layernorm_2 = layers.LayerNormalization()
        self.layernorm_3 = layers.LayerNormalization()
        self.supports_masking = True

    def call(self, inputs, encoder_outputs, mask=None):
        causal_mask = self.get_causal_attention_mask(inputs)
        if mask is not None:
            padding_mask = tf.cast(mask[:, tf.newaxis, :], dtype="int32")
            padding_mask = tf.minimum(padding_mask, causal_mask)

        attention_output_1 = self.attention_1(
            query=inputs, value=inputs, key=inputs, attention_mask=causal_mask
        )
        out_1 = self.layernorm_1(inputs + attention_output_1)

        attention_output_2 = self.attention_2(
            query=out_1,
            value=encoder_outputs,
            key=encoder_outputs,
            attention_mask=padding_mask,
        )
        out_2 = self.layernorm_2(out_1 + attention_output_2)

        proj_output = self.dense_proj(out_2)
        return self.layernorm_3(out_2 + proj_output)

    def get_causal_attention_mask(self, inputs):
        input_shape = tf.shape(inputs)
        batch_size, sequence_length = input_shape[0], input_shape[1]
        i = tf.range(sequence_length)[:, tf.newaxis]
        j = tf.range(sequence_length)
        mask = tf.cast(i >= j, dtype="int32")
        mask = tf.reshape(mask, (1, input_shape[1], input_shape[1]))
        mult = tf.concat(
            [tf.expand_dims(batch_size, -1), tf.constant([1, 1], dtype=tf.int32)],
            axis=0,
        )
        return tf.tile(mask, mult)

        
def create_actor(max_length):
    encoder_inputs = keras.Input(shape=(None,), dtype="int32", name="encoder_inputs")
    x = PositionalEmbedding(max_length, VOCAB_SIZE+1, EMBED_DIM)(encoder_inputs)
    encoder_outputs = FNetEncoder(EMBED_DIM, LATENT_DIM)(x)
    encoder = keras.Model(encoder_inputs, encoder_outputs)
    decoder_inputs = keras.Input(shape=(None,), dtype="int32", name="decoder_inputs")
    encoded_seq_inputs = keras.Input(
        shape=(None, EMBED_DIM), name="decoder_state_inputs"
    )
    x = PositionalEmbedding(max_length, VOCAB_SIZE+1, EMBED_DIM)(decoder_inputs)
    x = FNetDecoder(EMBED_DIM, LATENT_DIM, NUM_HEADS)(x, encoded_seq_inputs)
    x = layers.Dropout(0.5)(x)
    decoder_outputs = layers.Dense(VOCAB_SIZE+1, activation="softmax")(x)
    decoder = keras.Model(
        [decoder_inputs, encoded_seq_inputs], decoder_outputs, name="outputs"
    )
    decoder_outputs = decoder([decoder_inputs, encoder_outputs])
    fnet = keras.Model([encoder_inputs, decoder_inputs], decoder_outputs, name="actor")
    return fnet


def create_critic(max_length):
    encoder_inputs = keras.Input(shape=(None,), dtype="int32", name="encoder_inputs")
    x = PositionalEmbedding(max_length, VOCAB_SIZE+1, EMBED_DIM)(encoder_inputs)
    encoder_outputs = FNetEncoder(EMBED_DIM, LATENT_DIM)(x)
    encoder = keras.Model(encoder_inputs, encoder_outputs)
    decoder_inputs = keras.Input(shape=(None,), dtype="int32", name="decoder_inputs")
    encoded_seq_inputs = keras.Input(
        shape=(None, EMBED_DIM), name="decoder_state_inputs"
    )
    x = PositionalEmbedding(max_length, VOCAB_SIZE+1, EMBED_DIM)(decoder_inputs)
    x = FNetDecoder(EMBED_DIM, LATENT_DIM, NUM_HEADS)(x, encoded_seq_inputs)
    x = layers.Dropout(0.2)(x)
    decoder_outputs = layers.Dense(VOCAB_SIZE+1, activation="softmax")(x)
    decoder = keras.Model(
        [decoder_inputs, encoded_seq_inputs], decoder_outputs, name="outputs"
    )
    decoder_outputs_tmp = decoder([decoder_inputs, encoder_outputs])

    action_input = keras.Input(shape=(VOCAB_SIZE+1), dtype="float32", name="action_inputs")
    action_out = layers.Dense(16, activation="relu")(action_input)

    decoder_outputs = []
    for i in tf.range(0, BATCH_SIZE_RL):
      decoder_outputs.append(decoder_outputs_tmp[i, tf.math.count_nonzero(decoder_inputs[i], dtype='int32'), :])
    decoder_outputs = tf.convert_to_tensor(decoder_outputs)

    concat = layers.Concatenate()([decoder_outputs, action_out])
    out = layers.Dense(1, activation="tanh")(concat)
    fnet = keras.Model([encoder_inputs, decoder_inputs, action_input], out, name="critic")
    return fnet

### Creating the seq2seq model

In [None]:
fnet = create_actor(max_seq_length)
fnet.compile("adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])

### Load Model

In [None]:
fnet.load_weights("/gdrive/MyDrive/NLP/pretrained_fnet.h5")

### Pre-Training

In [None]:
start_pretraining = False  # Added for security against "run all"
if start_pretraining:
  callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0.001, patience=3)
  history = fnet.fit(train_dataset, epochs=90, validation_data=test_dataset, 
                    callbacks=[callback])

### Inference

In [None]:
sentence = "marriage isn't keeping up with the times. abandon the old thinking and bring something that incorporates all unions not just those with a man and woman."
out = decode_sentence(sentence, preprocess_pretrain)
print(out)

### Save Model

In [None]:
start_pretraining = False  # Added for security against "run all"
if start_pretraining:
  fnet.save_weights("/gdrive/MyDrive/NLP/pretrained_fnet.h5")

## [Bert](https://colab.research.google.com/github/tensorflow/text/blob/master/docs/tutorials/classify_text_with_bert.ipynb)

### Model to fine-tune

In [None]:
bert_model_name_quality = 'bert_en_uncased_L-12_H-768_A-12'

tfhub_handle_encoder_quality = 'https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/3'
tfhub_handle_preprocess_quality = 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3'

bert_model_name_r1 = 'albert_en_base'

tfhub_handle_encoder_r1 = 'https://tfhub.dev/tensorflow/albert_en_base/3'
tfhub_handle_preprocess_r1 = 'https://tfhub.dev/tensorflow/albert_en_preprocess/3'

print(f'Model name quality                       : {bert_model_name_quality}')
print(f'BERT model selected quality              : {tfhub_handle_encoder_quality}')
print(f'Preprocess model auto-selected quality   : {tfhub_handle_preprocess_quality}')
print(f'Model name r1                            : {bert_model_name_r1}')
print(f'BERT model selected r1                   : {tfhub_handle_encoder_r1}')
print(f'Preprocess model auto-selected r1        : {tfhub_handle_preprocess_r1}')

In [None]:
def build_classifier_model(dense_size=100):
  text_input = tf.keras.layers.Input(shape=(), dtype=tf.string, name='text')
  preprocessing_layer = hub.KerasLayer(tfhub_handle_preprocess_quality, name='preprocessing')
  encoder_inputs = preprocessing_layer(text_input)
  encoder = hub.KerasLayer(tfhub_handle_encoder_quality, trainable=True, name='BERT_encoder')
  outputs = encoder(encoder_inputs)
  net = outputs['pooled_output']
  net = tf.keras.layers.Dense(dense_size, activation=keras.activations.relu, name='fc')(net)
  net = tf.keras.layers.Dense(1, activation=keras.activations.sigmoid, name='classifier')(net)
  return tf.keras.Model(text_input, net)

In [None]:
def build_classifier_model_r1(dense_size=100):  # model used to compute the score of the topic
  text_input = tf.keras.layers.Input(shape=(), dtype=tf.string, name='text')
  preprocessing_layer = hub.KerasLayer(tfhub_handle_preprocess_r1, name='preprocessing')
  encoder_inputs = preprocessing_layer(text_input)
  encoder = hub.KerasLayer(tfhub_handle_encoder_r1, trainable=True, name='BERT_encoder')
  outputs = encoder(encoder_inputs)
  net = outputs['pooled_output']
  net = tf.keras.layers.Dense(dense_size, activation=keras.activations.relu, name='fc_1')(net)
  net = tf.keras.layers.Dense(3, activation=keras.activations.softmax, name='classifier')(net)
  return tf.keras.Model(text_input, net)

In [None]:
bert_model_quality = build_classifier_model()
bert_model_r1 = build_classifier_model_r1()

In [None]:
bert_model_quality.summary()

### Load best model R1

In [None]:
bert_model_quality.load_weights(quality_model_weights)

### Load best model Topic

In [None]:
bert_model_r1.load_weights(r1_model_weights)

## Train

## RL Stuff

### Noise

In [None]:
class OUActionNoise:
    def __init__(self, mean, std_deviation, theta=0.15, dt=1e-2, x_initial=None):
        self.theta = theta
        self.mean = mean
        self.std_dev = std_deviation
        self.dt = dt
        self.x_initial = x_initial
        self.reset()

    def __call__(self):
        # Formula taken from https://www.wikipedia.org/wiki/Ornstein-Uhlenbeck_process.
        x = (
            self.x_prev
            + self.theta * (self.mean - self.x_prev) * self.dt
            + self.std_dev * np.sqrt(self.dt) * np.random.normal(size=self.mean.shape)
        )
        # Store x into x_prev
        # Makes next noise dependent on current one
        self.x_prev = x
        return x

    def reset(self):
        if self.x_initial is not None:
            self.x_prev = self.x_initial
        else:
            self.x_prev = np.zeros_like(self.mean)

### Buffer

In [None]:
class Buffer:
    def __init__(self, buffer_capacity=100000, batch_size=32, num_epochs=1, 
                 train_buffer=100000):
        # Number of "experiences" to store at max
        self.buffer_capacity = buffer_capacity
        # Number of epochs
        self.num_epochs = num_epochs
        # Num of tuples to train on.
        self.batch_size = batch_size
        # Num of obs to use in epochs
        self.train_buffer = train_buffer
        assert self.train_buffer <= self.buffer_capacity

        # Its tells us num of times record() was called.
        self.buffer_counter = 0

        # It tells us num of episode recorder
        self.buffer_used = 0

        # Instead of list of tuples as the exp.replay concept go
        # We use different np.arrays for each tuple element
        self.state_buffer = np.zeros((self.buffer_capacity, 2, max_seq_length))
        self.action_buffer = np.zeros((self.buffer_capacity, VOCAB_SIZE+1))
        self.reward_buffer = np.zeros((self.buffer_capacity, 1))
        self.next_state_buffer = np.zeros((self.buffer_capacity, max_seq_length))
        self.dones = np.zeros((self.buffer_capacity))

    # Takes (s,a,r,s') obervation tuple as input
    def record(self, obs_tuple):
        # Set index to zero if buffer_capacity is exceeded,
        # replacing old records
        index = self.buffer_counter % self.buffer_capacity

        self.state_buffer[index] = obs_tuple[0]
        self.action_buffer[index] = obs_tuple[1]
        self.reward_buffer[index] = obs_tuple[2]
        self.next_state_buffer[index] = obs_tuple[3]
        self.dones[index] = 1 - obs_tuple[4]

        self.buffer_counter += 1

        if self.buffer_used < len(self.state_buffer):
          self.buffer_used += 1

    # Eager execution is turned on by default in TensorFlow 2. Decorating with tf.function allows
    # TensorFlow to build a static graph out of the logic and computations in our function.
    # This provides a large speed up for blocks of code that contain many small TensorFlow operations such as this one.
    @tf.function
    def update(
        self, state_batch, action_batch, reward_batch, next_new_batch, dones,
    ):
        # Training and updating Actor & Critic networks.
        # See Pseudo Code.
        start_batch = state_batch[:, 0, :]
        new_batch = state_batch[:, 1, :]
        with tf.GradientTape() as tape:
            actions_tmp = actor_model([start_batch, new_batch], training=True)
            # Removing dimension 1 from output
            actions = []
            for i in range(actions_tmp.shape[0]):
              actions.append(actions_tmp[i, tf.math.count_nonzero(new_batch[i])-1, :])
            actions = tf.convert_to_tensor(actions)
            critic_value = critic_model([start_batch, new_batch, actions], training=True)
            # Used `-value` as we want to maximize the value given
            # by the critic for our actions
            actor_loss = -tf.math.reduce_mean(critic_value)

        actor_grad = tape.gradient(actor_loss, actor_model.trainable_variables)
        actor_optimizer.apply_gradients(
            zip(actor_grad, actor_model.trainable_variables)
        )

        # tf.print("Actor Loss: ", actor_loss)
        
        with tf.GradientTape() as tape:
            target_actions_tmp = target_actor([start_batch, next_new_batch], training=True)
            target_actions = []
            # Removing dimension 1 from output
            for i in range(target_actions_tmp.shape[0]):
              target_actions.append(target_actions_tmp[i, tf.math.count_nonzero(next_new_batch[i]), :])
            target_actions = tf.convert_to_tensor(target_actions)
            y = reward_batch + dones * gamma * target_critic([start_batch, 
                                                                    next_new_batch, 
                                                                    target_actions], 
                                                                   training=True)
            critic_value = critic_model([start_batch, new_batch, action_batch], training=True)
            critic_loss = tf.math.reduce_mean(tf.math.square(y - critic_value))
        critic_grad = tape.gradient(critic_loss, critic_model.trainable_variables)
        critic_optimizer.apply_gradients(
            zip(critic_grad, critic_model.trainable_variables)
        )

        # tf.print("Critic Loss: ", critic_loss)

        return actor_loss, critic_loss

    # We compute the loss and update parameters
    def learn(self):
        size_memory = self.buffer_used
        # Random permutation of indices of dataset
        obs_perm = np.random.permutation(size_memory)
        # Get sampling range
        record_range = range(0, min(size_memory, self.train_buffer))
        actor_loss, critic_loss = [], []
        for i in range(self.num_epochs):
          steps = 0
          while len(record_range) <= size_memory and self.batch_size <= len(record_range):
            # Randomly sample indices
            batch_indices = np.random.choice(record_range, self.batch_size, replace=False)
            # Convert to tensors
            state_batch = tf.convert_to_tensor(self.state_buffer[obs_perm[batch_indices]])
            action_batch = tf.convert_to_tensor(self.action_buffer[obs_perm[batch_indices]])
            reward_batch = tf.convert_to_tensor(self.reward_buffer[obs_perm[batch_indices]])
            reward_batch = tf.cast(reward_batch, dtype=tf.float32)
            next_state_batch = tf.convert_to_tensor(self.next_state_buffer[obs_perm[batch_indices]])
            dones = tf.convert_to_tensor(self.dones[obs_perm[batch_indices]])
            dones = tf.cast(dones, dtype=tf.float32)

            actor_loss_tmp, critic_loss_tmp = self.update(state_batch, action_batch, 
                                                          reward_batch, next_state_batch, 
                                                          dones)
            actor_loss.append(actor_loss_tmp)
            critic_loss.append(critic_loss_tmp)
            steps += 1

            # Update sampling range
            record_range = [x for x in record_range if x not in batch_indices]
            print()
          actor_loss[-1] /= steps
          critic_loss[-1] /= steps
        return np.array(actor_loss).mean(), np.array(critic_loss).mean()


# This update target parameters slowly
# Based on rate `tau`, which is much less than one.
@tf.function
def update_target(target_weights, weights, tau):
    for (a, b) in zip(target_weights, weights):
        a.assign(b * tau + a * (1 - tau))

### Policy

In [None]:
def policy(state, noise_object, step):
    sampled_actions = tf.squeeze(actor_model(state))
    if step >= BATCH_SIZE_RL:
      noise = noise_object()
      # Adding noise to action
      sampled_actions = sampled_actions.numpy() + noise

    # We make sure action is within bounds
    legal_action = np.clip(sampled_actions, 0.0, 1.0)

    return np.array([np.squeeze(legal_action)])

In [None]:
def sentences_bert(sentence):
  sentence = list(sentence) + [0] * (max_seq_length - len(sentence))
  sentence = np.array(sentence[:max_seq_length])
  sentence = " ".join(tokenizer.convert_ids_to_tokens([sentence]))
  return sentence


if os.path.exists(start_scores):
  with open(start_scores, "rb") as f:
    score_predictions = pickle.load(f)
else:
  score_predictions = bert_model_quality.predict(np.array([sentences_bert(x) for x in x_train_rl]), batch_size=256, verbose=1)
  with open(start_scores, "wb") as f:
    pickle.dump(score_predictions, f)

### Environment

In [None]:
def r4_reward(sentence):
  sentence_w0 = [i for i in sentence if i!=0]  # remove the zeros in the list
  unique_tokens = np.unique(np.array(sentence_w0))  # scroll the list according to the unique elements, in such a way I do not need to remove each elem everytime
  n_repetead_tokens = 0

  for elem in unique_tokens:
    repetitions = sentence_w0.count(elem)  # count the repetitions for the elem

    if repetitions > 1:  # if I have more than 1 repetitions
      n_repetead_tokens += 1  # increase the counter
  
  r4 = 1 - (n_repetead_tokens/len(unique_tokens))
  return r4

In [None]:
class Env:
  def __init__(self, tokenizer, max_seq_length, train_dataset, train_dataset_topics, 
               evaluator_quality, evaluator_r1, score_predictions):
    self.tokenizer = tokenizer
    self.max_seq_length = max_seq_length
    self.start_sentence = random.choice(train_dataset)
    self.train_dataset = train_dataset
    self.prev_sentence = []
    self.evaluator_quality = evaluator_quality
    self.evaluator_r1 = evaluator_r1
    self.train_dataset = [list(self.train_dataset)]
    self.train_dataset.append([])
    self.train_dataset[1] = score_predictions
    self.train_dataset_topics = train_dataset_topics
      
  def get_reward(self, new_sentence, done):
    # Padding new sentence
    new_sentence = new_sentence[1:-1]
    sentence_r = new_sentence + [0] * (self.max_seq_length - len(new_sentence))
    sentence_r = np.array(sentence_r[:self.max_seq_length])

    if np.count_nonzero(sentence_r) >= 1:
      bert_sentence = " ".join(tokenizer.convert_ids_to_tokens([sentence_r]))
      new_score = self.evaluator_quality(np.array([bert_sentence]))
      r_quality = new_score - self.score_start_sentence
      topic_sentence = bert_sentence + ". " + self.topic
      r1_scores = self.evaluator_r1(np.array([topic_sentence]))[0]
      r1 = r1_scores[0] - 1.0 * r1_scores[1]

      r4 = r4_reward(sentence_r)
      if done:
          print(0.30 * r_quality, " ", 0.40 * r4, " ", 0.30 * r1)
      reward = 0.30 * r_quality + 0.40 * r4 + 0.30 * r1
    else:
      reward = tf.constant([[-1.0]])

    return reward

  def step(self, action, step):
    buff_action = action[0, step, :]
    new_sentence = self.prev_sentence + [tf.argmax(buff_action)]

    done = False
    if new_sentence[-1] == self.tokenizer.vocab["[end]"] or len(new_sentence) >= self.max_seq_length:
      done = True
    # Padding new sentence
    print(new_sentence)
    
    sentence_r = new_sentence[1:-1]
    print(sentence_r)
    sentence_r = sentence_r + [0] * (self.max_seq_length - len(sentence_r))
    sentence_r = np.array(sentence_r[:self.max_seq_length])
    
    self.prev_sentence = new_sentence

    return sentence_r, buff_action, done

  def reset(self):
    id = random.randint(0, len(self.train_dataset[0]))
    self.start_sentence = self.train_dataset[0][id]
    self.topic = self.train_dataset_topics[id]
    self.score_start_sentence = self.train_dataset[1][id]
    self.prev_sentence = [self.tokenizer.vocab["[start]"]]

    sentence_r = self.prev_sentence + [0] * (self.max_seq_length - len(self.prev_sentence))
    sentence_r = np.array(sentence_r[:self.max_seq_length])

    start_sentence = [self.tokenizer.vocab["[start]"]] 
    start_sentence.extend(self.start_sentence)
    start_sentence.extend([self.tokenizer.vocab["[end]"]])

    return start_sentence, sentence_r

## Training

In [None]:
std_dev = 0.5
ou_noise = OUActionNoise(mean=np.zeros(1), std_deviation=float(std_dev) * np.ones(1))

actor_model = fnet
critic_model = create_critic(max_seq_length)

for i in range(len(actor_model.layers)):
  for j in range(len(critic_model.layers)):
    if actor_model.layers[i].name==critic_model.layers[j].name:
      critic_model.layers[j].set_weights(actor_model.layers[i].get_weights())

target_actor = create_actor(max_seq_length)
target_critic = create_critic(max_seq_length)

# Making the weights equal initially
target_actor.set_weights(actor_model.get_weights())
target_critic.set_weights(critic_model.get_weights())

# Learning rate for actor-critic models
critic_lr = 0.003
actor_lr = 0.003

critic_optimizer = tf.keras.optimizers.Adam(critic_lr)
actor_optimizer = tf.keras.optimizers.Adam(actor_lr)

# Train frequency in episodes
train_freq = 3
# Plot frequency in episodes
plot_freq = 50
# Number of episodes
total_episodes = 10000
# Discount factor for future rewards
gamma = 0.99
# Used to update target networks
tau = 0.005

buffer = Buffer(BUFFER_SIZE, BATCH_SIZE_RL, 1, TRAIN_BUFFER)

In [None]:
# To store reward history of each episode
ep_reward_list = []
# To store average reward history of last few episodes
avg_reward_list = []
# To store average actor loss
avg_actor_list = []
# To store average critic loss
avg_critic_list = []

env = Env(tokenizer, max_seq_length, x_train_rl, topic_list, bert_model_quality, 
          bert_model_r1, score_predictions)

for ep in range(total_episodes):

    start_sentence, prev_sentence = env.reset()
    tf_prev_state_1 = tf.expand_dims(tf.convert_to_tensor(start_sentence), 0)
    tf_prev_state_2 = tf.expand_dims(tf.convert_to_tensor(prev_sentence), 0)

    prev_state_tr = (tf_prev_state_1, tf_prev_state_2)
    prev_state_sv = (start_sentence, prev_sentence)

    step = 0
    ep_reward = 0.0

    while True:
        action = policy(prev_state_tr, ou_noise, step)
        # Receive state from environment.
        new_sentence, buff_action, done = env.step(action, step)
        tf_prev_state_2 = tf.expand_dims(tf.convert_to_tensor(new_sentence), 0)
        state_tr = (tf_prev_state_1, tf_prev_state_2)
        state_sv = (start_sentence, new_sentence)

        # Getting reward
        reward_tmp = env.get_reward(env.prev_sentence, done)
        ep_reward += reward_tmp
        buffer.record((prev_state_sv, buff_action, reward_tmp, state_sv[1], 1 if done else 0))
        
        # End this episode when `done` is True
        if done:
            break

        prev_state_tr = state_tr
        prev_state_sv = state_sv

        step += 1

    ep_reward /= step
    ep_reward_list.append(ep_reward)

    print([tokenizer.tokenizer.index_word[x] for x in start_sentence if x != 0])
    print([tokenizer.tokenizer.index_word[x] for x in new_sentence[1:-1] if x != 0])

    # Mean of last 10 episodes
    avg_reward = np.mean(ep_reward_list[-10:])
    print("Buffer size: ", buffer.buffer_used)
    print("Episode * {} * Avg Reward is ==> {} * Episode Reward is ==> {}".format(ep, avg_reward, ep_reward))
    print()
    avg_reward_list.append(avg_reward)

    # Training
    if ep % train_freq == 0 and ep != 0 and buffer.buffer_counter >= 1.5 * TRAIN_BUFFER:
      print("Training")
      actor_loss_tmp, critic_loss_tmp = buffer.learn()
      avg_actor_list.append(actor_loss_tmp)
      avg_critic_list.append(critic_loss_tmp)
      update_target(target_actor.variables, actor_model.variables, tau)
      update_target(target_critic.variables, critic_model.variables, tau)

    if ep != 0 and ep % plot_freq == 0:
      # Plotting graph
      # Episodes versus Avg. Rewards
      plt.plot(avg_reward_list)
      plt.xlabel("Episode")
      plt.ylabel("Avg. Epsiodic Reward")
      plt.show()
      # Plotting graph
      # Episodes versus Avg. Actor loss
      plt.plot(avg_actor_list)
      plt.xlabel("Episode")
      plt.ylabel("Avg. Actor Loss")
      plt.show()
      # Plotting graph
      # Episodes versus Avg. Critic loss
      plt.plot(avg_critic_list)
      plt.xlabel("Episode")
      plt.ylabel("Avg. Critic Loss")
      plt.show()

### Save Model

In [None]:
assert False  # Make sure to avoid deleting other saved models
fnet.save_weights("/gdrive/MyDrive/NLP/final_fnet.h5")

## Load Model

In [None]:
fnet.load_weights("/gdrive/MyDrive/NLP/final_fnet.h5")

## Inference

In [None]:
sentence = "marriage isn't keeping up with the times. abandon the old thinking and bring something that incorporates all unions not just those with a man and woman."
out = decode_sentence(sentence, preprocess_rl)
print(out)