# Setup

**Package Installations**

In [None]:
!pip install transformers
!pip install datasets
!pip install evaluate
!pip install rouge_score

**Imports**

In [None]:
import datasets
from datasets import load_dataset

import transformers
from transformers import AutoTokenizer, TFT5ForConditionalGeneration

import tensorflow as tf

import evaluate

from tqdm import tqdm

import pickle

**Hyperparameters**

In [None]:
task_prefix = 'generate boolean question: ' # task_prefix + "true or false" + context --> boolean question
learning_rate = 3e-4
encoder_max_len = 500
decoder_max_len = 70
batch_size = 8

**Tokenizer & Optimizer**

In [None]:
model_name = "t5-small"
tokenizer = AutoTokenizer.from_pretrained(model_name)
optimizer = tf.keras.optimizers.AdamW(learning_rate)

# Dataset

## Loading

In [None]:
save_folder_path = "/content/drive/MyDrive/Datasets/BoolQG"

boolq_data_files = {
      "train": save_folder_path + "/" + "boolq-train-original-encoded.json",
      "validation": save_folder_path + "/" + "boolq-valid-original-encoded.json",
  }

In [None]:
boolq_train_ds = load_dataset("json", data_files=boolq_data_files, split='train')
boolq_valid_ds = load_dataset("json", data_files=boolq_data_files, split='validation')

In [None]:
print(boolq_train_ds.features)

In [None]:
train_ds = boolq_train_ds
valid_ds = boolq_valid_ds

## Encoding

In [None]:
# def encode(example, encoder_max_len=encoder_max_len, decoder_max_len=decoder_max_len):
#     context = example['passage']
#     answer = str(example['answer'])

#     input = task_prefix + answer + ' ' + 'context: ' + context
#     output = example['question']

#     encoder_inputs = tokenizer(input, truncation=True,
#                                return_tensors='tf', max_length=encoder_max_len,
#                               pad_to_max_length=True)
#     decoder_inputs = tokenizer(output, truncation=True,
#                                return_tensors='tf', max_length=decoder_max_len,
#                               pad_to_max_length=True)

#     # Shapes come from the encoder_max_len and decoder_max_len in hyperparameters section
#     input_ids = encoder_inputs['input_ids'][0] # Shape before flattening: input_ids.shape= (1, 500) [[1,1,3,...]]
#     input_attention = encoder_inputs['attention_mask'][0] # Shape before flattening: attension_mask.shape= (1, 500)
#     target_ids = decoder_inputs['input_ids'][0] # Shape before flattening: target_ids.shape= (1, 70)
#     target_attention = decoder_inputs['attention_mask'][0] # Shape before flattening: target_attention.shape= (1, 70)

#     outputs = {'input_ids':input_ids, 'attention_mask': input_attention,
#                'labels':target_ids, 'decoder_attention_mask':target_attention}
#     return outputs

In [None]:
# boolq_train_ds = boolq_train_ds.map(encode)
# boolq_valid_ds = boolq_valid_ds.map(encode)

**Save encoded datasets to Drive (requires mounting Drive)**

In [None]:
# save_folder_path = "/content/drive/MyDrive/Datasets/BoolQG"

# boolq_train_ds.to_json(save_folder_path + "/" + "boolq-train-original-encoded.json")
# boolq_valid_ds.to_json(save_folder_path + "/" + "boolq-valid-original-encoded.json")

## To Tensorflow PrefetchDataset

**FlatMapDataset**

In [None]:
def to_tf_dataset(dataset):
  columns = ['input_ids', 'attention_mask', 'labels', 'decoder_attention_mask']
  dataset.set_format(type='tensorflow', columns=columns)
  return_types = {'input_ids':tf.int32, 'attention_mask':tf.int32,
                'labels':tf.int32, 'decoder_attention_mask':tf.int32,  }
  return_shapes = {'input_ids': tf.TensorShape([None]), 'attention_mask': tf.TensorShape([None]),
                  'labels': tf.TensorShape([None]), 'decoder_attention_mask':tf.TensorShape([None])}
  ds = tf.data.Dataset.from_generator(lambda : dataset, return_types, return_shapes)
  return ds

In [None]:
tf_train_ds = to_tf_dataset(train_ds)
tf_valid_ds = to_tf_dataset(valid_ds)

In [None]:
tf_train_ds

**PrefetchDataset**

In [None]:
def create_dataset(dataset, cache_path=None, batch_size=4,
                   buffer_size= 1000, shuffling=True):
    if cache_path is not None:
        dataset = dataset.cache(cache_path)
    dataset = dataset.repeat()
    if shuffling:
        dataset = dataset.shuffle(buffer_size)
    dataset = dataset.batch(batch_size)
    dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE)
    return dataset

In [None]:
tf_train_ds= create_dataset(tf_train_ds, batch_size=batch_size,
                           shuffling=True, cache_path = None)
tf_valid_ds = create_dataset(tf_valid_ds, batch_size=batch_size,
                           shuffling=False, cache_path = None)

# Training

**Training parameters**

In [None]:
epochs_done = 0
total_num_of_epochs = 3
ntrain = len(train_ds)
# nvalid = len(valid_ds)
steps = ntrain // batch_size
# valid_steps = nvalid // batch_size

In [None]:
try:
    model
except NameError:
    print("WARNING: Model was NOT loaded.")
    model = TFT5ForConditionalGeneration.from_pretrained(model_name) #options: t5-small, t5-base, t5-large, t5-3b, t5-11b
else:
    print("Model was loaded")

model.compile(optimizer=optimizer)

**Callbacks**

In [None]:
import shutil
from tensorflow import keras

epoch_num = epochs_done + 1
saved_model_dir_dest = "/content/drive/MyDrive/BoolQGModels"

class SaveMyModel(keras.callbacks.Callback):

    def on_epoch_end(self, batch, logs={}):
        global epoch_num
        saved_model_dir_src = f'{model_name}-epochs={epoch_num}-original-paperhp-all'
        model.save_pretrained(saved_model_dir_src)
        shutil.copytree(f'/content/{saved_model_dir_src}', saved_model_dir_dest + '/' + saved_model_dir_src, dirs_exist_ok=True)
        epoch_num = epoch_num + 1

saveMyModel = SaveMyModel()

In [None]:
print("Total Steps: ", steps)
# print("Total Validation Steps: ", valid_steps)
# model.fit(tf_train_ds, epochs=total_num_of_epochs, steps_per_epoch=steps, validation_data=tf_valid_ds, validation_steps=valid_steps, initial_epoch=epochs_done, callbacks=[saveMyModel])
model.fit(tf_train_ds, epochs=total_num_of_epochs, steps_per_epoch=steps, initial_epoch=epochs_done, callbacks=[saveMyModel])

# Saving Model

In [None]:
model.save_pretrained(f'{model_name}-epochs={total_num_of_epochs}')

In [None]:
saved_model_dir_src = f'{model_name}-epochs={total_num_of_epochs}'
saved_model_dir_dest = "/content/drive/MyDrive/BoolQGModels"

In [None]:
!cp -r {saved_model_dir_src} {saved_model_dir_dest}

# Loading Saved Model

In [None]:
saved_model_dir_src = "/content/drive/MyDrive/BoolQGModels/t5-small-epochs=20-original" #@param {type:"string"}
model_name = 't5-small' #@param ["t5-small", "t5-base", "t5-large"]

In [None]:
model = TFT5ForConditionalGeneration.from_pretrained(saved_model_dir_src)

# Inference

In [None]:
to_tf_dataset(boolq_valid_ds)
inference_ds = boolq_valid_ds

In [None]:
extracted_answers = list()

for i in tqdm(range(0, len(inference_ds['input_ids']), 100)):
  output_sequences = model.generate(
      input_ids=inference_ds["input_ids"][i:i+100],
      attention_mask=inference_ds["attention_mask"][i:i+100],
      max_length=decoder_max_len,
      top_p=0.95,
      top_k=50,
      repetition_penalty=float(2)
  )
  a = tokenizer.batch_decode(output_sequences, skip_special_tokens=True)
  extracted_answers.extend(a)

In [None]:
with open('your_file.txt', 'w') as f:
    for line in extracted_answers:
        f.write(f"{line}\n")

**Inference file name**

In [None]:
inference_file_name = 'boolean_questions_epochs=2.pickle'

**Saving inference**

In [None]:
print(len(extracted_answers))

In [None]:
import pickle

In [None]:
with open(inference_file_name, 'wb') as f:
    pickle.dump(extracted_answers, f)

In [None]:
import os
size_in_mb = os.path.getsize(f"/content/{inference_file_name}") / 10**6
print(f'{inference_file_name} size: {size_in_mb}MB')

In [None]:
!cp {inference_file_name} "/content/drive/MyDrive/AnswerExtractionModels"

**Loading inference**

In [None]:
inference_file_src = f"/content/drive/MyDrive/AnswerExtractionModels/{inference_file_name}"
inference_file_dist = inference_file_name

In [None]:
!cp {inference_file_src} {inference_file_dist}

In [None]:
with open(inference_file_name, 'rb') as f:
    extracted_answers = pickle.load(f)

print('extracted_answers is', len(extracted_answers))

# Evaluation

In [None]:
print(len(boolq_valid_ds['question']))
print(len(extracted_answers))

**BLEU**

In [None]:
bleu = evaluate.load("bleu")

In [None]:
bleu1_score = bleu.compute(predictions=extracted_answers, references=boolq_valid_ds['question'],max_order=1)
print(bleu1_score)
bleu2_score = bleu.compute(predictions=extracted_answers, references=boolq_valid_ds['question'],max_order=2)
print(bleu2_score)
bleu3_score = bleu.compute(predictions=extracted_answers, references=boolq_valid_ds['question'],max_order=3)
print(bleu3_score)
bleu4_score = bleu.compute(predictions=extracted_answers, references=boolq_valid_ds['question'],max_order=4)
print(bleu4_score)

**ROGUE-L**

In [None]:
rouge_l = evaluate.load('rouge')
rogue_scores = rouge_l.compute(predictions=extracted_answers, references=boolq_valid_ds['question'])

In [None]:
print(rogue_scores)

**METOER**

In [None]:
meteor = evaluate.load('meteor')
meteor_score = meteor.compute(predictions=extracted_answers, references=boolq_valid_ds['question'])

In [None]:
print(meteor_score)

# {'meteor': 0.45575705972743036} base

# Calculate & Save Metrics

In [None]:
results_file_name = 't5-base-epochs=10:15-lr=3e-4-optimizer=adamW Fix Results'

bleu = evaluate.load("bleu")
rouge_l = evaluate.load('rouge')
meteor = evaluate.load('meteor')

tokenizer = AutoTokenizer.from_pretrained('t5-base')

to_tf_dataset(boolq_valid_ds)
inference_ds = boolq_valid_ds

In [None]:
results = list()

for i in tqdm(range(10, 15 + 1)):
  saved_model_dir_src = f'/content/drive/MyDrive/BoolQGModels/t5-base-epochs={i}-original-paperhp'
  model = TFT5ForConditionalGeneration.from_pretrained(saved_model_dir_src)
  extracted_answers = list()
  for i in tqdm(range(0, len(inference_ds['input_ids']), 100)):
    output_sequences = model.generate(
        input_ids=inference_ds["input_ids"][i:i+100],
        attention_mask=inference_ds["attention_mask"][i:i+100],
        max_length=decoder_max_len,
        top_p=0.95,
        top_k=50,
        repetition_penalty=float(2)
    )
    a = tokenizer.batch_decode(output_sequences, skip_special_tokens=True)
    extracted_answers.extend(a)

  bleu1_score = bleu.compute(predictions=extracted_answers, references=boolq_valid_ds['question'],max_order=1)
  bleu2_score = bleu.compute(predictions=extracted_answers, references=boolq_valid_ds['question'],max_order=2)
  bleu3_score = bleu.compute(predictions=extracted_answers, references=boolq_valid_ds['question'],max_order=3)
  bleu4_score = bleu.compute(predictions=extracted_answers, references=boolq_valid_ds['question'],max_order=4)
  rogue_scores = rouge_l.compute(predictions=extracted_answers, references=boolq_valid_ds['question'])
  meteor_score = meteor.compute(predictions=extracted_answers, references=boolq_valid_ds['question'])

  current_epoch_results = [bleu1_score, bleu2_score, bleu3_score, bleu4_score, rogue_scores, meteor_score]
  results.append(current_epoch_results)

In [None]:
import pickle

with open(results_file_name, 'wb') as f:
    pickle.dump(results, f)
!cp {results_file_name} "/content/drive/MyDrive/BoolQGModels"

In [None]:
with open(results_file_name, 'rb') as f:
    results = pickle.load(f)

for l in results:
  print(l)

In [None]:
print(extracted_answers)

In [None]:
with open('your_file.txt', 'w') as f:
    for line in extracted_answers:
        f.write(f"{line}\n")

# Visualize results

Get scores in seperate lists

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
bleu1_scores = list()
bleu2_scores = list()
bleu3_scores = list()
bleu4_scores = list()
roguel_scores = list()
meteor_scores = list()

In [None]:
for i in range(0, 20):
  bleu1_scores.append(results[i][0]['bleu'])
  bleu2_scores.append(results[i][1]['bleu'])
  bleu3_scores.append(results[i][2]['bleu'])
  bleu4_scores.append(results[i][3]['bleu'])
  roguel_scores.append(results[i][4]['rougeL'])
  meteor_scores.append(results[i][5]['meteor'])

In [None]:
import numpy as np
print(np.argmax(bleu1_scores) + 1)

In [None]:
sns.scatterplot(bleu1_scores)