# Setup File

In [None]:
!pip install nltk --quiet
!pip install transformers --quiet
!pip install datasets --quiet
!pip install evaluate --quiet
!pip install sentencepiece --quiet
!pip install accelerate --quiet
!pip install rouge_score --quiet
!pip install bert_score --quiet
!pip install torchvision --quiet
!pip install tensorboard --quiet

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.3/6.3 MB[0m [31m87.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m190.3/190.3 KB[0m [31m25.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.6/7.6 MB[0m [31m105.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m462.8/462.8 KB[0m [31m27.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m213.0/213.0 KB[0m [31m28.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m132.0/132.0 KB[0m [31m18.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m140.6/140.6 KB[0m [31m19.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m81.4/81.4 KB[0m [31m8.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━

In [None]:
!cp /content/drive/MyDrive/MscThesis/Evaluation_code/Bartscore.py /content

In [None]:
import os
import pickle
import pandas as pd
import numpy as np
import re
import torch 
import nltk

from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, Seq2SeqTrainingArguments, Seq2SeqTrainer, pipeline,  EarlyStoppingCallback, DataCollatorForSeq2Seq, Trainer
from transformers import MT5ForConditionalGeneration, T5Tokenizer, T5ForConditionalGeneration, MT5TokenizerFast, is_torch_tpu_available, logging

import datasets
import evaluate
import accelerate

import Bartscore as bartscore 
import gc
import json
from ast import literal_eval

In [None]:
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


True

# Set Parameters

In [None]:
# Define Parameters
FLAGS = {}
FLAGS['model_name'] = "google/mt5-base" 
FLAGS['max_token_length_preprocessing'] = 256
FLAGS['early_stopping_patience'] = 3
FLAGS['model_save_total_limit'] = 4
FLAGS['training_optimizer'] = "adafactor"
FLAGS['batch_size'] = 16
FLAGS['gradient_accumulation_steps'] = 2
FLAGS['learning_rate'] = 5e-05
FLAGS['num_epochs'] = 200
FLAGS['training_strategy'] = 'epoch'
FLAGS['generation_num_beams'] = 5
FLAGS['generation_max_length'] = 100
FLAGS['data_location'] = "/content/drive/MyDrive/MscThesis/Data/Elongated_test_subset/Improved Parent Testing/"
FLAGS['drive_path'] = "/content/drive/MyDrive/MscThesis"
FLAGS['mixed_precision'] = False # it seems that fp16 results in no learning (e.g. training loss of 0.000 andvalidation loss of nan)
FLAGS['model_iteration'] = 'google_mt5-base/'
FLAGS['path_model_name'] = "google_mt5-base/"
FLAGS['saved_model_path'] = '/content/drive/MyDrive/MscThesis/Models/google_mt5-base/Elongated_best_model/'

# Full Training and Evaluation Pipeline

In [None]:
############################################################################## Begin Environment Setup ######################################################################################

def ensure_cuda_compatability():
    print(f'Torch version: {torch.__version__}')
    print(f'Cuda version: {torch.version.cuda}')
    print(f'Cudnn version: {torch.backends.cudnn.version()}')
    print(f'Is cuda available: {torch.cuda.is_available()}')
    print(f'Number of cuda devices: {torch.cuda.device_count()}')
    print(f'Current default device: {torch.cuda.current_device()}')
    print(f'First cuda device: {torch.cuda.device(0)}')
    print(f'Name of the first cuda device: {torch.cuda.get_device_name(0)}\n\n')
    torch.backends.cuda.matmul.allow_tf32 = True
    torch.backends.cudnn.allow_tf32 = True
    #Ensure we are really working with full GPU capacity
    gc.collect() 
    torch.cuda.empty_cache()

############################################################################## End Environment Setup ######################################################################################

############################################################################## Begin Model and Dataset Setup ######################################################################################

def preprocess_model(model_name):
    """
    Setup the model and tokenizer for preprocessing. This will be a pre-trained model collected from huggingface
    """
    model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    #tokenizer = AutoTokenizer.from_pretrained(model_name, add_prefix_space=True)
    #BartTokenizerFast with add_prefix_space=True

    print('LOGGING: preprocess_model DONE \n')
    return model, tokenizer


def load_CACAPO_data():
    """
    This function retrieves the csv files and creates a dataset
    """
    return datasets.load_dataset(FLAGS['data_location'], data_files={"test": "Test.csv"})


def preprocess_data(data):
    """
    Tokenize the data
    """
    max_length = FLAGS['max_token_length_preprocessing']
    RDFs = data["input"]
    texts = data["output"]

    ## When converting a pandas df to csv (used for loading dataset), a list of lists can transform to a long string
    ## Here we convert it back with literal_eval

    for rdf_iteration, rdf in enumerate(RDFs):
        RDFs[rdf_iteration] = literal_eval(rdf)

    model_inputs = tokenizer(RDFs, truncation=True, padding='max_length', return_tensors='pt',  max_length=max_length, is_split_into_words=True)
    
    with tokenizer.as_target_tokenizer():
        target_texts = tokenizer(texts, padding='max_length', truncation=True, return_tensors='pt',  max_length=max_length).input_ids

    model_inputs["labels"] = target_texts

    return model_inputs


def transform_datasets(dataset):
    """
    After loading in and creating the initial dataset, the text data is transformed, by tokenizing the input and output texts. The initial dataset is also split into train,val,test for training use.
    NOTE That the test set will not be preprocessed here yet, this will be done in a different function
    """

    train_ds = dataset["train"]
    val_ds = dataset["dev"]
    test_ds = dataset["test"]

    # to use the actual articles for evaluation
    true_articles_test = test_ds['output']
    # The Parent Metric requires the original RDFs
    test_rdf_input = test_ds['input']


    ## Process the data in batches
    train_ds = train_ds.map(preprocess_data, batched=True, remove_columns=dataset["train"].column_names)
    val_ds = val_ds.map(preprocess_data, batched=True, remove_columns=dataset["dev"].column_names)
    test_ds = test_ds.map(preprocess_data, batched=True, remove_columns=dataset["test"].column_names)

    # transform the datasets into torch sensors, as the model will expect this format
    train_ds.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels']) 
    val_ds.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])
    test_ds.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])

    print('LOGGING: transform_datasets DONE \n')

    return train_ds, val_ds, test_ds, true_articles_test, test_rdf_input

############################################################################## End Model and Dataset Setup ######################################################################################

############################################################################## Begin Evaluation Setup######################################################################################


def load_eval_metrics():
    """
    Loads in all metrics that will be used later on during evaluation. This is seperated to not load in the metrics a dozen of times during training.
    """
    bleu = datasets.load_metric("bleu")
    rouge = evaluate.load('rouge')
    meteor = evaluate.load('meteor')
    perplexity = evaluate.load("perplexity", module_type="metric")
    bertscore = evaluate.load("bertscore")
    bart_scorer = bartscore.BARTScorer(device = 'cuda', checkpoint='facebook/bart-base') 

    print('LOGGING: load_eval_metrics DONE \n')

    return bleu, rouge, meteor, perplexity, bertscore, bart_scorer


def postprocess_text(preds, labels):
    """
    Supplementary Method called in decode_text.

    Returns list of split decoded labels and predictions for evaluation
    """
    preds = [pred.split() for pred in preds]
    labels = [[label.split()] for label in labels]
    return preds, labels



def decode_text(predictions, labels):
    """
    Supplementary Method called in compute_metrics.

    Returns decoded labels and predictions for evaluation
    """
    if isinstance(predictions, tuple):
            predictions = predictions[0]

    decoded_preds = tokenizer.batch_decode(predictions, skip_special_tokens=True)

    # Replace -100 in the labels as we can't decode them.
    labels = np.where(labels != -100, labels, tokenizer.pad_token_id)
    decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)
    
    return decoded_preds, decoded_labels

############################################################################## End Evaluation Setup######################################################################################

############################################################################## Begin Evaluation######################################################################################

def evaluate_texts(decoded_preds, decoded_labels):
    """
    Calculates metrics given a list of decoded predictions and decoded labels
    """
    #post_process for BLEU
    blue_preds, blue_labels = postprocess_text(decoded_preds,  decoded_labels)

    # setup metrics for use
    bleu, rouge, meteor,perplexity, bertscore, bart_scorer = load_eval_metrics()

    #Calculate the metrics
    print(f'\n LOGGING: Calculating Blue')
    bleu_output = bleu.compute(predictions=blue_preds, references=blue_labels)
    print(f'\n LOGGING: Calculating Rouge')
    rouge_output = rouge.compute(predictions=decoded_preds, references=decoded_labels)
    print(f'\n LOGGING: Calculating Meteor')
    meteor_output = meteor.compute(predictions=decoded_preds, references=decoded_labels)
    print(f'\n LOGGING: Calculating Perplexity')
    perp_output = perplexity.compute(predictions=decoded_preds, model_id='gpt2')
    print(f'\n LOGGING: Calculating Bertscore')
    bertscore_output = bertscore.compute(predictions=decoded_preds, references=decoded_labels, lang="en")
    print(f'\n LOGGING: Calculating Bartscore')
    bart_scores_output = bart_scorer.score(srcs=decoded_preds, tgts=decoded_labels, batch_size=FLAGS['batch_size'])

    print(f'\n LOGGING: Done calculations')

    return bleu_output, rouge_output, meteor_output, perp_output, bertscore_output, bart_scores_output


def compute_metrics(pred):
    """"
    Metrics to be evaluated during training and validation
    Metrics used: BLEU, ROUGE, METEOR, Bertscore, BARTScore
    """
    # decode the predictions and labels for eval
    predictions, labels = pred
    decoded_preds, decoded_labels = decode_text(predictions, labels)

    bleu_output, rouge_output, meteor_output, perp_output, bertscore_output, bart_scores_output = evaluate_texts(decoded_preds, decoded_labels)
    
    ## Huggingsface trainer requires a dict if multiple metrics are used
    evaluation_results = {"blue_output": bleu_output, "rouge_output": rouge_output, "meteor_results": meteor_output, "perp_output": perp_output, 
                          "bertscore_output": bertscore_output, "bart_scores_output": bart_scores_output}  
    
    # Tensorboard doesn't like the dict format of our calculated methods, so we write them to a file so that we can create our own figures later on.
    logging_for_graphs_path = f"{FLAGS['drive_path']}GraphMetricLogging/{path_model_name}_metrics.txt"
    
    # if the file doesn't exist yet, create it and write first evaluation results to it
    if not os.path.exists(logging_for_graphs_path):
        with open(logging_for_graphs_path, 'w', encoding='utf-8') as logging_creation:
            logging_creation.write(f'{evaluation_results} \n')
    # metric file already exists, so now we merely append to the existing file. We need a seperate opener, as otherwise we would overwrite the file
    else:
        with open(logging_for_graphs_path, 'a', encoding='utf-8') as logging_appending:
            logging_appending.write(f'{evaluation_results} \n')

    #During training we can see the intermediary results, however Bartscore, Bertscore and Perplexity, make it far mor difficult to read. Tensorboard also ignores these outputs.
    #Therefore we only give bleu, rouge and meteor back to the trainer for logging. We do not lose any results, as we store the total results in a text file                
    return {"blue_output": bleu_output, "rouge_output": rouge_output, "meteor_results": meteor_output}


############################################################################## End Evaluation Section######################################################################################

############################################################################## Begin Huggingface Trainer Setup ######################################################################################

def set_training_args(model_name, learning_rate, num_train_epochs, evaluation_strategy, generation_num_beams, generation_max_length,
                      gradient_accumulation_steps, per_device_train_batch_size, per_device_eval_batch_size):
    """
    Setup the training arguments that will be used during training.
    """
    model_dir = f"{FLAGS['drive_path']}/Results/{model_name}"

    training_args = Seq2SeqTrainingArguments(
                output_dir=model_dir,
                learning_rate=learning_rate,
                do_eval=True, # will be set to true if evaluation strategy is set
                do_predict=True, #Whether to run predictions on the test set or not.
                num_train_epochs=num_train_epochs,
                evaluation_strategy= evaluation_strategy,
                save_strategy=evaluation_strategy,
                logging_strategy = evaluation_strategy,
                save_total_limit= FLAGS['model_save_total_limit'], # the maximum number of models to keep before deleting the oldest one
                predict_with_generate=True, # Whether to use generate to calculate generative metrics (ROUGE, BLEU).
                generation_num_beams=generation_num_beams,  #The num_beams to use on each evaluation loop when predict_with_generate=True. Will default to the num_beams value of the model configuration
                gradient_checkpointing=True, #
                fp16=FLAGS['mixed_precision'],
                generation_max_length=generation_max_length,
                gradient_accumulation_steps=gradient_accumulation_steps, #Number of updates steps to accumulate the gradients for, before performing a backward/update pass
                per_device_train_batch_size=per_device_train_batch_size, #The batch size per GPU/TPU core/CPU for training.
                per_device_eval_batch_size=per_device_eval_batch_size, #The batch size per GPU/TPU core/CPU for evaluation.
                optim= FLAGS['training_optimizer'], #The optimizer to use: adamw_hf, adamw_torch, adamw_apex_fused, or adafactor.
                report_to="tensorboard",
                load_best_model_at_end = True, #required for early stopping callback 
                
                # If doing continuous learning and retraining on different datasets, parameter below will be needed if training is continued from existing model in same output/dir. 
                # Note that this is most often not necessary for this piece of code, as we save the model in a different location, thus the model always takes all data. But if you change this, then the parameter below is needed
                ignore_data_skip = True, # Added this, otherwise the model skips first 150 batches of data, however we show new data so we do not want this
       )

    print('LOGGING: set_training_args DONE \n')

    return training_args


def get_clean_model(model_name):
    """
    Ensures that a new, fresh model is used for finetuning
    """
    model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
    return model

     

def set_trainer(model_name, training_args, train_ds, val_ds, tokenizer):
    """
    Initializes a trainer
    Takes in: Model name, training arguments, training dataset, validation dataset, and tokenizer
    Returns: Trainer instance
    """
    clean_model = get_clean_model(model_name)
    #continued_model = AutoModelForSeq2SeqLM.from_pretrained("/content/drive/MyDrive/MscThesis/Models/WeatherV2_3of4_google_mt5-base/")

    data_collator = DataCollatorForSeq2Seq(tokenizer)
    trainer = Seq2SeqTrainer(
                model=clean_model,
                args=training_args,
                train_dataset=train_ds,
                eval_dataset=val_ds,
                compute_metrics=compute_metrics,
                data_collator = data_collator,
                tokenizer=tokenizer,
                callbacks=[EarlyStoppingCallback(early_stopping_patience=FLAGS['early_stopping_patience'])], #Earlystopping metric is by default the validation loss
                )

    print('LOGGING: set_trainer DONE \n')

    return trainer
############################################################################## End Huggingface Trainer Setup ######################################################################################

############################################################################## Begin Train and Save ######################################################################################


def train_and_save(trainer, path_model_name):
    logging.set_verbosity_info()
    trainer.train()
    
    #Continue training on a previous checkpoint
    #trainer.train("/content/drive/MyDrive/MscThesis/Models/WeatherV2_3of4_google_mt5-base/")

    trainer.save_model(f"{FLAGS['drive_path']}/Models/{path_model_name}")

    print('LOGGING: train_and_save DONE \n')

############################################################################## End Train and Save ######################################################################################


############################################################################## Begin Evaluation Process ######################################################################################

def get_saved_model(path_model_name):
    """"
    Retrieves the best model that was saved after fine-tuning
    """
    saved_model_path = FLAGS['saved_model_path']

    saved_model = AutoModelForSeq2SeqLM.from_pretrained(saved_model_path, local_files_only=True)
    tokenizer = AutoTokenizer.from_pretrained(saved_model_path ,local_files_only=True, add_prefix_space=True)

    return saved_model, tokenizer



def generate_predictions(saved_model, test_set):
    """
    Generates predictions based on the test set, returns a list of predictions and the corresponding "true" articles
    """
    encoded_inputs = test_set.remove_columns("labels")

    # set-up a dataloader to load in the tokenized test dataset
    dataloader = torch.utils.data.DataLoader(encoded_inputs,  batch_size=FLAGS['batch_size']) 

    # generate text for each batch
    all_predictions = []
    for i,batch in enumerate(dataloader):
        predictions = saved_model.generate(**batch, max_new_tokens = 100, do_sample=True, num_beams = 5, top_p=0.7, repetition_penalty = 1.3) 
        all_predictions.append(predictions)

    # flatten predictions
    all_predictions_flattened = [pred for preds in all_predictions for pred in preds]

    print('LOGGING: generate_predictions DONE \n')
    return all_predictions_flattened


def decode_predictions(predictions, tokenizer):
    """
    Decode the predictions made by the model
    """
    decoded_predictions = []

    for iteration, prediction in enumerate(predictions):
        decoded_predictions.append((tokenizer.decode(prediction,skip_special_tokens=True)))

    print('LOGGING: decode_predictions DONE \n')

    return decoded_predictions



def evaluate_test_set(path_model_name, test_set, true_articles_test, test_rdf_input):
    """
    Transforms test set, retrieves predictions, and evaluates these predictions
    """
    saved_model, saved_tokenizer = get_saved_model(path_model_name)

    predictions = generate_predictions(saved_model, test_set)

    #decode the predictions in preperation of evaluation
    decoded_test_predictions = decode_predictions(predictions, saved_tokenizer)

    #calculate the evaluation metrics on the predictions
    bleu_output, rouge_output, meteor_output, perp_output,  bertscore_output, bart_scores_output = evaluate_texts(decoded_test_predictions, true_articles_test)

    ## Huggingsface trainer requires a dict if multiple metrics are used
    evaluation_results = {"blue_output": bleu_output, "rouge_output": rouge_output, "meteor_results": meteor_output, "perp_output": perp_output, 
                           "bertscore_output": bertscore_output, "bart_scores_output": bart_scores_output}

    log_results(path_model_name, evaluation_results)

    ##Additional PARENT evaluation
    tables = test_rdf_input
    references = true_articles_test
    generations = decoded_test_predictions
    parent_attempt(path_model_name, generations, references, tables)
    
    return evaluation_results


def write_to_text_parent(path_model_name, decoded_predictions, true_articles, rdfs):
    """
    Parent script requires text files, so we create them here
    """

    with open(f"{FLAGS['drive_path']}/Parent_test/{path_model_name}_true_articles.txt", 'w', encoding='utf-8') as f:
        for articles in true_articles:
            f.write(f'{articles} \n')

    with open(f"{FLAGS['drive_path']}/Parent_test/{path_model_name}_decode_predictions.txt", 'w', encoding='utf-8') as f:
        for predictions in decoded_predictions:
            f.write(f'{predictions} \n')

    with open(f"{FLAGS['drive_path']}/Parent_test/{path_model_name}_rdfs.txt", 'w', encoding='utf-8') as f:
        for pairs in rdfs:
            f.write(f'{pairs} \n')


def prepare_inputs_parent(RDFs):
    """
    Cleans the RDF pairs and transforms them in the proper format so that the parent module can calculate with it.
    Input: RDF pairs of format "Attribute | Value"
    Returns a list of lists containing tuples --> [ [ (Attribute, Value), (Attribute, Value), (Attribute, Value)] ...]
    """

    attribute_value_pairs = []

    for iteration, inputRDF in enumerate(RDFs):
        split_RDF = inputRDF.split(", ")
        entry=[]
        for connected_pair in split_RDF:
            if '[' in connected_pair:
                connected_pair = connected_pair.replace('[', '')
            if ']' in connected_pair:
                connected_pair = connected_pair.replace(']', '')
            if '_' in connected_pair:
                connected_pair = connected_pair.replace('_', ' ')
            split_pair = tuple(connected_pair.split(' | '))
            entry.append((split_pair))
        attribute_value_pairs.append(entry)
    return attribute_value_pairs


def parent_attempt(path_model_name, generations, references, rdfs):
    """
    The Parent metric needs special treatment, as it only accepts specific inputs and file types.
    """
    prepared_rdfs = prepare_inputs_parent(rdfs)
    write_to_text_parent(path_model_name, generations, references, prepared_rdfs)

    !python -i f"{FLAGS['drive_path']}/Evaluation_code/Parent.py" --references f"{FLAGS['drive_path']}/Parent_test/{path_model_name}_true_articles.txt" \
                                                     --generations f"{FLAGS['drive_path']}/Parent_test/{path_model_name}_decode_predictions.txt"  \
                                                     --tables f"{FLAGS['drive_path']}/Parent_test/{path_model_name}_rdfs.txt"

def log_results(path_model_name, results):
    with open(f"{FLAGS['drive_path']}/Logging_TestSet_Results/{path_model_name}_logResults.json", 'w') as convert_file:
        convert_file.write(json.dumps(results))

############################################################################## End Evaluation Process ######################################################################################

############################################################################## Begin Full fine-tune setup######################################################################################

def fine_tune_model(model_name):
    # ensure cuda compatability
    ensure_cuda_compatability()

    # I instantiate the tokenizer as a global variable, as the .map function in transform_datasets was not working properly. 
    # This should not be an issue, as the tokenizer remains consistent during training and evaluation.
    global tokenizer
    global path_model_name

    model, tokenizer = get_saved_model(FLAGS['path_model_name'])

    entire_dataset = load_CACAPO_data()
    
    test_ds, true_articles_test, test_rdf_inputs =  transform_datasets(entire_dataset)

    training_args = set_training_args(model_name=model_name, learning_rate = FLAGS['learning_rate'], 
                                     num_train_epochs = FLAGS['num_epochs'], evaluation_strategy = FLAGS['training_strategy'], generation_num_beams=FLAGS['generation_num_beams'], 
                                     generation_max_length = FLAGS['generation_max_length'], gradient_accumulation_steps = FLAGS['gradient_accumulation_steps'], 
                                     per_device_train_batch_size= FLAGS['batch_size'] , per_device_eval_batch_size= FLAGS['batch_size'] )

    trainer = set_trainer(model_name, training_args, train_ds, val_ds, tokenizer)

    # Both mt5 and T5-dutch have / in their name, which makes pathing more chaotic
    if '/' in model_name:
        path_model_name = model_name.replace('/', '_')
    elif '-' in model_name:
        path_model_name = model_name.replace('-', '_')

    ## Finally fine-tune the model and save it
    train_and_save(trainer, path_model_name)

    testset_evaluation_results = evaluate_test_set( path_model_name, test_ds, true_articles_test, test_rdf_inputs)

    return testset_evaluation_results


############################################################################## End Full fine-tune setup######################################################################################


# Start training processes
def main(flags):
    global FLAGS
    global model_name
    
    FLAGS = flags
    results = fine_tune_model(FLAGS['model_name'])

In [None]:
main(FLAGS)

Torch version: 1.13.1+cu116
Cuda version: 11.6
Cudnn version: 8302
Is cuda available: True
Number of cuda devices: 1
Current default device: 0
First cuda device: <torch.cuda.device object at 0x7ff3c8f32f10>
Name of the first cuda device: NVIDIA A100-SXM4-40GB






  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?ba/s]



LOGGING: transform_datasets DONE 

LOGGING: generate_predictions DONE 

LOGGING: decode_predictions DONE 



  bleu = datasets.load_metric("bleu")
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


LOGGING: load_eval_metrics DONE 


 LOGGING: Calculating Blue

 LOGGING: Calculating Rouge

 LOGGING: Calculating Meteor

 LOGGING: Calculating Perplexity


Using pad_token, but it is not set yet.


  0%|          | 0/24 [00:00<?, ?it/s]


 LOGGING: Calculating Bertscore

 LOGGING: Calculating Bartscore

 LOGGING: Done calculations
python3: can't open file 'f/content/drive/MyDrive/MscThesis/Evaluation_code/Parent.py': [Errno 2] No such file or directory
>>> 

KeyboardInterrupt
>>> ^C


# PARENT Value generations

#### Base good

In [None]:
!python -i "/content/drive/MyDrive/MscThesis/Evaluation_code/Parent.py" \
            --references "/content/drive/MyDrive/MscThesis/Parent_test/Experiment_Elongated_Poor_v_Good/Improved testing/Base/Improved_Parent_test_google_mt5-base_true_articles.txt" \
            --generations "/content/drive/MyDrive/MscThesis/Parent_test/Experiment_Elongated_Poor_v_Good/Improved testing/Base/Improved_Parent_test_google_mt5-base_decode_predictions.txt"  \
            --tables "/content/drive/MyDrive/MscThesis/Parent_test/Experiment_Elongated_Poor_v_Good/Improved testing/Base/Improved_Parent_test_google_mt5-base_rdfs.txt"

2023-02-16 14:04:52.622920: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/lib64-nvidia
2023-02-16 14:04:52.623025: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/lib64-nvidia
sum(precisions):  339.3934664047637    len(precisions  380)
sum(recalls):  119.87687802221485    len(recalls) 380)
sum(all_f_scores) :  149.59087501001386    len(all_f_scores)  380)
I0216 14:04:55.769519 140309656155968 Parent.py:571] Evaluated 380 examples.
I0216 14:04:55.769849 140309656155968 Parent.py:572] Precision = 0.8931 Recall = 0.3155 F-score = 0.3937
Traceback (most recent call last):
  File "/content/drive/MyDrive/MscThe

#### Base poor

In [None]:
!python -i "/content/drive/MyDrive/MscThesis/Evaluation_code/Parent.py" \
            --references "/content/drive/MyDrive/MscThesis/Parent_test/Experiment_Elongated_Poor_v_Good/Base model/Poor/Base_model_poor_google_mt5-base_true_articles.txt" \
            --generations "/content/drive/MyDrive/MscThesis/Parent_test/Experiment_Elongated_Poor_v_Good/Base model/Poor/Base_model_poor_google_mt5-base_decode_predictions.txt"  \
            --tables "/content/drive/MyDrive/MscThesis/Parent_test/Experiment_Elongated_Poor_v_Good/Base model/Poor/Base_model_poor_google_mt5-base_rdfs.txt"

2023-02-16 14:05:54.391824: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/lib64-nvidia
2023-02-16 14:05:54.391928: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/lib64-nvidia
sum(precisions):  248.0893484059181    len(precisions  380)
sum(recalls):  22.9782212815476    len(recalls) 380)
sum(all_f_scores) :  28.73009005219199    len(all_f_scores)  380)
I0216 14:05:56.850828 140386567505728 Parent.py:571] Evaluated 380 examples.
I0216 14:05:56.851159 140386567505728 Parent.py:572] Precision = 0.6529 Recall = 0.0605 F-score = 0.0756
Traceback (most recent call last):
  File "/content/drive/MyDrive/MscThesis

#### Augmented good

In [None]:
!python -i "/content/drive/MyDrive/MscThesis/Evaluation_code/Parent.py" \
            --references "/content/drive/MyDrive/MscThesis/Parent_test/Experiment_Elongated_Poor_v_Good/Improved testing/Augmented/Aug_improved_google_mt5-base_true_articles.txt" \
            --generations "/content/drive/MyDrive/MscThesis/Parent_test/Experiment_Elongated_Poor_v_Good/Improved testing/Augmented/Aug_improved_google_mt5-base_decode_predictions.txt"  \
            --tables "/content/drive/MyDrive/MscThesis/Parent_test/Experiment_Elongated_Poor_v_Good/Improved testing/Augmented/Improved_Parent_test_google_mt5-base_rdfs.txt"

2023-02-16 14:02:44.739368: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/lib64-nvidia
2023-02-16 14:02:44.739480: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/lib64-nvidia
sum(precisions):  328.1906655177836    len(precisions  380)
sum(recalls):  109.25244749898475    len(recalls) 380)
sum(all_f_scores) :  136.49295113985542    len(all_f_scores)  380)
I0216 14:02:48.712801 139956158285632 Parent.py:571] Evaluated 380 examples.
I0216 14:02:48.713135 139956158285632 Parent.py:572] Precision = 0.8637 Recall = 0.2875 F-score = 0.3592
Traceback (most recent call last):
  File "/content/drive/MyDrive/MscThe

#### Augmented poor


In [None]:
!python -i "/content/drive/MyDrive/MscThesis/Evaluation_code/Parent.py" \
            --references "/content/drive/MyDrive/MscThesis/Parent_test/Experiment_Elongated_Poor_v_Good/Augmented_model/Poor/Augmented_poor_test_google_mt5-base_true_articles.txt" \
            --generations "/content/drive/MyDrive/MscThesis/Parent_test/Experiment_Elongated_Poor_v_Good/Augmented_model/Poor/Augmented_poor_test_google_mt5-base_decode_predictions.txt"  \
            --tables "/content/drive/MyDrive/MscThesis/Parent_test/Experiment_Elongated_Poor_v_Good/Augmented_model/Poor/Augmented_poor_test_google_mt5-base_rdfs.txt"

2023-02-16 14:01:51.509110: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/lib64-nvidia
2023-02-16 14:01:51.509208: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/lib64-nvidia
sum(precisions):  245.35758638302747    len(precisions  380)
sum(recalls):  20.963219653283073    len(recalls) 380)
sum(all_f_scores) :  26.198779139869437    len(all_f_scores)  380)
I0216 14:01:55.082578 139755701761856 Parent.py:571] Evaluated 380 examples.
I0216 14:01:55.082922 139755701761856 Parent.py:572] Precision = 0.6457 Recall = 0.0552 F-score = 0.0689
Traceback (most recent call last):
  File "/content/drive/MyDrive/MscTh

#### Elongated good

In [None]:
!python -i "/content/drive/MyDrive/MscThesis/Evaluation_code/Parent.py" \
            --references "/content/drive/MyDrive/MscThesis/Parent_test/Experiment_Elongated_Poor_v_Good/Improved testing/Elongated/Elongated_Improved_google_mt5-base_true_articles.txt" \
            --generations "/content/drive/MyDrive/MscThesis/Parent_test/Experiment_Elongated_Poor_v_Good/Improved testing/Elongated/Elongated_Improved_google_mt5-base_decode_predictions.txt"  \
            --tables "/content/drive/MyDrive/MscThesis/Parent_test/Experiment_Elongated_Poor_v_Good/Improved testing/Elongated/Improved_Parent_test_google_mt5-base_rdfs.txt"

2023-02-16 14:00:52.426004: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/lib64-nvidia
2023-02-16 14:00:52.426119: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/lib64-nvidia
sum(precisions):  347.41226204618    len(precisions  380)
sum(recalls):  125.28811934754238    len(recalls) 380)
sum(all_f_scores) :  154.61747811621265    len(all_f_scores)  380)
I0216 14:00:56.123008 140385501448000 Parent.py:571] Evaluated 380 examples.
I0216 14:00:56.123364 140385501448000 Parent.py:572] Precision = 0.9142 Recall = 0.3297 F-score = 0.4069
Traceback (most recent call last):
  File "/content/drive/MyDrive/MscThesi

#### Elongated poor

In [None]:
!python -i "/content/drive/MyDrive/MscThesis/Evaluation_code/Parent.py" \
            --references "/content/drive/MyDrive/MscThesis/Parent_test/Experiment_Elongated_Poor_v_Good/Elongated_Model/Poor/google_mt5-base_true_articles.txt" \
            --generations "/content/drive/MyDrive/MscThesis/Parent_test/Experiment_Elongated_Poor_v_Good/Elongated_Model/Poor/google_mt5-base_decode_predictions.txt"  \
            --tables "/content/drive/MyDrive/MscThesis/Parent_test/Experiment_Elongated_Poor_v_Good/Elongated_Model/Poor/google_mt5-base_rdfs.txt"

2023-02-16 13:59:30.777004: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/lib64-nvidia
2023-02-16 13:59:30.777115: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/lib64-nvidia
sum(precisions):  251.08540937985896    len(precisions  380)
sum(recalls):  19.359979556410078    len(recalls) 380)
sum(all_f_scores) :  24.25166169109174    len(all_f_scores)  380)
I0216 13:59:34.504665 139663418181440 Parent.py:571] Evaluated 380 examples.
I0216 13:59:34.505154 139663418181440 Parent.py:572] Precision = 0.6608 Recall = 0.0509 F-score = 0.0638
Traceback (most recent call last):
  File "/content/drive/MyDrive/MscThe

#### Differences in Dutch and English for Parent

##### Elongated Subset

In [None]:
!python -i "/content/drive/MyDrive/MscThesis/Evaluation_code/Parent.py" \
            --references "/content/drive/MyDrive/MscThesis/Parent_test/Experiment_Elongated_Poor_v_Good/Improved testing/Elongated/Elongated Language Split/NL/True_articles_improved_NL.txt" \
            --generations "/content/drive/MyDrive/MscThesis/Parent_test/Experiment_Elongated_Poor_v_Good/Improved testing/Elongated/Elongated Language Split/NL/Generations_improved_NL.txt"  \
            --tables "/content/drive/MyDrive/MscThesis/Parent_test/Experiment_Elongated_Poor_v_Good/Improved testing/Elongated/Elongated Language Split/NL/RDF_Improved_NL.txt"

2023-02-16 21:35:09.000237: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/lib64-nvidia
2023-02-16 21:35:09.000346: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/lib64-nvidia
sum(precisions):  181.34627213984146    len(precisions  196)
sum(recalls):  67.90919578304629    len(recalls) 196)
sum(all_f_scores) :  82.74577243960334    len(all_f_scores)  196)
I0216 21:35:12.711636 140382747481920 Parent.py:571] Evaluated 196 examples.
I0216 21:35:12.711983 140382747481920 Parent.py:572] Precision = 0.9252 Recall = 0.3465 F-score = 0.4222
Traceback (most recent call last):
  File "/content/drive/MyDrive/MscThes

In [None]:
!python -i "/content/drive/MyDrive/MscThesis/Evaluation_code/Parent.py" \
            --references "/content/drive/MyDrive/MscThesis/Parent_test/Experiment_Elongated_Poor_v_Good/Improved testing/Elongated/Elongated Language Split/Engels/True_articles_improved_ENG.txt" \
            --generations "/content/drive/MyDrive/MscThesis/Parent_test/Experiment_Elongated_Poor_v_Good/Improved testing/Elongated/Elongated Language Split/Engels/Generations_improved_ENG.txt"  \
            --tables "/content/drive/MyDrive/MscThesis/Parent_test/Experiment_Elongated_Poor_v_Good/Improved testing/Elongated/Elongated Language Split/Engels/RDF_improved_ENG.txt"

2023-02-16 21:34:28.658749: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/lib64-nvidia
2023-02-16 21:34:28.658960: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/lib64-nvidia
sum(precisions):  166.0659899063384    len(precisions  184)
sum(recalls):  57.378923564496084    len(recalls) 184)
sum(all_f_scores) :  71.87170567660935    len(all_f_scores)  184)
I0216 21:34:34.091015 139884549666624 Parent.py:571] Evaluated 184 examples.
I0216 21:34:34.091451 139884549666624 Parent.py:572] Precision = 0.9025 Recall = 0.3118 F-score = 0.3906
Traceback (most recent call last):
  File "/content/drive/MyDrive/MscThes

In [None]:
!python -i "/content/drive/MyDrive/MscThesis/Evaluation_code/Parent.py" \
            --references "/content/drive/MyDrive/MscThesis/Parent_test/Experiment_Elongated_Poor_v_Good/Improved testing/Base/Language_change/Base_reference_parent_ENG.txt" \
            --generations "/content/drive/MyDrive/MscThesis/Parent_test/Experiment_Elongated_Poor_v_Good/Improved testing/Base/Language_change/Base_generations_parent_ENG.txt"  \
            --tables "/content/drive/MyDrive/MscThesis/Parent_test/Experiment_Elongated_Poor_v_Good/Improved testing/Base/Language_change/Base_rdf_parent_ENG.txt"

2023-02-16 22:16:28.444988: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/lib64-nvidia
2023-02-16 22:16:28.445105: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/lib64-nvidia
sum(precisions):  162.4236033574828    len(precisions  184)
sum(recalls):  56.86800907902294    len(recalls) 184)
sum(all_f_scores) :  71.83224363806865    len(all_f_scores)  184)
I0216 22:16:33.818626 139724901484352 Parent.py:571] Evaluated 184 examples.
I0216 22:16:33.818982 139724901484352 Parent.py:572] Precision = 0.8827 Recall = 0.3091 F-score = 0.3904
Traceback (most recent call last):
  File "/content/drive/MyDrive/MscThesi

In [None]:
!python -i "/content/drive/MyDrive/MscThesis/Evaluation_code/Parent.py" \
            --references "/content/drive/MyDrive/MscThesis/Parent_test/Experiment_Elongated_Poor_v_Good/Improved testing/Base/Language_change/Base_reference_parent_NL.txt" \
            --generations "/content/drive/MyDrive/MscThesis/Parent_test/Experiment_Elongated_Poor_v_Good/Improved testing/Base/Language_change/Base_generations_parent_NL.txt"  \
            --tables "/content/drive/MyDrive/MscThesis/Parent_test/Experiment_Elongated_Poor_v_Good/Improved testing/Base/Language_change/Base_rdf_parent_NL.txt"

2023-02-16 22:16:39.602112: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/lib64-nvidia
2023-02-16 22:16:39.602255: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/lib64-nvidia
sum(precisions):  176.9698630472812    len(precisions  196)
sum(recalls):  63.008868943191956    len(recalls) 196)
sum(all_f_scores) :  77.75863137194513    len(all_f_scores)  196)
I0216 22:16:44.972071 140159369832256 Parent.py:571] Evaluated 196 examples.
I0216 22:16:44.972404 140159369832256 Parent.py:572] Precision = 0.9029 Recall = 0.3215 F-score = 0.3967
Traceback (most recent call last):
  File "/content/drive/MyDrive/MscThes

In [None]:
!python -i "/content/drive/MyDrive/MscThesis/Evaluation_code/Parent.py" \
            --references "/content/drive/MyDrive/MscThesis/Parent_test/Experiment_Elongated_Poor_v_Good/Improved testing/Augmented/Language split/Aug_reference_parent_Eng.txt" \
            --generations "/content/drive/MyDrive/MscThesis/Parent_test/Experiment_Elongated_Poor_v_Good/Improved testing/Augmented/Language split/Aug_generations_parent_Eng.txt"  \
            --tables "/content/drive/MyDrive/MscThesis/Parent_test/Experiment_Elongated_Poor_v_Good/Improved testing/Augmented/Language split/Aug_good_parent_ENG.txt"

2023-02-16 22:16:51.007870: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/lib64-nvidia
2023-02-16 22:16:51.007994: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/lib64-nvidia
sum(precisions):  152.77040166989386    len(precisions  184)
sum(recalls):  50.36284367263732    len(recalls) 184)
sum(all_f_scores) :  64.00356986638603    len(all_f_scores)  184)
I0216 22:16:55.958149 140229007349568 Parent.py:571] Evaluated 184 examples.
I0216 22:16:55.958490 140229007349568 Parent.py:572] Precision = 0.8303 Recall = 0.2737 F-score = 0.3478
Traceback (most recent call last):
  File "/content/drive/MyDrive/MscThes

In [None]:
!python -i "/content/drive/MyDrive/MscThesis/Evaluation_code/Parent.py" \
            --references "/content/drive/MyDrive/MscThesis/Parent_test/Experiment_Elongated_Poor_v_Good/Improved testing/Augmented/Language split/Aug_reference_NL.txt" \
            --generations "/content/drive/MyDrive/MscThesis/Parent_test/Experiment_Elongated_Poor_v_Good/Improved testing/Augmented/Language split/Aug_generations_parent_Nl.txt"  \
            --tables "/content/drive/MyDrive/MscThesis/Parent_test/Experiment_Elongated_Poor_v_Good/Improved testing/Augmented/Language split/Aug_good_parent_NL.txt"

2023-02-16 22:17:01.779703: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/lib64-nvidia
2023-02-16 22:17:01.779834: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/lib64-nvidia
sum(precisions):  175.42026384789006    len(precisions  196)
sum(recalls):  58.889603826347454    len(recalls) 196)
sum(all_f_scores) :  72.48938127346942    len(all_f_scores)  196)
I0216 22:17:06.579260 140143403939648 Parent.py:571] Evaluated 196 examples.
I0216 22:17:06.579537 140143403939648 Parent.py:572] Precision = 0.8950 Recall = 0.3005 F-score = 0.3698
Traceback (most recent call last):
  File "/content/drive/MyDrive/MscThe