In [1]:
!pip install nltk --quiet
!pip install transformers --quiet
!pip install datasets --quiet
!pip install evaluate --quiet
!pip install sentencepiece --quiet
!pip install accelerate --quiet
!pip install rouge_score --quiet
!pip install bert_score --quiet
!pip install torchvision --quiet
!pip install tensorboard --quiet

#%pip install tensorboardX --quiet
#!pip install transformers[deepspeed] --quiet
#!pip install deepspeed --quiet


[K     |████████████████████████████████| 5.8 MB 8.3 MB/s 
[K     |████████████████████████████████| 182 kB 93.7 MB/s 
[K     |████████████████████████████████| 7.6 MB 82.9 MB/s 
[K     |████████████████████████████████| 451 kB 6.8 MB/s 
[K     |████████████████████████████████| 212 kB 90.0 MB/s 
[K     |████████████████████████████████| 132 kB 89.9 MB/s 
[K     |████████████████████████████████| 127 kB 100.2 MB/s 
[K     |████████████████████████████████| 72 kB 1.4 MB/s 
[K     |████████████████████████████████| 1.3 MB 8.5 MB/s 
[K     |████████████████████████████████| 191 kB 7.7 MB/s 
[?25h  Building wheel for rouge-score (setup.py) ... [?25l[?25hdone
[K     |████████████████████████████████| 60 kB 5.6 MB/s 
[?25h

In [2]:
!cp /content/drive/MyDrive/MscThesis/Evaluation_code/Bartscore.py /content

In [3]:
import os
import pickle
import pandas as pd
import numpy as np
import re
import torch 
import nltk

from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, Seq2SeqTrainingArguments, Seq2SeqTrainer, pipeline,  EarlyStoppingCallback, DataCollatorForSeq2Seq, Trainer
from transformers import MT5ForConditionalGeneration, T5Tokenizer, T5ForConditionalGeneration, MT5TokenizerFast, is_torch_tpu_available, logging

import datasets
import evaluate
import accelerate
#import deepspeed

import Bartscore as bartscore ## code for Bartscore
import gc
import json
from ast import literal_eval

In [4]:
# Define Parameters
FLAGS = {}
FLAGS['model_name'] = 'google/mt5-base' 
FLAGS['max_token_length_preprocessing'] = 256
FLAGS['early_stopping_patience'] = 3
FLAGS['model_save_total_limit'] = 4
FLAGS['training_optimizer'] = "adafactor"
FLAGS['batch_size'] = 16
FLAGS['gradient_accumulation_steps'] = 2
FLAGS['learning_rate'] = 5e-05
FLAGS['num_epochs'] = 200
FLAGS['training_strategy'] = 'epoch'
FLAGS['generation_num_beams'] = 5
FLAGS['generation_max_length'] = 100
FLAGS['data_location'] = "/content/drive/MyDrive/MscThesis/Data/Cleaned_Continuous/Stocks/"
FLAGS['drive_path'] = "/content/drive/MyDrive/MscThesis/"

In [5]:
############################################################################## Begin Environment Setup ######################################################################################

def ensure_cuda_compatability():
    print(f'Torch version: {torch.__version__}')
    print(f'Cuda version: {torch.version.cuda}')
    print(f'Cudnn version: {torch.backends.cudnn.version()}')
    print(f'Is cuda available: {torch.cuda.is_available()}')
    print(f'Number of cuda devices: {torch.cuda.device_count()}')
    print(f'Current default device: {torch.cuda.current_device()}')
    print(f'First cuda device: {torch.cuda.device(0)}')
    print(f'Name of the first cuda device: {torch.cuda.get_device_name(0)}\n\n')
    torch.backends.cuda.matmul.allow_tf32 = True
    torch.backends.cudnn.allow_tf32 = True
    #Ensure we are really working with full GPU capacity
    gc.collect() 
    torch.cuda.empty_cache()

############################################################################## End Environment Setup ######################################################################################

############################################################################## Begin Model and Dataset Setup ######################################################################################

def preprocess_model(model_name):
    """
    Setup the model and tokenizer for preprocessing. This will be a pre-trained model collected from huggingface
    """
    model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
    #model.cuda()
    tokenizer = AutoTokenizer.from_pretrained(model_name)

    print('LOGGING: preprocess_model DONE \n')
    return model, tokenizer


def load_CACAPO_data():
    """
    This function retrieves the csv files and creates a dataset
    """
    #print('LOGGING: load_CACAPO_data DONE \n')
    return datasets.load_dataset(FLAGS['data_location'], data_files={"train": "Train.csv", "dev": "Dev.csv", "test": "Test.csv"})


def preprocess_data(data):
    """
    Tokenize the data
    """
    max_length = FLAGS['max_token_length_preprocessing']
    RDFs = data["input"]
    texts = data["output"]

    ## When converting a pandas df to csv (used for loading dataset), a list of lists can transform to a long string
    ## Here we convert it back with literal_eval

    for rdf_iteration, rdf in enumerate(RDFs):
        RDFs[rdf_iteration] = literal_eval(rdf)

    #This creates a dataset object of model_inputs{input_ids, attention_mask}
    model_inputs = tokenizer(RDFs, truncation=True, padding='max_length', return_tensors='pt',  max_length=max_length, is_split_into_words=True)
    
    # specially for seq2seq tokenizer, "Temporarily sets the tokenizer for encoding the targets. Useful for tokenizer associated to sequence-to-sequence models that need a slightly different processing for the labels."
    with tokenizer.as_target_tokenizer():
        target_texts = tokenizer(texts, padding='max_length', truncation=True, return_tensors='pt',  max_length=max_length).input_ids

    #This adds the tokenized target texts as a new column in the dataset object, thus becoming model_inputs{input_ids, attention_mask, labels}
    model_inputs["labels"] = target_texts

    return model_inputs


def transform_datasets(dataset):
    """
    After loading in and creating the initial dataset, the text data is transformed, by tokenizing the input and output texts. The initial dataset is also split into train,val,test for training use.
    NOTE That the test set will not be preprocessed here yet, this will be done in a different function
    """

    train_ds = dataset["train"]
    val_ds = dataset["dev"]
    test_ds = dataset["test"]

    # to use the actual articles for evaluation
    true_articles_test = test_ds['output']
    # The Parent Metric requires the original RDFs
    test_rdf_input = test_ds['input']


    ## Process the data in batches
    train_ds = train_ds.map(preprocess_data, batched=True, remove_columns=dataset["train"].column_names)
    val_ds = val_ds.map(preprocess_data, batched=True, remove_columns=dataset["dev"].column_names)
    test_ds = test_ds.map(preprocess_data, batched=True, remove_columns=dataset["test"].column_names)

    # transform the datasets into torch sensors, as the model will expect this format
    train_ds.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels']) #, device= "cuda")
    val_ds.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels']) #, device= "cuda")
    test_ds.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels']) # , device= "cuda")

    print('LOGGING: transform_datasets DONE \n')

    return train_ds, val_ds, test_ds, true_articles_test, test_rdf_input

############################################################################## End Model and Dataset Setup ######################################################################################

############################################################################## Begin Evaluation Setup######################################################################################


def load_eval_metrics():
    """
    Loads in all metrics that will be used later on during evaluation. This is seperated to not load in the metrics a dozen of times during training.
    """
    bleu = datasets.load_metric("bleu")
    rouge = evaluate.load('rouge')
    meteor = evaluate.load('meteor')
    perplexity = evaluate.load("perplexity", module_type="metric")
    bertscore = evaluate.load("bertscore")
    bart_scorer = bartscore.BARTScorer(device = 'cuda', checkpoint='facebook/bart-base') 

    print('LOGGING: load_eval_metrics DONE \n')

    return bleu, rouge, meteor, perplexity, bertscore, bart_scorer


def postprocess_text(preds, labels):
    """
    Supplementary Method called in decode_text.

    Returns list of split decoded labels and predictions for evaluation
    """
    preds = [pred.split() for pred in preds]
    labels = [[label.split()] for label in labels]
    return preds, labels



def decode_text(predictions, labels):
    """
    Supplementary Method called in compute_metrics.

    Returns decoded labels and predictions for evaluation
    """
    if isinstance(predictions, tuple):
            predictions = predictions[0]

    decoded_preds = tokenizer.batch_decode(predictions, skip_special_tokens=True)

    # Replace -100 in the labels as we can't decode them.
    labels = np.where(labels != -100, labels, tokenizer.pad_token_id)
    decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)
    
    
    # with open("../GraphMetricLogging/decoded_preds.txt", 'w', encoding='utf-8') as tester:
    #     for pairs in decoded_preds:
    #         tester.write(f'{pairs} \n')
    # with open("../GraphMetricLogging/decoded_labels.txt", 'w', encoding='utf-8') as test:
    #     for labels in decoded_labels:
    #         test.write(f'{labels} \n')
    # with open("../GraphMetricLogging/preds.txt", 'w', encoding='utf-8') as a:
    #     for pred_pairs in predictions:
    #         a.write(f'{pred_pairs} \n')
    # with open("../GraphMetricLogging/labels.txt", 'w', encoding='utf-8') as b:
    #     for pred_labels in decoded_preds:
    #         b.write(f'{pred_labels} \n')
    
    return decoded_preds, decoded_labels

############################################################################## End Evaluation Setup######################################################################################

############################################################################## Begin Evaluation######################################################################################

def evaluate_texts(decoded_preds, decoded_labels):
    """
    Calculates metrics given a list of decoded predictions and decoded labels
    """
    #post_process for BLEU
    blue_preds, blue_labels = postprocess_text(decoded_preds,  decoded_labels)

    # setup metrics for use
    bleu, rouge, meteor,perplexity, bertscore, bart_scorer = load_eval_metrics()

    #Calculate the metrics
    print(f'\n LOGGING: Calculating Blue')
    bleu_output = bleu.compute(predictions=blue_preds, references=blue_labels)
    print(f'\n LOGGING: Calculating Rouge')
    rouge_output = rouge.compute(predictions=decoded_preds, references=decoded_labels)
    print(f'\n LOGGING: Calculating Meteor')
    meteor_output = meteor.compute(predictions=decoded_preds, references=decoded_labels)
    #print(f'\n LOGGING: Calculating Perplexity')
    #perp_output = perplexity.compute(predictions=decoded_preds, model_id='gpt2')
    print(f'\n LOGGING: Calculating Bertscore')
    bertscore_output = bertscore.compute(predictions=decoded_preds, references=decoded_labels, lang="en")
    print(f'\n LOGGING: Calculating Bartscore')
    bart_scores_output = bart_scorer.score(srcs=decoded_preds, tgts=decoded_labels, batch_size=FLAGS['batch_size'])

    print(f'\n LOGGING: Done calculations')

    #return bleu_output, rouge_output, meteor_output, perp_output, bertscore_output, bart_scores_output
    return bleu_output, rouge_output, meteor_output, bertscore_output, bart_scores_output



def compute_metrics(pred):
    """"
    Metrics to be evaluated during training and validation
    Metrics used: BLEU, ROUGE, METEOR, Bertscore, BARTScore
    """
    # decode the predictions and labels for eval
    predictions, labels = pred
    decoded_preds, decoded_labels = decode_text(predictions, labels)

    bleu_output, rouge_output, meteor_output, bertscore_output, bart_scores_output = evaluate_texts(decoded_preds, decoded_labels)
    
    ## Huggingsface trainer requires a dict if multiple metrics are used
    evaluation_results = {"blue_output": bleu_output, "rouge_output": rouge_output, "meteor_results": meteor_output, 
                          "bertscore_output": bertscore_output, "bart_scores_output": bart_scores_output}  
    
    # Tensorboard doesn't like the dict format of our calculated methods, so we write them to a file so that we can create our own figures later on.
    logging_for_graphs_path = f"{FLAGS['drive_path']}GraphMetricLogging/{path_model_name}_metrics.txt"
    
    # if the file doesn't exist yet, create it and write first evaluation results to it
    if not os.path.exists(logging_for_graphs_path):
        with open(logging_for_graphs_path, 'w', encoding='utf-8') as logging_creation:
            logging_creation.write(f'{evaluation_results} \n')
    # metric file already exists, so now we merely append to the existing file. We need a seperate opener, as otherwise we would overwrite the file
    else:
        with open(logging_for_graphs_path, 'a', encoding='utf-8') as logging_appending:
            logging_appending.write(f'{evaluation_results} \n')

    #During training we can see the intermediary results, however Bartscore, Bertscore and Perplexity, make it far mor difficult to read. Tensorboard also ignores these outputs.
    #Therefore we only give bleu, rouge and meteor back to the trainer for logging. We do not lose any results, as we store the total results in a text file                
    return {"blue_output": bleu_output, "rouge_output": rouge_output, "meteor_results": meteor_output}


############################################################################## End Evaluation Section######################################################################################

############################################################################## Begin Huggingface Trainer Setup ######################################################################################

def set_training_args(model_name, learning_rate, num_train_epochs, evaluation_strategy, generation_num_beams, generation_max_length,
                      gradient_accumulation_steps, per_device_train_batch_size, per_device_eval_batch_size):
    """
    Setup the training arguments that will be used during training.
    """
    model_dir = f"{FLAGS['drive_path']}/Results/{model_name}"

    training_args = Seq2SeqTrainingArguments(
                output_dir=model_dir,
                learning_rate=learning_rate,
                do_eval=True, # will be set to true if evaluation strategy is set
                do_predict=True, #Whether to run predictions on the test set or not.
                num_train_epochs=num_train_epochs,
                evaluation_strategy= evaluation_strategy,
                save_strategy=evaluation_strategy,
                logging_strategy = evaluation_strategy,
                save_total_limit= FLAGS['model_save_total_limit'], # the maximum number of models to keep before deleting the oldest one
                predict_with_generate=True, # Whether to use generate to calculate generative metrics (ROUGE, BLEU).
                generation_num_beams=generation_num_beams,  #The num_beams to use on each evaluation loop when predict_with_generate=True. Will default to the num_beams value of the model configuration
                gradient_checkpointing=True, #
                generation_max_length=generation_max_length,
                gradient_accumulation_steps=gradient_accumulation_steps, #Number of updates steps to accumulate the gradients for, before performing a backward/update pass
                per_device_train_batch_size=per_device_train_batch_size, #The batch size per GPU/TPU core/CPU for training.
                per_device_eval_batch_size=per_device_eval_batch_size, #The batch size per GPU/TPU core/CPU for evaluation.
                optim= FLAGS['training_optimizer'], #The optimizer to use: adamw_hf, adamw_torch, adamw_apex_fused, or adafactor.
                report_to="tensorboard",
                load_best_model_at_end = True, #required for early stopping callback 
       )

    print('LOGGING: set_training_args DONE \n')

    return training_args


def get_clean_model(model_name):
    """
    Ensures that a new, fresh model is used for finetuning
    """
    model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
    #model.to('cuda')
    return model

     

def set_trainer(model_name, training_args, train_ds, val_ds, tokenizer):
    """
    Initializes a trainer
    Takes in: Model name, training arguments, training dataset, validation dataset, and tokenizer
    Returns: Trainer instance
    """
    #clean_model = get_clean_model(model_name)
    continued_model = AutoModelForSeq2SeqLM.from_pretrained("/content/drive/MyDrive/MscThesis/Models/mT5_Sport_First_1of4/")

    data_collator = DataCollatorForSeq2Seq(tokenizer)
    trainer = Seq2SeqTrainer(
                model=continued_model,
                args=training_args,
                train_dataset=train_ds,
                eval_dataset=val_ds,
                compute_metrics=compute_metrics,
                data_collator = data_collator,
                tokenizer=tokenizer,
                callbacks=[EarlyStoppingCallback(early_stopping_patience=FLAGS['early_stopping_patience'])], #Earlystopping metric is by default the validation loss
                )

    print('LOGGING: set_trainer DONE \n')

    return trainer
############################################################################## End Huggingface Trainer Setup ######################################################################################

############################################################################## Begin Train and Save ######################################################################################


def train_and_save(trainer, path_model_name):
    logging.set_verbosity_info()
    #trainer.train()
    
    #Continue training on a previous checkpoint
    trainer.train("/content/drive/MyDrive/MscThesis/Models/mT5_Sport_First_1of4/")

    trainer.save_model(f"{FLAGS['drive_path']}/Models/{path_model_name}")

    print('LOGGING: train_and_save DONE \n')

############################################################################## End Train and Save ######################################################################################


############################################################################## Begin Evaluation Process ######################################################################################

def get_saved_model(path_model_name):
    """"
    Retrieves the best model that was saved after fine-tuning
    """
    saved_model_path = f"{FLAGS['drive_path']}/Models/{path_model_name}"

    saved_model = AutoModelForSeq2SeqLM.from_pretrained(saved_model_path, local_files_only=True)
    #saved_model.cuda()
    tokenizer = AutoTokenizer.from_pretrained(saved_model_path ,local_files_only=True)

    return saved_model, tokenizer



def generate_predictions(saved_model, test_set):
    """
    Generates predictions based on the test set, returns a list of predictions and the corresponding "true" articles
    """
    encoded_inputs = test_set.remove_columns("labels")

    # set-up a dataloader to load in the tokenized test dataset
    dataloader = torch.utils.data.DataLoader(encoded_inputs,  batch_size=FLAGS['batch_size']) 

    # generate text for each batch
    all_predictions = []
    for i,batch in enumerate(dataloader):
        predictions = saved_model.generate(**batch, max_new_tokens = 100, do_sample=True, num_beams = 5, top_p=0.7, repetition_penalty = 1.3) 
        all_predictions.append(predictions)

    # flatten predictions
    all_predictions_flattened = [pred for preds in all_predictions for pred in preds]

    print('LOGGING: generate_predictions DONE \n')
    return all_predictions_flattened


def decode_predictions(predictions, tokenizer):
    """
    Decode the predictions made by the model
    """
    decoded_predictions = []

    for iteration, prediction in enumerate(predictions):
        # single_decoded_prediction = (tokenizer.decode(prediction,skip_special_tokens=True))
        # if len(single_decoded_prediction) != 0:
        decoded_predictions.append((tokenizer.decode(prediction,skip_special_tokens=True)))

    print('LOGGING: decode_predictions DONE \n')

    return decoded_predictions



def evaluate_test_set(path_model_name, test_set, true_articles_test, test_rdf_input):
    """
    Transforms test set, retrieves predictions, and evaluates these predictions
    """
    saved_model, saved_tokenizer = get_saved_model(path_model_name)

    predictions = generate_predictions(saved_model, test_set)

    #decode the predictions in preperation of evaluation
    decoded_test_predictions = decode_predictions(predictions, saved_tokenizer)

    #calculate the evaluation metrics on the predictions
    #bleu_output, rouge_output, meteor_output, perp_output,  bertscore_output, bart_scores_output = evaluate_texts(decoded_test_predictions, true_articles_test)
    bleu_output, rouge_output, meteor_output,  bertscore_output, bart_scores_output = evaluate_texts(decoded_test_predictions, true_articles_test)

    ## Huggingsface trainer requires a dict if multiple metrics are used
    evaluation_results = {"blue_output": bleu_output, "rouge_output": rouge_output, "meteor_results": meteor_output, 
                           "bertscore_output": bertscore_output, "bart_scores_output": bart_scores_output}

    log_results(path_model_name, evaluation_results)

    ##Additional PARENT evaluation
    tables = test_rdf_input
    references = true_articles_test
    generations = decoded_test_predictions
    parent_attempt(path_model_name, generations, references, tables)
    
    return evaluation_results


def write_to_text_parent(path_model_name, decoded_predictions, true_articles, rdfs):
    """
    Parent script requires text files, so we create them here
    """

    with open(f"{FLAGS['drive_path']}/Parent_test/{path_model_name}_true_articles.txt", 'w', encoding='utf-8') as f:
        for articles in true_articles:
            f.write(f'{articles} \n')

    with open(f"{FLAGS['drive_path']}/Parent_test/{path_model_name}_decode_predictions.txt", 'w', encoding='utf-8') as f:
        for predictions in decoded_predictions:
            f.write(f'{predictions} \n')

    with open(f"{FLAGS['drive_path']}/Parent_test/{path_model_name}_rdfs.txt", 'w', encoding='utf-8') as f:
        for pairs in rdfs:
            f.write(f'{pairs} \n')


def prepare_inputs_parent(RDFs):
    """
    Cleans the RDF pairs and transforms them in the proper format so that the parent module can calculate with it.
    Input: RDF pairs of format "Attribute | Value"
    Returns a list of lists containing tuples --> [ [ (Attribute, Value), (Attribute, Value), (Attribute, Value)] ...]
    """

    attribute_value_pairs = []

    for iteration, inputRDF in enumerate(RDFs):
        split_RDF = inputRDF.split(", ")
        entry=[]
        for connected_pair in split_RDF:
            if '[' in connected_pair:
                connected_pair = connected_pair.replace('[', '')
            if ']' in connected_pair:
                connected_pair = connected_pair.replace(']', '')
            if '_' in connected_pair:
                connected_pair = connected_pair.replace('_', ' ')
            split_pair = tuple(connected_pair.split(' | '))
            entry.append((split_pair))
        attribute_value_pairs.append(entry)
    return attribute_value_pairs


def parent_attempt(path_model_name, generations, references, rdfs):
    """
    The Parent metric needs special treatment, as it only accepts specific inputs and file types.
    """
    prepared_rdfs = prepare_inputs_parent(rdfs)
    write_to_text_parent(path_model_name, generations, references, prepared_rdfs)

    !python -i "Evaluation_code/Parent.py" --references f"{FLAGS['drive_path']}/Parent_test/{path_model_name}_true_articles.txt" \
                                                     --generations f"{FLAGS['drive_path']}/Parent_test/{path_model_name}_decode_predictions.txt"  \
                                                     --tables f"{FLAGS['drive_path']}/Parent_test/{path_model_name}_rdfs.txt"

def log_results(path_model_name, results):
    with open(f"{FLAGS['drive_path']}/Logging_TestSet_Results/{path_model_name}_logResults.json", 'w') as convert_file:
        convert_file.write(json.dumps(results))

############################################################################## End Evaluation Process ######################################################################################

############################################################################## Begin Full fine-tune setup######################################################################################

def fine_tune_model(model_name):
    # ensure cuda compatability
    ensure_cuda_compatability()

    # I instantiate the tokenizer as a global variable, as the .map function in transform_datasets was not working properly. 
    # This should not be an issue, as the tokenizer remains consistent during training and evaluation.
    global tokenizer
    global path_model_name

    ## retrieve model and tokenizer from huggingface to prepare dataset
    #tokenizer = preprocess_model(model_name)
    model, tokenizer = preprocess_model(model_name)

    ### retrieve the unprocessed data from the csv files
    entire_dataset = load_CACAPO_data()
    
    # ## process the dataset and split it into its natural train, val, test split
    train_ds, val_ds, test_ds, true_articles_test, test_rdf_inputs =  transform_datasets(entire_dataset)


    # ### setup the training arguments 
    training_args = set_training_args(model_name=model_name, learning_rate = FLAGS['learning_rate'], 
                                      num_train_epochs = FLAGS['num_epochs'], evaluation_strategy = FLAGS['training_strategy'], generation_num_beams=FLAGS['generation_num_beams'], 
                                      generation_max_length = FLAGS['generation_max_length'], gradient_accumulation_steps = FLAGS['gradient_accumulation_steps'], 
                                      per_device_train_batch_size= FLAGS['batch_size'] , per_device_eval_batch_size= FLAGS['batch_size'] )

    # ###create a trainer instance 
    trainer = set_trainer(model_name, training_args, train_ds, val_ds, tokenizer)

    # Both mt5 and T5-dutch have / in their name, which makes pathing more chaotic
    if '/' in model_name:
        path_model_name = model_name.replace('/', '_')
    elif '-' in model_name:
        path_model_name = model_name.replace('-', '_')

    ## Finally fine-tune the model and save it
    train_and_save(trainer, path_model_name)

    #testset_evaluation_results = evaluate_test_set( path_model_name, test_ds, true_articles_test, test_rdf_inputs)

    #return testset_evaluation_results


############################################################################## End Full fine-tune setup######################################################################################


# Start training processes
def main(flags):
    global FLAGS
    global model_name
    
    FLAGS = flags
    results = fine_tune_model(FLAGS['model_name'])


In [6]:
main(FLAGS)

Torch version: 1.12.1+cu113
Cuda version: 11.3
Cudnn version: 8302
Is cuda available: True
Number of cuda devices: 1
Current default device: 0
First cuda device: <torch.cuda.device object at 0x7fcaa925d430>
Name of the first cuda device: A100-SXM4-40GB




Downloading:   0%|          | 0.00/702 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/2.33G [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/376 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/4.31M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/65.0 [00:00<?, ?B/s]



LOGGING: preprocess_model DONE 

Downloading and preparing dataset csv/Stocks to /root/.cache/huggingface/datasets/csv/Stocks-e71bd095404fe121/0.0.0/6b34fb8fcf56f7c8ba51dc895bfa2bfbe43546f190a60fcf74bb5e8afdcc2317...


Downloading data files:   0%|          | 0/3 [00:00<?, ?it/s]

   

Extracting data files #0:   0%|          | 0/1 [00:00<?, ?obj/s]

Extracting data files #1:   0%|          | 0/1 [00:00<?, ?obj/s]

Extracting data files #2:   0%|          | 0/1 [00:00<?, ?obj/s]

Generating train split: 0 examples [00:00, ? examples/s]

Generating dev split: 0 examples [00:00, ? examples/s]

Generating test split: 0 examples [00:00, ? examples/s]

Dataset csv downloaded and prepared to /root/.cache/huggingface/datasets/csv/Stocks-e71bd095404fe121/0.0.0/6b34fb8fcf56f7c8ba51dc895bfa2bfbe43546f190a60fcf74bb5e8afdcc2317. Subsequent calls will reuse this data.


  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?ba/s]



  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

LOGGING: transform_datasets DONE 

LOGGING: set_training_args DONE 



Loading model from /content/drive/MyDrive/MscThesis/Models/mT5_Sport_First_1of4/.


LOGGING: set_trainer DONE 



***** Running training *****
  Num examples = 3941
  Num Epochs = 200
  Instantaneous batch size per device = 16
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 2
  Total optimization steps = 24600
  Number of trainable parameters = 582401280
Didn't find an RNG file, if you are resuming a training that was launched in a distributed fashion, reproducibility is not guaranteed.
You're using a T5TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...


Epoch,Training Loss,Validation Loss,Blue Output,Rouge Output,Meteor Results
0,0.3998,0.24857,"{'bleu': 0.13033255692434015, 'precisions': [0.40137307434695246, 0.21663019693654267, 0.12189751801441152, 0.07209405501330968], 'brevity_penalty': 0.7839033655564815, 'length_ratio': 0.8042014543495826, 'translation_length': 5972, 'reference_length': 7426}","{'rouge1': 0.5240501503478875, 'rouge2': 0.3441612393456942, 'rougeL': 0.4649846126790713, 'rougeLsum': 0.46416776674019034}",{'meteor': 0.44257615639997777}
1,0.328,0.226891,"{'bleu': 0.17249569634565007, 'precisions': [0.4255555555555556, 0.24913971094287682, 0.1590013140604468, 0.10734929810074319], 'brevity_penalty': 0.8363315457176264, 'length_ratio': 0.8483705898195529, 'translation_length': 6300, 'reference_length': 7426}","{'rouge1': 0.5492156924875282, 'rouge2': 0.3729554396460315, 'rougeL': 0.49468547010333613, 'rougeLsum': 0.4943014254626097}",{'meteor': 0.47761022838948985}
2,1.9453,2.917617,"{'bleu': 0.165274536950144, 'precisions': [0.5069936769496072, 0.3043294614572334, 0.1960554120685607, 0.13387905994190652], 'brevity_penalty': 0.6551585929807452, 'length_ratio': 0.7028009695663884, 'translation_length': 5219, 'reference_length': 7426}","{'rouge1': 0.5621426185232341, 'rouge2': 0.3833186142086102, 'rougeL': 0.507708651154811, 'rougeLsum': 0.5067598971167693}",{'meteor': 0.4831665795683685}
3,2.1333,0.35771,"{'bleu': 0.17227962471287392, 'precisions': [0.5016635859519408, 0.3008939455505892, 0.19383160738406124, 0.13366960907944514], 'brevity_penalty': 0.6889109557659353, 'length_ratio': 0.7285214112577431, 'translation_length': 5410, 'reference_length': 7426}","{'rouge1': 0.5715765764208608, 'rouge2': 0.3925469405601145, 'rougeL': 0.5193746409099027, 'rougeLsum': 0.5180544026706151}",{'meteor': 0.4919413421377468}
4,0.5113,0.273386,"{'bleu': 0.17780940729325873, 'precisions': [0.44069952305246424, 0.25809786354238456, 0.16325380424572608, 0.11085879438480595], 'brevity_penalty': 0.8347657490538197, 'length_ratio': 0.8470239698357124, 'translation_length': 6290, 'reference_length': 7426}","{'rouge1': 0.558124941796313, 'rouge2': 0.38111929851499526, 'rougeL': 0.5047917144807409, 'rougeLsum': 0.5040840629429457}",{'meteor': 0.4905770123582041}


`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...

Downloading builder script:   0%|          | 0.00/2.48k [00:00<?, ?B/s]

Downloading extra modules:   0%|          | 0.00/1.55k [00:00<?, ?B/s]

Downloading builder script:   0%|          | 0.00/6.27k [00:00<?, ?B/s]

Downloading builder script:   0%|          | 0.00/6.81k [00:00<?, ?B/s]

[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...


Downloading builder script:   0%|          | 0.00/8.41k [00:00<?, ?B/s]

Downloading builder script:   0%|          | 0.00/7.95k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/899k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/456k [00:00<?, ?B/s]

loading file vocab.json from cache at /root/.cache/huggingface/hub/models--facebook--bart-base/snapshots/aadd2ab0ae0c8268c7c9693540e9904811f36177/vocab.json
loading file merges.txt from cache at /root/.cache/huggingface/hub/models--facebook--bart-base/snapshots/aadd2ab0ae0c8268c7c9693540e9904811f36177/merges.txt
loading file added_tokens.json from cache at None
loading file special_tokens_map.json from cache at None
loading file tokenizer_config.json from cache at None


Downloading:   0%|          | 0.00/1.72k [00:00<?, ?B/s]

loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--facebook--bart-base/snapshots/aadd2ab0ae0c8268c7c9693540e9904811f36177/config.json
Model config BartConfig {
  "_name_or_path": "facebook/bart-base",
  "activation_dropout": 0.1,
  "activation_function": "gelu",
  "add_bias_logits": false,
  "add_final_layer_norm": false,
  "architectures": [
    "BartModel"
  ],
  "attention_dropout": 0.1,
  "bos_token_id": 0,
  "classif_dropout": 0.1,
  "classifier_dropout": 0.0,
  "d_model": 768,
  "decoder_attention_heads": 12,
  "decoder_ffn_dim": 3072,
  "decoder_layerdrop": 0.0,
  "decoder_layers": 6,
  "decoder_start_token_id": 2,
  "dropout": 0.1,
  "early_stopping": true,
  "encoder_attention_heads": 12,
  "encoder_ffn_dim": 3072,
  "encoder_layerdrop": 0.0,
  "encoder_layers": 6,
  "eos_token_id": 2,
  "forced_bos_token_id": 0,
  "forced_eos_token_id": 2,
  "gradient_checkpointing": false,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2":

Downloading:   0%|          | 0.00/558M [00:00<?, ?B/s]

loading weights file pytorch_model.bin from cache at /root/.cache/huggingface/hub/models--facebook--bart-base/snapshots/aadd2ab0ae0c8268c7c9693540e9904811f36177/pytorch_model.bin
All model checkpoint weights were used when initializing BartForConditionalGeneration.

All the weights of BartForConditionalGeneration were initialized from the model checkpoint at facebook/bart-base.
If your task is similar to the task the model of the checkpoint was trained on, you can already use BartForConditionalGeneration for predictions without further training.


LOGGING: load_eval_metrics DONE 


 LOGGING: Calculating Blue

 LOGGING: Calculating Rouge

 LOGGING: Calculating Meteor

 LOGGING: Calculating Bertscore


Could not locate the tokenizer configuration file, will try to use the model config instead.


Downloading:   0%|          | 0.00/482 [00:00<?, ?B/s]

loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--roberta-large/snapshots/5069d8a2a32a7df4c69ef9b56348be04152a2341/config.json
Model config RobertaConfig {
  "_name_or_path": "roberta-large",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 1024,
  "initializer_range": 0.02,
  "intermediate_size": 4096,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 16,
  "num_hidden_layers": 24,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "transformers_version": "4.25.1",
  "type_vocab_size": 1,
  "use_cache": true,
  "vocab_size": 50265
}



Downloading:   0%|          | 0.00/899k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/456k [00:00<?, ?B/s]

loading file vocab.json from cache at /root/.cache/huggingface/hub/models--roberta-large/snapshots/5069d8a2a32a7df4c69ef9b56348be04152a2341/vocab.json
loading file merges.txt from cache at /root/.cache/huggingface/hub/models--roberta-large/snapshots/5069d8a2a32a7df4c69ef9b56348be04152a2341/merges.txt
loading file added_tokens.json from cache at None
loading file special_tokens_map.json from cache at None
loading file tokenizer_config.json from cache at None
loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--roberta-large/snapshots/5069d8a2a32a7df4c69ef9b56348be04152a2341/config.json
Model config RobertaConfig {
  "_name_or_path": "roberta-large",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 1024,
  "initializer_range": 0.02,
  "intermediate_size": 4096,
  "layer_

Downloading:   0%|          | 0.00/1.43G [00:00<?, ?B/s]

loading weights file pytorch_model.bin from cache at /root/.cache/huggingface/hub/models--roberta-large/snapshots/5069d8a2a32a7df4c69ef9b56348be04152a2341/pytorch_model.bin
All the weights of RobertaModel were initialized from the model checkpoint at roberta-large.
If your task is similar to the task the model of the checkpoint was trained on, you can already use RobertaModel for predictions without further training.



 LOGGING: Calculating Bartscore


Trainer is attempting to log a value of "{'bleu': 0.13033255692434015, 'precisions': [0.40137307434695246, 0.21663019693654267, 0.12189751801441152, 0.07209405501330968], 'brevity_penalty': 0.7839033655564815, 'length_ratio': 0.8042014543495826, 'translation_length': 5972, 'reference_length': 7426}" of type <class 'dict'> for key "eval/blue_output" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'rouge1': 0.5240501503478875, 'rouge2': 0.3441612393456942, 'rougeL': 0.4649846126790713, 'rougeLsum': 0.46416776674019034}" of type <class 'dict'> for key "eval/rouge_output" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'meteor': 0.44257615639997777}" of type <class 'dict'> for key "eval/meteor_results" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we d


 LOGGING: Done calculations


Saving model checkpoint to /content/drive/MyDrive/MscThesis//Results/google/mt5-base/checkpoint-123
Configuration saved in /content/drive/MyDrive/MscThesis//Results/google/mt5-base/checkpoint-123/config.json
Model weights saved in /content/drive/MyDrive/MscThesis//Results/google/mt5-base/checkpoint-123/pytorch_model.bin
tokenizer config file saved in /content/drive/MyDrive/MscThesis//Results/google/mt5-base/checkpoint-123/tokenizer_config.json
Special tokens file saved in /content/drive/MyDrive/MscThesis//Results/google/mt5-base/checkpoint-123/special_tokens_map.json
Copy vocab file to /content/drive/MyDrive/MscThesis//Results/google/mt5-base/checkpoint-123/spiece.model
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient c

LOGGING: load_eval_metrics DONE 


 LOGGING: Calculating Blue

 LOGGING: Calculating Rouge

 LOGGING: Calculating Meteor

 LOGGING: Calculating Bertscore


Could not locate the tokenizer configuration file, will try to use the model config instead.
loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--roberta-large/snapshots/5069d8a2a32a7df4c69ef9b56348be04152a2341/config.json
Model config RobertaConfig {
  "_name_or_path": "roberta-large",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 1024,
  "initializer_range": 0.02,
  "intermediate_size": 4096,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 16,
  "num_hidden_layers": 24,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "transformers_version": "4.25.1",
  "type_vocab_size": 1,
  "use_cache": true,
  "vocab_size": 50265
}

loading file vocab.json from cache at /root/.cache/huggingface/hub


 LOGGING: Calculating Bartscore


Trainer is attempting to log a value of "{'bleu': 0.17249569634565007, 'precisions': [0.4255555555555556, 0.24913971094287682, 0.1590013140604468, 0.10734929810074319], 'brevity_penalty': 0.8363315457176264, 'length_ratio': 0.8483705898195529, 'translation_length': 6300, 'reference_length': 7426}" of type <class 'dict'> for key "eval/blue_output" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'rouge1': 0.5492156924875282, 'rouge2': 0.3729554396460315, 'rougeL': 0.49468547010333613, 'rougeLsum': 0.4943014254626097}" of type <class 'dict'> for key "eval/rouge_output" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'meteor': 0.47761022838948985}" of type <class 'dict'> for key "eval/meteor_results" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dro


 LOGGING: Done calculations


Saving model checkpoint to /content/drive/MyDrive/MscThesis//Results/google/mt5-base/checkpoint-246
Configuration saved in /content/drive/MyDrive/MscThesis//Results/google/mt5-base/checkpoint-246/config.json
Model weights saved in /content/drive/MyDrive/MscThesis//Results/google/mt5-base/checkpoint-246/pytorch_model.bin
tokenizer config file saved in /content/drive/MyDrive/MscThesis//Results/google/mt5-base/checkpoint-246/tokenizer_config.json
Special tokens file saved in /content/drive/MyDrive/MscThesis//Results/google/mt5-base/checkpoint-246/special_tokens_map.json
Copy vocab file to /content/drive/MyDrive/MscThesis//Results/google/mt5-base/checkpoint-246/spiece.model
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient c

LOGGING: load_eval_metrics DONE 


 LOGGING: Calculating Blue

 LOGGING: Calculating Rouge

 LOGGING: Calculating Meteor

 LOGGING: Calculating Bertscore


Could not locate the tokenizer configuration file, will try to use the model config instead.
loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--roberta-large/snapshots/5069d8a2a32a7df4c69ef9b56348be04152a2341/config.json
Model config RobertaConfig {
  "_name_or_path": "roberta-large",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 1024,
  "initializer_range": 0.02,
  "intermediate_size": 4096,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 16,
  "num_hidden_layers": 24,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "transformers_version": "4.25.1",
  "type_vocab_size": 1,
  "use_cache": true,
  "vocab_size": 50265
}

loading file vocab.json from cache at /root/.cache/huggingface/hub


 LOGGING: Calculating Bartscore


Trainer is attempting to log a value of "{'bleu': 0.165274536950144, 'precisions': [0.5069936769496072, 0.3043294614572334, 0.1960554120685607, 0.13387905994190652], 'brevity_penalty': 0.6551585929807452, 'length_ratio': 0.7028009695663884, 'translation_length': 5219, 'reference_length': 7426}" of type <class 'dict'> for key "eval/blue_output" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'rouge1': 0.5621426185232341, 'rouge2': 0.3833186142086102, 'rougeL': 0.507708651154811, 'rougeLsum': 0.5067598971167693}" of type <class 'dict'> for key "eval/rouge_output" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'meteor': 0.4831665795683685}" of type <class 'dict'> for key "eval/meteor_results" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped t


 LOGGING: Done calculations


Saving model checkpoint to /content/drive/MyDrive/MscThesis//Results/google/mt5-base/checkpoint-369
Configuration saved in /content/drive/MyDrive/MscThesis//Results/google/mt5-base/checkpoint-369/config.json
Model weights saved in /content/drive/MyDrive/MscThesis//Results/google/mt5-base/checkpoint-369/pytorch_model.bin
tokenizer config file saved in /content/drive/MyDrive/MscThesis//Results/google/mt5-base/checkpoint-369/tokenizer_config.json
Special tokens file saved in /content/drive/MyDrive/MscThesis//Results/google/mt5-base/checkpoint-369/special_tokens_map.json
Copy vocab file to /content/drive/MyDrive/MscThesis//Results/google/mt5-base/checkpoint-369/spiece.model
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient c

LOGGING: load_eval_metrics DONE 


 LOGGING: Calculating Blue

 LOGGING: Calculating Rouge

 LOGGING: Calculating Meteor

 LOGGING: Calculating Bertscore


Could not locate the tokenizer configuration file, will try to use the model config instead.
loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--roberta-large/snapshots/5069d8a2a32a7df4c69ef9b56348be04152a2341/config.json
Model config RobertaConfig {
  "_name_or_path": "roberta-large",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 1024,
  "initializer_range": 0.02,
  "intermediate_size": 4096,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 16,
  "num_hidden_layers": 24,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "transformers_version": "4.25.1",
  "type_vocab_size": 1,
  "use_cache": true,
  "vocab_size": 50265
}

loading file vocab.json from cache at /root/.cache/huggingface/hub


 LOGGING: Calculating Bartscore


Trainer is attempting to log a value of "{'bleu': 0.17227962471287392, 'precisions': [0.5016635859519408, 0.3008939455505892, 0.19383160738406124, 0.13366960907944514], 'brevity_penalty': 0.6889109557659353, 'length_ratio': 0.7285214112577431, 'translation_length': 5410, 'reference_length': 7426}" of type <class 'dict'> for key "eval/blue_output" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'rouge1': 0.5715765764208608, 'rouge2': 0.3925469405601145, 'rougeL': 0.5193746409099027, 'rougeLsum': 0.5180544026706151}" of type <class 'dict'> for key "eval/rouge_output" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'meteor': 0.4919413421377468}" of type <class 'dict'> for key "eval/meteor_results" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropp


 LOGGING: Done calculations


Saving model checkpoint to /content/drive/MyDrive/MscThesis//Results/google/mt5-base/checkpoint-492
Configuration saved in /content/drive/MyDrive/MscThesis//Results/google/mt5-base/checkpoint-492/config.json
Model weights saved in /content/drive/MyDrive/MscThesis//Results/google/mt5-base/checkpoint-492/pytorch_model.bin
tokenizer config file saved in /content/drive/MyDrive/MscThesis//Results/google/mt5-base/checkpoint-492/tokenizer_config.json
Special tokens file saved in /content/drive/MyDrive/MscThesis//Results/google/mt5-base/checkpoint-492/special_tokens_map.json
Copy vocab file to /content/drive/MyDrive/MscThesis//Results/google/mt5-base/checkpoint-492/spiece.model
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient c

LOGGING: load_eval_metrics DONE 


 LOGGING: Calculating Blue

 LOGGING: Calculating Rouge

 LOGGING: Calculating Meteor

 LOGGING: Calculating Bertscore


Could not locate the tokenizer configuration file, will try to use the model config instead.
loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--roberta-large/snapshots/5069d8a2a32a7df4c69ef9b56348be04152a2341/config.json
Model config RobertaConfig {
  "_name_or_path": "roberta-large",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 1024,
  "initializer_range": 0.02,
  "intermediate_size": 4096,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 16,
  "num_hidden_layers": 24,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "transformers_version": "4.25.1",
  "type_vocab_size": 1,
  "use_cache": true,
  "vocab_size": 50265
}

loading file vocab.json from cache at /root/.cache/huggingface/hub


 LOGGING: Calculating Bartscore


Trainer is attempting to log a value of "{'bleu': 0.17780940729325873, 'precisions': [0.44069952305246424, 0.25809786354238456, 0.16325380424572608, 0.11085879438480595], 'brevity_penalty': 0.8347657490538197, 'length_ratio': 0.8470239698357124, 'translation_length': 6290, 'reference_length': 7426}" of type <class 'dict'> for key "eval/blue_output" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'rouge1': 0.558124941796313, 'rouge2': 0.38111929851499526, 'rougeL': 0.5047917144807409, 'rougeLsum': 0.5040840629429457}" of type <class 'dict'> for key "eval/rouge_output" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'meteor': 0.4905770123582041}" of type <class 'dict'> for key "eval/meteor_results" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dro


 LOGGING: Done calculations


Saving model checkpoint to /content/drive/MyDrive/MscThesis//Results/google/mt5-base/checkpoint-615
Configuration saved in /content/drive/MyDrive/MscThesis//Results/google/mt5-base/checkpoint-615/config.json
Model weights saved in /content/drive/MyDrive/MscThesis//Results/google/mt5-base/checkpoint-615/pytorch_model.bin
tokenizer config file saved in /content/drive/MyDrive/MscThesis//Results/google/mt5-base/checkpoint-615/tokenizer_config.json
Special tokens file saved in /content/drive/MyDrive/MscThesis//Results/google/mt5-base/checkpoint-615/special_tokens_map.json
Copy vocab file to /content/drive/MyDrive/MscThesis//Results/google/mt5-base/checkpoint-615/spiece.model


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from /content/drive/MyDrive/MscThesis//Results/google/mt5-base/checkpoint-246 (score: 0.22689081728458405).


KeyboardInterrupt: ignored