In [1]:
import transformers
from datasets import load_dataset, load_metric
import pandas as pd

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
news = load_dataset("csv", data_files="./data/news_collection_new.csv")

Using custom data configuration default-e640655710a3e75d
Found cached dataset csv (C:/Users/love4/.cache/huggingface/datasets/csv/default-e640655710a3e75d/0.0.0/6b34fb8fcf56f7c8ba51dc895bfa2bfbe43546f190a60fcf74bb5e8afdcc2317)
100%|██████████| 1/1 [00:00<00:00, 427.03it/s]


In [3]:
datasets_train_validation = news["train"].train_test_split(test_size=1000)

news["train"] = datasets_train_validation["train"].shuffle()
news["validation"] = datasets_train_validation["test"].shuffle()

In [4]:
import nltk
nltk.download('punkt')
import string

from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

tokenizer = AutoTokenizer.from_pretrained("yihsuan/mt5_chinese_small")

model = AutoModelForSeq2SeqLM.from_pretrained("yihsuan/mt5_chinese_small")

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\love4\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [5]:
prefix = "summarize: "
max_input_length = 512
max_target_length = 64


def preprocess_data(examples):
  
  inputs = [prefix + text for text in examples["desc"]]
  model_inputs = tokenizer(inputs, max_length=max_input_length, truncation=True)

  # Setup the tokenizer for targets
  with tokenizer.as_target_tokenizer():
    labels = tokenizer(examples["title"], max_length=max_target_length, truncation=True)

  model_inputs["labels"] = labels["input_ids"]
  return model_inputs

In [6]:
tokenized_datasets = news.map(preprocess_data, batched=True)

 98%|█████████▊| 46/47 [00:02<00:00, 15.80ba/s]
  0%|          | 0/1 [00:00<?, ?ba/s]


In [7]:
from transformers import AutoModelForSeq2SeqLM, DataCollatorForSeq2Seq, Seq2SeqTrainingArguments, Seq2SeqTrainer

In [8]:
batch_size = 4
model_name = "mt5-base-news-title-generation"
model_dir = f"./Models/{model_name}"

args = Seq2SeqTrainingArguments(
    model_dir,
    evaluation_strategy="steps",
    eval_steps = 500,
    logging_strategy="steps",
    logging_steps=500,
    save_strategy="steps",
    save_steps=1000,
    learning_rate=4e-4,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    weight_decay=0.01,
    save_total_limit=3,
    num_train_epochs=1,
    predict_with_generate=True,
    fp16=True,
    load_best_model_at_end=True,
    metric_for_best_model="rouge1",
    report_to="tensorboard"
)

In [9]:
data_collator = DataCollatorForSeq2Seq(tokenizer)

In [10]:
import evaluate
metric = evaluate.load("rouge")

In [11]:
import numpy as np

def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    decoded_preds = tokenizer.batch_decode(predictions, skip_special_tokens=True)
    
    # Replace -100 in the labels as we can't decode them.
    labels = np.where(labels != -100, labels, tokenizer.pad_token_id)
    decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)
    
    # Rouge expects a newline after each sentence
    decoded_preds = ["\n".join(nltk.sent_tokenize(pred.strip()))
                      for pred in decoded_preds]
    decoded_labels = ["\n".join(nltk.sent_tokenize(label.strip())) 
                      for label in decoded_labels]
    
    # Compute ROUGE scores
    result = metric.compute(predictions=decoded_preds, references=decoded_labels,
                            use_stemmer=True)

    # Extract ROUGE f1 scores
    result = {key: value * 100 for key, value in result.items()}
    
    # Add mean generated length to metrics
    prediction_lens = [np.count_nonzero(pred != tokenizer.pad_token_id)
                      for pred in predictions]
    result["gen_len"] = np.mean(prediction_lens)
    
    return {k: round(v, 4) for k, v in result.items()}

In [12]:
# Function that returns an untrained model to be trained
def model_init():
    return AutoModelForSeq2SeqLM.from_pretrained("yihsuan/mt5_chinese_small")

trainer = Seq2SeqTrainer(
    model_init=model_init,
    args=args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"],
    data_collator=data_collator,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)

loading configuration file config.json from cache at C:\Users\love4/.cache\huggingface\hub\models--yihsuan--mt5_chinese_small\snapshots\338c78a9a4e6ff81d4e613a7b742a99b864b8f28\config.json
Model config MT5Config {
  "_name_or_path": "yihsuan/mt5_chinese_small",
  "architectures": [
    "MT5ForConditionalGeneration"
  ],
  "d_ff": 1024,
  "d_kv": 64,
  "d_model": 512,
  "decoder_start_token_id": 0,
  "dense_act_fn": "gelu_new",
  "dropout_rate": 0.1,
  "eos_token_id": 1,
  "feed_forward_proj": "gated-gelu",
  "initializer_factor": 1.0,
  "is_encoder_decoder": true,
  "is_gated_act": true,
  "layer_norm_epsilon": 1e-06,
  "model_type": "mt5",
  "num_decoder_layers": 8,
  "num_heads": 6,
  "num_layers": 8,
  "pad_token_id": 0,
  "relative_attention_max_distance": 128,
  "relative_attention_num_buckets": 32,
  "tie_word_embeddings": false,
  "tokenizer_class": "T5Tokenizer",
  "torch_dtype": "float32",
  "transformers_version": "4.24.0",
  "use_cache": true,
  "vocab_size": 250100
}

loadi

In [13]:
%load_ext tensorboard
%tensorboard --logdir '{model_dir}'/runs

Reusing TensorBoard on port 6006 (pid 9564), started 4:07:42 ago. (Use '!kill 9564' to kill it.)

In [14]:
trainer.train()

loading configuration file config.json from cache at C:\Users\love4/.cache\huggingface\hub\models--yihsuan--mt5_chinese_small\snapshots\338c78a9a4e6ff81d4e613a7b742a99b864b8f28\config.json
Model config MT5Config {
  "_name_or_path": "yihsuan/mt5_chinese_small",
  "architectures": [
    "MT5ForConditionalGeneration"
  ],
  "d_ff": 1024,
  "d_kv": 64,
  "d_model": 512,
  "decoder_start_token_id": 0,
  "dense_act_fn": "gelu_new",
  "dropout_rate": 0.1,
  "eos_token_id": 1,
  "feed_forward_proj": "gated-gelu",
  "initializer_factor": 1.0,
  "is_encoder_decoder": true,
  "is_gated_act": true,
  "layer_norm_epsilon": 1e-06,
  "model_type": "mt5",
  "num_decoder_layers": 8,
  "num_heads": 6,
  "num_layers": 8,
  "pad_token_id": 0,
  "relative_attention_max_distance": 128,
  "relative_attention_num_buckets": 32,
  "tie_word_embeddings": false,
  "tokenizer_class": "T5Tokenizer",
  "torch_dtype": "float32",
  "transformers_version": "4.24.0",
  "use_cache": true,
  "vocab_size": 250100
}

loadi

{'loss': 3.3158, 'learning_rate': 0.00038842588581824484, 'epoch': 0.04}


                                                   
  4%|▍         | 501/11543 [01:26<23:49:36,  7.77s/it]

{'eval_loss': nan, 'eval_rouge1': 0.0, 'eval_rouge2': 0.0, 'eval_rougeL': 0.0, 'eval_rougeLsum': 0.0, 'eval_gen_len': 0.0, 'eval_runtime': 43.5651, 'eval_samples_per_second': 22.954, 'eval_steps_per_second': 5.739, 'epoch': 0.04}


  9%|▊         | 1000/11543 [02:04<13:34, 12.94it/s]  The following columns in the evaluation set don't have a corresponding argument in `MT5ForConditionalGeneration.forward` and have been ignored: url, image, title, date, source, desc. If url, image, title, date, source, desc are not expected by `MT5ForConditionalGeneration.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1000
  Batch size = 4


{'loss': 0.0, 'learning_rate': 0.0003710993675820844, 'epoch': 0.09}


                                                    
  9%|▊         | 1000/11543 [02:49<13:34, 12.94it/s]Saving model checkpoint to ./Models/mt5-base-news-title-generation\checkpoint-1000
Configuration saved in ./Models/mt5-base-news-title-generation\checkpoint-1000\config.json


{'eval_loss': nan, 'eval_rouge1': 0.0, 'eval_rouge2': 0.0, 'eval_rougeL': 0.0, 'eval_rougeLsum': 0.0, 'eval_gen_len': 0.0, 'eval_runtime': 44.1777, 'eval_samples_per_second': 22.636, 'eval_steps_per_second': 5.659, 'epoch': 0.09}


Model weights saved in ./Models/mt5-base-news-title-generation\checkpoint-1000\pytorch_model.bin
tokenizer config file saved in ./Models/mt5-base-news-title-generation\checkpoint-1000\tokenizer_config.json
Special tokens file saved in ./Models/mt5-base-news-title-generation\checkpoint-1000\special_tokens_map.json
Copy vocab file to ./Models/mt5-base-news-title-generation\checkpoint-1000\spiece.model
Deleting older checkpoint [Models\mt5-base-news-title-generation\checkpoint-800] due to args.save_total_limit
 13%|█▎        | 1500/11543 [03:33<13:02, 12.84it/s]   The following columns in the evaluation set don't have a corresponding argument in `MT5ForConditionalGeneration.forward` and have been ignored: url, image, title, date, source, desc. If url, image, title, date, source, desc are not expected by `MT5ForConditionalGeneration.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1000
  Batch size = 4


{'loss': 0.0, 'learning_rate': 0.0003537728493459239, 'epoch': 0.13}


                                                    
 13%|█▎        | 1501/11543 [04:19<19:40:31,  7.05s/it]

{'eval_loss': nan, 'eval_rouge1': 0.0, 'eval_rouge2': 0.0, 'eval_rougeL': 0.0, 'eval_rougeLsum': 0.0, 'eval_gen_len': 0.0, 'eval_runtime': 46.4992, 'eval_samples_per_second': 21.506, 'eval_steps_per_second': 5.376, 'epoch': 0.13}


 17%|█▋        | 2000/11543 [04:58<12:26, 12.78it/s]   The following columns in the evaluation set don't have a corresponding argument in `MT5ForConditionalGeneration.forward` and have been ignored: url, image, title, date, source, desc. If url, image, title, date, source, desc are not expected by `MT5ForConditionalGeneration.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1000
  Batch size = 4


{'loss': 0.0, 'learning_rate': 0.00033644633110976353, 'epoch': 0.17}


                                                    
 17%|█▋        | 2000/11543 [05:47<12:26, 12.78it/s]Saving model checkpoint to ./Models/mt5-base-news-title-generation\checkpoint-2000
Configuration saved in ./Models/mt5-base-news-title-generation\checkpoint-2000\config.json


{'eval_loss': nan, 'eval_rouge1': 0.0, 'eval_rouge2': 0.0, 'eval_rougeL': 0.0, 'eval_rougeLsum': 0.0, 'eval_gen_len': 0.0, 'eval_runtime': 48.1925, 'eval_samples_per_second': 20.75, 'eval_steps_per_second': 5.188, 'epoch': 0.17}


Model weights saved in ./Models/mt5-base-news-title-generation\checkpoint-2000\pytorch_model.bin
tokenizer config file saved in ./Models/mt5-base-news-title-generation\checkpoint-2000\tokenizer_config.json
Special tokens file saved in ./Models/mt5-base-news-title-generation\checkpoint-2000\special_tokens_map.json
Copy vocab file to ./Models/mt5-base-news-title-generation\checkpoint-2000\spiece.model
 22%|██▏       | 2500/11543 [06:30<11:55, 12.64it/s]   The following columns in the evaluation set don't have a corresponding argument in `MT5ForConditionalGeneration.forward` and have been ignored: url, image, title, date, source, desc. If url, image, title, date, source, desc are not expected by `MT5ForConditionalGeneration.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1000
  Batch size = 4


{'loss': 0.0, 'learning_rate': 0.0003191198128736031, 'epoch': 0.22}


                                                    
 22%|██▏       | 2501/11543 [07:17<17:36:27,  7.01s/it]

{'eval_loss': nan, 'eval_rouge1': 0.0, 'eval_rouge2': 0.0, 'eval_rougeL': 0.0, 'eval_rougeLsum': 0.0, 'eval_gen_len': 0.0, 'eval_runtime': 46.1984, 'eval_samples_per_second': 21.646, 'eval_steps_per_second': 5.411, 'epoch': 0.22}


 26%|██▌       | 3000/11543 [07:56<11:35, 12.29it/s]   The following columns in the evaluation set don't have a corresponding argument in `MT5ForConditionalGeneration.forward` and have been ignored: url, image, title, date, source, desc. If url, image, title, date, source, desc are not expected by `MT5ForConditionalGeneration.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1000
  Batch size = 4


{'loss': 0.0, 'learning_rate': 0.0003017932946374426, 'epoch': 0.26}


                                                    
 26%|██▌       | 3000/11543 [08:40<11:35, 12.29it/s]Saving model checkpoint to ./Models/mt5-base-news-title-generation\checkpoint-3000
Configuration saved in ./Models/mt5-base-news-title-generation\checkpoint-3000\config.json


{'eval_loss': nan, 'eval_rouge1': 0.0, 'eval_rouge2': 0.0, 'eval_rougeL': 0.0, 'eval_rougeLsum': 0.0, 'eval_gen_len': 0.0, 'eval_runtime': 43.5754, 'eval_samples_per_second': 22.949, 'eval_steps_per_second': 5.737, 'epoch': 0.26}


Model weights saved in ./Models/mt5-base-news-title-generation\checkpoint-3000\pytorch_model.bin
tokenizer config file saved in ./Models/mt5-base-news-title-generation\checkpoint-3000\tokenizer_config.json
Special tokens file saved in ./Models/mt5-base-news-title-generation\checkpoint-3000\special_tokens_map.json
Copy vocab file to ./Models/mt5-base-news-title-generation\checkpoint-3000\spiece.model
Deleting older checkpoint [Models\mt5-base-news-title-generation\checkpoint-2000] due to args.save_total_limit
 30%|███       | 3500/11543 [09:23<10:19, 12.98it/s]   The following columns in the evaluation set don't have a corresponding argument in `MT5ForConditionalGeneration.forward` and have been ignored: url, image, title, date, source, desc. If url, image, title, date, source, desc are not expected by `MT5ForConditionalGeneration.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1000
  Batch size = 4


{'loss': 0.0, 'learning_rate': 0.00028446677640128217, 'epoch': 0.3}


                                                    
 30%|███       | 3501/11543 [10:06<14:33:42,  6.52s/it]

{'eval_loss': nan, 'eval_rouge1': 0.0, 'eval_rouge2': 0.0, 'eval_rougeL': 0.0, 'eval_rougeLsum': 0.0, 'eval_gen_len': 0.0, 'eval_runtime': 42.9277, 'eval_samples_per_second': 23.295, 'eval_steps_per_second': 5.824, 'epoch': 0.3}


 35%|███▍      | 4000/11543 [10:43<09:08, 13.76it/s]   The following columns in the evaluation set don't have a corresponding argument in `MT5ForConditionalGeneration.forward` and have been ignored: url, image, title, date, source, desc. If url, image, title, date, source, desc are not expected by `MT5ForConditionalGeneration.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1000
  Batch size = 4


{'loss': 0.0, 'learning_rate': 0.00026714025816512173, 'epoch': 0.35}


                                                    
 35%|███▍      | 4000/11543 [11:25<09:08, 13.76it/s]Saving model checkpoint to ./Models/mt5-base-news-title-generation\checkpoint-4000
Configuration saved in ./Models/mt5-base-news-title-generation\checkpoint-4000\config.json


{'eval_loss': nan, 'eval_rouge1': 0.0, 'eval_rouge2': 0.0, 'eval_rougeL': 0.0, 'eval_rougeLsum': 0.0, 'eval_gen_len': 0.0, 'eval_runtime': 41.9048, 'eval_samples_per_second': 23.864, 'eval_steps_per_second': 5.966, 'epoch': 0.35}


Model weights saved in ./Models/mt5-base-news-title-generation\checkpoint-4000\pytorch_model.bin
tokenizer config file saved in ./Models/mt5-base-news-title-generation\checkpoint-4000\tokenizer_config.json
Special tokens file saved in ./Models/mt5-base-news-title-generation\checkpoint-4000\special_tokens_map.json
Copy vocab file to ./Models/mt5-base-news-title-generation\checkpoint-4000\spiece.model
 39%|███▉      | 4500/11543 [12:07<08:43, 13.46it/s]   The following columns in the evaluation set don't have a corresponding argument in `MT5ForConditionalGeneration.forward` and have been ignored: url, image, title, date, source, desc. If url, image, title, date, source, desc are not expected by `MT5ForConditionalGeneration.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1000
  Batch size = 4


{'loss': 0.0, 'learning_rate': 0.0002498137399289613, 'epoch': 0.39}


                                                    
 39%|███▉      | 4503/11543 [12:50<9:00:09,  4.60s/it] 

{'eval_loss': nan, 'eval_rouge1': 0.0, 'eval_rouge2': 0.0, 'eval_rougeL': 0.0, 'eval_rougeLsum': 0.0, 'eval_gen_len': 0.0, 'eval_runtime': 43.158, 'eval_samples_per_second': 23.171, 'eval_steps_per_second': 5.793, 'epoch': 0.39}


 43%|████▎     | 5000/11543 [13:26<08:13, 13.25it/s]  The following columns in the evaluation set don't have a corresponding argument in `MT5ForConditionalGeneration.forward` and have been ignored: url, image, title, date, source, desc. If url, image, title, date, source, desc are not expected by `MT5ForConditionalGeneration.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1000
  Batch size = 4


{'loss': 0.0, 'learning_rate': 0.00023248722169280086, 'epoch': 0.43}


                                                    
 43%|████▎     | 5000/11543 [14:08<08:13, 13.25it/s]Saving model checkpoint to ./Models/mt5-base-news-title-generation\checkpoint-5000
Configuration saved in ./Models/mt5-base-news-title-generation\checkpoint-5000\config.json


{'eval_loss': nan, 'eval_rouge1': 0.0, 'eval_rouge2': 0.0, 'eval_rougeL': 0.0, 'eval_rougeLsum': 0.0, 'eval_gen_len': 0.0, 'eval_runtime': 41.756, 'eval_samples_per_second': 23.949, 'eval_steps_per_second': 5.987, 'epoch': 0.43}


Model weights saved in ./Models/mt5-base-news-title-generation\checkpoint-5000\pytorch_model.bin
tokenizer config file saved in ./Models/mt5-base-news-title-generation\checkpoint-5000\tokenizer_config.json
Special tokens file saved in ./Models/mt5-base-news-title-generation\checkpoint-5000\special_tokens_map.json
Copy vocab file to ./Models/mt5-base-news-title-generation\checkpoint-5000\spiece.model
Deleting older checkpoint [Models\mt5-base-news-title-generation\checkpoint-4000] due to args.save_total_limit
 48%|████▊     | 5500/11543 [14:49<07:28, 13.47it/s]   The following columns in the evaluation set don't have a corresponding argument in `MT5ForConditionalGeneration.forward` and have been ignored: url, image, title, date, source, desc. If url, image, title, date, source, desc are not expected by `MT5ForConditionalGeneration.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1000
  Batch size = 4


{'loss': 0.0, 'learning_rate': 0.0002151607034566404, 'epoch': 0.48}


                                                    
 48%|████▊     | 5501/11543 [15:31<12:29:01,  7.44s/it]

{'eval_loss': nan, 'eval_rouge1': 0.0, 'eval_rouge2': 0.0, 'eval_rougeL': 0.0, 'eval_rougeLsum': 0.0, 'eval_gen_len': 0.0, 'eval_runtime': 41.7109, 'eval_samples_per_second': 23.975, 'eval_steps_per_second': 5.994, 'epoch': 0.48}


 52%|█████▏    | 6000/11543 [16:08<06:51, 13.47it/s]   The following columns in the evaluation set don't have a corresponding argument in `MT5ForConditionalGeneration.forward` and have been ignored: url, image, title, date, source, desc. If url, image, title, date, source, desc are not expected by `MT5ForConditionalGeneration.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1000
  Batch size = 4


{'loss': 0.0, 'learning_rate': 0.00019783418522047996, 'epoch': 0.52}


                                                    
 52%|█████▏    | 6000/11543 [16:50<06:51, 13.47it/s]Saving model checkpoint to ./Models/mt5-base-news-title-generation\checkpoint-6000
Configuration saved in ./Models/mt5-base-news-title-generation\checkpoint-6000\config.json


{'eval_loss': nan, 'eval_rouge1': 0.0, 'eval_rouge2': 0.0, 'eval_rougeL': 0.0, 'eval_rougeLsum': 0.0, 'eval_gen_len': 0.0, 'eval_runtime': 41.8787, 'eval_samples_per_second': 23.879, 'eval_steps_per_second': 5.97, 'epoch': 0.52}


Model weights saved in ./Models/mt5-base-news-title-generation\checkpoint-6000\pytorch_model.bin
tokenizer config file saved in ./Models/mt5-base-news-title-generation\checkpoint-6000\tokenizer_config.json
Special tokens file saved in ./Models/mt5-base-news-title-generation\checkpoint-6000\special_tokens_map.json
Copy vocab file to ./Models/mt5-base-news-title-generation\checkpoint-6000\spiece.model
Deleting older checkpoint [Models\mt5-base-news-title-generation\checkpoint-3000] due to args.save_total_limit
 56%|█████▋    | 6500/11543 [17:32<06:30, 12.92it/s]   The following columns in the evaluation set don't have a corresponding argument in `MT5ForConditionalGeneration.forward` and have been ignored: url, image, title, date, source, desc. If url, image, title, date, source, desc are not expected by `MT5ForConditionalGeneration.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1000
  Batch size = 4


{'loss': 0.0, 'learning_rate': 0.00018050766698431953, 'epoch': 0.56}


                                                    
 56%|█████▋    | 6501/11543 [18:15<9:06:43,  6.51s/it]

{'eval_loss': nan, 'eval_rouge1': 0.0, 'eval_rouge2': 0.0, 'eval_rougeL': 0.0, 'eval_rougeLsum': 0.0, 'eval_gen_len': 0.0, 'eval_runtime': 42.8446, 'eval_samples_per_second': 23.34, 'eval_steps_per_second': 5.835, 'epoch': 0.56}


 61%|██████    | 7000/11543 [18:52<05:42, 13.25it/s]  The following columns in the evaluation set don't have a corresponding argument in `MT5ForConditionalGeneration.forward` and have been ignored: url, image, title, date, source, desc. If url, image, title, date, source, desc are not expected by `MT5ForConditionalGeneration.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1000
  Batch size = 4


{'loss': 0.0, 'learning_rate': 0.00016318114874815906, 'epoch': 0.61}


                                                    
 61%|██████    | 7000/11543 [19:35<05:42, 13.25it/s]Saving model checkpoint to ./Models/mt5-base-news-title-generation\checkpoint-7000
Configuration saved in ./Models/mt5-base-news-title-generation\checkpoint-7000\config.json


{'eval_loss': nan, 'eval_rouge1': 0.0, 'eval_rouge2': 0.0, 'eval_rougeL': 0.0, 'eval_rougeLsum': 0.0, 'eval_gen_len': 0.0, 'eval_runtime': 42.9908, 'eval_samples_per_second': 23.261, 'eval_steps_per_second': 5.815, 'epoch': 0.61}


Model weights saved in ./Models/mt5-base-news-title-generation\checkpoint-7000\pytorch_model.bin
tokenizer config file saved in ./Models/mt5-base-news-title-generation\checkpoint-7000\tokenizer_config.json
Special tokens file saved in ./Models/mt5-base-news-title-generation\checkpoint-7000\special_tokens_map.json
Copy vocab file to ./Models/mt5-base-news-title-generation\checkpoint-7000\spiece.model
Deleting older checkpoint [Models\mt5-base-news-title-generation\checkpoint-5000] due to args.save_total_limit
 65%|██████▍   | 7500/11543 [20:16<04:51, 13.89it/s]  The following columns in the evaluation set don't have a corresponding argument in `MT5ForConditionalGeneration.forward` and have been ignored: url, image, title, date, source, desc. If url, image, title, date, source, desc are not expected by `MT5ForConditionalGeneration.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1000
  Batch size = 4


{'loss': 0.0, 'learning_rate': 0.00014585463051199863, 'epoch': 0.65}


                                                    
 65%|██████▍   | 7501/11543 [20:58<7:08:52,  6.37s/it]

{'eval_loss': nan, 'eval_rouge1': 0.0, 'eval_rouge2': 0.0, 'eval_rougeL': 0.0, 'eval_rougeLsum': 0.0, 'eval_gen_len': 0.0, 'eval_runtime': 41.9392, 'eval_samples_per_second': 23.844, 'eval_steps_per_second': 5.961, 'epoch': 0.65}


 69%|██████▉   | 8000/11543 [21:35<04:19, 13.64it/s]  The following columns in the evaluation set don't have a corresponding argument in `MT5ForConditionalGeneration.forward` and have been ignored: url, image, title, date, source, desc. If url, image, title, date, source, desc are not expected by `MT5ForConditionalGeneration.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1000
  Batch size = 4


{'loss': 0.0, 'learning_rate': 0.0001285281122758382, 'epoch': 0.69}


                                                    
 69%|██████▉   | 8000/11543 [22:17<04:19, 13.64it/s]Saving model checkpoint to ./Models/mt5-base-news-title-generation\checkpoint-8000
Configuration saved in ./Models/mt5-base-news-title-generation\checkpoint-8000\config.json


{'eval_loss': nan, 'eval_rouge1': 0.0, 'eval_rouge2': 0.0, 'eval_rougeL': 0.0, 'eval_rougeLsum': 0.0, 'eval_gen_len': 0.0, 'eval_runtime': 41.9494, 'eval_samples_per_second': 23.838, 'eval_steps_per_second': 5.96, 'epoch': 0.69}


Model weights saved in ./Models/mt5-base-news-title-generation\checkpoint-8000\pytorch_model.bin
tokenizer config file saved in ./Models/mt5-base-news-title-generation\checkpoint-8000\tokenizer_config.json
Special tokens file saved in ./Models/mt5-base-news-title-generation\checkpoint-8000\special_tokens_map.json
Copy vocab file to ./Models/mt5-base-news-title-generation\checkpoint-8000\spiece.model
Deleting older checkpoint [Models\mt5-base-news-title-generation\checkpoint-6000] due to args.save_total_limit
 74%|███████▎  | 8500/11543 [22:58<03:45, 13.48it/s]  The following columns in the evaluation set don't have a corresponding argument in `MT5ForConditionalGeneration.forward` and have been ignored: url, image, title, date, source, desc. If url, image, title, date, source, desc are not expected by `MT5ForConditionalGeneration.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1000
  Batch size = 4


{'loss': 0.0, 'learning_rate': 0.00011120159403967774, 'epoch': 0.74}


                                                    
 74%|███████▎  | 8501/11543 [23:42<5:33:02,  6.57s/it]

{'eval_loss': nan, 'eval_rouge1': 0.0, 'eval_rouge2': 0.0, 'eval_rougeL': 0.0, 'eval_rougeLsum': 0.0, 'eval_gen_len': 0.0, 'eval_runtime': 43.29, 'eval_samples_per_second': 23.1, 'eval_steps_per_second': 5.775, 'epoch': 0.74}


 78%|███████▊  | 9000/11543 [24:19<03:08, 13.52it/s]  The following columns in the evaluation set don't have a corresponding argument in `MT5ForConditionalGeneration.forward` and have been ignored: url, image, title, date, source, desc. If url, image, title, date, source, desc are not expected by `MT5ForConditionalGeneration.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1000
  Batch size = 4


{'loss': 0.0, 'learning_rate': 9.387507580351729e-05, 'epoch': 0.78}


                                                    
 78%|███████▊  | 9000/11543 [25:05<03:08, 13.52it/s]Saving model checkpoint to ./Models/mt5-base-news-title-generation\checkpoint-9000
Configuration saved in ./Models/mt5-base-news-title-generation\checkpoint-9000\config.json


{'eval_loss': nan, 'eval_rouge1': 0.0, 'eval_rouge2': 0.0, 'eval_rougeL': 0.0, 'eval_rougeLsum': 0.0, 'eval_gen_len': 0.0, 'eval_runtime': 46.4022, 'eval_samples_per_second': 21.551, 'eval_steps_per_second': 5.388, 'epoch': 0.78}


Model weights saved in ./Models/mt5-base-news-title-generation\checkpoint-9000\pytorch_model.bin
tokenizer config file saved in ./Models/mt5-base-news-title-generation\checkpoint-9000\tokenizer_config.json
Special tokens file saved in ./Models/mt5-base-news-title-generation\checkpoint-9000\special_tokens_map.json
Copy vocab file to ./Models/mt5-base-news-title-generation\checkpoint-9000\spiece.model
Deleting older checkpoint [Models\mt5-base-news-title-generation\checkpoint-7000] due to args.save_total_limit
 82%|████████▏ | 9500/11543 [25:50<02:35, 13.10it/s]  The following columns in the evaluation set don't have a corresponding argument in `MT5ForConditionalGeneration.forward` and have been ignored: url, image, title, date, source, desc. If url, image, title, date, source, desc are not expected by `MT5ForConditionalGeneration.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1000
  Batch size = 4


{'loss': 0.0, 'learning_rate': 7.654855756735684e-05, 'epoch': 0.82}


 82%|████████▏ | 9500/11543 [26:05<02:35, 13.10it/s]

KeyboardInterrupt: 