In [1]:
import os
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:64"

In [2]:
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, DataCollatorForSeq2Seq, Seq2SeqTrainer, Seq2SeqTrainingArguments
import random
import torch
import evaluate
import numpy as np
from sklearn.metrics import precision_score, recall_score, f1_score

# Load dataset, model and tokenizer of model
dataset_name = "ccdv/pubmed-summarization"
model_name = "google/flan-t5-small"

# Load the dataset and select the first 10,000 records from the 'train' split
dataset = load_dataset(dataset_name)
first_10k_train = dataset['train'].select(range(400))

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
random.seed(42)

# Define a function to preprocess the dataset
def preprocess_function(batch):
    # Assuming 'article' and 'abstract' are lists of strings for each example in the batch
    inputs = [f"summarize: {article}" for article in batch["article"]]
    model_inputs = tokenizer(inputs, max_length=512, padding="max_length", truncation=True)
    labels = tokenizer(batch["abstract"], max_length=128, padding="max_length", truncation=True)
    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

rouge = evaluate.load("rouge")

def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    # Check if predictions is a tuple
    if isinstance(predictions, tuple):
        # If predictions is a tuple, we assume the first element is the logits
        predictions = predictions[0]
    # Convert logits to token IDs
    predictions = np.argmax(predictions, axis=-1)
    # Ensure predictions is a 2D list
    predictions = predictions.tolist()
    decoded_preds = tokenizer.batch_decode(predictions, skip_special_tokens=True)
    # Replace -100 in the labels as we can't decode them.
    labels = np.where(labels != -100, labels, tokenizer.pad_token_id)
    decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)

    result = rouge.compute(predictions=decoded_preds, references=decoded_labels)

    # Multiply each value in the result dictionary by 100
    result = {key: round(value * 100, 4) for key, value in result.items()}

    return result

# Tokenize and preprocess the dataset
tokenized_dataset_train = first_10k_train.map(preprocess_function, batched=True)
tokenized_dataset_validate = dataset["validation"].select(range(400)).map(preprocess_function, batched=True)

# Define training arguments
training_args = Seq2SeqTrainingArguments(
    output_dir="./results",
    num_train_epochs=5,
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    gradient_accumulation_steps=2,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir="./logs",
    logging_steps=10,
    save_steps=1000,
    evaluation_strategy="epoch",
    eval_steps=1000,
    load_best_model_at_end=True,
    metric_for_best_model="rouge1",
    greater_is_better=True,
    save_strategy="epoch",
    save_total_limit=2,
    learning_rate=2e-5,
)

# Create a data collator
data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=model)

# Initialize the trainer
trainer = Seq2SeqTrainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset_train,
    data_collator=data_collator,
    eval_dataset=tokenized_dataset_validate,
    compute_metrics=compute_metrics,
    tokenizer=tokenizer,
)

# Train the model
print("Starting to train...")
trainer.train()

# Save the model and tokenizer
trainer.save_model("./saved_model")
tokenizer.save_pretrained("./saved_tokenizer")


You can avoid this message in future by passing the argument `trust_remote_code=True`.
Passing `trust_remote_code=True` will be mandatory to load this dataset from the next major release of `datasets`.


Map:   0%|          | 0/400 [00:00<?, ? examples/s]

Map:   0%|          | 0/400 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Starting to train...


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
[34m[1mwandb[0m: Currently logged in as: [33mojaswini-kohale[0m ([33mopnp[0m). Use [1m`wandb login --relogin`[0m to force relogin
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,2.9939,3.499626,48.1709,20.1864,41.0238,41.0696
2,3.0679,3.398567,48.3886,20.7911,41.5267,41.5816
3,3.4999,3.303023,48.526,21.2316,41.8768,41.9367
4,3.3123,3.195276,48.5438,21.4996,41.9662,42.0008
5,2.8156,3.089887,48.6207,21.6846,42.0428,42.0937


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


('./saved_tokenizer/tokenizer_config.json',
 './saved_tokenizer/special_tokens_map.json',
 './saved_tokenizer/spiece.model',
 './saved_tokenizer/added_tokens.json',
 './saved_tokenizer/tokenizer.json')

In [None]:
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, DataCollatorForSeq2Seq, Seq2SeqTrainer, Seq2SeqTrainingArguments
import random
import torch
import evaluate
import numpy as np
from sklearn.metrics import precision_score, recall_score, f1_score

batch_size = 400

total_batches = len(dataset['train'])

tokenizer = AutoTokenizer.from_pretrained("saved_tokenizer")
model = AutoModelForSeq2SeqLM.from_pretrained("saved_model")
random.seed(42)

# Define a function to preprocess the dataset
def preprocess_function(batch):
    # Assuming 'article' and 'abstract' are lists of strings for each example in the batch
    inputs = [f"summarize: {article}" for article in batch["article"]]
    model_inputs = tokenizer(inputs, max_length=512, padding="max_length", truncation=True)
    labels = tokenizer(batch["abstract"], max_length=128, padding="max_length", truncation=True)
    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

rouge = evaluate.load("rouge")

def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    # Check if predictions is a tuple
    if isinstance(predictions, tuple):
        # If predictions is a tuple, we assume the first element is the logits
        predictions = predictions[0]
    # Convert logits to token IDs
    predictions = np.argmax(predictions, axis=-1)
    # Ensure predictions is a 2D list
    predictions = predictions.tolist()
    decoded_preds = tokenizer.batch_decode(predictions, skip_special_tokens=True)
    # Replace -100 in the labels as we can't decode them.
    labels = np.where(labels != -100, labels, tokenizer.pad_token_id)
    decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)

    result = rouge.compute(predictions=decoded_preds, references=decoded_labels)

    # Multiply each value in the result dictionary by 100
    result = {key: round(value * 100, 4) for key, value in result.items()}

    return result

for batch_num in range(total_batches):
    start_idx = batch_num * batch_size
    end_idx = (batch_num + 1) * batch_size
    current_batch = dataset['train'].select(range(start_idx, end_idx))
    tokenized_dataset_train = current_batch.map(preprocess_function, batched=True)
    tokenized_dataset_validate = dataset["validation"].select(range(400)).map(preprocess_function, batched=True)
    
    # Define training arguments
    training_args = Seq2SeqTrainingArguments(
        output_dir="./results",
        num_train_epochs=5,
        per_device_train_batch_size=2,
        per_device_eval_batch_size=2,
        gradient_accumulation_steps=2,
        warmup_steps=500,
        weight_decay=0.01,
        logging_dir="./logs",
        logging_steps=10,
        save_steps=1000,
        evaluation_strategy="epoch",
        eval_steps=1000,
        load_best_model_at_end=True,
        metric_for_best_model="rouge1",
        greater_is_better=True,
        save_strategy="epoch",
        save_total_limit=2,
        learning_rate=2e-5,
    )
    
    # Create a data collator
    data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=model)

    # Initialize the trainer
    trainer = Seq2SeqTrainer(
        model=model,
        args=training_args,
        train_dataset=tokenized_dataset_train,
        data_collator=data_collator,
        eval_dataset=tokenized_dataset_validate,
        compute_metrics=compute_metrics,
        tokenizer=tokenizer,
    )

    # Train the model
    print("Starting to train...")
    trainer.train()

    # Save the model and tokenizer
    trainer.save_model("./saved_model")
    tokenizer.save_pretrained("./saved_tokenizer")


Map:   0%|          | 0/400 [00:00<?, ? examples/s]

Map:   0%|          | 0/400 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Starting to train...


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,2.6852,3.082449,48.6567,21.6892,42.1106,42.1168
2,2.8028,3.045423,48.5491,21.6123,41.9722,41.9808
3,3.0775,2.975374,48.4632,21.6194,41.9658,41.9927
4,3.0217,2.867026,48.351,21.6752,41.9134,41.9405
5,2.6681,2.749565,48.3581,21.7,41.9105,41.9456


Checkpoint destination directory ./results/checkpoint-500 already exists and is non-empty. Saving will proceed but saved results may be invalid.
There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


Map:   0%|          | 0/400 [00:00<?, ? examples/s]

Map:   0%|          | 0/400 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Starting to train...


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,3.0824,3.058832,48.6275,21.627,42.0617,42.0589
2,3.1573,3.000024,48.5129,21.6231,42.0544,42.0749
3,3.2885,2.899332,48.2464,21.5848,41.8629,41.895
4,2.851,2.739056,48.1588,21.5472,41.8949,41.9303
5,3.2748,2.564379,48.5552,21.7316,42.2129,42.2706


Checkpoint destination directory ./results/checkpoint-100 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Checkpoint destination directory ./results/checkpoint-500 already exists and is non-empty. Saving will proceed but saved results may be invalid.
There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


Map:   0%|          | 0/400 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Starting to train...


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,2.954,3.040035,48.5402,21.5996,42.0254,42.0306
2,3.3975,2.989563,48.4396,21.5994,42.0153,42.0157
3,2.8211,2.879349,48.2757,21.6152,41.8998,41.9153
4,3.3248,2.746248,48.3408,21.6798,41.9809,42.0051
5,3.114,2.59676,48.466,21.647,42.0541,42.0686


Checkpoint destination directory ./results/checkpoint-100 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Checkpoint destination directory ./results/checkpoint-500 already exists and is non-empty. Saving will proceed but saved results may be invalid.
There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


Map:   0%|          | 0/400 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Starting to train...


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,3.209,3.017359,48.4706,21.546,41.9905,42.0041
2,3.2326,2.949923,48.3191,21.4884,41.928,41.9439
3,3.4174,2.840793,48.1536,21.5233,41.8638,41.8894
4,3.24,2.668914,48.2355,21.6596,41.9525,41.9938
5,2.7744,2.536709,48.6237,21.7921,42.3349,42.3563


Checkpoint destination directory ./results/checkpoint-100 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Checkpoint destination directory ./results/checkpoint-500 already exists and is non-empty. Saving will proceed but saved results may be invalid.
There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


Map:   0%|          | 0/400 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Starting to train...


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,2.7992,2.532663,48.6021,21.7848,42.3486,42.3688
2,2.8142,2.522796,48.8127,21.8955,42.4751,42.5228
3,2.8243,2.506587,48.9247,21.9579,42.5657,42.6171
4,3.0055,2.483614,49.0463,22.0628,42.6649,42.7087
5,2.8741,2.460248,49.061,22.1101,42.7003,42.7473


Checkpoint destination directory ./results/checkpoint-100 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Checkpoint destination directory ./results/checkpoint-500 already exists and is non-empty. Saving will proceed but saved results may be invalid.
There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


Map:   0%|          | 0/400 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Starting to train...


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,2.9649,2.459218,49.0417,22.0945,42.6275,42.6863
2,2.8961,2.453524,49.0608,22.1754,42.69,42.7458
3,2.6292,2.438977,49.0994,22.2893,42.7183,42.7826
4,2.6996,2.422747,48.9338,22.3526,42.6676,42.7285
5,2.7942,2.407043,48.854,22.3483,42.6695,42.7191


Checkpoint destination directory ./results/checkpoint-500 already exists and is non-empty. Saving will proceed but saved results may be invalid.
There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


Map:   0%|          | 0/400 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Starting to train...


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,2.6827,2.437879,49.0475,22.3165,42.6944,42.7414
2,2.6735,2.430482,49.0057,22.331,42.6651,42.6957
3,2.7043,2.418124,48.9042,22.309,42.5838,42.6408
4,2.9876,2.404763,48.8658,22.2971,42.6126,42.67
5,2.8093,2.391888,49.0699,22.368,42.8017,42.8396


Checkpoint destination directory ./results/checkpoint-500 already exists and is non-empty. Saving will proceed but saved results may be invalid.
There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


Map:   0%|          | 0/400 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Starting to train...


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,2.7163,2.386132,49.1972,22.4214,42.8557,42.9117
2,2.7074,2.381633,49.2926,22.4867,42.9944,43.0599
3,2.6371,2.371536,49.4838,22.5428,43.132,43.187
4,2.7366,2.362223,49.6498,22.6949,43.3449,43.393
5,2.5604,2.355034,49.7547,22.7522,43.4207,43.4695


Checkpoint destination directory ./results/checkpoint-100 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Checkpoint destination directory ./results/checkpoint-500 already exists and is non-empty. Saving will proceed but saved results may be invalid.
There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


Map:   0%|          | 0/400 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Starting to train...


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,2.7303,2.352927,49.7365,22.698,43.373,43.4332
2,2.5444,2.346572,49.6881,22.7334,43.3891,43.4247
3,2.6471,2.338596,49.6893,22.7945,43.3942,43.4413
4,2.6755,2.332058,49.6182,22.715,43.3674,43.3991
5,2.5465,2.318274,49.736,22.7956,43.4634,43.507


Checkpoint destination directory ./results/checkpoint-500 already exists and is non-empty. Saving will proceed but saved results may be invalid.
There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


Map:   0%|          | 0/400 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Starting to train...


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,2.7912,2.347133,49.7531,22.7384,43.4525,43.5027
2,2.8024,2.340133,49.7782,22.6793,43.4559,43.4985
3,2.9125,2.331784,49.7098,22.6168,43.4194,43.4673
4,2.7988,2.326471,49.6643,22.662,43.5012,43.5399
5,2.9215,2.312638,49.7073,22.6359,43.4616,43.5063


Checkpoint destination directory ./results/checkpoint-100 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Checkpoint destination directory ./results/checkpoint-500 already exists and is non-empty. Saving will proceed but saved results may be invalid.
There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


Map:   0%|          | 0/400 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Starting to train...


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,2.7247,2.339813,49.8047,22.6697,43.4302,43.4825
2,2.6708,2.337721,49.7631,22.707,43.4084,43.4563
3,2.5149,2.331973,49.7226,22.6227,43.3472,43.3875
4,2.7345,2.322347,49.6805,22.6265,43.3328,43.3865
5,2.5855,2.313953,49.69,22.6303,43.2896,43.3356


Checkpoint destination directory ./results/checkpoint-500 already exists and is non-empty. Saving will proceed but saved results may be invalid.
There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


Map:   0%|          | 0/400 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Starting to train...


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,2.5991,2.338517,49.7941,22.7159,43.4285,43.4716
2,2.5394,2.333833,49.8385,22.7933,43.4687,43.5174
3,2.607,2.325297,49.7882,22.8068,43.463,43.5041
4,2.4131,2.320098,49.7959,22.752,43.4473,43.4881
5,2.6266,2.312217,49.808,22.7671,43.4342,43.4888


Checkpoint destination directory ./results/checkpoint-100 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Checkpoint destination directory ./results/checkpoint-500 already exists and is non-empty. Saving will proceed but saved results may be invalid.
There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


Map:   0%|          | 0/400 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Starting to train...


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,2.6837,2.332826,49.8346,22.7953,43.4833,43.5327
2,2.5701,2.330916,49.7571,22.7397,43.4771,43.5191
3,2.6474,2.324357,49.7239,22.6717,43.4237,43.4677
4,2.5816,2.317266,49.7495,22.7354,43.4551,43.5105
5,2.4521,2.316016,49.7992,22.8416,43.4304,43.4801


Checkpoint destination directory ./results/checkpoint-500 already exists and is non-empty. Saving will proceed but saved results may be invalid.
There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


Map:   0%|          | 0/400 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Starting to train...


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,2.6749,2.332792,49.7977,22.7643,43.4892,43.5341
2,2.5271,2.331418,49.7236,22.7038,43.4565,43.4992
3,2.7129,2.327162,49.6961,22.6714,43.4834,43.5323
4,2.7788,2.316385,49.8471,22.7118,43.5345,43.5769
5,2.7066,2.308206,49.7907,22.7235,43.5302,43.5797


Checkpoint destination directory ./results/checkpoint-100 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Checkpoint destination directory ./results/checkpoint-500 already exists and is non-empty. Saving will proceed but saved results may be invalid.
There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


Map:   0%|          | 0/400 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Starting to train...


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,2.7034,2.315104,49.7808,22.6908,43.5178,43.5622
2,2.6257,2.311564,49.8053,22.7005,43.5293,43.5642
3,2.5274,2.309109,49.8778,22.7804,43.5498,43.5872
4,2.6356,2.304212,49.8961,22.8246,43.5596,43.6142
5,2.5297,2.300656,50.0168,22.935,43.6164,43.6661


Checkpoint destination directory ./results/checkpoint-500 already exists and is non-empty. Saving will proceed but saved results may be invalid.
There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


Map:   0%|          | 0/400 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Starting to train...


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,2.615,2.295846,49.9606,22.8937,43.6165,43.6664
2,2.5925,2.290088,49.9776,22.8692,43.6309,43.6589
3,2.6482,2.283553,49.9096,22.8806,43.6699,43.6988
4,2.3736,2.2767,49.9409,22.8588,43.6914,43.7409
5,2.5992,2.27406,50.0201,22.8288,43.7023,43.7428


Checkpoint destination directory ./results/checkpoint-500 already exists and is non-empty. Saving will proceed but saved results may be invalid.
There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


Map:   0%|          | 0/400 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Starting to train...


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,2.5831,2.273248,49.9488,22.8687,43.6613,43.7018
2,2.467,2.267289,49.9497,22.8448,43.7265,43.7653
3,2.5333,2.264308,49.9444,22.9394,43.7398,43.7745
4,2.5046,2.262973,50.0629,23.0035,43.7741,43.8157
5,2.5876,2.25324,50.1156,23.0528,43.8388,43.8875


Checkpoint destination directory ./results/checkpoint-500 already exists and is non-empty. Saving will proceed but saved results may be invalid.
There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


Map:   0%|          | 0/400 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Starting to train...


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,2.5364,2.252917,50.1291,23.0467,43.8185,43.875
2,2.618,2.254496,50.0878,23.0063,43.7984,43.8488
3,2.8118,2.25274,50.1318,22.9684,43.8202,43.8799
4,2.6985,2.250446,50.0733,22.9526,43.7164,43.7645
5,2.4776,2.247545,50.1527,22.9977,43.7619,43.8063


Checkpoint destination directory ./results/checkpoint-500 already exists and is non-empty. Saving will proceed but saved results may be invalid.
There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


Map:   0%|          | 0/400 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Starting to train...


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,2.6139,2.245939,50.1181,22.9885,43.7603,43.8075
2,2.4657,2.246108,50.1316,22.9777,43.7693,43.818
3,2.5484,2.24521,50.2004,23.0186,43.8374,43.8849
4,2.6624,2.240954,50.1823,23.01,43.8449,43.8915
5,2.4204,2.233713,50.2079,23.0525,43.9238,43.9769


Checkpoint destination directory ./results/checkpoint-500 already exists and is non-empty. Saving will proceed but saved results may be invalid.
There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


Map:   0%|          | 0/400 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Starting to train...


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,2.6938,2.231818,50.2091,23.0802,43.9015,43.9499
2,2.6798,2.23041,50.1944,23.0865,43.9064,43.9623
3,2.6467,2.226249,50.0839,23.0402,43.8187,43.8771
4,2.496,2.224616,50.1736,23.0736,43.9261,43.9638
5,2.5306,2.219777,50.1475,23.0714,43.9469,43.9898


Checkpoint destination directory ./results/checkpoint-500 already exists and is non-empty. Saving will proceed but saved results may be invalid.
There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


Map:   0%|          | 0/400 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Starting to train...


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,2.4297,2.232059,50.2365,23.1129,43.9264,43.9802
2,2.8531,2.232969,50.264,23.1621,43.9661,44.0243
3,2.6708,2.23008,50.2601,23.197,44.0331,44.0874
4,2.3352,2.226654,50.2726,23.1957,44.0287,44.0886
5,2.4481,2.224192,50.2758,23.1711,43.9791,44.0448


Checkpoint destination directory ./results/checkpoint-100 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Checkpoint destination directory ./results/checkpoint-500 already exists and is non-empty. Saving will proceed but saved results may be invalid.
There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


Map:   0%|          | 0/400 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Starting to train...


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,2.4136,2.223139,50.3016,23.157,43.987,44.0588
2,2.464,2.222459,50.301,23.1536,44.0026,44.0663
3,2.4743,2.218837,50.2939,23.1552,43.9848,44.0501
4,2.5443,2.21527,50.3193,23.1563,44.0497,44.1036
5,2.5145,2.217705,50.4596,23.2035,44.0674,44.1205


Checkpoint destination directory ./results/checkpoint-500 already exists and is non-empty. Saving will proceed but saved results may be invalid.
There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


Map:   0%|          | 0/400 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Starting to train...


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,2.5552,2.214157,50.3718,23.1678,44.0734,44.1108
2,2.6618,2.211889,50.3595,23.201,44.052,44.1108
3,2.5662,2.211586,50.3387,23.1914,44.0084,44.0494
4,2.5328,2.211461,50.473,23.3156,44.1133,44.1627
5,2.5202,2.209672,50.3236,23.1947,43.975,44.0144


Checkpoint destination directory ./results/checkpoint-500 already exists and is non-empty. Saving will proceed but saved results may be invalid.
There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


Map:   0%|          | 0/400 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Starting to train...


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,2.6084,2.211837,50.4151,23.3053,44.0598,44.1112
2,2.6226,2.21067,50.4207,23.3125,44.0704,44.1232
3,2.4656,2.210363,50.3753,23.3166,44.0892,44.1346
4,2.5265,2.212231,50.3435,23.2468,44.0516,44.0954
5,2.5845,2.212514,50.3921,23.3293,44.1701,44.2069


Checkpoint destination directory ./results/checkpoint-500 already exists and is non-empty. Saving will proceed but saved results may be invalid.
There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


Map:   0%|          | 0/400 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Starting to train...


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,2.4915,2.209754,50.4349,23.3167,44.0767,44.12
2,2.7444,2.209586,50.3922,23.259,44.0488,44.1093
3,2.5171,2.206594,50.3308,23.2258,44.0138,44.0932
4,2.4665,2.208465,50.391,23.2627,44.0621,44.1232
5,2.4919,2.206998,50.4454,23.2501,44.0629,44.1069


Checkpoint destination directory ./results/checkpoint-500 already exists and is non-empty. Saving will proceed but saved results may be invalid.
There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


Map:   0%|          | 0/400 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Starting to train...


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,2.4583,2.206342,50.3786,23.2579,44.0534,44.1048
2,2.6198,2.205295,50.3176,23.2165,44.0342,44.0827
3,2.4173,2.202697,50.3175,23.1823,44.0335,44.0833
4,2.6283,2.200039,50.3051,23.1865,44.0508,44.114
5,2.3735,2.201828,50.2129,23.1298,43.9914,44.0295


Checkpoint destination directory ./results/checkpoint-100 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Checkpoint destination directory ./results/checkpoint-500 already exists and is non-empty. Saving will proceed but saved results may be invalid.
There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


Map:   0%|          | 0/400 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Starting to train...


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,2.3108,2.204472,50.3947,23.2205,44.0704,44.1177
2,2.4451,2.2009,50.3613,23.1991,44.0472,44.1068
3,2.7651,2.200309,50.3362,23.1688,43.9638,44.0171
4,2.6749,2.196006,50.2936,23.115,44.0,44.0609
5,2.4926,2.198673,50.303,23.0944,44.0372,44.0881


Checkpoint destination directory ./results/checkpoint-100 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Checkpoint destination directory ./results/checkpoint-500 already exists and is non-empty. Saving will proceed but saved results may be invalid.
There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


Map:   0%|          | 0/400 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Starting to train...


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,2.6289,2.203037,50.3803,23.1981,44.0443,44.1008
2,2.401,2.204628,50.4334,23.1858,44.0509,44.0918
3,2.3165,2.204743,50.36,23.218,44.0418,44.1015
4,2.3794,2.201761,50.4026,23.2103,44.0994,44.1509
5,2.4802,2.205856,50.3735,23.2424,44.0708,44.1238


Checkpoint destination directory ./results/checkpoint-100 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Checkpoint destination directory ./results/checkpoint-500 already exists and is non-empty. Saving will proceed but saved results may be invalid.
There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


Map:   0%|          | 0/400 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Starting to train...


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,2.5835,2.203101,50.401,23.2282,44.0843,44.1268
2,2.4695,2.199082,50.3415,23.2025,44.0967,44.1615
3,2.727,2.200282,50.3215,23.2765,44.0854,44.1461
4,2.4906,2.198108,50.3119,23.3189,44.1138,44.1661
5,2.5883,2.195189,50.3311,23.2936,44.1222,44.1682


Checkpoint destination directory ./results/checkpoint-500 already exists and is non-empty. Saving will proceed but saved results may be invalid.
There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


Map:   0%|          | 0/400 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Starting to train...


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,2.5938,2.201916,50.3675,23.2084,44.0673,44.1084
2,2.5315,2.200969,50.3197,23.1907,44.035,44.07
3,2.5299,2.195612,50.3903,23.1876,44.069,44.1242
4,2.689,2.200398,50.4001,23.2529,44.0838,44.1304
5,2.7594,2.194813,50.4755,23.2897,44.1818,44.2304


Checkpoint destination directory ./results/checkpoint-100 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Checkpoint destination directory ./results/checkpoint-500 already exists and is non-empty. Saving will proceed but saved results may be invalid.
There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


Map:   0%|          | 0/400 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Starting to train...


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,2.3088,2.195447,50.4808,23.3015,44.1717,44.2174
2,2.5364,2.197783,50.4014,23.2889,44.1352,44.1737
3,2.4474,2.193294,50.4095,23.2519,44.1189,44.1562
4,2.4292,2.194147,50.3375,23.2751,44.0502,44.1049
5,2.4818,2.199145,50.4441,23.3556,44.1374,44.1885


Checkpoint destination directory ./results/checkpoint-500 already exists and is non-empty. Saving will proceed but saved results may be invalid.
There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


Map:   0%|          | 0/400 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Starting to train...


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,2.3394,2.196649,50.4695,23.2956,44.1609,44.2107
2,2.6779,2.194869,50.4801,23.2923,44.1867,44.2239
3,2.5713,2.195465,50.5179,23.3447,44.2253,44.2828
4,2.6859,2.194751,50.5956,23.3555,44.2684,44.3166
5,2.5401,2.19185,50.4338,23.2162,44.1195,44.161


Checkpoint destination directory ./results/checkpoint-100 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Checkpoint destination directory ./results/checkpoint-500 already exists and is non-empty. Saving will proceed but saved results may be invalid.
There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


Map:   0%|          | 0/400 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Starting to train...


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,2.6586,2.194019,50.6057,23.3701,44.2781,44.3291
2,2.6583,2.190624,50.5864,23.3008,44.2254,44.272
3,2.4454,2.192255,50.4962,23.324,44.2215,44.2598
4,2.4699,2.191461,50.5039,23.2452,44.1994,44.2429
5,2.4833,2.190543,50.4184,23.2016,44.1488,44.1939


Checkpoint destination directory ./results/checkpoint-500 already exists and is non-empty. Saving will proceed but saved results may be invalid.
There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


Map:   0%|          | 0/400 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Starting to train...


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,2.4622,2.194503,50.5632,23.3515,44.2105,44.2626
2,2.3373,2.193844,50.5844,23.4021,44.2448,44.2873
3,2.4042,2.192962,50.5121,23.3504,44.2199,44.2578
4,2.4782,2.193358,50.5599,23.3559,44.2719,44.3049
5,2.2866,2.191984,50.5036,23.3754,44.1702,44.2264


Checkpoint destination directory ./results/checkpoint-100 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Checkpoint destination directory ./results/checkpoint-500 already exists and is non-empty. Saving will proceed but saved results may be invalid.
There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


Map:   0%|          | 0/400 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Starting to train...


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,2.5854,2.192518,50.6338,23.4261,44.2964,44.3228
2,2.468,2.191923,50.5822,23.4131,44.2785,44.3201
3,2.5172,2.192542,50.5278,23.4432,44.2954,44.3347
4,2.658,2.193531,50.5063,23.4794,44.2756,44.319
5,2.6016,2.19167,50.4445,23.4805,44.2956,44.3567


Checkpoint destination directory ./results/checkpoint-500 already exists and is non-empty. Saving will proceed but saved results may be invalid.
There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


Map:   0%|          | 0/400 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Starting to train...


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,2.4181,2.192765,50.5851,23.3877,44.2392,44.2761
2,2.496,2.191402,50.5755,23.3579,44.2332,44.2584
3,2.4577,2.191694,50.5415,23.3824,44.2731,44.3079
4,2.3673,2.192086,50.5517,23.4502,44.3054,44.359
5,2.3854,2.191733,50.5203,23.2591,44.2093,44.238


Checkpoint destination directory ./results/checkpoint-100 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Checkpoint destination directory ./results/checkpoint-500 already exists and is non-empty. Saving will proceed but saved results may be invalid.
There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


Map:   0%|          | 0/400 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Starting to train...


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,2.4633,2.191892,50.5659,23.3405,44.2479,44.2839
2,2.3209,2.190737,50.5526,23.3369,44.2605,44.3138
3,2.3875,2.187369,50.5587,23.3853,44.2656,44.316
4,2.4056,2.189673,50.4848,23.314,44.228,44.2829
5,2.4283,2.184686,50.5618,23.3311,44.2532,44.2922


Checkpoint destination directory ./results/checkpoint-100 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Checkpoint destination directory ./results/checkpoint-500 already exists and is non-empty. Saving will proceed but saved results may be invalid.
There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


Map:   0%|          | 0/400 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Starting to train...


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,2.425,2.191098,50.5693,23.3356,44.2335,44.2714
2,2.5362,2.191742,50.6371,23.3906,44.3441,44.3878
3,2.4276,2.193449,50.5251,23.325,44.2358,44.2716
4,2.4945,2.196103,50.4608,23.2891,44.2091,44.2453
5,2.3974,2.194736,50.5118,23.3167,44.2088,44.2595


Checkpoint destination directory ./results/checkpoint-100 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Checkpoint destination directory ./results/checkpoint-500 already exists and is non-empty. Saving will proceed but saved results may be invalid.
There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


Map:   0%|          | 0/400 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Starting to train...


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,2.7064,2.19109,50.6416,23.3771,44.3296,44.3825
2,2.469,2.188476,50.6125,23.3544,44.3216,44.3486
3,2.4558,2.192871,50.5842,23.3891,44.2967,44.3335
4,2.2827,2.19021,50.54,23.4233,44.2996,44.3389
5,2.534,2.188748,50.5985,23.4676,44.3178,44.3899


Checkpoint destination directory ./results/checkpoint-500 already exists and is non-empty. Saving will proceed but saved results may be invalid.
There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


Map:   0%|          | 0/400 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Starting to train...


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,2.7777,2.191103,50.6143,23.4161,44.3154,44.3646
2,2.6256,2.189939,50.5321,23.348,44.2706,44.3204
3,2.6395,2.189355,50.4497,23.2922,44.214,44.2666
4,2.3835,2.189022,50.473,23.2096,44.1847,44.2327
5,2.5525,2.189164,50.3065,23.2194,44.0991,44.1398


Checkpoint destination directory ./results/checkpoint-100 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Checkpoint destination directory ./results/checkpoint-500 already exists and is non-empty. Saving will proceed but saved results may be invalid.
There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


Map:   0%|          | 0/400 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Starting to train...


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,2.5294,2.191242,50.6023,23.3895,44.3106,44.3562
2,2.5124,2.193135,50.5435,23.4126,44.2592,44.314
3,2.4534,2.191843,50.5549,23.3354,44.2127,44.2551
4,2.3897,2.190506,50.533,23.2863,44.1876,44.251
5,2.2703,2.190925,50.486,23.2544,44.1471,44.2086


Checkpoint destination directory ./results/checkpoint-100 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Checkpoint destination directory ./results/checkpoint-500 already exists and is non-empty. Saving will proceed but saved results may be invalid.
There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


Map:   0%|          | 0/400 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Starting to train...


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,2.6564,2.190877,50.59,23.418,44.3183,44.364
2,2.5396,2.190489,50.5436,23.3929,44.2834,44.3284
3,2.662,2.188749,50.451,23.3994,44.2321,44.2883
4,2.5807,2.186695,50.4569,23.3228,44.2178,44.2758
5,2.344,2.191549,50.424,23.3254,44.1545,44.2142


Checkpoint destination directory ./results/checkpoint-100 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Checkpoint destination directory ./results/checkpoint-500 already exists and is non-empty. Saving will proceed but saved results may be invalid.
There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


Map:   0%|          | 0/400 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Starting to train...


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,2.4706,2.191368,50.5545,23.4326,44.3143,44.3639
2,2.6593,2.1894,50.565,23.4185,44.3247,44.3777
3,2.4449,2.187589,50.551,23.4264,44.2953,44.3469
4,2.599,2.194378,50.5747,23.4358,44.288,44.3392
5,2.5159,2.191543,50.5855,23.4988,44.2424,44.3119


Checkpoint destination directory ./results/checkpoint-100 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Checkpoint destination directory ./results/checkpoint-500 already exists and is non-empty. Saving will proceed but saved results may be invalid.
There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


Map:   0%|          | 0/400 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Starting to train...


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,2.5849,2.189603,50.5865,23.4952,44.2632,44.33
2,2.6733,2.190641,50.5755,23.4363,44.2311,44.2957
3,2.5399,2.188645,50.4947,23.3482,44.2037,44.2548
4,2.4525,2.189487,50.5321,23.3738,44.2364,44.2766
5,2.4249,2.187188,50.5284,23.2805,44.1815,44.2304


Checkpoint destination directory ./results/checkpoint-500 already exists and is non-empty. Saving will proceed but saved results may be invalid.
There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


Map:   0%|          | 0/400 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Starting to train...


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,2.4071,2.1889,50.5666,23.4938,44.2701,44.3486
2,2.3645,2.187008,50.631,23.4952,44.3338,44.3924
3,2.3858,2.189658,50.6213,23.5377,44.3336,44.3834
4,2.5773,2.184812,50.6093,23.4973,44.3126,44.3624
5,2.715,2.189981,50.5212,23.4905,44.3152,44.3591


Checkpoint destination directory ./results/checkpoint-100 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Checkpoint destination directory ./results/checkpoint-500 already exists and is non-empty. Saving will proceed but saved results may be invalid.
There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


Map:   0%|          | 0/400 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Starting to train...


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,2.5443,2.18655,50.6481,23.5065,44.3622,44.4201
2,2.7401,2.185719,50.6688,23.5397,44.3874,44.4411
3,2.433,2.183048,50.6257,23.4872,44.3946,44.455
4,2.5861,2.186252,50.6565,23.4534,44.3901,44.432
5,2.3778,2.18471,50.5856,23.3848,44.3257,44.3775


Checkpoint destination directory ./results/checkpoint-500 already exists and is non-empty. Saving will proceed but saved results may be invalid.
There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


Map:   0%|          | 0/400 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Starting to train...


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,2.5968,2.187037,50.6465,23.5233,44.3423,44.4019
2,2.6615,2.186176,50.6502,23.5443,44.3742,44.4396
3,2.3972,2.185239,50.6801,23.555,44.3982,44.4486
4,2.6008,2.183765,50.7072,23.5776,44.3682,44.4092
5,2.3237,2.182413,50.6256,23.4582,44.323,44.3619


Checkpoint destination directory ./results/checkpoint-500 already exists and is non-empty. Saving will proceed but saved results may be invalid.
There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


Map:   0%|          | 0/400 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Starting to train...


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,2.4178,2.18343,50.6371,23.5303,44.3404,44.3993
2,2.5517,2.18444,50.5759,23.4808,44.2466,44.2887
3,2.2809,2.182858,50.5492,23.4566,44.2516,44.3101
4,2.3224,2.181702,50.4833,23.3651,44.1627,44.2214
5,2.4231,2.182311,50.4805,23.4101,44.1732,44.2387


Checkpoint destination directory ./results/checkpoint-500 already exists and is non-empty. Saving will proceed but saved results may be invalid.
There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


Map:   0%|          | 0/400 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Starting to train...


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,2.41,2.183616,50.6547,23.5224,44.3309,44.3909
2,2.487,2.185639,50.6604,23.5234,44.3039,44.3762
3,2.5452,2.183595,50.6921,23.5351,44.3391,44.4054
4,2.6829,2.184803,50.604,23.4876,44.3146,44.3751
5,2.4299,2.190398,50.4565,23.3805,44.1657,44.2108


Checkpoint destination directory ./results/checkpoint-100 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Checkpoint destination directory ./results/checkpoint-500 already exists and is non-empty. Saving will proceed but saved results may be invalid.
There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


Map:   0%|          | 0/400 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Starting to train...


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,2.4146,2.184816,50.6631,23.5315,44.2744,44.3397
2,2.3986,2.187181,50.6347,23.5254,44.2796,44.329
3,2.5133,2.184016,50.5402,23.4715,44.2345,44.3092
4,2.4735,2.186479,50.5816,23.5012,44.2688,44.3217
5,2.6823,2.184124,50.5206,23.3958,44.2235,44.2779


Checkpoint destination directory ./results/checkpoint-500 already exists and is non-empty. Saving will proceed but saved results may be invalid.
There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


Map:   0%|          | 0/400 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Starting to train...


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,2.4473,2.182933,50.6404,23.5322,44.3247,44.3935
2,2.69,2.183613,50.5844,23.4848,44.3012,44.3587
3,2.5152,2.183785,50.4948,23.4281,44.2412,44.311
4,2.4042,2.181426,50.5578,23.4737,44.3553,44.4177
5,2.742,2.181714,50.4818,23.4261,44.2635,44.3417


Checkpoint destination directory ./results/checkpoint-100 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Checkpoint destination directory ./results/checkpoint-500 already exists and is non-empty. Saving will proceed but saved results may be invalid.
There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


Map:   0%|          | 0/400 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Starting to train...


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,2.4437,2.182296,50.6676,23.5559,44.3702,44.4244
2,2.581,2.179843,50.6667,23.532,44.3862,44.433
3,2.785,2.178096,50.6144,23.4718,44.3504,44.3952
4,2.4896,2.181329,50.5594,23.4554,44.3128,44.366
5,2.388,2.181144,50.5917,23.4445,44.2921,44.3299


Checkpoint destination directory ./results/checkpoint-100 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Checkpoint destination directory ./results/checkpoint-500 already exists and is non-empty. Saving will proceed but saved results may be invalid.
There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


Map:   0%|          | 0/400 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Starting to train...


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,2.4636,2.182483,50.646,23.5505,44.3585,44.4153
2,2.4606,2.182542,50.5292,23.4048,44.2739,44.3393
3,2.7202,2.183105,50.5126,23.3908,44.2541,44.3248
4,2.4626,2.184437,50.4645,23.356,44.2424,44.3074
5,2.6468,2.185165,50.4391,23.3235,44.1829,44.2285


Checkpoint destination directory ./results/checkpoint-100 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Checkpoint destination directory ./results/checkpoint-500 already exists and is non-empty. Saving will proceed but saved results may be invalid.
There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


Map:   0%|          | 0/400 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Starting to train...


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,2.4729,2.18412,50.6618,23.521,44.3257,44.3866
2,2.2813,2.184432,50.5962,23.4938,44.2854,44.3371
3,2.5235,2.186279,50.5962,23.4608,44.2717,44.3098
4,2.4301,2.186565,50.6419,23.4906,44.3174,44.3509
5,2.4067,2.184662,50.6267,23.4513,44.2825,44.3328


Checkpoint destination directory ./results/checkpoint-100 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Checkpoint destination directory ./results/checkpoint-500 already exists and is non-empty. Saving will proceed but saved results may be invalid.
There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


Map:   0%|          | 0/400 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Starting to train...


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,2.4282,2.182747,50.6483,23.5006,44.3365,44.3987
2,2.1499,2.182719,50.6217,23.4466,44.2935,44.3535
3,2.6715,2.179765,50.5358,23.38,44.2315,44.291
4,2.3308,2.183464,50.5423,23.3883,44.2027,44.2431
5,2.3278,2.179035,50.4289,23.29,44.1987,44.2419


Checkpoint destination directory ./results/checkpoint-100 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Checkpoint destination directory ./results/checkpoint-500 already exists and is non-empty. Saving will proceed but saved results may be invalid.
There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


Map:   0%|          | 0/400 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Starting to train...


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,2.5245,2.181573,50.6189,23.4895,44.3641,44.4172
2,2.7529,2.183518,50.5822,23.4791,44.2987,44.353
3,2.452,2.180545,50.5487,23.455,44.2579,44.3124
4,2.6318,2.181392,50.4846,23.4216,44.2589,44.3081
5,2.5421,2.183661,50.5208,23.4156,44.2343,44.2712


Checkpoint destination directory ./results/checkpoint-100 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Checkpoint destination directory ./results/checkpoint-500 already exists and is non-empty. Saving will proceed but saved results may be invalid.
There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


Map:   0%|          | 0/400 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Starting to train...


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,2.4478,2.181164,50.6065,23.47,44.3347,44.3936
2,2.4395,2.183078,50.5732,23.4331,44.2506,44.3058
3,2.4566,2.183154,50.5408,23.375,44.2567,44.3082
4,2.3078,2.180449,50.6402,23.3968,44.2646,44.3218
5,2.3658,2.184301,50.4712,23.3996,44.1863,44.2536


Checkpoint destination directory ./results/checkpoint-100 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Checkpoint destination directory ./results/checkpoint-500 already exists and is non-empty. Saving will proceed but saved results may be invalid.
There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


Map:   0%|          | 0/400 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Starting to train...


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,2.5752,2.180457,50.5772,23.3297,44.2405,44.3028
2,2.4036,2.180492,50.5561,23.3398,44.2568,44.3084
3,2.4837,2.183816,50.5779,23.3642,44.2137,44.2751
4,2.3926,2.181274,50.5777,23.3603,44.2029,44.2539
5,2.32,2.179489,50.6034,23.3905,44.2383,44.3017


Checkpoint destination directory ./results/checkpoint-500 already exists and is non-empty. Saving will proceed but saved results may be invalid.
There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


Map:   0%|          | 0/400 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Starting to train...


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,2.3644,2.178308,50.6162,23.3857,44.2336,44.2851
2,2.6727,2.178564,50.6706,23.4192,44.2659,44.3152
3,2.3505,2.17648,50.6705,23.3886,44.2899,44.3367
4,2.4127,2.17744,50.6202,23.3891,44.2293,44.2894
5,2.4016,2.172351,50.5543,23.402,44.2659,44.3123


Checkpoint destination directory ./results/checkpoint-500 already exists and is non-empty. Saving will proceed but saved results may be invalid.
There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


Map:   0%|          | 0/400 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Starting to train...


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,2.6579,2.178324,50.657,23.3873,44.2651,44.3173
2,2.4904,2.175939,50.6721,23.4219,44.3151,44.3679
3,2.3706,2.177112,50.6595,23.3487,44.2738,44.3123
4,2.5818,2.176691,50.5637,23.4362,44.2746,44.3402
5,2.4926,2.174582,50.4554,23.4223,44.2725,44.3133


Checkpoint destination directory ./results/checkpoint-500 already exists and is non-empty. Saving will proceed but saved results may be invalid.
There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


Map:   0%|          | 0/400 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Starting to train...


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,2.3576,2.176378,50.6153,23.3641,44.2634,44.3263
2,2.4373,2.180027,50.5663,23.3636,44.2133,44.2639
3,2.4385,2.181102,50.5075,23.3276,44.1932,44.2436
4,2.5192,2.17853,50.4964,23.2883,44.1936,44.2418
5,2.3772,2.17644,50.5666,23.3591,44.2434,44.3048


Checkpoint destination directory ./results/checkpoint-500 already exists and is non-empty. Saving will proceed but saved results may be invalid.
There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


Map:   0%|          | 0/400 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Starting to train...


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,2.5739,2.176669,50.6702,23.3981,44.2916,44.3534
2,2.595,2.175307,50.6191,23.3715,44.2336,44.2913
3,2.4791,2.174142,50.7142,23.4894,44.3487,44.3934
4,2.5056,2.171541,50.7568,23.4784,44.3369,44.4144
5,2.3727,2.172462,50.6153,23.4442,44.3046,44.3704


Checkpoint destination directory ./results/checkpoint-100 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Checkpoint destination directory ./results/checkpoint-500 already exists and is non-empty. Saving will proceed but saved results may be invalid.
There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


Map:   0%|          | 0/400 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Starting to train...


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,2.4735,2.173397,50.6973,23.4689,44.343,44.4051
2,2.5768,2.173939,50.6581,23.4225,44.281,44.3371
3,2.5459,2.174642,50.6125,23.4532,44.2721,44.3274
4,2.3068,2.170533,50.6103,23.3239,44.2393,44.2885
5,2.4406,2.170146,50.6566,23.3556,44.2499,44.3084


Checkpoint destination directory ./results/checkpoint-500 already exists and is non-empty. Saving will proceed but saved results may be invalid.
There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


Map:   0%|          | 0/400 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Starting to train...


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,2.6119,2.174543,50.6969,23.4649,44.321,44.3703
2,2.5756,2.17476,50.679,23.4671,44.3489,44.4016
3,2.7091,2.172343,50.6594,23.4178,44.3541,44.4156
4,2.4005,2.175891,50.6448,23.4263,44.3125,44.3577
5,2.7755,2.173581,50.6215,23.3783,44.3428,44.388


Checkpoint destination directory ./results/checkpoint-100 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Checkpoint destination directory ./results/checkpoint-500 already exists and is non-empty. Saving will proceed but saved results may be invalid.
There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


Map:   0%|          | 0/400 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Starting to train...


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,2.2822,2.175114,50.6728,23.4618,44.3392,44.3947
2,2.3214,2.17328,50.6534,23.4947,44.3643,44.4266
3,2.397,2.171822,50.5685,23.4282,44.3054,44.3668
4,2.2686,2.170452,50.6159,23.4756,44.431,44.4891
5,2.6318,2.170126,50.646,23.4552,44.4142,44.4716


Checkpoint destination directory ./results/checkpoint-100 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Checkpoint destination directory ./results/checkpoint-500 already exists and is non-empty. Saving will proceed but saved results may be invalid.
There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


Map:   0%|          | 0/400 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Starting to train...


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,2.5997,2.175025,50.6435,23.4755,44.3284,44.3783
2,2.418,2.172312,50.5895,23.4293,44.3246,44.3725
3,2.5085,2.17452,50.5457,23.4357,44.3137,44.3706
4,2.6139,2.173107,50.5405,23.4182,44.3736,44.4199
5,2.7951,2.168467,50.5221,23.379,44.3819,44.4419


Checkpoint destination directory ./results/checkpoint-100 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Checkpoint destination directory ./results/checkpoint-500 already exists and is non-empty. Saving will proceed but saved results may be invalid.
There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


Map:   0%|          | 0/400 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Starting to train...


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,2.5537,2.175032,50.6768,23.4633,44.343,44.3974
2,2.6073,2.175243,50.636,23.4857,44.349,44.4093
3,2.3072,2.174222,50.5928,23.4924,44.3813,44.4395
4,2.5098,2.173833,50.5563,23.3975,44.337,44.3813
5,2.5864,2.171013,50.4847,23.3783,44.3388,44.3867


Checkpoint destination directory ./results/checkpoint-100 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Checkpoint destination directory ./results/checkpoint-500 already exists and is non-empty. Saving will proceed but saved results may be invalid.
There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


Map:   0%|          | 0/400 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Starting to train...


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,2.4529,2.175438,50.6464,23.4894,44.3262,44.379
2,2.44,2.173767,50.6763,23.5253,44.3541,44.3929
3,2.3628,2.175987,50.5865,23.467,44.2241,44.2708
4,2.5235,2.176734,50.5776,23.5081,44.2235,44.2815
5,2.5113,2.172209,50.6491,23.4678,44.2816,44.3466


Checkpoint destination directory ./results/checkpoint-100 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Checkpoint destination directory ./results/checkpoint-500 already exists and is non-empty. Saving will proceed but saved results may be invalid.
There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


Map:   0%|          | 0/400 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Starting to train...


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,2.563,2.173801,50.6117,23.4995,44.3312,44.3802
2,2.5272,2.174219,50.6269,23.4496,44.3396,44.4081
3,2.3123,2.172396,50.6337,23.4139,44.2923,44.3431
4,2.4397,2.169363,50.6843,23.3898,44.293,44.3467
5,2.4652,2.171923,50.6829,23.4181,44.2846,44.3339


Checkpoint destination directory ./results/checkpoint-500 already exists and is non-empty. Saving will proceed but saved results may be invalid.
There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


Map:   0%|          | 0/400 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Starting to train...


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,2.5124,2.169614,50.6463,23.3698,44.2872,44.3417
2,2.3968,2.171207,50.6095,23.4555,44.3026,44.359
3,2.4648,2.170249,50.6602,23.438,44.3087,44.3703
4,2.2018,2.167066,50.6804,23.4314,44.3312,44.3859
5,2.6278,2.172925,50.627,23.4718,44.321,44.3824


Checkpoint destination directory ./results/checkpoint-500 already exists and is non-empty. Saving will proceed but saved results may be invalid.
There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


Map:   0%|          | 0/400 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Starting to train...


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,2.7907,2.167509,50.6432,23.4418,44.317,44.3607
2,2.3565,2.167825,50.6094,23.4197,44.3019,44.353
3,2.5448,2.168058,50.5752,23.3954,44.323,44.3859
4,2.4806,2.16815,50.6773,23.474,44.3529,44.3995
5,2.3096,2.169355,50.599,23.4416,44.3447,44.401


Checkpoint destination directory ./results/checkpoint-500 already exists and is non-empty. Saving will proceed but saved results may be invalid.
There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


Map:   0%|          | 0/400 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Starting to train...


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,2.446,2.169021,50.6333,23.4161,44.3123,44.3687
2,2.3549,2.171721,50.5115,23.3854,44.2947,44.3559
3,2.3982,2.169763,50.5227,23.2962,44.2845,44.3422
4,2.4498,2.169478,50.4905,23.3021,44.3292,44.3744
5,2.2212,2.172383,50.5388,23.4246,44.3606,44.4153


Checkpoint destination directory ./results/checkpoint-500 already exists and is non-empty. Saving will proceed but saved results may be invalid.
There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


Map:   0%|          | 0/400 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Starting to train...


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,2.4855,2.169772,50.5708,23.4286,44.3054,44.3557
2,2.454,2.167104,50.6011,23.4503,44.3205,44.3723
3,2.3651,2.170713,50.555,23.4142,44.2226,44.2706
4,2.4939,2.170369,50.6061,23.4487,44.2647,44.3273
5,2.3381,2.172742,50.6309,23.5146,44.2588,44.3106


Checkpoint destination directory ./results/checkpoint-100 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Checkpoint destination directory ./results/checkpoint-500 already exists and is non-empty. Saving will proceed but saved results may be invalid.
There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


Map:   0%|          | 0/400 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Starting to train...


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,2.6556,2.17062,50.6382,23.5415,44.2753,44.3262
2,2.671,2.167296,50.6052,23.5284,44.3371,44.3997
3,2.5433,2.166692,50.5909,23.5691,44.3487,44.4105
4,2.4337,2.16905,50.5136,23.5647,44.2951,44.353
5,2.4237,2.166565,50.5606,23.5097,44.4027,44.4372


Checkpoint destination directory ./results/checkpoint-500 already exists and is non-empty. Saving will proceed but saved results may be invalid.
There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


Map:   0%|          | 0/400 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Starting to train...


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,2.4323,2.168255,50.6647,23.554,44.3144,44.3707
2,2.417,2.164742,50.6401,23.5052,44.3535,44.3999
3,2.3925,2.165158,50.7047,23.5632,44.4043,44.4581
4,2.357,2.164055,50.6574,23.5551,44.3964,44.4405
5,2.349,2.162483,50.7198,23.5136,44.3944,44.4418


Checkpoint destination directory ./results/checkpoint-100 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Checkpoint destination directory ./results/checkpoint-500 already exists and is non-empty. Saving will proceed but saved results may be invalid.
There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


Map:   0%|          | 0/400 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Starting to train...


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,2.6017,2.162698,50.672,23.5334,44.4029,44.45
2,2.7075,2.166886,50.7198,23.574,44.3678,44.4114
3,2.5424,2.164251,50.6457,23.5679,44.3546,44.405
4,2.5059,2.166905,50.697,23.5829,44.406,44.4564
5,2.371,2.170372,50.6434,23.5829,44.32,44.3702


Checkpoint destination directory ./results/checkpoint-500 already exists and is non-empty. Saving will proceed but saved results may be invalid.
There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


Map:   0%|          | 0/400 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Starting to train...


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,2.4821,2.165129,50.7161,23.5685,44.3826,44.4316
2,2.3743,2.162434,50.7428,23.6104,44.4509,44.5038
3,2.3956,2.161526,50.7071,23.6411,44.4371,44.4835
4,2.3181,2.167741,50.7809,23.6892,44.4725,44.5207
5,2.4788,2.162851,50.6947,23.5777,44.3974,44.4591


Checkpoint destination directory ./results/checkpoint-500 already exists and is non-empty. Saving will proceed but saved results may be invalid.
There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


Map:   0%|          | 0/400 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Starting to train...


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,2.1691,2.16607,50.7795,23.6877,44.4677,44.5178
2,2.3892,2.163028,50.7772,23.7095,44.4534,44.5233
3,2.4739,2.162875,50.7906,23.6437,44.4301,44.5041
4,2.4766,2.1631,50.7706,23.6584,44.4388,44.4993
5,2.4248,2.166287,50.6798,23.6472,44.4469,44.4986


Checkpoint destination directory ./results/checkpoint-500 already exists and is non-empty. Saving will proceed but saved results may be invalid.
There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


Map:   0%|          | 0/400 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Starting to train...


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,2.4307,2.162001,50.8266,23.6625,44.4591,44.5298
2,2.4742,2.161994,50.7852,23.6184,44.4738,44.5472
3,2.4575,2.162318,50.6923,23.5913,44.4112,44.4673
4,2.5369,2.161194,50.7024,23.6173,44.4086,44.4854
5,2.8167,2.162822,50.6826,23.5975,44.4006,44.4605


Checkpoint destination directory ./results/checkpoint-500 already exists and is non-empty. Saving will proceed but saved results may be invalid.
There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


Map:   0%|          | 0/400 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Starting to train...


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,2.3813,2.162819,50.7887,23.63,44.446,44.5178
2,2.6286,2.16203,50.7203,23.5875,44.4358,44.4844
3,2.4314,2.161266,50.7258,23.5951,44.4196,44.482
4,2.5906,2.16364,50.6843,23.6114,44.4477,44.4972
5,2.3192,2.167739,50.7123,23.7158,44.5455,44.5945


Checkpoint destination directory ./results/checkpoint-100 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Checkpoint destination directory ./results/checkpoint-500 already exists and is non-empty. Saving will proceed but saved results may be invalid.
There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


Map:   0%|          | 0/400 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Starting to train...


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,2.5133,2.161617,50.8124,23.6125,44.4604,44.5271
2,2.5889,2.160277,50.784,23.6075,44.4876,44.5548
3,2.612,2.160046,50.7348,23.6301,44.4695,44.5158
4,2.1481,2.162002,50.7534,23.6633,44.4896,44.5497
5,2.2507,2.162384,50.7153,23.6788,44.4437,44.5134


Checkpoint destination directory ./results/checkpoint-100 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Checkpoint destination directory ./results/checkpoint-500 already exists and is non-empty. Saving will proceed but saved results may be invalid.
There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


Map:   0%|          | 0/400 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Starting to train...


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,2.4697,2.161235,50.7444,23.5833,44.4342,44.4989
2,2.5581,2.158787,50.706,23.596,44.47,44.5313
3,2.3498,2.163183,50.6775,23.6633,44.485,44.5213
4,2.3525,2.159598,50.7204,23.5658,44.468,44.5382
5,2.5424,2.161039,50.7254,23.6283,44.4948,44.5445


Checkpoint destination directory ./results/checkpoint-100 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Checkpoint destination directory ./results/checkpoint-500 already exists and is non-empty. Saving will proceed but saved results may be invalid.
There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


Map:   0%|          | 0/400 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Starting to train...


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,2.447,2.161,50.7684,23.5879,44.4787,44.5463
2,2.4774,2.161415,50.7826,23.5905,44.4968,44.5366
3,2.5522,2.159206,50.7411,23.5634,44.5027,44.5476
4,2.6142,2.159254,50.8043,23.5877,44.5063,44.5423
5,2.4085,2.165402,50.7939,23.7382,44.5622,44.6068


Checkpoint destination directory ./results/checkpoint-100 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Checkpoint destination directory ./results/checkpoint-500 already exists and is non-empty. Saving will proceed but saved results may be invalid.
There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


Map:   0%|          | 0/400 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Starting to train...


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,2.6333,2.159049,50.7747,23.6216,44.4899,44.5392
2,2.5615,2.159728,50.7658,23.6217,44.4858,44.5413
3,2.4119,2.158898,50.7304,23.5587,44.4043,44.463
4,2.4656,2.159063,50.6659,23.5695,44.3764,44.4391
5,2.2786,2.159664,50.6067,23.5191,44.3225,44.3854


Checkpoint destination directory ./results/checkpoint-500 already exists and is non-empty. Saving will proceed but saved results may be invalid.
There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


Map:   0%|          | 0/400 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Starting to train...


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,2.5556,2.160083,50.785,23.6437,44.5257,44.5671
2,2.5759,2.159645,50.7948,23.6384,44.5485,44.585
3,2.3715,2.159388,50.7272,23.5947,44.464,44.5225
4,2.5969,2.161475,50.6846,23.596,44.4032,44.4506


Checkpoint destination directory ./results/checkpoint-100 already exists and is non-empty. Saving will proceed but saved results may be invalid.


In [None]:
from datasets import load_dataset, concatenate_datasets
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, DataCollatorForSeq2Seq, Seq2SeqTrainer, Seq2SeqTrainingArguments
import random
import evaluate
import nltk
import torch
import numpy as np
from nltk.tokenize import sent_tokenize
nltk.download("punkt")


# Load dataset, model and tokenizer of model
dataset_name = "ccdv/pubmed-summarization"
model_name = "google/flan-t5-large"
dataset = load_dataset(dataset_name)
tokenizer = AutoTokenizer.from_pretrained(model_name, torch_dtype=torch.float16)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name, torch_dtype=torch.float16)
random.seed(42)
# Unused columns for fine-tuning
remove_columns=["article", "abstract"]

first_10k_train = dataset['train'].select(range(10000))

# Get the maximum total input sequence length after tokenization.
# Sequences longer than this will be truncated, sequences shorter will be padded.
def get_max_length(dataset, column_key, tokenizer):
    tokenized_data = concatenate_datasets([dataset["train"], dataset["test"]]).map(
        lambda x: tokenizer(x[column_key], truncation=True), 
        batched=True, 
        remove_columns=remove_columns
    )
    max_length = max([len(x) for x in tokenized_data["input_ids"]])
    return max_length

max_source_length = get_max_length(dataset, "article", tokenizer)
print(f"Max source sequence length: {max_source_length}")

max_target_length = get_max_length(dataset, "abstract", tokenizer)
print(f"Max target sequence length: {max_target_length}")

prefix = "summarize: "

def preprocess_function(examples, padding="max_length"):
    
    # Add prefix to the input for Flan-T5
    inputs = [prefix + example for example in examples["article"]]

    # Tokenize inputs
    model_inputs = tokenizer(inputs, max_length=max_source_length, padding=padding, truncation=True)

    # Tokenize targets with the `text_target` keyword argument
    labels = tokenizer(text_target=examples["abstract"], max_length=max_target_length, padding=padding, truncation=True)

    # Replace all tokenizer.pad_token_id in the labels by -100 because we want to ignore padding in the loss
    if padding == "max_length":
        labels["input_ids"] = [
            [(l if l != tokenizer.pad_token_id else -100) for l in label] for label in labels["input_ids"]
        ]

    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

tokenized_dataset = dataset.map(preprocess_function, batched=True, remove_columns=remove_columns)
print(f"Keys of tokenized dataset: {list(tokenized_dataset['train'].features)}")

# Load ROUGE metric
metric = evaluate.load("rouge")

# Function for post-processing text
def postprocess_text(predictions, labels):
    # Remove spaces at the beginning and at the end of texts
    predictions = [prediction.strip() for prediction in predictions]
    labels = [label.strip() for label in labels]

    # Operate texts at the level of sentences
    predictions = ["\n".join(sent_tokenize(prediction)) for prediction in predictions]
    labels = ["\n".join(sent_tokenize(label)) for label in labels]

    return predictions, labels


def compute_metrics(eval_preds):
    predictions, labels = eval_preds
    if isinstance(predictions, tuple):
        predictions = predictions[0]
        
    decoded_predictions = tokenizer.batch_decode(predictions, skip_special_tokens=True)
    labels = np.where(labels != -100, labels, tokenizer.pad_token_id)
    decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)
    decoded_predictions, decoded_labels = postprocess_text(decoded_predictions, decoded_labels)

    result = metric.compute(predictions=decoded_predictions, references=decoded_labels, use_stemmer=True)
    result = {k: round(v * 100, 4) for k, v in result.items()}
    prediction_lens = [np.count_nonzero(prediction != tokenizer.pad_token_id) for prediction in predictions]
    result["gen_len"] = np.mean(prediction_lens)
    return result


# Define data collator
data_collator = DataCollatorForSeq2Seq(
    tokenizer=tokenizer,
    model=model,
    label_pad_token_id=-100,
    #pad_to_multiple_of=8
)

# Define training args
training_args = Seq2SeqTrainingArguments(
    output_dir="sft-flan-t5",
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    predict_with_generate=True,
    fp16=True,
    learning_rate=2e-5,
    num_train_epochs=2,
    logging_dir="sft-flan-t5/logs",
    logging_strategy="steps",
    logging_steps=500,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    save_total_limit=2,
    load_best_model_at_end=True,
)

# Create Seq2SeqTrainer instance
trainer = Seq2SeqTrainer(
    model=model,
    args=training_args,
    data_collator=data_collator,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["validation"],
    compute_metrics=compute_metrics,
)

# Training model
print("Starting to train...")
trainer.train()
trainer.save_model("saved_model")
tokenizer.save_pretrained("saved_tokenizer")



In [None]:
model = AutoModelForSeq2SeqLM.from_pretrained("saved_model")
tokenizer = AutoTokenizer.from_pretrained("saved_tokenizer")
validation_dataset = load_dataset(dataset_name)["validation"]
validation_dataset = validation_dataset.map(preprocess_function, batched=True)

In [None]:
!pip install tqdm
from tqdm import tqdm

model.eval()
generated_summaries = []
for batch in tqdm(validation_dataset, desc="Generating summaries"):
    with torch.no_grad():
        inputs = tokenizer(batch["article"], return_tensors="pt", padding=True, truncation=True)
        outputs = model.generate(**inputs)
        summaries = tokenizer.batch_decode(outputs, skip_special_tokens=True)
        generated_summaries.extend(summaries)

In [None]:
from evaluate import load

# Load the ROUGE metric
rouge = load("rouge")

# Assuming generated_summaries and references are already defined
results = rouge.compute(predictions=generated_summaries, references=references, use_stemmer=True)

# Adjusted to directly use the numpy.float64 values
results = {key: value * 100 for key, value in results.items()}
print(results)


In [None]:
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer

model_path = "saved_model"
tokenizer_path = "saved_tokenizer"
max_source_length =512
max_target_length = 512
tokenizer = AutoTokenizer.from_pretrained(tokenizer_path)
model = AutoModelForSeq2SeqLM.from_pretrained(model_path)

text = "A 47-year-old female patient presents with a history of abdominal pain that began three days ago, localized to the lower right quadrant of the abdomen, and characterized by sharp, stabbing sensations that worsen with movement and after meals. The pain is relieved by rest and warmth but aggravates with movement and eating. The patient reports that the pain radiates to the lower back and is worse in the morning and after meals, rating the severity as a 7 out of 10. Vital signs are within normal limits, and the physical examination reveals tenderness in the lower right quadrant of the abdomen. Laboratory data, including a complete blood count (CBC) and electrolyte panel, are pending, and an abdominal ultrasound is scheduled for tomorrow. The patient's problem list includes abdominal pain, tenderness in the lower right quadrant, pain worsening with movement and after meals, and radiating pain to the lower back. The plan involves diagnostic testing, including a complete blood count (CBC) and electrolyte panel to rule out anemia and electrolyte imbalances, and an abdominal ultrasound to identify the cause of the pain. Therapy includes pain management with over-the-counter analgesics and instructions to avoid strenuous activity and to rest. A consultation with a gastroenterologist is recommended for further evaluation. The patient is educated on the importance of adhering to the prescribed medication regimen and the benefits of dietary changes, such as avoiding spicy foods and drinking plenty of water."
inputs = tokenizer(text, return_tensors="pt", max_length=max_source_length, truncation=True)

summary_ids = model.generate(inputs["input_ids"], max_length=max_target_length, num_beams=4, early_stopping=True)
summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
print(summary)

In [None]:














`