In [1]:
%env CUDA_DEVICE_ORDER=PCI_BUS_ID
%env CUDA_VISIBLE_DEVICES=3

env: CUDA_DEVICE_ORDER=PCI_BUS_ID
env: CUDA_VISIBLE_DEVICES=3


In [2]:
from datetime import datetime
import copy

from tqdm.notebook import tqdm
import numpy as np
import torch
from torch.nn import functional as F
from torch.utils.data import DataLoader
from datasets import load_dataset
import transformers
from transformers import AutoModelForSeq2SeqLM, Seq2SeqTrainingArguments, Seq2SeqTrainer, DataCollatorForSeq2Seq, AutoTokenizer, pipeline
import evaluate
from evaluate import evaluator

In [3]:
# load dataset
cnn_test = load_dataset("cnn_dailymail", '3.0.0', split="test")

checkpoint = "facebook/bart-large-cnn"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=checkpoint)


def preprocess_function(examples):
    inputs = examples["article"]
    model_inputs = tokenizer(inputs, max_length=1024, truncation=True)

    labels = tokenizer(text_target=examples["highlights"], max_length=142, 
                       truncation=True, padding=True)

    model_inputs["labels"] = labels["input_ids"]
    return model_inputs


tokenized_test = cnn_test.map(preprocess_function, batched=True)

Found cached dataset cnn_dailymail (/home/markintosh/.cache/huggingface/datasets/cnn_dailymail/3.0.0/3.0.0/1b3c71476f6d152c31c1730e83ccb08bcf23e348233f4fcc11e182248e6bf7de)
Loading cached processed dataset at /home/markintosh/.cache/huggingface/datasets/cnn_dailymail/3.0.0/3.0.0/1b3c71476f6d152c31c1730e83ccb08bcf23e348233f4fcc11e182248e6bf7de/cache-e6a8566197def2bc.arrow


In [4]:
# evaluation setup
rouge = evaluate.load("rouge")

def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    decoded_preds = tokenizer.batch_decode(predictions, skip_special_tokens=True)
    labels = np.where(labels != -100, labels, tokenizer.pad_token_id)
    decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)

    result = rouge.compute(predictions=decoded_preds, references=decoded_labels, use_stemmer=True)

    prediction_lens = [np.count_nonzero(pred != tokenizer.pad_token_id) for pred in predictions]
    result["gen_len"] = np.mean(prediction_lens)

    return {k: round(v, 4) for k, v in result.items()}

In [16]:
# model
model_ckpt = "facebook/bart-large-cnn"
model = AutoModelForSeq2SeqLM.from_pretrained(model_ckpt)

training_args = Seq2SeqTrainingArguments(
    output_dir="BART",
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=2,
    per_device_eval_batch_size=10,
    weight_decay=0.01,
    save_total_limit=3,
    num_train_epochs=4,
    predict_with_generate=True,
    fp16=True,
)

trainer = Seq2SeqTrainer(
    model=model,
    args=training_args,
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

metrics = trainer.evaluate(tokenized_test.select(range(1000)))
metrics

{'eval_loss': 8.2366304397583,
 'eval_rouge1': 0.3439,
 'eval_rouge2': 0.1441,
 'eval_rougeL': 0.2463,
 'eval_rougeLsum': 0.2855,
 'eval_gen_len': 66.694,
 'eval_runtime': 364.4356,
 'eval_samples_per_second': 2.744,
 'eval_steps_per_second': 0.274}

In [5]:
# model
model_ckpt = './checkpoints2023-05-06T13:00:32.263295'
model = AutoModelForSeq2SeqLM.from_pretrained(model_ckpt)

training_args = Seq2SeqTrainingArguments(
    output_dir="BART",
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=2,
    per_device_eval_batch_size=10,
    weight_decay=0.01,
    save_total_limit=3,
    num_train_epochs=4,
    predict_with_generate=True,
    fp16=True,
)

trainer = Seq2SeqTrainer(
    model=model,
    args=training_args,
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

metrics = trainer.evaluate(tokenized_test.select(range(1000)))
metrics

You're using a BartTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


{'eval_loss': 7.847792148590088,
 'eval_rouge1': 0.3722,
 'eval_rouge2': 0.1564,
 'eval_rougeL': 0.2622,
 'eval_rougeLsum': 0.3047,
 'eval_gen_len': 68.436,
 'eval_runtime': 403.6445,
 'eval_samples_per_second': 2.477,
 'eval_steps_per_second': 0.248}

In [6]:
# model
model_ckpt = './checkpoints2023-05-06T16:43:22.416287/'
model = AutoModelForSeq2SeqLM.from_pretrained(model_ckpt)

training_args = Seq2SeqTrainingArguments(
    output_dir="BART",
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=2,
    per_device_eval_batch_size=10,
    weight_decay=0.01,
    save_total_limit=3,
    num_train_epochs=4,
    predict_with_generate=True,
    fp16=True,
)

trainer = Seq2SeqTrainer(
    model=model,
    args=training_args,
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

metrics = trainer.evaluate(tokenized_test.select(range(1000)))
metrics

{'eval_loss': 8.471336364746094,
 'eval_rouge1': 0.3672,
 'eval_rouge2': 0.1566,
 'eval_rougeL': 0.2573,
 'eval_rougeLsum': 0.2988,
 'eval_gen_len': 69.594,
 'eval_runtime': 316.1945,
 'eval_samples_per_second': 3.163,
 'eval_steps_per_second': 0.316}

In [6]:
# model
model_ckpt = './checkpoints2023-05-06T13:00:32.263295'
model = AutoModelForSeq2SeqLM.from_pretrained(model_ckpt)

training_args = Seq2SeqTrainingArguments(
    output_dir="BART",
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=2,
    per_device_eval_batch_size=10,
    weight_decay=0.01,
    save_total_limit=3,
    num_train_epochs=4,
    predict_with_generate=True,
    fp16=True,
)

trainer = Seq2SeqTrainer(
    model=model,
    args=training_args,
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

metrics = trainer.evaluate(tokenized_test)
metrics

{'eval_loss': 6.863325595855713,
 'eval_rouge1': 0.4618,
 'eval_rouge2': 0.2235,
 'eval_rougeL': 0.32,
 'eval_rougeLsum': 0.3912,
 'eval_gen_len': 74.1923,
 'eval_runtime': 4150.6028,
 'eval_samples_per_second': 2.768,
 'eval_steps_per_second': 0.277}

In [5]:
# model
model_ckpt = './checkpoints2023-05-12T20:18:24.824529/'
model = AutoModelForSeq2SeqLM.from_pretrained(model_ckpt)

training_args = Seq2SeqTrainingArguments(
    output_dir="BART",
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=2,
    per_device_eval_batch_size=10,
    weight_decay=0.01,
    save_total_limit=3,
    num_train_epochs=4,
    predict_with_generate=True,
    fp16=True,
)

trainer = Seq2SeqTrainer(
    model=model,
    args=training_args,
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

metrics = trainer.evaluate(tokenized_test)
metrics

You're using a BartTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


{'eval_loss': 7.3013129234313965,
 'eval_rouge1': 0.4576,
 'eval_rouge2': 0.2227,
 'eval_rougeL': 0.3167,
 'eval_rougeLsum': 0.3871,
 'eval_gen_len': 77.4565,
 'eval_runtime': 4409.3738,
 'eval_samples_per_second': 2.606,
 'eval_steps_per_second': 0.261}

In [5]:
# model
model_ckpt = './checkpoints2023-05-14T18:15:05.361400/'
model = AutoModelForSeq2SeqLM.from_pretrained(model_ckpt)

training_args = Seq2SeqTrainingArguments(
    output_dir="BART",
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=2,
    per_device_eval_batch_size=10,
    weight_decay=0.01,
    save_total_limit=3,
    num_train_epochs=4,
    predict_with_generate=True,
    fp16=True,
)

trainer = Seq2SeqTrainer(
    model=model,
    args=training_args,
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

metrics = trainer.evaluate(tokenized_test)
metrics

You're using a BartTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


{'eval_loss': 7.935605525970459,
 'eval_rouge1': 0.4604,
 'eval_rouge2': 0.2261,
 'eval_rougeL': 0.3164,
 'eval_rougeLsum': 0.4293,
 'eval_gen_len': 87.524,
 'eval_runtime': 4951.5594,
 'eval_samples_per_second': 2.32,
 'eval_steps_per_second': 0.232}

In [6]:
# model
model_ckpt = './checkpoints2023-05-14T19:42:34.263202/'
model = AutoModelForSeq2SeqLM.from_pretrained(model_ckpt)

training_args = Seq2SeqTrainingArguments(
    output_dir="BART",
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=2,
    per_device_eval_batch_size=10,
    weight_decay=0.01,
    save_total_limit=3,
    num_train_epochs=4,
    predict_with_generate=True,
    fp16=True,
)

trainer = Seq2SeqTrainer(
    model=model,
    args=training_args,
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

metrics = trainer.evaluate(tokenized_test)
metrics

{'eval_loss': 8.115837097167969,
 'eval_rouge1': 0.4597,
 'eval_rouge2': 0.2255,
 'eval_rougeL': 0.3175,
 'eval_rougeLsum': 0.4295,
 'eval_gen_len': 86.2607,
 'eval_runtime': 4619.5407,
 'eval_samples_per_second': 2.487,
 'eval_steps_per_second': 0.249}