In [None]:
! pip install -q peft transformers datasets
! pip install -q evaluate
! pip install -q huggingface_hub
! pip install -q bert-score

In [2]:
from transformers import (
    AutoTokenizer,
    AutoModelForSeq2SeqLM,
    default_data_collator,
    get_linear_schedule_with_warmup,
)
from peft import (
    get_peft_config,
    get_peft_model,
    get_peft_model_state_dict,
    PrefixTuningConfig,
    TaskType,
)
from datasets import load_dataset
from torch.utils.data import DataLoader
from tqdm import tqdm
import torch
import os

os.environ["TOKENIZERS_PARALLELISM"] = "false"
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

device = "cuda:0"
model_name_or_path = "t5-large"
tokenizer_name_or_path = "t5-large"

text_column = "sentence"
label_column = "text_label"
max_length = 512
lr = 1e-2
num_epochs = 5
batch_size = 2
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

Downloading (…)lve/main/config.json:   0%|          | 0.00/1.21k [00:00<?, ?B/s]

Downloading (…)ve/main/spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/1.39M [00:00<?, ?B/s]

For now, this behavior is kept to avoid breaking backwards compatibility when padding/encoding with `truncation is True`.
- Be aware that you SHOULD NOT rely on t5-large automatically truncating your input to 512 when padding/encoding.
- If you want to encode/pad to sequences longer than 512 you can either instantiate this tokenizer with `model_max_length` or pass `max_length` when encoding/padding.


In [3]:
from huggingface_hub import notebook_login

In [4]:
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [12]:
peft_config = PrefixTuningConfig(
    task_type=TaskType.SEQ_2_SEQ_LM, inference_mode=False, num_virtual_tokens=20
)

model = AutoModelForSeq2SeqLM.from_pretrained(model_name_or_path)
model = get_peft_model(model, peft_config)

In [6]:
model.print_trainable_parameters()

trainable params: 983,040 || all params: 738,651,136 || trainable%: 0.13308583065659835


In [7]:
dataset = load_dataset("minh21/cpgQA-v1.0-unique-context-for-flan-t5")
train_dataset = dataset["train"]
test_dataset = dataset["test"]

Downloading readme:   0%|          | 0.00/767 [00:00<?, ?B/s]

Downloading data files:   0%|          | 0/2 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/29.6k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/145k [00:00<?, ?B/s]

Extracting data files:   0%|          | 0/2 [00:00<?, ?it/s]

Generating test split:   0%|          | 0/144 [00:00<?, ? examples/s]

Generating train split:   0%|          | 0/860 [00:00<?, ? examples/s]

In [8]:
def qa_prompt_preprocess(ds):
    input = f"question: {ds['question']} " f"context: {ds['context']}"
    model_outputs = {}
    input_tokenized = tokenizer(
        input,
        return_tensors="pt",
        padding="max_length",
        truncation=True,
        max_length=max_length,
    )
    output_tokenized = tokenizer(
        ds["answer_text"], padding="max_length", truncation=True, max_length=max_length
    )
    model_outputs["input_ids"] = input_tokenized["input_ids"].squeeze(0)
    model_outputs["attention_mask"] = input_tokenized["attention_mask"]
    labels = output_tokenized["input_ids"]
    labels[labels == 0] = -100
    model_outputs["labels"] = labels
    model_outputs["decoder_attention_mask"] = output_tokenized["attention_mask"]
    return model_outputs


seq2seq_dataset = {
    "train": dataset["train"].map(
        qa_prompt_preprocess,
        batched=False,
        remove_columns=dataset["train"].column_names,
    ),
    "test": dataset["test"].map(
        qa_prompt_preprocess,
        batched=False,
        remove_columns=dataset["train"].column_names,
    ),
}

Map:   0%|          | 0/860 [00:00<?, ? examples/s]

Map:   0%|          | 0/144 [00:00<?, ? examples/s]

In [None]:
seq2seq_dataset

{'train': Dataset({
     features: ['input_ids', 'attention_mask', 'labels', 'decoder_attention_mask'],
     num_rows: 860
 }),
 'test': Dataset({
     features: ['input_ids', 'attention_mask', 'labels', 'decoder_attention_mask'],
     num_rows: 144
 })}

In [9]:
train_dataset = seq2seq_dataset["train"]
eval_dataset = seq2seq_dataset["test"]

train_dataloader = DataLoader(
    train_dataset,
    shuffle=True,
    collate_fn=default_data_collator,
    batch_size=batch_size,
    pin_memory=True,
)
eval_dataloader = DataLoader(
    eval_dataset,
    collate_fn=default_data_collator,
    batch_size=batch_size,
    pin_memory=True,
)

#Mannual Training


In [None]:
optimizer = torch.optim.AdamW(model.parameters(), lr=lr)
lr_scheduler = get_linear_schedule_with_warmup(
    optimizer=optimizer,
    num_warmup_steps=0,
    num_training_steps=(len(train_dataloader) * num_epochs),
)

In [None]:
model = model.to(device)

for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    for step, batch in enumerate(tqdm(train_dataloader)):
        batch = {k: v.to(device) for k, v in batch.items()}
        outputs = model(**batch)
        loss = outputs.loss
        total_loss += loss.detach().float()
        loss.backward()
        optimizer.step()
        lr_scheduler.step()
        optimizer.zero_grad()

    model.eval()
    eval_loss = 0
    eval_preds = []
    for step, batch in enumerate(tqdm(eval_dataloader)):
        batch = {k: v.to(device) for k, v in batch.items()}
        with torch.no_grad():
            outputs = model(**batch)
        loss = outputs.loss
        eval_loss += loss.detach().float()
        eval_preds.extend(
            tokenizer.batch_decode(
                torch.argmax(outputs.logits, -1).detach().cpu().numpy(),
                skip_special_tokens=True,
            )
        )

    eval_epoch_loss = eval_loss / len(eval_dataloader)
    eval_ppl = torch.exp(eval_epoch_loss)
    train_epoch_loss = total_loss / len(train_dataloader)
    train_ppl = torch.exp(train_epoch_loss)
    print(f"{epoch=}: {train_ppl=} {train_epoch_loss=} {eval_ppl=} {eval_epoch_loss=}")

100%|██████████| 430/430 [01:36<00:00,  4.44it/s]
100%|██████████| 72/72 [00:09<00:00,  7.38it/s]


epoch=0: train_ppl=tensor(3.5676e+09, device='cuda:0') train_epoch_loss=tensor(21.9952, device='cuda:0') eval_ppl=tensor(1.7241e+10, device='cuda:0') eval_epoch_loss=tensor(23.5706, device='cuda:0')


100%|██████████| 430/430 [01:36<00:00,  4.44it/s]
100%|██████████| 72/72 [00:09<00:00,  7.39it/s]


epoch=1: train_ppl=tensor(3.6868e+09, device='cuda:0') train_epoch_loss=tensor(22.0280, device='cuda:0') eval_ppl=tensor(1.7241e+10, device='cuda:0') eval_epoch_loss=tensor(23.5706, device='cuda:0')


100%|██████████| 430/430 [01:36<00:00,  4.44it/s]
100%|██████████| 72/72 [00:09<00:00,  7.38it/s]


epoch=2: train_ppl=tensor(3.6969e+09, device='cuda:0') train_epoch_loss=tensor(22.0308, device='cuda:0') eval_ppl=tensor(1.7241e+10, device='cuda:0') eval_epoch_loss=tensor(23.5706, device='cuda:0')


100%|██████████| 430/430 [01:36<00:00,  4.44it/s]
100%|██████████| 72/72 [00:09<00:00,  7.38it/s]


epoch=3: train_ppl=tensor(3.6034e+09, device='cuda:0') train_epoch_loss=tensor(22.0051, device='cuda:0') eval_ppl=tensor(1.7241e+10, device='cuda:0') eval_epoch_loss=tensor(23.5706, device='cuda:0')


100%|██████████| 430/430 [01:36<00:00,  4.44it/s]
100%|██████████| 72/72 [00:09<00:00,  7.40it/s]

epoch=4: train_ppl=tensor(3.6665e+09, device='cuda:0') train_epoch_loss=tensor(22.0225, device='cuda:0') eval_ppl=tensor(1.7241e+10, device='cuda:0') eval_epoch_loss=tensor(23.5706, device='cuda:0')





#Train with trainer


In [13]:
import transformers

bs = 4
eval_bs = 4
epochs = 5
lr = 2e-2
step = 200
training_args = transformers.Seq2SeqTrainingArguments(
    output_dir="./cpgqa-prefix-results",
    eval_steps=step,
    logging_steps=step,
    evaluation_strategy="steps",
    learning_rate=lr,
    per_device_train_batch_size=bs,
    per_device_eval_batch_size=bs,
    # gradient_accumulation_steps=bs,
    num_train_epochs=epochs,
    weight_decay=0.01,
    bf16=True,
    warmup_ratio=0.03,
    save_steps=step,
    lr_scheduler_type="linear",
    load_best_model_at_end=True,
    # metric_for_best_model="bleu_bleu",
)

from transformers import default_data_collator

data_collator = default_data_collator

trainer = transformers.Seq2SeqTrainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    data_collator=data_collator,
    tokenizer=tokenizer,
)
trainer.train()

Step,Training Loss,Validation Loss
200,2.5215,0.039875
400,0.0289,0.033514
600,0.0291,0.024777
800,0.0204,0.02393
1000,0.0208,0.02304


TrainOutput(global_step=1075, training_loss=0.4889429238785145, metrics={'train_runtime': 223.4665, 'train_samples_per_second': 19.242, 'train_steps_per_second': 4.811, 'total_flos': 9309703372800000.0, 'train_loss': 0.4889429238785145, 'epoch': 5.0})

In [19]:
model.push_to_hub("flan-t5-prefix")

adapter_model.bin:   0%|          | 0.00/3.93M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/checkiejan/flan-t5-prefix-1/commit/c6d560504edfb96940768018e03d87aa109f6221', commit_message='Upload model', commit_description='', oid='c6d560504edfb96940768018e03d87aa109f6221', pr_url=None, pr_revision=None, pr_num=None)

In [None]:
trainer.push_to_hub()

'https://huggingface.co/checkiejan/cpgqa-prefix-results/tree/main/'

#Evaluate

In [24]:
import torch
from peft import PeftModel, PeftConfig
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer

# peft_model_id = "checkiejan/cpgqa-prefix-results"
peft_model_id = "checkiejan/flan-t5-prefix"
config = PeftConfig.from_pretrained(peft_model_id)

model = AutoModelForSeq2SeqLM.from_pretrained(config.base_model_name_or_path)
tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)

model = PeftModel.from_pretrained(model, peft_model_id)

Downloading (…)/adapter_config.json:   0%|          | 0.00/370 [00:00<?, ?B/s]

Downloading adapter_model.bin:   0%|          | 0.00/3.93M [00:00<?, ?B/s]

In [14]:
model.to(device)
predictions_for_squad = []
predictions_for_squad_v2 = []
predictions_for_bert_score = []
references_for_bert_score = []
for data in test_dataset:
    context = data["context"]
    question = data["question"]
    answer = data["answer_text"]
    id = data["id"]
    input = f"""
    "{context}\nTry to answer this question if possible (otherwise reply "
        "\"unanswerable\"): {question}"
    """
    model_inputs = tokenizer(
        input,
        padding="max_length",
        max_length=512,
        truncation=True,
        return_tensors="pt",
    ).to(torch.device(device))

    model_output = tokenizer.decode(
        model.generate(
            input_ids=model_inputs["input_ids"],
            attention_mask=model_inputs["attention_mask"],
        )[0],
        skip_special_tokens=True,
    )

    predictions_for_squad.append(
        {
            "prediction_text": model_output,
            "id": str(id),
        }
    )

    predictions_for_squad_v2.append(
        {
            "prediction_text": model_output,
            "no_answer_probability": 0,
            "id": str(id),
        }
    )

    predictions_for_bert_score.append(model_output)
    references_for_bert_score.append(answer)
    # predictions.extend(predicted_texts)



In [15]:
references_for_squad_v2 = [
    {
        "answers": {"answer_start": [ds["answer_start"]], "text": [ds["answer_text"]]},
        "id": str(ds["id"]),
    }
    for id, ds in enumerate(test_dataset)
]

In [16]:
from evaluate import load

results = dict()
squad_metric = load("squad_v2")
results["squad_v2"] = squad_metric.compute(
    predictions=predictions_for_squad_v2, references=references_for_squad_v2
)

squad_metric = load("squad")
results["squad"] = squad_metric.compute(
    predictions=predictions_for_squad, references=references_for_squad_v2
)

bleu_metrics = load("bleu")
prediction = ["hello there general kenobi", "foo bar foobar"]
reference = [["hello there general kenobi"], ["foo bar foobar"]]
results["bleu"] = bleu_metrics.compute(
    predictions=predictions_for_bert_score, references=references_for_bert_score
)

bertscore_metric = load("bertscore")
berscore = bertscore_metric.compute(
    predictions=predictions_for_bert_score,
    references=references_for_bert_score,
    lang="en",
)

results["bertscore"] = {
    "precision": sum(berscore["precision"]) / len(berscore["precision"]),
    "recall": sum(berscore["recall"]) / len(berscore["recall"]),
    "f1": sum(berscore["f1"]) / len(berscore["f1"]),
}
results

Downloading builder script:   0%|          | 0.00/6.47k [00:00<?, ?B/s]

Downloading extra modules:   0%|          | 0.00/11.3k [00:00<?, ?B/s]

Downloading builder script:   0%|          | 0.00/4.53k [00:00<?, ?B/s]

Downloading extra modules:   0%|          | 0.00/3.32k [00:00<?, ?B/s]

Downloading builder script:   0%|          | 0.00/5.94k [00:00<?, ?B/s]

Downloading extra modules:   0%|          | 0.00/1.55k [00:00<?, ?B/s]

Downloading extra modules:   0%|          | 0.00/3.34k [00:00<?, ?B/s]

Downloading builder script:   0%|          | 0.00/7.95k [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/482 [00:00<?, ?B/s]

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading model.safetensors:   0%|          | 0.00/1.42G [00:00<?, ?B/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


{'squad_v2': {'exact': 0.6944444444444444,
  'f1': 2.1353430317246107,
  'total': 144,
  'HasAns_exact': 0.6944444444444444,
  'HasAns_f1': 2.1353430317246107,
  'HasAns_total': 144,
  'best_exact': 0.6944444444444444,
  'best_exact_thresh': 0.0,
  'best_f1': 2.1353430317246107,
  'best_f1_thresh': 0.0},
 'squad': {'exact_match': 0.6944444444444444, 'f1': 2.1353430317246107},
 'bleu': {'bleu': 0.018207794096735407,
  'precisions': [0.034067085953878404,
   0.015306122448979591,
   0.014814814814814815,
   0.014227642276422764],
  'brevity_penalty': 1.0,
  'length_ratio': 1.1932457786116322,
  'translation_length': 1908,
  'reference_length': 1599},
 'bertscore': {'precision': 0.7843345072534349,
  'recall': 0.8016920209758811,
  'f1': 0.7926918959452046}}