# Set up

In [None]:
! pip install -q datasets
! pip install -q transformers -U
! pip install -q transformers[torch] -U
! pip install -q sentencepiece
! pip install -q evaluate
! pip install -q bert_score
! pip install -q -U accelerate
! pip install -q peft
! pip install -q huggingface-hub

In [None]:
from huggingface_hub import notebook_login

notebook_login()

# CpgQA

In [None]:
from datasets import load_dataset

dataset_name = "minh21/cpgQA-v1.0-unique-context-test-10-percent-validation-10-percent"
dataset = load_dataset(dataset_name, use_auth_token=True)
dataset = dataset.remove_columns(["title", "id"])
dataset = dataset.rename_column("answer_text", "answer")

# Model

In [4]:
model_nm = "google/flan-t5-large"
max_length = 512
max_target_length = 200
checkpoint = "flan-t5-large-ia3-cpgQA"
username = "legacy107"

In [5]:
def print_trainable_parameters(model):
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
    )

In [None]:
import evaluate


def evaluate_qa(predicted_result):

    squad_metric = evaluate.load("squad")
    predictions = [
        {"prediction_text": answer, "id": str(id)}
        for id, answer in enumerate(predicted_result["output"])
    ]
    references = [
        {
            "answers": {"answer_start": [ds["answer_start"]], "text": [ds["answer"]]},
            "id": str(id),
        }
        for id, ds in enumerate(predicted_result)
    ]
    results = {}
    results["squad"] = squad_metric.compute(
        predictions=predictions, references=references
    )

    predictions = predicted_result["output"]
    references = predicted_result["answer"]

    bleu_metrics = evaluate.load("bleu")
    results["bleu"] = bleu_metrics.compute(
        predictions=predictions, references=references
    )

    bertscore_metric = evaluate.load("bertscore")
    berscore = bertscore_metric.compute(
        predictions=predictions, references=references, lang="en"
    )
    results["bertscore"] = {
        "precision": sum(berscore["precision"]) / len(berscore["precision"]),
        "recall": sum(berscore["recall"]) / len(berscore["recall"]),
        "f1": sum(berscore["f1"]) / len(berscore["f1"]),
    }

    return results

# Evaluate prompting

In [None]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import torch

tokenizer = AutoTokenizer.from_pretrained(model_nm, device_map="auto")
model = AutoModelForSeq2SeqLM.from_pretrained(model_nm, device_map="auto")

In [None]:
def interfere(ds):
    ds["input"] = (
        f"context: {dataset['train'][0]['context']} "
        + f"question: {dataset['train'][0]['question']} "
        + f"answer: {dataset['train'][0]['answer']}\n"
        + f"context: {ds['context']} "
        + f"question: {ds['question']} "
        + f"answer: "
    )
    input_ids = tokenizer(
        ds["input"],
        return_tensors="pt",
        padding="max_length",
        truncation=True,
        max_length=max_length,
    ).input_ids.to(torch.device("cuda"))
    outputs = model.generate(input_ids, max_new_tokens=max_target_length)
    ds["output"] = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return ds


model.eval()
with torch.no_grad():
    predicted_result = dataset["test"].map(interfere, batched=False)

In [None]:
evaluate_qa(predicted_result)

# IA3

In [None]:
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
from peft import PeftModelForSeq2SeqLM, get_peft_config

config = {
    "peft_type": "IA3",
    "task_type": "SEQ_2_SEQ_LM",
    "inference_mode": False,
    "target_modules": ["q", "v"],
}

peft_config = get_peft_config(config)
tokenizer = AutoTokenizer.from_pretrained(model_nm)
model = AutoModelForSeq2SeqLM.from_pretrained(model_nm)
peft_model = PeftModelForSeq2SeqLM(model, peft_config)
peft_model.print_trainable_parameters()

In [None]:
def qa_prompt_preprocess(ds):
    input = f"question: {ds['question']} " f"context: {ds['context']}"
    model_inputs = tokenizer(input, max_length=max_length, truncation=True)

    # Setup the tokenizer for targets
    labels = tokenizer(
        text_target=ds["answer"], max_length=max_target_length, truncation=True
    )

    model_inputs["labels"] = labels["input_ids"]
    return model_inputs


seq2seq_dataset = {
    "train": dataset["train"].map(qa_prompt_preprocess, batched=False),
    "test": dataset["test"].map(qa_prompt_preprocess, batched=False),
}

In [9]:
seq2seq_dataset["train"] = seq2seq_dataset["train"].remove_columns(
    ["answer", "answer_start", "question", "context"]
)
seq2seq_dataset["test"] = seq2seq_dataset["test"].remove_columns(
    ["answer", "answer_start", "question", "context"]
)

In [None]:
squad_metric = evaluate.load("squad")
bleu_metrics = evaluate.load("bleu")


def compute_metrics(pred):
    labels_ids, pred_ids = pred

    pred_ids[pred_ids == -100] = tokenizer.pad_token_id
    predictions = tokenizer.batch_decode(pred_ids, skip_special_tokens=True)

    labels_ids[labels_ids == -100] = tokenizer.pad_token_id
    references = tokenizer.batch_decode(labels_ids, skip_special_tokens=True)

    squad_predictions = [
        {"prediction_text": answer, "id": str(id)}
        for id, answer in enumerate(predictions)
    ]
    squad_references = [
        {
            "answers": {"answer_start": [-1], "text": [answer]},
            "id": str(id),
        }
        for id, answer in enumerate(references)
    ]
    results = {}
    results["squad"] = squad_metric.compute(
        predictions=squad_predictions, references=squad_references
    )

    results["bleu"] = bleu_metrics.compute(
        predictions=predictions, references=references
    )

    return results

In [None]:
from transformers.trainer_utils import PREFIX_CHECKPOINT_DIR
from transformers.trainer_callback import TrainerCallback
import os


class SavePeftModelCallback(TrainerCallback):
    def on_save(
        self,
        args,
        state,
        control,
        **kwargs,
    ):
        checkpoint_folder = os.path.join(
            args.output_dir, f"{PREFIX_CHECKPOINT_DIR}-{state.global_step}"
        )

        peft_model_path = os.path.join(checkpoint_folder, "adapter_model")
        kwargs["model"].save_pretrained(peft_model_path)

        pytorch_model_path = os.path.join(checkpoint_folder, "pytorch_model.bin")
        if os.path.exists(pytorch_model_path):
            os.remove(pytorch_model_path)
        return control

In [None]:
from transformers import (
    DataCollatorForSeq2Seq,
    Seq2SeqTrainingArguments,
    Seq2SeqTrainer,
)

bs = 2
eval_bs = 1
epochs = 16
lr = 3e-3
steps = 200
max_steps = 4000  # number of steps before overfitting
training_args = Seq2SeqTrainingArguments(
    output_dir=f"./{checkpoint}",
    learning_rate=lr,
    # per_device_train_batch_size=bs,
    # per_device_eval_batch_size=eval_bs,
    auto_find_batch_size=True,
    # num_train_epochs=epochs,
    max_steps=4000,
    weight_decay=0.01,
    lr_scheduler_type="linear",
    push_to_hub=True,
    save_strategy="steps",
    save_steps=steps,
    logging_steps=steps,
    evaluation_strategy="steps",
    eval_steps=steps,
    predict_with_generate=True,
    generation_max_length=max_target_length,
)

data_collator = DataCollatorForSeq2Seq(
    tokenizer=tokenizer,
    model=peft_model,
    padding="max_length",
    max_length=max_length,
    return_tensors="pt",
)

trainer = Seq2SeqTrainer(
    model=peft_model,
    args=training_args,
    train_dataset=seq2seq_dataset["train"].with_format("torch"),
    eval_dataset=seq2seq_dataset["test"].with_format("torch"),
    data_collator=data_collator,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
    callbacks=[SavePeftModelCallback],
)

In [None]:
trainer.train()

In [None]:
trainer.model.save_pretrained(checkpoint)
tokenizer.save_pretrained(checkpoint)

trainer.model.push_to_hub(checkpoint)
tokenizer.push_to_hub(checkpoint)

## Evaluate from checkpoint

In [None]:
from transformers import AutoTokenizer
from peft import AutoPeftModelForSeq2SeqLM

model_checkpoint = f"{username}/{checkpoint}"
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint, device_map="auto")
checkpoint_model = AutoPeftModelForSeq2SeqLM.from_pretrained(
    model_checkpoint, device_map="auto"
)

In [None]:
import torch


def interfere_prompt(ds):
    qa_input = f"question: {ds['question']} " f"context: {ds['context']}"
    input_ids = tokenizer(
        qa_input,
        return_tensors="pt",
        padding="max_length",
        truncation=True,
        max_length=max_length,
    ).to(torch.device("cuda"))
    outputs = checkpoint_model.generate(**input_ids, max_new_tokens=max_target_length)
    ds["output"] = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return ds


checkpoint_model.eval()
with torch.no_grad():
    predicted_result = dataset["test"].map(interfere_prompt, batched=False)

In [None]:
evaluate_qa(predicted_result)