# conditional generation任务
条件生成任务是给定一个文本后生成一段推理或者解释
prompt的方式是采用Prefix的方式 api:PrefixTuningTemplate
文本在<eos>后加上mask
在进行推理时，有一些generation_arguments
评价方式使用generation_metric api(不一定所有任务都用这个评估)

In [1]:
import torch
import os
from openprompt.data_utils.conditional_generation_dataset import WebNLGProcessor
from openprompt.plms import load_plm

In [2]:
dataset_root_dir = '/home/wy/OpenPrompt/datasets/'

In [3]:
webNLG_path = os.path.join(dataset_root_dir, 'CondGen/webnlg_2017')
os.path.exists(webNLG_path)

True

In [4]:
dataset = dict()
dataset['train'] = WebNLGProcessor().get_train_examples(webNLG_path)
dataset['validation'] = WebNLGProcessor().get_dev_examples(webNLG_path)
dataset['test'] = WebNLGProcessor().get_test_examples(webNLG_path)

In [5]:
print(len(dataset['train']))
print(len(dataset['validation']))
print(len(dataset['test']))

18025
872
1862


In [6]:
dataset['train'][100]

{
  "guid": "100",
  "label": null,
  "meta": {},
  "text_a": " | Afonso_Pena_International_Airport : elevationAboveTheSeaLevel_(in_feet) : 2988",
  "text_b": "",
  "tgt_text": "Afonso Pena International Airport has an elevation above the sea level (in feet) of 2988."
}

In [7]:
plm, tokenizer, model_config, WrapperClass = load_plm('t5', 't5-base')

For now, this behavior is kept to avoid breaking backwards compatibility when padding/encoding with `truncation is True`.
- Be aware that you SHOULD NOT rely on t5-base automatically truncating your input to 512 when padding/encoding.
- If you want to encode/pad to sequences longer than 512 you can either instantiate this tokenizer with `model_max_length` or pass `max_length` when encoding/padding.


In [8]:
from openprompt.prompts.prefix_tuning_template import PrefixTuningTemplate

## 自定义prefix template
prefix是相对于cloze的另一种prompt的方式 进行文本推理生成任务
使用PrefixTuningTemplate api 文本在<eos>后加上mask

In [9]:
my_template = PrefixTuningTemplate(model=plm, tokenizer=tokenizer, text='{"placeholder":"text_a"} {"special": "<eos>"} {"mask"}', using_decoder_past_key_values=False)
# text加入special eos和mask就是一个基本的prefix template的形式

In [10]:
wrapped_example = my_template.wrap_one_example(dataset['train'][0])
wrapped_example

[[{'text': ' | Aarhus_Airport : cityServed : "Aarhus, Denmark"',
   'loss_ids': 0,
   'shortenable_ids': 1},
  {'text': '<eos>', 'loss_ids': 0, 'shortenable_ids': 0},
  {'text': '<mask>', 'loss_ids': 1, 'shortenable_ids': 0}],
 {'guid': '0', 'tgt_text': 'The Aarhus is the airport of Aarhus, Denmark.'}]

In [11]:
from openprompt import PromptDataLoader

In [12]:
train_dataloader = PromptDataLoader(dataset=dataset['train'], template=my_template, tokenizer=tokenizer,tokenizer_wrapper_class=WrapperClass,
                                    max_seq_length=256, decoder_max_length=256, batch_size=5, shuffle=True, teacher_forcing=False, predict_eos_token=True, # 设置predict_eos_token是很重要的 因为template生成的文本最后是<mask>而不是<eos>如果不设置为True会不断进行推理
                                    truncate_method='head')

tokenizing: 18025it [00:16, 1074.11it/s]


In [13]:
validation_dataloader = PromptDataLoader(dataset=dataset['validation'], template=my_template, tokenizer=tokenizer,tokenizer_wrapper_class=WrapperClass,
                                    max_seq_length=256, decoder_max_length=256, batch_size=5, shuffle=True, teacher_forcing=False, predict_eos_token=True, # 设置predict_eos_token是很重要的 因为template生成的文本最后是<mask>而不是<eos>如果不设置为True会不断进行推理
                                    truncate_method='head')

tokenizing: 872it [00:00, 1040.37it/s]


In [14]:
test_dataloader = PromptDataLoader(dataset=dataset['test'], template=my_template, tokenizer=tokenizer,tokenizer_wrapper_class=WrapperClass,
                                    max_seq_length=256, decoder_max_length=256, batch_size=5, shuffle=True, teacher_forcing=False, predict_eos_token=True, # 设置predict_eos_token是很重要的 因为template生成的文本最后是<mask>而不是<eos>如果不设置为True会不断进行推理
                                    truncate_method='head')

tokenizing: 1862it [00:01, 995.15it/s] 


In [15]:
from openprompt import PromptForGeneration

In [16]:
prompt_model = PromptForGeneration(plm=plm, template=my_template, freeze_plm=True, tokenizer=tokenizer)

In [17]:
prompt_model = prompt_model.cuda()

In [18]:
from transformers import AdamW

In [19]:
# 模型frozen 仅更新template的参数
no_decay = ["bias", "LayerNorm.weight"]
optimizer_grouped_parameters = [
{
    "params": [p for n, p in my_template.named_parameters() if (not any(nd in n for nd in no_decay)) and p.requires_grad],
    "weight_decay": 0.0,
},
{
    "params": [p for n, p in my_template.named_parameters() if any(nd in n for nd in no_decay) and p.requires_grad],
    "weight_decay": 0.0,
},
]

In [20]:
optimizer = AdamW(optimizer_grouped_parameters, lr=1e-3, eps=1e-8)



In [21]:
from transformers.optimization import get_linear_schedule_with_warmup

In [22]:
total_step = len(train_dataloader) * 5
scheduler = get_linear_schedule_with_warmup(optimizer, 0, total_step)

In [23]:
from openprompt.utils.metrics import generation_metric

In [24]:
# generation的参数集
generation_arguments = {
    "max_length": 512,
    "max_new_tokens": None,
    "min_length": 5,
    "temperature": 1.0,
    "do_sample": False,
    "top_k": 0,
    "top_p": 0.9,
    "repetition_penalty": 1.0,
    "num_beams": 5,
    "bad_words_ids": [[628], [198]]
}

In [30]:
def evaluate(prompt_model, dataloader):
    generated_sentence = list()
    groundtruth_sentence = list()
    input_and_generated_sentence = list()
    prompt_model.eval()

    for step, inputs in enumerate(dataloader):
        inputs.cuda()
        _, output_sentence = prompt_model.generate(inputs, **generation_arguments)
        generated_sentence.extend(output_sentence)
        groundtruth_sentence.extend(inputs['tgt_text'])
        input_and_generated_sentence.extend((inputs, output_sentence))
    score = generation_metric(generated_sentence, groundtruth_sentence, "sentence_bleu")
    print('test score: ', score, flush=True)
    return generated_sentence, input_and_generated_sentence

In [27]:
global_step = 0
total_loss = 0
log_loss = 0

for epoch in range(5):
    prompt_model.train()
    for step, inputs in enumerate(train_dataloader):
        global_step = global_step + 1
        inputs.cuda()
        loss = prompt_model(inputs)
        loss.backward()
        total_loss = total_loss + loss.item()
        torch.nn.utils.clip_grad_norm_(my_template.parameters(), 1.0)
        optimizer.step()
        scheduler.step()
        optimizer.zero_grad()
        if global_step%500 == 0:
            print("Epoch {}, global_step {} average loss: {} lr: {}".format(epoch, global_step, (total_loss-log_loss)/500, scheduler.get_last_lr()[0]), flush=True)
            log_loss = total_loss

Epoch 0, global_step 500 average loss: 0.08401759774401261 lr: 0.0009722607489597782
Epoch 0, global_step 1000 average loss: 2.2206769032891317e-05 lr: 0.0009445214979195562
Epoch 0, global_step 1500 average loss: 0.002482076367018408 lr: 0.0009167822468793344
Epoch 0, global_step 2000 average loss: 2.568547551595657e-05 lr: 0.0008890429958391124
Epoch 0, global_step 2500 average loss: 1.5708446252915564e-05 lr: 0.0008613037447988904
Epoch 0, global_step 3000 average loss: 1.3141183935317713e-05 lr: 0.0008335644937586685
Epoch 0, global_step 3500 average loss: 7.719029113104626e-06 lr: 0.0008058252427184466
Epoch 1, global_step 4000 average loss: 6.009228314837855e-06 lr: 0.0007780859916782246
Epoch 1, global_step 4500 average loss: 4.003010688521158e-06 lr: 0.0007503467406380028
Epoch 1, global_step 5000 average loss: 3.7837838099648024e-06 lr: 0.0007226074895977809
Epoch 1, global_step 5500 average loss: 3.2953857238737782e-06 lr: 0.000694868238557559
Epoch 1, global_step 6000 averag

In [31]:
generate_sentence, input_and_generated_sentence = evaluate(prompt_model, test_dataloader)

test score:  1.32648207427266e-06
