In [1]:
import os
import sys
sys.path.append(".")
import oss2
import time

import argparse

In [2]:
from openprompt.trainer import ClassificationRunner, GenerationRunner
from openprompt.lm_bff_trainer import LMBFFClassificationRunner
from re import template
from openprompt.pipeline_base import PromptForClassification, PromptForGeneration
from openprompt.utils.reproduciblity import set_seed
from openprompt import PromptDataLoader
from openprompt.prompts import load_template, load_verbalizer, load_template_generator, load_verbalizer_generator
from openprompt.data_utils import FewShotSampler
from openprompt.utils.logging import config_experiment_dir, init_logger, logger
from openprompt.config import get_config, save_config_to_yaml
from openprompt.plms import load_plm_from_config
from openprompt.data_utils import load_dataset
from openprompt.utils.cuda import model_to_device
from openprompt.utils.logging import logger

In [3]:
def build_dataloader(dataset, template, tokenizer,tokenizer_wrapper_class, config, split):
    dataloader = PromptDataLoader(
        dataset = dataset, 
        template = template, 
        tokenizer = tokenizer, 
        tokenizer_wrapper_class=tokenizer_wrapper_class, 
        batch_size = config[split].batch_size,
        shuffle = config[split].shuffle_data,
        teacher_forcing = config[split].teacher_forcing if hasattr(config[split],'teacher_forcing') else None,
        predict_eos_token = True if config.task == "generation" else False,
        **config.dataloader
    )
    return dataloader

In [4]:
def main():
    config, args = get_config()
    # init logger, create log dir and set log level, etc.
    if args.resume and args.test:
        raise Exception("cannot use flag --resume and --test together")
    if args.resume or args.test:
        config.logging.path = EXP_PATH = args.resume or args.test
    else:
        EXP_PATH = config_experiment_dir(config)
        init_logger(os.path.join(EXP_PATH, "log.txt"), config.logging.file_level, config.logging.console_level)
        # save config to the logger directory
        save_config_to_yaml(config)
    

    # load dataset. The valid_dataset can be None
    train_dataset, valid_dataset, test_dataset, Processor = load_dataset(config, test = args.test is not None or config.learning_setting == 'zero_shot')

    # main
    if config.learning_setting == 'full':
        score = 0         
        for i in  range(1):
            res = trainer(
                EXP_PATH,
                config,
                Processor,
                resume = args.resume,
                test = args.test,
                train_dataset = train_dataset,
                valid_dataset = valid_dataset,
                test_dataset = test_dataset,
            )
            if res > score:
                score  =res
        logger.info(f"the best result of test Performance:" + "( micro-f1: " + str(score) + ", accuracy: " + str(score)+")") 
    elif config.learning_setting == 'few_shot':
        if config.few_shot.few_shot_sampling is None:
            raise ValueError("use few_shot setting but config.few_shot.few_shot_sampling is not specified")
        seeds = config.sampling_from_train.seed
        res = 0
        for seed in seeds:
            if not args.test:
                sampler = FewShotSampler(
                    num_examples_per_label = config.sampling_from_train.num_examples_per_label,
                    also_sample_dev = config.sampling_from_train.also_sample_dev,
                    num_examples_per_label_dev = config.sampling_from_train.num_examples_per_label_dev
                )
                train_sampled_dataset, valid_sampled_dataset = sampler(
                    train_dataset = train_dataset,
                    valid_dataset = valid_dataset,
                    seed = seed
                )
                result = trainer(
                    os.path.join(EXP_PATH, f"seed-{seed}"),
                    config,
                    Processor,
                    resume = args.resume,
                    test = args.test,
                    train_dataset = train_sampled_dataset,
                    valid_dataset = valid_sampled_dataset,
                    test_dataset = test_dataset,
                )
            else:
                result = trainer(
                    os.path.join(EXP_PATH, f"seed-{seed}"),
                    config,
                    Processor,
                    test = args.test,
                    test_dataset = test_dataset,
                )
            res += result
        res /= len(seeds)
    elif config.learning_setting == 'zero_shot':
        res = trainer(
            EXP_PATH,
            config,
            Processor,
            zero = True,
            train_dataset = train_dataset,
            valid_dataset = valid_dataset,
            test_dataset = test_dataset,
        )


In [5]:
def trainer(EXP_PATH, config, Processor, train_dataset = None, valid_dataset = None, test_dataset = None, resume = None, test = None, zero = False):
    if not os.path.exists(EXP_PATH):
        os.mkdir(EXP_PATH)
    config.logging.path = EXP_PATH
    # set seed
    set_seed(config.reproduce.seed)

    # load the pretrained models, its model, tokenizer, and config.
    plm_model, plm_tokenizer, plm_config, plm_wrapper_class = load_plm_from_config(config)

    

    # define template and verbalizer
    if config.task == "classification":
        verbalizer = load_verbalizer(config=config, model=plm_model, tokenizer=plm_tokenizer, plm_config=plm_config, classes=Processor.labels)
        template_generate_model, template_generate_tokenizer = None, None
        if config.classification.auto_t:
            template_generate_model, template_generate_tokenizer, template_generate_config = load_plm_from_config(config.template_generator)
            template = load_template(config=config, model=template_generate_model, tokenizer=template_generate_tokenizer, plm_config=template_generate_config, verbalizer=verbalizer)

        else:
            # define prompt
            template = load_template(config=config, model=plm_model, tokenizer=plm_tokenizer, plm_config=plm_config)
            
            # load prompt’s pipeline model
        prompt_model = PromptForClassification(plm_model, template, verbalizer, freeze_plm = config.plm.optimize.freeze_para)
            
    elif config.task == "generation":
        template = load_template(config=config, model=plm_model, tokenizer=plm_tokenizer, plm_config=plm_config)
        prompt_model = PromptForGeneration(plm_model, template, freeze_plm = config.plm.optimize.freeze_para, gen_config = config.generation)
    else:
        raise NotImplementedError(f"config.task {config.task} is not implemented yet. Only classification and generation are supported.")

    # process data and get data_loader
    train_dataloader = build_dataloader(train_dataset, template, plm_tokenizer, plm_wrapper_class, config, "train") if train_dataset else None
    valid_dataloader = build_dataloader(valid_dataset, template, plm_tokenizer, plm_wrapper_class, config, "dev") if valid_dataset else None
    test_dataloader = build_dataloader(test_dataset, template, plm_tokenizer, plm_wrapper_class, config, "test") if test_dataset else None

    print('train_dataloader:', train_dataloader)
    if config.task == "classification":
        if config.classification.auto_t or config.classification.auto_v:
            runner = LMBFFClassificationRunner(train_dataset = train_dataset, 
                                        valid_dataset = valid_dataset, 
                                        test_dataset = test_dataset, 
                                        model= plm_model, 
                                        tokenizer = plm_tokenizer, 
                                        template_generate_tokenizer = template_generate_tokenizer,
                                        template_generate_model = template_generate_model,
                                        initial_template = template,
                                        initial_verbalizer = verbalizer,
                                        config = config)
        else:
            runner = ClassificationRunner(
                model = prompt_model,
                train_dataloader = train_dataloader,
                valid_dataloader = valid_dataloader,
                test_dataloader = test_dataloader,
                config = config
            )
    elif config.task == "generation":
        runner = GenerationRunner(
            model = prompt_model,
            train_dataloader = train_dataloader,
            valid_dataloader = valid_dataloader,
            test_dataloader = test_dataloader,
            config = config
        )
        
    if zero:
        res = runner.test()
    elif test:
        res = runner.test(ckpt = 'best')
    elif resume:
        res = runner.run(ckpt = 'last')
    else:
        res = runner.run()
    return res

In [6]:
os.makedirs('logs', exist_ok=True)
main()

[[032m2022-02-18 11:19:01,849[0m INFO] config.save_config_to_yaml Config saved as logs/rte_roberta-large_soft_template_manual_verbalizer_0218111901805843/config.yaml
[[032m2022-02-18 11:19:01,897[0m INFO] reproduciblity.set_seed Global seed set to 100


dataset_config.name.lower(): rte


Some weights of the model checkpoint at roberta-large were not used when initializing RobertaModel: ['lm_head.layer_norm.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


num_classes: 2
self.label_words: [[' yes'], [' no']]


[[032m2022-02-18 11:19:16,972[0m INFO] prompt_base.from_file using template: {"placeholder":"text_a"} {"placeholder":"text_b"} {"mask"} .
tokenizing: 2490it [00:02, 859.74it/s]
tokenizing: 277it [00:00, 1003.21it/s]
tokenizing: 277it [00:00, 1218.83it/s]


train_dataloader: <openprompt.pipeline_base.PromptDataLoader object at 0x7f7179f57a50>


[[032m2022-02-18 11:19:25,190[0m INFO] cuda.model_to_device Using DataParallel
100%|██████████| 39/39 [01:06<00:00,  1.70s/it, loss=0.752]
[[032m2022-02-18 11:20:31,708[0m INFO] trainer.training_epoch Global step 39
[[032m2022-02-18 11:20:31,710[0m INFO] trainer.training_epoch Training epoch 0, num_steps 39, avg_loss: 0.7302, total_loss: 28.4771
validation: 100%|██████████| 9/9 [00:02<00:00,  4.42it/s]
[[032m2022-02-18 11:20:33,790[0m INFO] trainer.inference_epoch validation Performance: OrderedDict([('micro-f1', 0.4729241877256318), ('accuracy', 0.4729241877256318)])
[[032m2022-02-18 11:20:33,791[0m INFO] trainer.save_checkpoint Saving checkpoint logs/rte_roberta-large_soft_template_manual_verbalizer_0218111901805843/checkpoints/last.ckpt...
[[032m2022-02-18 11:20:50,157[0m INFO] trainer.save_checkpoint Copying checkpoint logs/rte_roberta-large_soft_template_manual_verbalizer_0218111901805843/checkpoints/last.ckpt to logs/rte_roberta-large_soft_template_manual_verbalizer_0