In [None]:
# default_exp run_bert_multitask


In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

# Run Bert Multitask Learning

Train, eval and predict api for bert multitask learning

In [None]:
# export
import argparse
import os
import time
from typing import Dict, Callable
from shutil import copytree, ignore_patterns, rmtree

import tensorflow as tf
from tensorflow.python.framework.errors_impl import NotFoundError as TFNotFoundError

from bert_multitask_learning.input_fn import predict_input_fn, train_eval_input_fn
from bert_multitask_learning.model_fn import BertMultiTask
from bert_multitask_learning.params import DynamicBatchSizeParams, BaseParams
from bert_multitask_learning.special_tokens import EVAL

# Fix duplicate log
LOGGER = tf.get_logger()
LOGGER.propagate = False

In [None]:
# export
def create_keras_model(
        mirrored_strategy: tf.distribute.MirroredStrategy,
        params: BaseParams,
        mode='train',
        inputs_to_build_model=None,
        model=None):
    """init model in various mode

    train: model will be loaded from huggingface
    resume: model will be loaded from params.ckpt_dir, if params.ckpt_dir dose not contain valid checkpoint, then load from huggingface
    transfer: model will be loaded from params.init_checkpoint, the correspongding path should contain checkpoints saved using bert-multitask-learning
    predict: model will be loaded from params.ckpt_dir except optimizers' states
    eval: model will be loaded from params.ckpt_dir except optimizers' states, model will be compiled

    Args:
        mirrored_strategy (tf.distribute.MirroredStrategy): mirrored strategy
        params (BaseParams): params
        mode (str, optional): Mode, see above explaination. Defaults to 'train'.
        inputs_to_build_model (Dict, optional): A batch of data. Defaults to None.
        model (Model, optional): Keras model. Defaults to None.

    Returns:
        model: loaded model
    """
   
    def _get_model_wrapper(params, mode, inputs_to_build_model, model):
        if model is None:
            model = BertMultiTask(params)
            # model.run_eagerly = True
        if mode == 'resume':
            model.compile()
            # build training graph
            # model.train_step(inputs_to_build_model)
            _ = model(inputs_to_build_model,
                      mode=tf.estimator.ModeKeys.PREDICT)
            # load ALL vars including optimizers' states
            try:
                model.load_weights(os.path.join(
                    params.ckpt_dir, 'model'), skip_mismatch=False)
            except TFNotFoundError:
                LOGGER.warn('Not resuming since no mathcing ckpt found')
        elif mode == 'transfer':
            # build graph without optimizers' states
            # calling compile again should reset optimizers' states but we're playing safe here
            _ = model(inputs_to_build_model,
                      mode=tf.estimator.ModeKeys.PREDICT)
            # load weights without loading optimizers' vars
            model.load_weights(os.path.join(params.init_checkpoint, 'model'))
            # compile again
            model.compile()
        elif mode == 'predict':
            _ = model(inputs_to_build_model,
                      mode=tf.estimator.ModeKeys.PREDICT)
            # load weights without loading optimizers' vars
            model.load_weights(os.path.join(params.ckpt_dir, 'model'))
        elif mode == 'eval':
            _ = model(inputs_to_build_model,
                      mode=tf.estimator.ModeKeys.PREDICT)
            # load weights without loading optimizers' vars
            model.load_weights(os.path.join(params.ckpt_dir, 'model'))
            model.compile()
        else:
            model.compile()

        return model
    if mirrored_strategy is not None:
         with mirrored_strategy.scope():
             model = _get_model_wrapper(params, mode, inputs_to_build_model, model)
    else:
        model = _get_model_wrapper(params, mode, inputs_to_build_model, model)
    return model



In [None]:
# export
def _train_bert_multitask_keras_model(train_dataset: tf.data.Dataset,
                                      eval_dataset: tf.data.Dataset,
                                      model: tf.keras.Model,
                                      params: BaseParams,
                                      mirrored_strategy: tf.distribute.MirroredStrategy = None):
    # can't save whole model with model subclassing api due to tf bug
    # see: https://github.com/tensorflow/tensorflow/issues/42741
    # https://github.com/tensorflow/tensorflow/issues/40366
    model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
        filepath=os.path.join(params.ckpt_dir, 'model'),
        save_weights_only=True,
        monitor='val_mean_acc',
        mode='auto',
        save_best_only=False)

    tensorboard_callback = tf.keras.callbacks.TensorBoard(
        log_dir=params.ckpt_dir)
    if mirrored_strategy is not None:
        with mirrored_strategy.scope():
            model.fit(
                x=train_dataset,
                validation_data=eval_dataset,
                epochs=params.train_epoch,
                callbacks=[model_checkpoint_callback, tensorboard_callback],
                steps_per_epoch=params.train_steps_per_epoch
            )
    else:
        model.fit(
            x=train_dataset,
            validation_data=eval_dataset,
            epochs=params.train_epoch,
            callbacks=[model_checkpoint_callback, tensorboard_callback],
            steps_per_epoch=params.train_steps_per_epoch
        )
    model.summary()



In [None]:
# export
def get_params_ready(problem, num_gpus, model_dir, params, problem_type_dict, processing_fn_dict, mode='train', json_path=''):
    if params is None:
        params = DynamicBatchSizeParams()
    if not os.path.exists('models'):
        os.mkdir('models')
    if model_dir:
        base_dir, dir_name = os.path.split(model_dir)
    else:
        base_dir, dir_name = None, None
    # add new problem to params if problem_type_dict and processing_fn_dict provided
    if problem_type_dict:
        params.add_multiple_problems(
            problem_type_dict=problem_type_dict, processing_fn_dict=processing_fn_dict)
    if mode == 'train':
        params.assign_problem(problem, gpu=int(num_gpus),
                              base_dir=base_dir, dir_name=dir_name)
        params.to_json()
    else:
        params.from_json(json_path)
        params.assign_problem(problem, gpu=int(num_gpus),
                              base_dir=base_dir, dir_name=dir_name)

    return params

In [None]:
# export
def train_bert_multitask(
        problem='weibo_ner',
        num_gpus=1,
        num_epochs=10,
        model_dir='',
        params: BaseParams = None,
        problem_type_dict: Dict[str, str] = None,
        processing_fn_dict: Dict[str, Callable] = None,
        model: tf.keras.Model = None,
        create_tf_record_only=False,
        steps_per_epoch=None,
        warmup_ratio=0.1,
        continue_training=False,
        mirrored_strategy=None):
    """Train Multi-task Bert model

    About problem:
        There are two types of chaining operations can be used to chain problems.
            - `&`. If two problems have the same inputs, they can be chained using `&`.
                Problems chained by `&` will be trained at the same time.
            - `|`. If two problems don't have the same inputs, they need to be chained using `|`.
                Problems chained by `|` will be sampled to train at every instance.

        For example, `cws|NER|weibo_ner&weibo_cws`, one problem will be sampled at each turn, say `weibo_ner&weibo_cws`, then `weibo_ner` and `weibo_cws` will trained for this turn together. Therefore, in a particular batch, some tasks might not be sampled, and their loss could be 0 in this batch.

    About problem_type_dict and processing_fn_dict:
        If the problem is not predefined, you need to tell the model what's the new problem's problem_type
        and preprocessing function.
            For example, a new problem: fake_classification
            problem_type_dict = {'fake_classification': 'cls'}
            processing_fn_dict = {'fake_classification': lambda: return ...}

        Available problem type:
            cls: Classification
            seq_tag: Sequence Labeling
            seq2seq_tag: Sequence to Sequence tag problem
            seq2seq_text: Sequence to Sequence text generation problem

        Preprocessing function example:
        Please refer to https://github.com/JayYip/bert-multitask-learning/blob/master/README.md

    Keyword Arguments:
        problem {str} -- Problems to train (default: {'weibo_ner'})
        num_gpus {int} -- Number of GPU to use (default: {1})
        num_epochs {int} -- Number of epochs to train (default: {10})
        model_dir {str} -- model dir (default: {''})
        params {BaseParams} -- Params to define training and models (default: {DynamicBatchSizeParams()})
        problem_type_dict {dict} -- Key: problem name, value: problem type (default: {{}})
        processing_fn_dict {dict} -- Key: problem name, value: problem data preprocessing fn (default: {{}})
    """
    params.train_epoch = num_epochs
    params = get_params_ready(problem, num_gpus, model_dir,
                              params, problem_type_dict, processing_fn_dict)

    train_dataset = train_eval_input_fn(params)
    eval_dataset = train_eval_input_fn(params, mode=EVAL)
    if create_tf_record_only:
        return

    # get train_steps and update params
    if steps_per_epoch is not None:
        train_steps = steps_per_epoch
    else:
        train_steps = 0
        for _ in train_dataset:
            train_steps += 1
    params.update_train_steps(train_steps, warmup_ratio=warmup_ratio)
    
    train_dataset = train_eval_input_fn(params)
    train_dataset = train_dataset.repeat(10)

    one_batch = next(train_dataset.as_numpy_iterator())

    if mirrored_strategy is None:
        mirrored_strategy = tf.distribute.MirroredStrategy()
    elif mirrored_strategy is False:
        mirrored_strategy = None

    if num_gpus > 1 and mirrored_strategy is not False:
        train_dataset = mirrored_strategy.experimental_distribute_dataset(
            train_dataset)
        eval_dataset = mirrored_strategy.experimental_distribute_dataset(
            eval_dataset)

    # restore priority: self > transfer > huggingface
    if continue_training and tf.train.latest_checkpoint(params.ckpt_dir):
        mode = 'resume'
    elif tf.train.latest_checkpoint(params.init_checkpoint):
        mode = 'transfer'
    else:
        mode = 'train'

    model = create_keras_model(
        mirrored_strategy=mirrored_strategy, params=params, mode=mode, inputs_to_build_model=one_batch)

    _train_bert_multitask_keras_model(
        train_dataset=train_dataset,
        eval_dataset=eval_dataset,
        model=model,
        params=params,
        mirrored_strategy=mirrored_strategy
    )
    return model



## Train multitask

Before training, we need to do the following things:
- pass transformers corresponding configuration to params, we use `voidful/albert_chinese_tiny` as example here
- configure the problems we want to train, which includes
    - training problems
    - their problem type as a dict
    - their preprocessing functions as a dict

In [None]:
import tensorflow as tf

from bert_multitask_learning.predefined_problems import *

from bert_multitask_learning import DynamicBatchSizeParams
import os
from bert_multitask_learning import predict_input_fn
params = DynamicBatchSizeParams()
params.shuffle_buffer = 1000

# configure transformers
params.transformer_tokenizer_loading = 'BertTokenizer'
params.transformer_model_loading = 'AlbertForMaskedLM'
params.transformer_config_loading = 'AlbertConfig'
params.transformer_model_name = 'voidful/albert_chinese_tiny'
params.transformer_config_name = 'voidful/albert_chinese_tiny'
params.transformer_tokenizer_name = 'voidful/albert_chinese_tiny'


In [None]:
# hide
import tempfile
params.tmp_file_dir = tempfile.mkdtemp()

In [None]:

problem = 'weibo_fake_ner&weibo_fake_cls|weibo_fake_multi_cls|weibo_masklm|weibo_pretrain'
problem_type_dict = {
    'weibo_fake_ner': 'seq_tag',
    'weibo_cws': 'seq_tag',
    'weibo_fake_multi_cls': 'multi_cls',
    'weibo_fake_cls': 'cls',
    'weibo_masklm': 'masklm',
    'weibo_pretrain': 'pretrain'
}

processing_fn_dict = {
    'weibo_fake_ner': get_weibo_fake_ner_fn(file_path='/data/bert-multitask-learning/data/ner/weiboNER*'),
    'weibo_cws': get_weibo_cws_fn(file_path='/data/bert-multitask-learning/data/ner/weiboNER*'),
    'weibo_fake_cls': get_weibo_fake_cls_fn(file_path='/data/bert-multitask-learning/data/ner/weiboNER*'),
    'weibo_fake_multi_cls': get_weibo_fake_multi_cls_fn(file_path='/data/bert-multitask-learning/data/ner/weiboNER*'),
    'weibo_masklm': get_weibo_masklm(file_path='/data/bert-multitask-learning/data/ner/weiboNER*'),
    'weibo_pretrain': get_weibo_pretrain_fn(file_path='/data/bert-multitask-learning/data/ner/weiboNER*')
}


In [None]:
model = train_bert_multitask(
    problem=problem,
    num_epochs=1,
    params=params,
    problem_type_dict=problem_type_dict,
    processing_fn_dict=processing_fn_dict,
    steps_per_epoch=1,
    continue_training=True,
    mirrored_strategy=False
)

749, 8024, 1157, 1157, 7564, 5276, 749, 1378, 103, 103, 4263, 2207, 5101, 2797, 3322, 1728, 711, 800, 3221, 6812, 791, 711, 103, 3297, 2571, 4638, 2207, 5101, 
INFO:tensorflow:input_mask: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
INFO:tensorflow:segment_ids: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
INFO:tensorflow:masked_lm_positions: [3, 15, 16, 29, 39, 40, 43, 53, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
INFO:tensorflow:masked_lm_ids: [2207  511 2769 3632  704 1286 2361 2797    0    0    0    0    0    0
    0    0    0    0    0    0]
INFO:tensorflow:masked_lm_weights: [1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0]
INFO:tensorflow:['最', '热', '时', '尚', '榜', '女', '人', '不', '坏', '，', '男', '人', '不', '爱', 

In [None]:
# hide
model = train_bert_multitask(
    problem=problem,
    num_epochs=1,
    params=params,
    problem_type_dict=problem_type_dict,
    processing_fn_dict=processing_fn_dict,
    continue_training=True
)

# fresh train
_ = train_bert_multitask(
    problem=problem,
    num_epochs=1,
    params=params,
    problem_type_dict=problem_type_dict,
    processing_fn_dict=processing_fn_dict,
    steps_per_epoch=1,
    continue_training=False,
    mirrored_strategy=False,
    model_dir='./models/fresh_train'
)

# transfer train
params.init_checkpoint = './models/fresh_train'
_ = train_bert_multitask(
    problem=problem,
    num_epochs=1,
    params=params,
    problem_type_dict=problem_type_dict,
    processing_fn_dict=processing_fn_dict,
    steps_per_epoch=1,
    continue_training=False,
    mirrored_strategy=False
)

nsors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your @tf.function outside of the loop. For (2), @tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for  more details.
INFO:tensorflow:Initial lr: 2e-05
INFO:tensorflow:Train steps: 1
INFO:tensorflow:Warmup steps: 0
The parameters `output_attentions`, `output_hidden_states` and `use_cache` cannot be updated when calling a model.They have to be set to True/False in the config object (i.e.: `config=XConfig.from_pretrained('name', output_attentions=True)`).
The parameter `return_dict` cannot be set in graph mode and will always be set to `True`.
The parameters `output_attentions`, `output_hidden_states` and `use_cache` cannot be updated when calling a model

In [None]:
# export
def trim_checkpoint_for_prediction(problem: str,
                                   input_dir: str,
                                   output_dir: str,
                                   problem_type_dict: Dict[str, str] = None,
                                   overwrite=True,
                                   fake_input_list=None,
                                   params=None):
    """Minimize checkpoint size for prediction.

    Since the original checkpoint contains optimizer's variable,
        for instance, if the use adam, the checkpoint size will 
        be three times of the size of model weights. This function 
        will remove those unused variables in prediction to save space.

    Note: if the model is a multimodal model, you have to provide fake_input_list that
        mimic the structure of real input.

    Args:
        problem (str): problem
        input_dir (str): input dir
        output_dir (str): output dir
        problem_type_dict (Dict[str, str], optional): problem type dict. Defaults to None.
        fake_input_list (List): fake input list to create dummy dataset
    """
    if overwrite and os.path.exists(output_dir):
        rmtree(output_dir)
    copytree(input_dir, output_dir, ignore=ignore_patterns(
        'checkpoint', '*.index', '*.data-000*'))
    base_dir, dir_name = os.path.split(output_dir)
    if params is None:
        params = DynamicBatchSizeParams()
    params.add_multiple_problems(problem_type_dict=problem_type_dict)
    params.from_json(os.path.join(input_dir, 'params.json'))
    params.assign_problem(problem, base_dir=base_dir,
                          dir_name=dir_name, predicting=True)

    model = BertMultiTask(params)
    if fake_input_list is None:
        dummy_dataset = predict_input_fn(['fake']*5, params)
    else:
        dummy_dataset = predict_input_fn(fake_input_list*5, params)
    _ = model(next(dummy_dataset.as_numpy_iterator()),
              mode=tf.estimator.ModeKeys.PREDICT)
    model.load_weights(os.path.join(input_dir, 'model'))
    model.save_weights(os.path.join(params.ckpt_dir, 'model'))
    params.to_json()



## Trim checkpoints

The checkpoints contains optimizers' states which is not needed once training is done and it makes the checkpoint size two times larger. We provide an api to trim down the size of checkpoint by removing optimizers' states.

Note: in multimodal setting, you need to provide a fake input to build the model correctly. Otherwise modal embeddings will be randomly initialized.

In [None]:
tf.get_logger().setLevel('ERROR')

In [None]:
# fake inputs
import numpy as np
fake_inputs = [{'text': 'test', 'image': np.random.uniform(
            size=(5, 10))} for _ in range(5)] 
trim_checkpoint_for_prediction(
    problem=problem, input_dir=model.params.ckpt_dir,
    output_dir=model.params.ckpt_dir+'_pred',
    problem_type_dict=problem_type_dict, overwrite=True, fake_input_list=fake_inputs)

Adding new problem weibo_fake_ner, problem type: seq_tag
Adding new problem weibo_cws, problem type: seq_tag
Adding new problem weibo_fake_multi_cls, problem type: multi_cls
Adding new problem weibo_fake_cls, problem type: cls
Adding new problem weibo_masklm, problem type: masklm
Adding new problem weibo_pretrain, problem type: pretrain


In [None]:
# export 
def eval_bert_multitask(
        problem='weibo_ner',
        num_gpus=1,
        model_dir='',
        params=None,
        problem_type_dict=None,
        processing_fn_dict=None,
        model=None):
    """Evaluate Multi-task Bert model

    Available eval_scheme:
        ner, cws, acc

    Keyword Arguments:
        problem {str} -- problems to evaluate (default: {'weibo_ner'})
        num_gpus {int} -- number of gpu to use (default: {1})
        model_dir {str} -- model dir (default: {''})
        eval_scheme {str} -- Evaluation scheme (default: {'ner'})
        params {Params} -- params to define model (default: {DynamicBatchSizeParams()})
        problem_type_dict {dict} -- Key: problem name, value: problem type (default: {{}})
        processing_fn_dict {dict} -- Key: problem name, value: problem data preprocessing fn (default: {{}})
    """
    if not model_dir and params is not None:
        model_dir = params.ckpt_dir
    params = get_params_ready(problem, num_gpus, model_dir,
                              params, problem_type_dict, processing_fn_dict,
                              mode='predict', json_path=os.path.join(model_dir, 'params.json'))
    eval_dataset = train_eval_input_fn(params, mode=EVAL)
    one_batch_data = next(eval_dataset.as_numpy_iterator())
    eval_dataset = train_eval_input_fn(params, mode=EVAL)
    mirrored_strategy = tf.distribute.MirroredStrategy()
    model = create_keras_model(
        mirrored_strategy=mirrored_strategy, params=params, mode='eval', inputs_to_build_model=one_batch_data)
    eval_dict = model.evaluate(eval_dataset, return_dict=True)
    return eval_dict



# Eval

Now we can use the trimmed checkpoint to do evaluation

In [None]:
eval_bert_multitask(problem=problem, params=params,
                    problem_type_dict=problem_type_dict, processing_fn_dict=processing_fn_dict,
                    model_dir=model.params.ckpt_dir+'_pred')

Adding new problem weibo_fake_ner, problem type: seq_tag
Adding new problem weibo_cws, problem type: seq_tag
Adding new problem weibo_fake_multi_cls, problem type: multi_cls
Adding new problem weibo_fake_cls, problem type: cls
Adding new problem weibo_masklm, problem type: masklm
Adding new problem weibo_pretrain, problem type: pretrain
The parameters `output_attentions`, `output_hidden_states` and `use_cache` cannot be updated when calling a model.They have to be set to True/False in the config object (i.e.: `config=XConfig.from_pretrained('name', output_attentions=True)`).
The parameter `return_dict` cannot be set in graph mode and will always be set to `True`.
The parameters `output_attentions`, `output_hidden_states` and `use_cache` cannot be updated when calling a model.They have to be set to True/False in the config object (i.e.: `config=XConfig.from_pretrained('name', output_attentions=True)`).
The parameter `return_dict` cannot be set in graph mode and will always be set to `Tr

{'loss': 16.580690383911133,
 'mean_acc': 0.2440515160560608,
 'weibo_fake_cls_acc': 0.44999998807907104,
 'weibo_fake_ner_acc': 0.18340164422988892}

In [None]:
# export
def predict_bert_multitask(
        inputs,
        problem='weibo_ner',
        model_dir='',
        params: BaseParams = None,
        problem_type_dict: Dict[str, str] = None,
        processing_fn_dict: Dict[str, Callable] = None,
        model: tf.keras.Model = None,
        return_model=False):
    """Evaluate Multi-task Bert model

    Available eval_scheme:
        ner, cws, acc

    Keyword Arguments:
        problem {str} -- problems to evaluate (default: {'weibo_ner'})
        num_gpus {int} -- number of gpu to use (default: {1})
        model_dir {str} -- model dir (default: {''})
        eval_scheme {str} -- Evaluation scheme (default: {'ner'})
        params {Params} -- params to define model (default: {DynamicBatchSizeParams()})
        problem_type_dict {dict} -- Key: problem name, value: problem type (default: {{}})
        processing_fn_dict {dict} -- Key: problem name, value: problem data preprocessing fn (default: {{}})
    """

    if params is None:
        params = DynamicBatchSizeParams()
    if not model_dir and params is not None:
        model_dir = params.ckpt_dir
    params = get_params_ready(problem, 1, model_dir,
                              params, problem_type_dict, processing_fn_dict,
                              mode='predict', json_path=os.path.join(model_dir, 'params.json'))

    LOGGER.info('Checkpoint dir: %s', params.ckpt_dir)
    time.sleep(3)

    pred_dataset = predict_input_fn(inputs, params)
    one_batch_data = next(pred_dataset.as_numpy_iterator())
    pred_dataset = predict_input_fn(inputs, params)

    mirrored_strategy = tf.distribute.MirroredStrategy()
    if model is None:
        model = create_keras_model(
            mirrored_strategy=mirrored_strategy, params=params, mode='predict', inputs_to_build_model=one_batch_data)

    with mirrored_strategy.scope():
        pred = model.predict(pred_dataset)

    if return_model:
        return pred, model
    return pred


## Predict
We can do prediction by providing list of input features

In [None]:
pred, model = predict_bert_multitask(
    problem='weibo_fake_ner',
    inputs=fake_inputs*20, model_dir=model.params.ckpt_dir,
    problem_type_dict=problem_type_dict,
    processing_fn_dict=processing_fn_dict, return_model=True,
    params=params)

Adding new problem weibo_fake_ner, problem type: seq_tag
Adding new problem weibo_cws, problem type: seq_tag
Adding new problem weibo_fake_multi_cls, problem type: multi_cls
Adding new problem weibo_fake_cls, problem type: cls
Adding new problem weibo_masklm, problem type: masklm
Adding new problem weibo_pretrain, problem type: pretrain
The parameters `output_attentions`, `output_hidden_states` and `use_cache` cannot be updated when calling a model.They have to be set to True/False in the config object (i.e.: `config=XConfig.from_pretrained('name', output_attentions=True)`).
The parameter `return_dict` cannot be set in graph mode and will always be set to `True`.
The parameters `output_attentions`, `output_hidden_states` and `use_cache` cannot be updated when calling a model.They have to be set to True/False in the config object (i.e.: `config=XConfig.from_pretrained('name', output_attentions=True)`).
The parameter `return_dict` cannot be set in graph mode and will always be set to `Tr

Adding new problem weibo_fake_ner, problem type: seq_tag
Adding new problem weibo_cws, problem type: seq_tag
Adding new problem weibo_fake_multi_cls, problem type: multi_cls
Adding new problem weibo_fake_cls, problem type: cls
Adding new problem weibo_masklm, problem type: masklm
Adding new problem weibo_pretrain, problem type: pretrain


FileNotFoundError: [Errno 2] No such file or directory: 'models/weibo_fake_cls_weibo_fake_multi_cls_weibo_fake_ner_weibo_masklm_weibo_pretrain_ckpt_pred_pred/params.json'