In [None]:
# default_exp run_bert_multitask
import os
%load_ext autoreload
%autoreload 2
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"


# Run Bert Multitask Learning

Train, eval and predict api for bert multitask learning

## Imports

In [None]:
# export
import json
import os
import time
from shutil import copytree, ignore_patterns, rmtree
from typing import Callable, Dict, List, Tuple, Union

from sklearn.preprocessing import MultiLabelBinarizer
from transformers import PreTrainedTokenizer

import tensorflow as tf
from loguru import logger
import numpy as np

from m3tl.input_fn import predict_input_fn, train_eval_input_fn
from m3tl.model_fn import BertMultiTask
from m3tl.params import Params
from m3tl.special_tokens import EVAL, PREDICT
from m3tl.utils import (compress_tf_warnings, get_or_make_label_encoder,
                        infer_shape_and_type_from_dict, set_phase, LabelEncoder, get_is_pyspark)
from tensorflow.python.framework.errors_impl import \
    NotFoundError as TFNotFoundError

compress_tf_warnings()
# Fix duplicate log
# LOGGER = tf.get_logger()
# LOGGER.propagate = False


## Test setup

In [None]:
# hide
import tensorflow as tf

from m3tl.predefined_problems import *

from m3tl import Params
import os
from m3tl import predict_input_fn
params = Params()
params.shuffle_buffer = 1000

# configure transformers
params.transformer_tokenizer_loading = 'BertTokenizer'
params.transformer_model_loading = 'AlbertForMaskedLM'
params.transformer_config_loading = 'AlbertConfig'
params.transformer_model_name = 'voidful/albert_chinese_tiny'
params.transformer_config_name = 'voidful/albert_chinese_tiny'
params.transformer_tokenizer_name = 'voidful/albert_chinese_tiny'

# hide
from m3tl.test_base import TestBase

tb = TestBase()
params = tb.params

# hide
problem = 'weibo_fake_ner&weibo_fake_cls|weibo_fake_multi_cls|weibo_masklm|weibo_premask_mlm'
problem_type_dict = {
    'weibo_fake_ner': 'seq_tag',
    'weibo_cws': 'seq_tag',
    'weibo_fake_multi_cls': 'multi_cls',
    'weibo_fake_cls': 'cls',
    'weibo_masklm': 'masklm',
    'weibo_pretrain': 'pretrain',
    'weibo_premask_mlm': 'premask_mlm'
}

processing_fn_dict = {
    'weibo_fake_ner': get_weibo_fake_ner_fn(file_path='/data/m3tl/data/ner/weiboNER*'),
    'weibo_cws': get_weibo_cws_fn(file_path='/data/m3tl/data/ner/weiboNER*'),
    'weibo_fake_cls': get_weibo_fake_cls_fn(file_path='/data/m3tl/data/ner/weiboNER*'),
    'weibo_fake_multi_cls': get_weibo_fake_multi_cls_fn(file_path='/data/m3tl/data/ner/weiboNER*'),
    'weibo_masklm': get_weibo_masklm(file_path='/data/m3tl/data/ner/weiboNER*'),
    'weibo_pretrain': get_weibo_pretrain_fn(file_path='/data/m3tl/data/ner/weiboNER*'),
    'weibo_premask_mlm': get_weibo_premask_mlm(file_path='/data/m3tl/data/ner/weiboNER*')
}

2021-06-17 13:22:35.631 | INFO     | m3tl.base_params:register_multiple_problems:538 - Adding new problem weibo_fake_ner, problem type: seq_tag
2021-06-17 13:22:35.632 | INFO     | m3tl.base_params:register_multiple_problems:538 - Adding new problem weibo_fake_multi_cls, problem type: multi_cls
2021-06-17 13:22:35.632 | INFO     | m3tl.base_params:register_multiple_problems:538 - Adding new problem weibo_fake_cls, problem type: cls
2021-06-17 13:22:35.633 | INFO     | m3tl.base_params:register_multiple_problems:538 - Adding new problem weibo_masklm, problem type: masklm
2021-06-17 13:22:35.633 | INFO     | m3tl.base_params:register_multiple_problems:538 - Adding new problem weibo_fake_regression, problem type: regression
2021-06-17 13:22:35.634 | INFO     | m3tl.base_params:register_multiple_problems:538 - Adding new problem weibo_fake_vector_fit, problem type: vector_fit
2021-06-17 13:22:35.635 | INFO     | m3tl.base_params:register_multiple_problems:538 - Adding new problem weibo_pre

In [None]:
# export
def create_keras_model(
        mirrored_strategy: tf.distribute.MirroredStrategy,
        params: Params,
        mode='train',
        inputs_to_build_model=None,
        model=None,
        run_eagerly=False):

    def _get_model_wrapper(params, mode, inputs_to_build_model, model):
        # Horovod: Specify `experimental_run_tf_function=False` to ensure TensorFlow
        # uses hvd.DistributedOptimizer() to compute gradients.
        experimental_run_tf_function = not params.use_horovod
        if model is None:
            model = BertMultiTask(params)
            # model.run_eagerly = True
        set_phase(PREDICT)
        if mode == 'resume':
            model.compile(run_eagerly=run_eagerly,
                          experimental_run_tf_function=experimental_run_tf_function)
            # build training graph
            # model.train_step(inputs_to_build_model)
            
            _ = model(inputs_to_build_model)
            # load ALL vars including optimizers' states
            try:
                model.load_weights(os.path.join(
                    params.ckpt_dir, 'model'), skip_mismatch=False)
            except TFNotFoundError:
                logger.warning('Not resuming since no mathcing ckpt found')
        elif mode == 'transfer':
            # build graph without optimizers' states
            # calling compile again should reset optimizers' states but we're playing safe here
            _ = model(inputs_to_build_model)
            # load weights without loading optimizers' vars
            model.load_weights(os.path.join(params.init_checkpoint, 'model'))
            # compile again
            model.compile(run_eagerly=run_eagerly,
                          experimental_run_tf_function=experimental_run_tf_function)
        elif mode == 'predict':
            _ = model(inputs_to_build_model)
            # load weights without loading optimizers' vars
            model.load_weights(os.path.join(params.ckpt_dir, 'model'))
        elif mode == 'eval':
            _ = model(inputs_to_build_model)
            # load weights without loading optimizers' vars
            model.load_weights(os.path.join(params.ckpt_dir, 'model'))
            model.compile(run_eagerly=run_eagerly,
                          experimental_run_tf_function=experimental_run_tf_function)
        else:
            model.compile(run_eagerly=run_eagerly,
                          experimental_run_tf_function=experimental_run_tf_function)

        return model
    if mirrored_strategy is not None:
        with mirrored_strategy.scope():
            model = _get_model_wrapper(
                params, mode, inputs_to_build_model, model)
    else:
        model = _get_model_wrapper(params, mode, inputs_to_build_model, model)
    return model


Init model in various mode

`train`: model will be loaded from huggingface
`resume`: model will be loaded from params.ckpt_dir, if params.ckpt_dir dose not contain valid checkpoint, then load from huggingface
`transfer`: model will be loaded from params.init_checkpoint, the correspongding path should contain checkpoints saved using m3tl
`predict`: model will be loaded from params.ckpt_dir except optimizers' states
`eval`: model will be loaded from params.ckpt_dir except optimizers' states, model will be compiled

Args:
- mirrored_strategy (tf.distribute.MirroredStrategy): mirrored strategy
- params (Params): params
- mode (str, optional): Mode, see above explaination. Defaults to 'train'.
- inputs_to_build_model (Dict, optional): A batch of data. Defaults to None.
- model (Model, optional): Keras model. Defaults to None.

Returns:
- model: loaded model

## Train model

In [None]:
# export
def _has_callbacks(callbacks: List[tf.keras.callbacks.Callback], check_callback: tf.keras.callbacks.Callback) -> bool:
    for callback in callbacks:
        if isinstance(callback, check_callback):
            return True
    return False


def _train_bert_multitask_keras_model(train_dataset: tf.data.Dataset,
                                      eval_dataset: tf.data.Dataset,
                                      model: tf.keras.Model,
                                      params: Params,
                                      mirrored_strategy: tf.distribute.MirroredStrategy = None,
                                      callbacks: List[tf.keras.callbacks.Callback] = None,
                                      verbose=1):

    all_callbacks = params.gather_mtl_callbacks()

    if callbacks is not None:
        all_callbacks += callbacks

    # if callbacks is not passed or callbacks dose not contain
    # ModelCheckpoint and TensorBoard callbacks, we add the default ones

    # can't save whole model with model subclassing api due to tf bug
    # see: https://github.com/tensorflow/tensorflow/issues/42741
    # https://github.com/tensorflow/tensorflow/issues/40366
    model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
        filepath=os.path.join(params.ckpt_dir, 'model'),
        save_weights_only=True,
        monitor='val_mean_acc',
        mode='auto',
        save_best_only=False)

    tensorboard_callback = tf.keras.callbacks.TensorBoard(
        log_dir=params.ckpt_dir)

    has_model_checkpoint_callback = _has_callbacks(
        all_callbacks, tf.keras.callbacks.ModelCheckpoint)

    # horovod callbacks
    if params.use_horovod:
        import horovod.tensorflow.keras as hvd
        # when using horovod as dist backend, only save model in process 0
        if not has_model_checkpoint_callback:
            if hvd.rank() == 0:
                all_callbacks.append(model_checkpoint_callback)
        all_callbacks = [
            hvd.callbacks.BroadcastGlobalVariablesCallback(0),
            hvd.callbacks.MetricAverageCallback()
        ] + all_callbacks
        # Horovod: write logs on worker 0.
        verbose = verbose if hvd.rank() == 0 else 0
    elif not has_model_checkpoint_callback:
        all_callbacks.append(model_checkpoint_callback)

    validation_steps = params.get('validation_steps', 1000)

    if mirrored_strategy is not None:
        with mirrored_strategy.scope():
            model.fit(
                x=train_dataset,
                validation_data=eval_dataset,
                epochs=params.train_epoch,
                callbacks=all_callbacks,
                steps_per_epoch=params.train_steps_per_epoch,
                verbose=verbose,
                validation_steps=validation_steps
            )
    else:
        model.fit(
            x=train_dataset,
            validation_data=eval_dataset,
            epochs=params.train_epoch,
            callbacks=all_callbacks,
            steps_per_epoch=params.train_steps_per_epoch,
            verbose=verbose,
            validation_steps=validation_steps
        )
    model.summary()


In [None]:
# export
def get_params_ready(problem, num_gpus, model_dir, params, problem_type_dict, processing_fn_dict, mode='train', json_path='') -> Params:
    if params is None:
        params = Params()
    if not os.path.exists('models'):
        os.mkdir('models')
    if model_dir:
        base_dir, dir_name = os.path.split(model_dir)
    else:
        base_dir, dir_name = None, None
    # add new problem to params if problem_type_dict and processing_fn_dict provided
    if problem_type_dict:
        params.register_multiple_problems(
            problem_type_dict=problem_type_dict, processing_fn_dict=processing_fn_dict)
    
    if not params.problem_assigned and not problem:
        raise ValueError('neither params problem assigned nor problem provided.')

    if mode == 'train':
        if problem:
            params.assign_problem(problem, model_dir=model_dir)
        params.to_json()
    else:
        params.from_json(json_path)
        if problem:
            params.assign_problem(problem, model_dir=model_dir, predicting=True)

    return params


In [None]:
# export
# @logger.catch
def train_bert_multitask(
        problem='weibo_ner',
        num_gpus=1,
        num_epochs=10,
        model_dir='',
        params: Params = None,
        problem_type_dict: Dict[str, str] = None,
        processing_fn_dict: Dict[str, Callable] = None,
        model: tf.keras.Model = None,
        create_tf_record_only=False,
        steps_per_epoch: int = None,
        warmup_ratio=0.1,
        continue_training=False,
        mirrored_strategy: tf.distribute.MirroredStrategy = None,
        run_eagerly=False,
        callbacks: List[tf.keras.callbacks.Callback] = None,
        verbose=1) -> tf.keras.Model:
    """
    Train Multi-task Bert model

    Keyword Arguments:
    - problem (str, optional) -- Problems to train. Defaults to 'weibo_ner'
    - num_gpus (int, optional) -- Number of GPU to use. Defaults to 1
    - num_epochs (int, optional) -- Number of epochs to train. Defaults to 10
    - model_dir (str, optional) -- model dir. Defaults to ''
    - params (Params, optional) -- Params to define training and models. Defaults to None
    - problem_type_dict (dict, optional) -- Key: problem name, value: problem type. Defaults to None
    - processing_fn_dict (dict, optional) -- Key: problem name, value: problem data preprocessing fn. Defaults to None
    - model (tf.keras.Model, optional): if not provided, it will be created using `create_keras_model`. Defaults to None.
    - create_tf_record_only (bool, optional): if `True`, the function will only create TFRecord without training model. Defaults to False.
    - steps_per_epoch (int, optional): steps per epochs, if not provided, train datset will be looped once to calculate steps per epoch. Defaults to None.
    - warmup_ratio (float, optional): lr warmup ratio. Defaults to 0.1.
    - continue_training (bool, optional): whether to resume training from `model_dir`. Defaults to False.
    - mirrored_strategy (MirroredStrategy, optional): Tensorflow MirroredStrategy. Defaults to None.
    - run_eagerly (bool, optional): Whether to run model eagerly. Defaults to False.
    - callbacks (list, optional): list of callbacks to add during training. If None, ModelCheckpoint will be added.
    - verbose (int, optional): 0, 1, or 2. Verbosity mode. 0 = silent, 1 = progress bar, 2 = one line per epoch. Note that the progress bar is not particularly useful when logged to a file, so verbose=2 is recommended when not running interactively (eg, in a production environment). 
    """
    if params is None:
        params = Params()
    if params.use_horovod:
        import horovod.tensorflow.keras as hvd
        hvd.init()
        gpus = tf.config.experimental.list_physical_devices('GPU')
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        if gpus:
            tf.config.experimental.set_visible_devices(
                gpus[hvd.local_rank()], 'GPU')

    params = get_params_ready(problem, num_gpus, model_dir,
                              params, problem_type_dict, processing_fn_dict)
    params.train_epoch = num_epochs

    train_dataset = train_eval_input_fn(params)
    eval_dataset = train_eval_input_fn(params, mode=EVAL)
    if create_tf_record_only:
        return

    if get_is_pyspark():
        raise NotImplementedError(
            'Pyspark only support creating TFRecord. Please set create_tf_record_only as True when pyspark is enabled.')

    # get train_steps and update params
    if steps_per_epoch is not None:
        train_steps = steps_per_epoch
    else:
        train_steps = 0
        for _ in train_dataset:
            train_steps += 1
    params.update_train_steps(train_steps, warmup_ratio=warmup_ratio)

    train_dataset = train_eval_input_fn(params)
    train_dataset = train_dataset.repeat()

    one_batch = next(train_dataset.as_numpy_iterator())

    if mirrored_strategy is None:
        mirrored_strategy = tf.distribute.MirroredStrategy()
    elif mirrored_strategy is False:
        mirrored_strategy = None

    if num_gpus > 1 and mirrored_strategy is not False:
        train_dataset = mirrored_strategy.experimental_distribute_dataset(
            train_dataset)
        eval_dataset = mirrored_strategy.experimental_distribute_dataset(
            eval_dataset)

    # restore priority: self > transfer > huggingface
    if continue_training and tf.train.latest_checkpoint(params.ckpt_dir):
        mode = 'resume'
    elif tf.train.latest_checkpoint(params.init_checkpoint):
        mode = 'transfer'
    else:
        mode = 'train'

    if model is None:
        model = create_keras_model(
            mirrored_strategy=mirrored_strategy, params=params,
            mode=mode, inputs_to_build_model=one_batch,
            run_eagerly=run_eagerly)

    _train_bert_multitask_keras_model(
        train_dataset=train_dataset,
        eval_dataset=eval_dataset,
        model=model,
        params=params,
        mirrored_strategy=mirrored_strategy,
        callbacks=callbacks,
        verbose=verbose
    )
    params.to_json()
    return model


Train Multi-task Bert model

Keyword Arguments:
- problem (str, optional) -- Problems to train. Defaults to 'weibo_ner'
- num_gpus (int, optional) -- Number of GPU to use. Defaults to 1
- num_epochs (int, optional) -- Number of epochs to train. Defaults to 10
- model_dir (str, optional) -- model dir. Defaults to ''
- params (Params, optional) -- Params to define training and models. Defaults to None
- problem_type_dict (dict, optional) -- Key: problem name, value: problem type. Defaults to None
- processing_fn_dict (dict, optional) -- Key: problem name, value: problem data preprocessing fn. Defaults to None
- model (tf.keras.Model, optional): if not provided, it will be created using `create_keras_model`. Defaults to None.
- create_tf_record_only (bool, optional): if `True`, the function will only create TFRecord without training model. Defaults to False.
- steps_per_epoch (int, optional): steps per epochs, if not provided, train datset will be looped once to calculate steps per epoch. Defaults to None.
- warmup_ratio (float, optional): lr warmup ratio. Defaults to 0.1.
- continue_training (bool, optional): whether to resume training from `model_dir`. Defaults to False.
- mirrored_strategy (MirroredStrategy, optional): Tensorflow MirroredStrategy. Defaults to None.
- run_eagerly (bool, optional): Whether to run model eagerly. Defaults to False.

In [None]:

params.use_horovod = False

model = train_bert_multitask(
    problem=problem,
    num_epochs=1,
    params=params,
    problem_type_dict=problem_type_dict,
    processing_fn_dict=processing_fn_dict,
    steps_per_epoch=1,
    continue_training=True,
    mirrored_strategy=False,
    run_eagerly=True
)


2021-06-17 13:22:41.927 | INFO     | m3tl.base_params:register_multiple_problems:538 - Adding new problem weibo_fake_ner, problem type: seq_tag
2021-06-17 13:22:41.928 | INFO     | m3tl.base_params:register_multiple_problems:538 - Adding new problem weibo_cws, problem type: seq_tag
2021-06-17 13:22:41.928 | INFO     | m3tl.base_params:register_multiple_problems:538 - Adding new problem weibo_fake_multi_cls, problem type: multi_cls
2021-06-17 13:22:41.929 | INFO     | m3tl.base_params:register_multiple_problems:538 - Adding new problem weibo_fake_cls, problem type: cls
2021-06-17 13:22:41.929 | INFO     | m3tl.base_params:register_multiple_problems:538 - Adding new problem weibo_masklm, problem type: masklm
2021-06-17 13:22:41.930 | INFO     | m3tl.base_params:register_multiple_problems:538 - Adding new problem weibo_pretrain, problem type: pretrain
2021-06-17 13:22:41.930 | INFO     | m3tl.base_params:register_multiple_problems:538 - Adding new problem weibo_premask_mlm, problem type: 

Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module, class, method, function, traceback, frame, or code object was expected, got cython_function_or_method


The parameters `output_attentions`, `output_hidden_states` and `use_cache` cannot be updated when calling a model.They have to be set to True/False in the config object (i.e.: `config=XConfig.from_pretrained('name', output_attentions=True)`).
The parameter `return_dict` cannot be set in graph mode and will always be set to `True`.




2021-06-17 13:23:00.005 | INFO     | m3tl.utils:set_phase:478 - Setting phase to eval
The parameters `output_attentions`, `output_hidden_states` and `use_cache` cannot be updated when calling a model.They have to be set to True/False in the config object (i.e.: `config=XConfig.from_pretrained('name', output_attentions=True)`).
The parameter `return_dict` cannot be set in graph mode and will always be set to `True`.
2021-06-17 13:23:00.947 | INFO     | m3tl.utils:set_phase:478 - Setting phase to eval
The parameters `output_attentions`, `output_hidden_states` and `use_cache` cannot be updated when calling a model.They have to be set to True/False in the config object (i.e.: `config=XConfig.from_pretrained('name', output_attentions=True)`).
The parameter `return_dict` cannot be set in graph mode and will always be set to `True`.


Model: "BertMultiTask"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
BertMultiTaskBody (BertMulti multiple                  4082696   
_________________________________________________________________
basic_mtl (BasicMTL)         multiple                  0         
_________________________________________________________________
BertMultiTaskTop (BertMultiT multiple                  13229575  
_________________________________________________________________
sum_loss_combination (SumLos multiple                  0         
Total params: 17,312,273
Trainable params: 17,312,267
Non-trainable params: 6
_________________________________________________________________


### Slow train test

In [None]:
# hide

# horovod train
params.use_horovod = False
_ = train_bert_multitask(
    problem=problem,
    num_epochs=1,
    params=params,
    problem_type_dict=problem_type_dict,
    processing_fn_dict=processing_fn_dict,
    steps_per_epoch=1,
    continue_training=False,
    mirrored_strategy=False,
    model_dir='./models/fresh_train'
)

model = train_bert_multitask(
    problem=problem,
    num_epochs=1,
    params=params,
    problem_type_dict=problem_type_dict,
    processing_fn_dict=processing_fn_dict,
    steps_per_epoch=1,
    continue_training=True,
    mirrored_strategy=False,
    run_eagerly=True
)

model = train_bert_multitask(
    problem=problem,
    num_epochs=1,
    params=params,
    problem_type_dict=problem_type_dict,
    processing_fn_dict=processing_fn_dict,
    continue_training=True
)

# fresh train
_ = train_bert_multitask(
    problem=problem,
    num_epochs=1,
    params=params,
    problem_type_dict=problem_type_dict,
    processing_fn_dict=processing_fn_dict,
    steps_per_epoch=1,
    continue_training=False,
    mirrored_strategy=False,
    model_dir='./models/fresh_train'
)

# transfer train
params.init_checkpoint = './models/fresh_train'
_ = train_bert_multitask(
    problem=problem,
    num_epochs=1,
    params=params,
    problem_type_dict=problem_type_dict,
    processing_fn_dict=processing_fn_dict,
    steps_per_epoch=1,
    continue_training=False,
    mirrored_strategy=False
)


2021-06-17 13:23:02.297 | INFO     | m3tl.base_params:register_multiple_problems:538 - Adding new problem weibo_fake_ner, problem type: seq_tag
2021-06-17 13:23:02.297 | INFO     | m3tl.base_params:register_multiple_problems:538 - Adding new problem weibo_cws, problem type: seq_tag
2021-06-17 13:23:02.298 | INFO     | m3tl.base_params:register_multiple_problems:538 - Adding new problem weibo_fake_multi_cls, problem type: multi_cls
2021-06-17 13:23:02.299 | INFO     | m3tl.base_params:register_multiple_problems:538 - Adding new problem weibo_fake_cls, problem type: cls
2021-06-17 13:23:02.299 | INFO     | m3tl.base_params:register_multiple_problems:538 - Adding new problem weibo_masklm, problem type: masklm
2021-06-17 13:23:02.300 | INFO     | m3tl.base_params:register_multiple_problems:538 - Adding new problem weibo_pretrain, problem type: pretrain
2021-06-17 13:23:02.300 | INFO     | m3tl.base_params:register_multiple_problems:538 - Adding new problem weibo_premask_mlm, problem type: 

Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: invalid value for "node": expected "ast.AST", got "<class 'NoneType'>"; to visit lists of nodes, use "visit_block" instead


The parameters `output_attentions`, `output_hidden_states` and `use_cache` cannot be updated when calling a model.They have to be set to True/False in the config object (i.e.: `config=XConfig.from_pretrained('name', output_attentions=True)`).
The parameter `return_dict` cannot be set in graph mode and will always be set to `True`.
2021-06-17 13:23:17.066 | INFO     | m3tl.utils:set_phase:478 - Setting phase to train




2021-06-17 13:23:23.802 | INFO     | m3tl.utils:set_phase:478 - Setting phase to eval
The parameters `output_attentions`, `output_hidden_states` and `use_cache` cannot be updated when calling a model.They have to be set to True/False in the config object (i.e.: `config=XConfig.from_pretrained('name', output_attentions=True)`).
The parameter `return_dict` cannot be set in graph mode and will always be set to `True`.




2021-06-17 13:23:27.046 | INFO     | m3tl.base_params:register_multiple_problems:538 - Adding new problem weibo_fake_ner, problem type: seq_tag
2021-06-17 13:23:27.047 | INFO     | m3tl.base_params:register_multiple_problems:538 - Adding new problem weibo_cws, problem type: seq_tag
2021-06-17 13:23:27.047 | INFO     | m3tl.base_params:register_multiple_problems:538 - Adding new problem weibo_fake_multi_cls, problem type: multi_cls
2021-06-17 13:23:27.048 | INFO     | m3tl.base_params:register_multiple_problems:538 - Adding new problem weibo_fake_cls, problem type: cls
2021-06-17 13:23:27.048 | INFO     | m3tl.base_params:register_multiple_problems:538 - Adding new problem weibo_masklm, problem type: masklm
2021-06-17 13:23:27.049 | INFO     | m3tl.base_params:register_multiple_problems:538 - Adding new problem weibo_pretrain, problem type: pretrain
2021-06-17 13:23:27.049 | INFO     | m3tl.base_params:register_multiple_problems:538 - Adding new problem weibo_premask_mlm, problem type: 

Model: "BertMultiTask"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
BertMultiTaskBody (BertMulti multiple                  4082696   
_________________________________________________________________
basic_mtl_1 (BasicMTL)       multiple                  0         
_________________________________________________________________
BertMultiTaskTop (BertMultiT multiple                  13229575  
_________________________________________________________________
sum_loss_combination_1 (SumL multiple                  0         
Total params: 17,312,273
Trainable params: 17,312,267
Non-trainable params: 6
_________________________________________________________________


2021-06-17 13:23:27.744 | INFO     | m3tl.input_fn:train_eval_input_fn:59 - sampling weights: 
2021-06-17 13:23:27.745 | INFO     | m3tl.input_fn:train_eval_input_fn:60 - {
    "weibo_fake_cls_weibo_fake_ner": 0.2702702702702703,
    "weibo_fake_multi_cls": 0.2702702702702703,
    "weibo_masklm": 0.1891891891891892,
    "weibo_premask_mlm": 0.2702702702702703
}
2021-06-17 13:23:28.442 | INFO     | m3tl.input_fn:train_eval_input_fn:59 - sampling weights: 
2021-06-17 13:23:28.443 | INFO     | m3tl.input_fn:train_eval_input_fn:60 - {
    "weibo_fake_cls_weibo_fake_ner": 0.2702702702702703,
    "weibo_fake_multi_cls": 0.2702702702702703,
    "weibo_masklm": 0.1891891891891892,
    "weibo_premask_mlm": 0.2702702702702703
}
2021-06-17 13:23:28.812 | CRITICAL | m3tl.base_params:update_train_steps:454 - Updating train_steps to 1
2021-06-17 13:23:29.380 | INFO     | m3tl.input_fn:train_eval_input_fn:59 - sampling weights: 
2021-06-17 13:23:29.381 | INFO     | m3tl.input_fn:train_eval_input_fn:6



2021-06-17 13:23:40.803 | INFO     | m3tl.utils:set_phase:478 - Setting phase to eval
The parameters `output_attentions`, `output_hidden_states` and `use_cache` cannot be updated when calling a model.They have to be set to True/False in the config object (i.e.: `config=XConfig.from_pretrained('name', output_attentions=True)`).
The parameter `return_dict` cannot be set in graph mode and will always be set to `True`.




2021-06-17 13:23:41.726 | INFO     | m3tl.utils:set_phase:478 - Setting phase to eval
The parameters `output_attentions`, `output_hidden_states` and `use_cache` cannot be updated when calling a model.They have to be set to True/False in the config object (i.e.: `config=XConfig.from_pretrained('name', output_attentions=True)`).
The parameter `return_dict` cannot be set in graph mode and will always be set to `True`.




2021-06-17 13:23:43.008 | INFO     | m3tl.base_params:register_multiple_problems:538 - Adding new problem weibo_fake_ner, problem type: seq_tag
2021-06-17 13:23:43.008 | INFO     | m3tl.base_params:register_multiple_problems:538 - Adding new problem weibo_cws, problem type: seq_tag
2021-06-17 13:23:43.009 | INFO     | m3tl.base_params:register_multiple_problems:538 - Adding new problem weibo_fake_multi_cls, problem type: multi_cls
2021-06-17 13:23:43.009 | INFO     | m3tl.base_params:register_multiple_problems:538 - Adding new problem weibo_fake_cls, problem type: cls
2021-06-17 13:23:43.010 | INFO     | m3tl.base_params:register_multiple_problems:538 - Adding new problem weibo_masklm, problem type: masklm
2021-06-17 13:23:43.010 | INFO     | m3tl.base_params:register_multiple_problems:538 - Adding new problem weibo_pretrain, problem type: pretrain
2021-06-17 13:23:43.011 | INFO     | m3tl.base_params:register_multiple_problems:538 - Adding new problem weibo_premask_mlm, problem type: 

Model: "BertMultiTask"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
BertMultiTaskBody (BertMulti multiple                  4082696   
_________________________________________________________________
basic_mtl_2 (BasicMTL)       multiple                  0         
_________________________________________________________________
BertMultiTaskTop (BertMultiT multiple                  13229575  
_________________________________________________________________
sum_loss_combination_2 (SumL multiple                  0         
Total params: 17,312,273
Trainable params: 17,312,267
Non-trainable params: 6
_________________________________________________________________


2021-06-17 13:23:43.644 | INFO     | m3tl.input_fn:train_eval_input_fn:59 - sampling weights: 
2021-06-17 13:23:43.645 | INFO     | m3tl.input_fn:train_eval_input_fn:60 - {
    "weibo_fake_cls_weibo_fake_ner": 0.2702702702702703,
    "weibo_fake_multi_cls": 0.2702702702702703,
    "weibo_masklm": 0.1891891891891892,
    "weibo_premask_mlm": 0.2702702702702703
}
2021-06-17 13:23:44.289 | INFO     | m3tl.input_fn:train_eval_input_fn:59 - sampling weights: 
2021-06-17 13:23:44.290 | INFO     | m3tl.input_fn:train_eval_input_fn:60 - {
    "weibo_fake_cls_weibo_fake_ner": 0.2702702702702703,
    "weibo_fake_multi_cls": 0.2702702702702703,
    "weibo_masklm": 0.1891891891891892,
    "weibo_premask_mlm": 0.2702702702702703
}
2021-06-17 13:23:45.277 | CRITICAL | m3tl.base_params:update_train_steps:454 - Updating train_steps to 2
2021-06-17 13:23:46.258 | INFO     | m3tl.input_fn:train_eval_input_fn:59 - sampling weights: 
2021-06-17 13:23:46.259 | INFO     | m3tl.input_fn:train_eval_input_fn:6

INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:CPU:0',)


404 Client Error: Not Found for url: https://huggingface.co/voidful/albert_chinese_tiny/resolve/main/tf_model.h5
Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFAlbertModel: ['predictions.dense.bias', 'predictions.decoder.weight', 'predictions.LayerNorm.weight', 'predictions.bias', 'predictions.dense.weight', 'predictions.LayerNorm.bias', 'predictions.decoder.bias']
- This IS expected if you are initializing TFAlbertModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFAlbertModel from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFAlbertModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was tr



2021-06-17 13:23:53.694 | INFO     | m3tl.utils:set_phase:478 - Setting phase to train
The parameters `output_attentions`, `output_hidden_states` and `use_cache` cannot be updated when calling a model.They have to be set to True/False in the config object (i.e.: `config=XConfig.from_pretrained('name', output_attentions=True)`).
The parameter `return_dict` cannot be set in graph mode and will always be set to `True`.
2021-06-17 13:23:59.127 | INFO     | m3tl.utils:set_phase:478 - Setting phase to train




2021-06-17 13:24:07.694 | INFO     | m3tl.utils:set_phase:478 - Setting phase to eval
The parameters `output_attentions`, `output_hidden_states` and `use_cache` cannot be updated when calling a model.They have to be set to True/False in the config object (i.e.: `config=XConfig.from_pretrained('name', output_attentions=True)`).
The parameter `return_dict` cannot be set in graph mode and will always be set to `True`.




2021-06-17 13:24:12.062 | INFO     | m3tl.base_params:register_multiple_problems:538 - Adding new problem weibo_fake_ner, problem type: seq_tag
2021-06-17 13:24:12.063 | INFO     | m3tl.base_params:register_multiple_problems:538 - Adding new problem weibo_cws, problem type: seq_tag
2021-06-17 13:24:12.063 | INFO     | m3tl.base_params:register_multiple_problems:538 - Adding new problem weibo_fake_multi_cls, problem type: multi_cls
2021-06-17 13:24:12.064 | INFO     | m3tl.base_params:register_multiple_problems:538 - Adding new problem weibo_fake_cls, problem type: cls
2021-06-17 13:24:12.064 | INFO     | m3tl.base_params:register_multiple_problems:538 - Adding new problem weibo_masklm, problem type: masklm
2021-06-17 13:24:12.064 | INFO     | m3tl.base_params:register_multiple_problems:538 - Adding new problem weibo_pretrain, problem type: pretrain
2021-06-17 13:24:12.065 | INFO     | m3tl.base_params:register_multiple_problems:538 - Adding new problem weibo_premask_mlm, problem type: 

Model: "BertMultiTask"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
BertMultiTaskBody (BertMulti multiple                  4082696   
_________________________________________________________________
basic_mtl_3 (BasicMTL)       multiple                  0         
_________________________________________________________________
BertMultiTaskTop (BertMultiT multiple                  13229575  
_________________________________________________________________
sum_loss_combination_3 (SumL multiple                  0         
Total params: 17,312,273
Trainable params: 17,312,267
Non-trainable params: 6
_________________________________________________________________


2021-06-17 13:24:12.694 | INFO     | m3tl.input_fn:train_eval_input_fn:59 - sampling weights: 
2021-06-17 13:24:12.695 | INFO     | m3tl.input_fn:train_eval_input_fn:60 - {
    "weibo_fake_cls_weibo_fake_ner": 0.2702702702702703,
    "weibo_fake_multi_cls": 0.2702702702702703,
    "weibo_masklm": 0.1891891891891892,
    "weibo_premask_mlm": 0.2702702702702703
}
2021-06-17 13:24:13.327 | INFO     | m3tl.input_fn:train_eval_input_fn:59 - sampling weights: 
2021-06-17 13:24:13.328 | INFO     | m3tl.input_fn:train_eval_input_fn:60 - {
    "weibo_fake_cls_weibo_fake_ner": 0.2702702702702703,
    "weibo_fake_multi_cls": 0.2702702702702703,
    "weibo_masklm": 0.1891891891891892,
    "weibo_premask_mlm": 0.2702702702702703
}
2021-06-17 13:24:13.390 | CRITICAL | m3tl.base_params:update_train_steps:454 - Updating train_steps to 1
2021-06-17 13:24:13.948 | INFO     | m3tl.input_fn:train_eval_input_fn:59 - sampling weights: 
2021-06-17 13:24:13.949 | INFO     | m3tl.input_fn:train_eval_input_fn:6



2021-06-17 13:24:31.396 | INFO     | m3tl.utils:set_phase:478 - Setting phase to eval
The parameters `output_attentions`, `output_hidden_states` and `use_cache` cannot be updated when calling a model.They have to be set to True/False in the config object (i.e.: `config=XConfig.from_pretrained('name', output_attentions=True)`).
The parameter `return_dict` cannot be set in graph mode and will always be set to `True`.




2021-06-17 13:24:34.681 | INFO     | m3tl.base_params:register_multiple_problems:538 - Adding new problem weibo_fake_ner, problem type: seq_tag
2021-06-17 13:24:34.682 | INFO     | m3tl.base_params:register_multiple_problems:538 - Adding new problem weibo_cws, problem type: seq_tag
2021-06-17 13:24:34.683 | INFO     | m3tl.base_params:register_multiple_problems:538 - Adding new problem weibo_fake_multi_cls, problem type: multi_cls
2021-06-17 13:24:34.684 | INFO     | m3tl.base_params:register_multiple_problems:538 - Adding new problem weibo_fake_cls, problem type: cls
2021-06-17 13:24:34.684 | INFO     | m3tl.base_params:register_multiple_problems:538 - Adding new problem weibo_masklm, problem type: masklm
2021-06-17 13:24:34.685 | INFO     | m3tl.base_params:register_multiple_problems:538 - Adding new problem weibo_pretrain, problem type: pretrain
2021-06-17 13:24:34.685 | INFO     | m3tl.base_params:register_multiple_problems:538 - Adding new problem weibo_premask_mlm, problem type: 

Model: "BertMultiTask"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
BertMultiTaskBody (BertMulti multiple                  4082696   
_________________________________________________________________
basic_mtl_4 (BasicMTL)       multiple                  0         
_________________________________________________________________
BertMultiTaskTop (BertMultiT multiple                  13229575  
_________________________________________________________________
sum_loss_combination_4 (SumL multiple                  0         
Total params: 17,312,273
Trainable params: 17,312,267
Non-trainable params: 6
_________________________________________________________________


2021-06-17 13:24:35.292 | INFO     | m3tl.input_fn:train_eval_input_fn:59 - sampling weights: 
2021-06-17 13:24:35.293 | INFO     | m3tl.input_fn:train_eval_input_fn:60 - {
    "weibo_fake_cls_weibo_fake_ner": 0.2702702702702703,
    "weibo_fake_multi_cls": 0.2702702702702703,
    "weibo_masklm": 0.1891891891891892,
    "weibo_premask_mlm": 0.2702702702702703
}
2021-06-17 13:24:35.936 | INFO     | m3tl.input_fn:train_eval_input_fn:59 - sampling weights: 
2021-06-17 13:24:35.937 | INFO     | m3tl.input_fn:train_eval_input_fn:60 - {
    "weibo_fake_cls_weibo_fake_ner": 0.2702702702702703,
    "weibo_fake_multi_cls": 0.2702702702702703,
    "weibo_masklm": 0.1891891891891892,
    "weibo_premask_mlm": 0.2702702702702703
}
2021-06-17 13:24:35.999 | CRITICAL | m3tl.base_params:update_train_steps:454 - Updating train_steps to 1
2021-06-17 13:24:36.554 | INFO     | m3tl.input_fn:train_eval_input_fn:59 - sampling weights: 
2021-06-17 13:24:36.555 | INFO     | m3tl.input_fn:train_eval_input_fn:6



2021-06-17 13:24:39.238 | CRITICAL | m3tl.model_fn:compile:271 - Initial lr: 0.0
2021-06-17 13:24:39.239 | CRITICAL | m3tl.model_fn:compile:272 - Train steps: 1
2021-06-17 13:24:39.240 | CRITICAL | m3tl.model_fn:compile:273 - Warmup steps: 0




2021-06-17 13:24:39.503 | INFO     | m3tl.utils:set_phase:478 - Setting phase to train
The parameters `output_attentions`, `output_hidden_states` and `use_cache` cannot be updated when calling a model.They have to be set to True/False in the config object (i.e.: `config=XConfig.from_pretrained('name', output_attentions=True)`).
The parameter `return_dict` cannot be set in graph mode and will always be set to `True`.
2021-06-17 13:24:44.772 | INFO     | m3tl.utils:set_phase:478 - Setting phase to train




2021-06-17 13:24:51.225 | INFO     | m3tl.utils:set_phase:478 - Setting phase to eval
The parameters `output_attentions`, `output_hidden_states` and `use_cache` cannot be updated when calling a model.They have to be set to True/False in the config object (i.e.: `config=XConfig.from_pretrained('name', output_attentions=True)`).
The parameter `return_dict` cannot be set in graph mode and will always be set to `True`.


Model: "BertMultiTask"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
BertMultiTaskBody (BertMulti multiple                  4082696   
_________________________________________________________________
basic_mtl_5 (BasicMTL)       multiple                  0         
_________________________________________________________________
BertMultiTaskTop (BertMultiT multiple                  13229575  
_________________________________________________________________
sum_loss_combination_5 (SumL multiple                  0         
Total params: 17,312,273
Trainable params: 17,312,267
Non-trainable params: 6
_________________________________________________________________


## Saving model for prediction

In [None]:
# export 
def create_tensorspec_from_shape_type(infered_shape_and_type: Tuple[Dict[str, list], Dict[str, tf.dtypes.DType]]) -> Dict[str, tf.TensorSpec]:
    
    shape_dict, type_dict = infered_shape_and_type
    tensorspec_dict = {}
    for name in shape_dict.keys():
        tensorspec_dict[name] = tf.TensorSpec(shape=[None for _ in shape_dict[name]], dtype=type_dict[name], name=name)
    return tensorspec_dict

In [None]:
test_tup = ({
    'text_input_ids': [None, 3],
    'text_mask': [None, 3],
    'text_segment_ids': [None, 3],
    'image_input_ids': [None, 5, 10],
    'image_mask': [None, 5],
    'image_segment_ids': [None, 5],
    'class_input_ids': [None, 1],
    'class_mask': [None, 1],
    'class_segment_ids': [None, 1]},
    {
    'text_input_ids': tf.int32,
    'text_mask': tf.int32,
    'text_segment_ids': tf.int32,
    'image_input_ids': tf.float32,
    'image_mask': tf.int32,
    'image_segment_ids': tf.int32,
    'class_input_ids': tf.int32,
    'class_mask': tf.int32,
    'class_segment_ids': tf.int32})


print(create_tensorspec_from_shape_type(test_tup))


{'text_input_ids': TensorSpec(shape=(None, None), dtype=tf.int32, name='text_input_ids'), 'text_mask': TensorSpec(shape=(None, None), dtype=tf.int32, name='text_mask'), 'text_segment_ids': TensorSpec(shape=(None, None), dtype=tf.int32, name='text_segment_ids'), 'image_input_ids': TensorSpec(shape=(None, None, None), dtype=tf.float32, name='image_input_ids'), 'image_mask': TensorSpec(shape=(None, None), dtype=tf.int32, name='image_mask'), 'image_segment_ids': TensorSpec(shape=(None, None), dtype=tf.int32, name='image_segment_ids'), 'class_input_ids': TensorSpec(shape=(None, None), dtype=tf.int32, name='class_input_ids'), 'class_mask': TensorSpec(shape=(None, None), dtype=tf.int32, name='class_mask'), 'class_segment_ids': TensorSpec(shape=(None, None), dtype=tf.int32, name='class_segment_ids')}


In [None]:
# export
@logger.catch
def trim_checkpoint_for_prediction(problem: str,
                                   input_dir: str,
                                   output_dir: str,
                                   problem_type_dict: Dict[str, str] = None,
                                   overwrite=True,
                                   fake_input_list=None,
                                   params=None,
                                   save_weights_only=True):

    if overwrite and os.path.exists(output_dir):
        rmtree(output_dir)
    copytree(input_dir, output_dir, ignore=ignore_patterns(
        'checkpoint', '*.index', '*.data-000*'))
    base_dir, dir_name = os.path.split(output_dir)
    if params is None:
        params = Params()

    if problem_type_dict:
        params.register_multiple_problems(problem_type_dict=problem_type_dict)
    params.from_json(os.path.join(input_dir, 'params.json'))
    params.assign_problem(problem, base_dir=base_dir,
                          dir_name=dir_name, predicting=True)

    model = BertMultiTask(params)
    if fake_input_list is None:
        dummy_dataset = predict_input_fn(['fake']*5, params)
    else:
        dummy_dataset = predict_input_fn(fake_input_list*5, params)

    batch_fake_data = next(dummy_dataset.as_numpy_iterator())
    shape_type_dict = infer_shape_and_type_from_dict(batch_fake_data)
    spec_dict = create_tensorspec_from_shape_type(infered_shape_and_type=shape_type_dict)

    # _ = model(batch_fake_data,
    #           mode=tf.estimator.ModeKeys.PREDICT)
    # monkeypatch predict_step
    # model.call = MethodType(tf.function(lambda x, y: model.call(x, mode=PREDICT)), model)
    set_phase(PREDICT)
    _ = model(batch_fake_data)
    model.load_weights(os.path.join(input_dir, 'model'))
    logger.critical("serving input sigantures: {}".format(spec_dict))
    if save_weights_only:
        model.save_weights(os.path.join(params.ckpt_dir, 'model'))
    else:
        class ServingModule(tf.Module):
            def __init__(self):
                super(ServingModule, self).__init__()
                self.model = model

            @tf.function
            def serve(self, x):
                return self.model.call(x)
        serving_module = ServingModule()
        _ = serving_module.serve(batch_fake_data)
        signatures = dict(
            serving_default=serving_module.serve.get_concrete_function(spec_dict)
        )
        tf.saved_model.save(serving_module, os.path.join(params.ckpt_dir, 'serving'), signatures=signatures)
    params.to_json()


Minimize checkpoint size for prediction.

Since the original checkpoint contains optimizer's variable,
        for instance, if the use adam, the checkpoint size will
        be three times of the size of model weights. This function
        will remove those unused variables in prediction to save space.

Note: if the model is a multimodal model, you have to provide fake_input_list that
        mimic the structure of real input. Otherwise modal embeddings will be randomly initialized.

Args:
- problem (str): problem
- input_dir (str): input dir
- output_dir (str): output dir
- problem_type_dict (Dict[str, str], optional): problem type dict. Defaults to None.
- fake_input_list (List, optional): fake input list to create dummy dataset
- params (Params, optional): params 

In [None]:
# fake inputs
tf.get_logger().setLevel('ERROR')
import numpy as np
from m3tl.predefined_problems.test_data import generate_fake_data
fake_inputs = [v for v, _ in generate_fake_data(output_format='gen_dict_tuple')]

# save as SavedModel pb
trim_checkpoint_for_prediction(problem=model.params.problem_str, input_dir=model.params.ckpt_dir,
    output_dir=model.params.ckpt_dir+'_pred',
    problem_type_dict=problem_type_dict, overwrite=True, fake_input_list=fake_inputs, save_weights_only=False)

trim_checkpoint_for_prediction(
    problem=problem, input_dir=model.params.ckpt_dir,
    output_dir=model.params.ckpt_dir+'_pred',
    problem_type_dict=problem_type_dict, overwrite=True, fake_input_list=fake_inputs)



2021-06-17 13:24:55.159 | INFO     | m3tl.base_params:register_multiple_problems:538 - Adding new problem weibo_fake_ner, problem type: seq_tag
2021-06-17 13:24:55.159 | INFO     | m3tl.base_params:register_multiple_problems:538 - Adding new problem weibo_cws, problem type: seq_tag
2021-06-17 13:24:55.160 | INFO     | m3tl.base_params:register_multiple_problems:538 - Adding new problem weibo_fake_multi_cls, problem type: multi_cls
2021-06-17 13:24:55.160 | INFO     | m3tl.base_params:register_multiple_problems:538 - Adding new problem weibo_fake_cls, problem type: cls
2021-06-17 13:24:55.161 | INFO     | m3tl.base_params:register_multiple_problems:538 - Adding new problem weibo_masklm, problem type: masklm
2021-06-17 13:24:55.161 | INFO     | m3tl.base_params:register_multiple_problems:538 - Adding new problem weibo_pretrain, problem type: pretrain
2021-06-17 13:24:55.164 | INFO     | m3tl.base_params:register_multiple_problems:538 - Adding new problem weibo_premask_mlm, problem type: 

## Eval

In [None]:
# export
@logger.catch
def eval_bert_multitask(
        problem='weibo_ner',
        num_gpus=1,
        model_dir='',
        params=None,
        problem_type_dict=None,
        processing_fn_dict=None,
        model=None,
        run_eagerly=False):

    if not model_dir and params is not None:
        model_dir = params.ckpt_dir
    params = get_params_ready(problem, num_gpus, model_dir,
                              params, problem_type_dict, processing_fn_dict,
                              mode='predict', json_path=os.path.join(model_dir, 'params.json'))
    eval_dataset = train_eval_input_fn(params, mode=EVAL)
    one_batch_data = next(eval_dataset.as_numpy_iterator())
    eval_dataset = train_eval_input_fn(params, mode=EVAL)
    mirrored_strategy = tf.distribute.MirroredStrategy()
    if model is None:
        model = create_keras_model(
            mirrored_strategy=mirrored_strategy, params=params,
            mode='eval', inputs_to_build_model=one_batch_data, run_eagerly=run_eagerly)
    eval_dict = model.evaluate(eval_dataset, return_dict=True)
    return eval_dict


Evaluate Multi-task Bert model

Keyword Arguments:

- problem (str, optional): problems to evaluate. Defaults to 'weibo_ner'.
- num_gpus (int, optional): number of gpu to use. Defaults to 1.
- model_dir (str, optional): model dir. Defaults to ''.
- params (Params, optional): params. Defaults to None.
- problem_type_dict (dict, optional): Key: problem name, value: problem type. Defaults to None.
- processing_fn_dict (dict, optional): Key: problem name, value: problem data preprocessing fn. Defaults to None.
- model (tf.keras.Model, optional): If not provided, it will be created with `create_keras_model`. Defaults to None.
- run_eagerly (bool, optional): Whether to run model eagerly. Defaults to False.

In [None]:
# remove original ckpt path to make sure model can be init from a different path
import shutil
shutil.rmtree(model.params.ckpt_dir)

eval_bert_multitask(problem=problem, params=params,
                    problem_type_dict=problem_type_dict, processing_fn_dict=processing_fn_dict,
                    model_dir=model.params.ckpt_dir+'_pred')

# provide model instead of dir
eval_bert_multitask(problem=problem, params=params,
                    problem_type_dict=problem_type_dict, processing_fn_dict=processing_fn_dict,
                    model=model)


2021-06-15 20:28:35.752 | INFO     | m3tl.base_params:register_multiple_problems:538 - Adding new problem weibo_fake_ner, problem type: seq_tag
2021-06-15 20:28:35.753 | INFO     | m3tl.base_params:register_multiple_problems:538 - Adding new problem weibo_cws, problem type: seq_tag
2021-06-15 20:28:35.754 | INFO     | m3tl.base_params:register_multiple_problems:538 - Adding new problem weibo_fake_multi_cls, problem type: multi_cls
2021-06-15 20:28:35.754 | INFO     | m3tl.base_params:register_multiple_problems:538 - Adding new problem weibo_fake_cls, problem type: cls
2021-06-15 20:28:35.755 | INFO     | m3tl.base_params:register_multiple_problems:538 - Adding new problem weibo_masklm, problem type: masklm
2021-06-15 20:28:35.755 | INFO     | m3tl.base_params:register_multiple_problems:538 - Adding new problem weibo_pretrain, problem type: pretrain
2021-06-15 20:28:35.755 | INFO     | m3tl.base_params:register_multiple_problems:538 - Adding new problem weibo_premask_mlm, problem type: 



2021-06-15 20:28:44.361 | INFO     | m3tl.base_params:register_multiple_problems:538 - Adding new problem weibo_fake_ner, problem type: seq_tag
2021-06-15 20:28:44.362 | INFO     | m3tl.base_params:register_multiple_problems:538 - Adding new problem weibo_cws, problem type: seq_tag
2021-06-15 20:28:44.363 | INFO     | m3tl.base_params:register_multiple_problems:538 - Adding new problem weibo_fake_multi_cls, problem type: multi_cls
2021-06-15 20:28:44.363 | INFO     | m3tl.base_params:register_multiple_problems:538 - Adding new problem weibo_fake_cls, problem type: cls
2021-06-15 20:28:44.363 | INFO     | m3tl.base_params:register_multiple_problems:538 - Adding new problem weibo_masklm, problem type: masklm
2021-06-15 20:28:44.364 | INFO     | m3tl.base_params:register_multiple_problems:538 - Adding new problem weibo_pretrain, problem type: pretrain
2021-06-15 20:28:44.364 | INFO     | m3tl.base_params:register_multiple_problems:538 - Adding new problem weibo_premask_mlm, problem type: 



{'loss': 23.17413330078125,
 'mean_acc': 0.4241071343421936,
 'weibo_fake_cls_acc': 0.5,
 'weibo_fake_ner_acc': 0.2857142984867096,
 'BertMultiTaskTop/weibo_fake_cls/losses/0': 1.8408839702606201,
 'BertMultiTaskTop/weibo_fake_multi_cls/losses/0': 0.0,
 'BertMultiTaskTop/weibo_fake_ner/losses/0': 1.7550007104873657,
 'BertMultiTaskTop/weibo_masklm/losses/0': 10.002845764160156,
 'BertMultiTaskTop/weibo_premask_mlm/losses/0': 9.788535118103027}

In [None]:
# hide
# eager mode test
eval_bert_multitask(problem=problem, params=params,
                    problem_type_dict=problem_type_dict, processing_fn_dict=processing_fn_dict,
                    model_dir=model.params.ckpt_dir, run_eagerly=True)


2021-06-15 20:28:47.317 | INFO     | m3tl.base_params:register_multiple_problems:538 - Adding new problem weibo_fake_ner, problem type: seq_tag
2021-06-15 20:28:47.318 | INFO     | m3tl.base_params:register_multiple_problems:538 - Adding new problem weibo_cws, problem type: seq_tag
2021-06-15 20:28:47.318 | INFO     | m3tl.base_params:register_multiple_problems:538 - Adding new problem weibo_fake_multi_cls, problem type: multi_cls
2021-06-15 20:28:47.319 | INFO     | m3tl.base_params:register_multiple_problems:538 - Adding new problem weibo_fake_cls, problem type: cls
2021-06-15 20:28:47.319 | INFO     | m3tl.base_params:register_multiple_problems:538 - Adding new problem weibo_masklm, problem type: masklm
2021-06-15 20:28:47.319 | INFO     | m3tl.base_params:register_multiple_problems:538 - Adding new problem weibo_pretrain, problem type: pretrain
2021-06-15 20:28:47.320 | INFO     | m3tl.base_params:register_multiple_problems:538 - Adding new problem weibo_premask_mlm, problem type: 

      1/Unknown - 2s 2s/step - loss: 23.5573 - mean_acc: 0.3690 - weibo_fake_cls_acc: 0.1667 - weibo_fake_ner_acc: 0.5714 - BertMultiTaskTop/weibo_fake_cls/losses/0: 0.9506 - BertMultiTaskTop/weibo_fake_multi_cls/losses/0: 1.7102 - BertMultiTaskTop/weibo_fake_ner/losses/0: 0.9680 - BertMultiTaskTop/weibo_masklm/losses/0: 10.0068 - BertMultiTaskTop/weibo_premask_mlm/losses/0: 9.9217

2021-06-15 20:28:52.575 | INFO     | m3tl.utils:set_phase:478 - Setting phase to eval
The parameters `output_attentions`, `output_hidden_states` and `use_cache` cannot be updated when calling a model.They have to be set to True/False in the config object (i.e.: `config=XConfig.from_pretrained('name', output_attentions=True)`).
The parameter `return_dict` cannot be set in graph mode and will always be set to `True`.




{'loss': 17.491474151611328,
 'mean_acc': 0.4523809552192688,
 'weibo_fake_cls_acc': 0.5,
 'weibo_fake_ner_acc': 0.5714285969734192,
 'BertMultiTaskTop/weibo_fake_cls/losses/0': 0.4286690652370453,
 'BertMultiTaskTop/weibo_fake_multi_cls/losses/0': 0.0,
 'BertMultiTaskTop/weibo_fake_ner/losses/0': 1.0083808898925781,
 'BertMultiTaskTop/weibo_masklm/losses/0': 9.988622665405273}

## Predict

In [None]:
# export


def arr_to_str(inp_arr: np.ndarray) -> str:
    l = inp_arr.tolist()
    l = [json.dumps(f) for f in l]
    return l


def decode_predictions(pred: Dict[str, np.ndarray], params: Params, array_as_str=False) -> Dict[str, Union[int, float, np.ndarray, list, str]]:
    parsed_pred = dict()
    problem_list = params.problem_list
    label_encoder_dict = {p: get_or_make_label_encoder(
        params=params, problem=p, mode=PREDICT) for p in problem_list}
    for problem, problem_pred_array in pred.items():

        # addtional outputs
        if problem not in problem_list:
            if isinstance(problem_pred_array, np.ndarray):
                if array_as_str:
                    parsed_pred[problem] = arr_to_str(problem_pred_array)
                else:
                    parsed_pred[problem] = problem_pred_array
            else:
                parsed_pred[problem] = problem_pred_array
            continue

        label_encoder = label_encoder_dict[problem]

        support_problem_type = [
            'multi_cls',
            'cls',
            'seq_tag',
            'regression',
            'masklm',
            'premask_mlm',
            'vectorfit'
        ]

        problem_type = params.get_problem_type(problem=problem)
        if problem_type not in support_problem_type:
            logger.warning("trying to decode prediction of unsupported problem type"
            " {}, if any error raised, please disable decode prediction.".format(problem_type))

        is_multi_cls = params.get_problem_type(problem=problem) == 'multi_cls'
        is_cls = params.get_problem_type(problem=problem) == 'cls'
        is_seq_tag = params.get_problem_type(problem=problem) == 'seq_tag'
        is_regression = params.get_problem_type(
            problem=problem) == 'regression'

        if is_regression:
            parsed_pred[problem] = problem_pred_array
            continue

        # get pred from prob
        if is_multi_cls:
            problem_pred = problem_pred_array >= 0.5
        elif is_cls or is_seq_tag:
            problem_pred = np.argmax(problem_pred_array, axis=-1)
            # problem_pred = problem_pred_array
        else:
            problem_pred = problem_pred_array

        # sequence labels
        if is_seq_tag:
            parsed_problem_pred = np.apply_along_axis(
                label_encoder.inverse_transform, axis=1, arr=problem_pred)
        else:
            if isinstance(label_encoder, MultiLabelBinarizer) or isinstance(label_encoder, LabelEncoder):
                parsed_problem_pred = label_encoder.inverse_transform(
                    problem_pred)
            elif isinstance(label_encoder, PreTrainedTokenizer):
                parsed_problem_pred = np.apply_along_axis(
                    label_encoder.convert_ids_to_tokens, axis=1, arr=problem_pred
                )
            else:
                parsed_problem_pred = problem_pred_array

        parsed_pred[problem] = parsed_problem_pred
    return parsed_pred


In [None]:
# export
@logger.catch
def predict_bert_multitask(
        inputs,
        problem='weibo_ner',
        model_dir='',
        params: Params = None,
        problem_type_dict: Dict[str, str] = None,
        processing_fn_dict: Dict[str, Callable] = None,
        model: tf.keras.Model = None,
        return_model=False,
        run_eagerly=False,
        mirrored_strategy=None,
        decode_prediction=False):
    """Use Multi-task Bert model to do prediction

        Args:
        - inputs (Iterable): Iterable of inputs
        - problem (str, optional): problems to predict. Defaults to 'weibo_ner'.
        - model_dir (str, optional): model dir. Defaults to ''.
        - params (Params, optional): params. Defaults to None.
        - problem_type_dict (Dict[str, str], optional): Key: problem name, value: problem type.. Defaults to None.
        - processing_fn_dict (Dict[str, Callable], optional): Key: problem name, value: problem data preprocessing fn. Defaults to None.
        - model (tf.keras.Model, optional): If not provided, it will be created with `create_keras_model`. Defaults to None.
        - return_model (bool, optional): Whether return model, if True, function will return (pred, model) tuple. Defaults to False.
        - run_eagerly (bool, optional): Whether to run model eagerly. Defaults to False.
        - mirrored_strategy (optional): mirrored strategy for distribute prediction. Defaults to None.
        - decode_prediction (bool, optional): whether to decode predictions. Defaults to False.
    """
    set_phase(PREDICT)
    if params is None:
        params = Params()
    if not model_dir and params is not None:
        model_dir = params.ckpt_dir
    params = get_params_ready(problem, 1, model_dir,
                              params, problem_type_dict, processing_fn_dict,
                              mode='predict', json_path=os.path.join(model_dir, 'params.json'))

    logger.info('Checkpoint dir: {}'.format(params.ckpt_dir))
    time.sleep(3)

    pred_dataset = predict_input_fn(inputs, params)
    one_batch_data = next(pred_dataset.as_numpy_iterator())
    pred_dataset = predict_input_fn(inputs, params)

    if model is None:
        model = create_keras_model(
            mirrored_strategy=mirrored_strategy, params=params,
            mode='predict', inputs_to_build_model=one_batch_data,
            run_eagerly=run_eagerly)

    if mirrored_strategy is not None:
        with mirrored_strategy.scope():
            pred = model.predict(pred_dataset)
    else:
        pred = model.predict(pred_dataset)

    if decode_prediction:
        pred = decode_predictions(pred=pred, params=model.params)

    if return_model:
        return pred, model
    return pred


In [None]:
pred, model = predict_bert_multitask(
    problem='weibo_fake_ner',
    inputs=fake_inputs*20, model_dir=model.params.ckpt_dir,
    problem_type_dict=problem_type_dict,
    processing_fn_dict=processing_fn_dict, return_model=True,
    params=params)


2021-06-15 20:28:53.975 | INFO     | m3tl.utils:set_phase:478 - Setting phase to infer
2021-06-15 20:28:53.976 | INFO     | m3tl.base_params:register_multiple_problems:538 - Adding new problem weibo_fake_ner, problem type: seq_tag
2021-06-15 20:28:53.977 | INFO     | m3tl.base_params:register_multiple_problems:538 - Adding new problem weibo_cws, problem type: seq_tag
2021-06-15 20:28:53.977 | INFO     | m3tl.base_params:register_multiple_problems:538 - Adding new problem weibo_fake_multi_cls, problem type: multi_cls
2021-06-15 20:28:53.978 | INFO     | m3tl.base_params:register_multiple_problems:538 - Adding new problem weibo_fake_cls, problem type: cls
2021-06-15 20:28:53.978 | INFO     | m3tl.base_params:register_multiple_problems:538 - Adding new problem weibo_masklm, problem type: masklm
2021-06-15 20:28:53.978 | INFO     | m3tl.base_params:register_multiple_problems:538 - Adding new problem weibo_pretrain, problem type: pretrain
2021-06-15 20:28:53.979 | INFO     | m3tl.base_param

In [None]:
# hide
# eager mode test
pred, model = predict_bert_multitask(
    problem='weibo_fake_ner',
    inputs=fake_inputs*20, model_dir=model.params.ckpt_dir,
    problem_type_dict=problem_type_dict,
    processing_fn_dict=processing_fn_dict, return_model=True,
    params=params, run_eagerly=True)


2021-06-15 20:29:02.664 | INFO     | m3tl.utils:set_phase:478 - Setting phase to infer
2021-06-15 20:29:02.665 | INFO     | m3tl.base_params:register_multiple_problems:538 - Adding new problem weibo_fake_ner, problem type: seq_tag
2021-06-15 20:29:02.666 | INFO     | m3tl.base_params:register_multiple_problems:538 - Adding new problem weibo_cws, problem type: seq_tag
2021-06-15 20:29:02.666 | INFO     | m3tl.base_params:register_multiple_problems:538 - Adding new problem weibo_fake_multi_cls, problem type: multi_cls
2021-06-15 20:29:02.666 | INFO     | m3tl.base_params:register_multiple_problems:538 - Adding new problem weibo_fake_cls, problem type: cls
2021-06-15 20:29:02.667 | INFO     | m3tl.base_params:register_multiple_problems:538 - Adding new problem weibo_masklm, problem type: masklm
2021-06-15 20:29:02.667 | INFO     | m3tl.base_params:register_multiple_problems:538 - Adding new problem weibo_pretrain, problem type: pretrain
2021-06-15 20:29:02.668 | INFO     | m3tl.base_param

In [None]:
# hide
pred, model = predict_bert_multitask(
    problem='weibo_fake_ner|weibo_fake_cls|weibo_fake_multi_cls|weibo_premask_mlm',
    inputs=fake_inputs*20, model_dir=model.params.ckpt_dir,
    problem_type_dict=problem_type_dict,
    processing_fn_dict=processing_fn_dict, return_model=True,
    params=params,
    decode_prediction=True)

2021-06-15 20:29:09.212 | INFO     | m3tl.utils:set_phase:478 - Setting phase to infer
2021-06-15 20:29:09.213 | INFO     | m3tl.base_params:register_multiple_problems:538 - Adding new problem weibo_fake_ner, problem type: seq_tag
2021-06-15 20:29:09.213 | INFO     | m3tl.base_params:register_multiple_problems:538 - Adding new problem weibo_cws, problem type: seq_tag
2021-06-15 20:29:09.214 | INFO     | m3tl.base_params:register_multiple_problems:538 - Adding new problem weibo_fake_multi_cls, problem type: multi_cls
2021-06-15 20:29:09.214 | INFO     | m3tl.base_params:register_multiple_problems:538 - Adding new problem weibo_fake_cls, problem type: cls
2021-06-15 20:29:09.215 | INFO     | m3tl.base_params:register_multiple_problems:538 - Adding new problem weibo_masklm, problem type: masklm
2021-06-15 20:29:09.215 | INFO     | m3tl.base_params:register_multiple_problems:538 - Adding new problem weibo_pretrain, problem type: pretrain
2021-06-15 20:29:09.216 | INFO     | m3tl.base_param