In [None]:
# default_exp modeling
import os
%load_ext autoreload
%autoreload 2
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"


# Body Modeling

Modeling code for body model, aka BERT.

In [None]:
# export
# nbdev_comment from __future__ import absolute_import, division, print_function

import json

import tensorflow as tf
import transformers
from loguru import logger
from m3tl.params import Params
from m3tl.utils import (get_embedding_table_from_model,
                        get_shape_list, load_transformer_model)
from m3tl.embedding_layer.base import DefaultMultimodalEmbedding


class MultiModalBertModel(tf.keras.Model):
    def __init__(self, params: Params, use_one_hot_embeddings=False):
        super(MultiModalBertModel, self).__init__()
        self.params = params
        if self.params.init_weight_from_huggingface:
            self.bert_model = load_transformer_model(
                self.params.transformer_model_name, self.params.transformer_model_loading)
        else:
            self.bert_model = load_transformer_model(
                self.params.bert_config, self.params.transformer_model_loading)
            self.bert_model(tf.convert_to_tensor(
                transformers.file_utils.DUMMY_INPUTS))
        self.use_one_hot_embeddings = use_one_hot_embeddings

        # multimodal input dense
        self.embedding_layer = self.bert_model.get_input_embeddings()
        self.multimoda_embedding = self.params.embedding_layer['model'](
            params=self.params, embedding_layer=self.embedding_layer)

    @tf.function
    def call(self, inputs, training=False):
        emb_inputs, embedding_tup = self.multimoda_embedding(inputs, training)
        self.embedding_output = embedding_tup.word_embedding
        self.model_input_mask = embedding_tup.res_input_mask
        self.model_token_type_ids = embedding_tup.res_segment_ids

        outputs = self.bert_model(
            {'input_ids': None,
             'inputs_embeds': self.embedding_output,
             'attention_mask': self.model_input_mask,
             'token_type_ids': self.model_token_type_ids,
             'position_ids': None},
            training=training
        )
        self.sequence_output = outputs.last_hidden_state
        if 'pooler_output' in outputs:
            self.pooled_output = outputs.pooler_output
        else:
            # no pooled output, use mean of token embedding
            self.pooled_output = tf.reduce_mean(
                outputs.last_hidden_state, axis=1)
            outputs['pooler_output'] = self.pooled_output
        self.all_encoder_layers = tf.stack(outputs.hidden_states, axis=1)
        outputs = {k: v for k, v in outputs.items() if k not in (
            'hidden_states', 'attentions')}
        outputs['model_input_mask'] = self.model_input_mask
        outputs['model_token_type_ids'] = self.model_token_type_ids
        outputs['all_encoder_layers'] = self.all_encoder_layers
        outputs['embedding_output'] = self.embedding_output
        outputs['embedding_table'] = self.embedding_layer.weights[0]
        return emb_inputs, outputs

    def get_pooled_output(self):
        return self.pooled_output

    def get_sequence_output(self):
        """Gets final hidden layer of encoder.

        Returns:
          float Tensor of shape [batch_size, seq_length, hidden_size] corresponding
          to the final hidden of the transformer encoder.
        """
        return self.sequence_output

    def get_all_encoder_layers(self):
        return self.all_encoder_layers

    def get_embedding_output(self):
        """Gets output of the embedding lookup (i.e., input to the transformer).

        Returns:
          float Tensor of shape [batch_size, seq_length, hidden_size] corresponding
          to the output of the embedding layer, after summing the word
          embeddings with the positional embeddings and the token type embeddings,
          then performing layer normalization. This is the input to the transformer.
        """
        return self.embedding_output

    def get_embedding_table(self):
        return get_embedding_table_from_model(self.bert_model)

    def get_input_mask(self):
        return self.model_input_mask

    def get_token_type_ids(self):
        return self.model_token_type_ids


In [None]:
# hide
from m3tl.test_base import TestBase
import m3tl
import shutil
import numpy as np
test_base = TestBase()
test_base.params.assign_problem(
    'weibo_fake_ner&weibo_fake_cls|weibo_fake_multi_cls|weibo_masklm')
params = test_base.params
train_dataset = m3tl.train_eval_input_fn(
    params=params, mode=m3tl.TRAIN)
eval_dataset = m3tl.train_eval_input_fn(
    params=params, mode=m3tl.EVAL
)

train_dataset = train_dataset.repeat()

one_batch_data = next(train_dataset.as_numpy_iterator())




Adding new problem weibo_fake_ner, problem type: seq_tag
Adding new problem weibo_cws, problem type: seq_tag
Adding new problem weibo_fake_multi_cls, problem type: multi_cls
Adding new problem weibo_fake_cls, problem type: cls
Adding new problem weibo_masklm, problem type: masklm
Adding new problem weibo_pretrain, problem type: pretrain
Adding new problem weibo_fake_regression, problem type: regression
Adding new problem weibo_fake_vector_fit, problem type: vector_fit
Adding new problem weibo_premask_mlm, problem type: premask_mlm




INFO:tensorflow:sampling weights: 
INFO:tensorflow:weibo_fake_cls_weibo_fake_ner: 0.3389830508474576
INFO:tensorflow:weibo_fake_multi_cls: 0.3389830508474576
INFO:tensorflow:weibo_masklm: 0.3220338983050847
INFO:tensorflow:sampling weights: 
INFO:tensorflow:weibo_fake_cls_weibo_fake_ner: 0.3389830508474576
INFO:tensorflow:weibo_fake_multi_cls: 0.3389830508474576
INFO:tensorflow:weibo_masklm: 0.3220338983050847


`MultiModalBertModel` is transformers model with multi-modal input support. One can use it as a normal keras model.

In [None]:

model = MultiModalBertModel(params=params)
_ = model(one_batch_data)
assert model.get_pooled_output().shape[-1] == 312
assert len(model.get_sequence_output().shape) == 3


404 Client Error: Not Found for url: https://huggingface.co/voidful/albert_chinese_tiny/resolve/main/tf_model.h5
Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFAlbertModel: ['predictions.bias', 'predictions.dense.bias', 'predictions.LayerNorm.bias', 'predictions.LayerNorm.weight', 'predictions.decoder.weight', 'predictions.decoder.bias', 'predictions.dense.weight']
- This IS expected if you are initializing TFAlbertModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFAlbertModel from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFAlbertModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was tr

INFO:tensorflow:Modal Type id mapping: 
 {
    "class": 0,
    "image": 1,
    "text": 2
}
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module, class, method, function, traceback, frame, or code object was expected, got cython_function_or_method
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module, class, method, function, traceback, frame, or code object was expected, got cython_function_or_method


The parameters `output_attentions`, `output_hidden_states` and `use_cache` cannot be updated when calling a model.They have to be set to True/False in the config object (i.e.: `config=XConfig.from_pretrained('name', output_attentions=True)`).
The parameter `return_dict` cannot be set in graph mode and will always be set to `True`.


In [None]:
from m3tl.run_bert_multitask import create_tensorspec_from_shape_type
from m3tl.utils import infer_shape_and_type_from_dict
spec_dict = create_tensorspec_from_shape_type(
    infer_shape_and_type_from_dict(one_batch_data))


class ServingModule(tf.Module):
    def __init__(self):
        super(ServingModule, self).__init__()
        self.model = model

    # @tf.function(input_signature=[v for v in spec_dict.values()])
    def serve(self, x):
        return self.model.call(x)


# serving_module = ServingModule()
# _ = serving_module.serve(one_batch_data)
# signatures = dict(
#     serving_default=serving_module.serve.get_concrete_function(one_batch_data)
# )
signatures = dict(
    serving_default=model.call.get_concrete_function(one_batch_data)
)
# tf.saved_model.save(serving_module, os.path.join(params.ckpt_dir, 'serving'), signatures=signatures)
# model.save(os.path.join(params.ckpt_dir, 'serving'), signatures=model.call.get_concrete_function(spec_dict), save_traces=True)
model.save(os.path.join(params.ckpt_dir, 'serving'))

# model.save(params.ckpt_dir, signatures=model.call.get_concrete_function(one_batch_data))


The parameters `output_attentions`, `output_hidden_states` and `use_cache` cannot be updated when calling a model.They have to be set to True/False in the config object (i.e.: `config=XConfig.from_pretrained('name', output_attentions=True)`).
The parameter `return_dict` cannot be set in graph mode and will always be set to `True`.
The parameters `output_attentions`, `output_hidden_states` and `use_cache` cannot be updated when calling a model.They have to be set to True/False in the config object (i.e.: `config=XConfig.from_pretrained('name', output_attentions=True)`).
The parameter `return_dict` cannot be set in graph mode and will always be set to `True`.
The parameters `output_attentions`, `output_hidden_states` and `use_cache` cannot be updated when calling a model.They have to be set to True/False in the config object (i.e.: `config=XConfig.from_pretrained('name', output_attentions=True)`).
The parameter `return_dict` cannot be set in graph mode and will always be set to `True`.
T

INFO:tensorflow:Unsupported signature for serialization: ((TensorSpec(shape=(None, 26), dtype=tf.int64, name='input_ids'), None, None, None, <tensorflow.python.framework.func_graph.UnknownArgument object at 0x7f6c126a4ed0>, False), {}).
INFO:tensorflow:Unsupported signature for serialization: ((TensorSpec(shape=(None, 26), dtype=tf.int64, name='input_ids'), None, None, None, <tensorflow.python.framework.func_graph.UnknownArgument object at 0x7f6c12685d90>, True), {}).
INFO:tensorflow:Unsupported signature for serialization: ((TensorSpec(shape=(None, 26), dtype=tf.int64, name='input_ids'), None, None, None, <tensorflow.python.framework.func_graph.UnknownArgument object at 0x7f6c126e3310>, True), {}).
INFO:tensorflow:Unsupported signature for serialization: ((TensorSpec(shape=(None, 26), dtype=tf.int64, name='input_ids'), None, None, None, <tensorflow.python.framework.func_graph.UnknownArgument object at 0x7f6c126e3790>, False), {}).
INFO:tensorflow:Unsupported signature for serializatio



INFO:tensorflow:Unsupported signature for serialization: ((TensorSpec(shape=(None, 26), dtype=tf.int64, name='input_ids'), None, None, None, <tensorflow.python.framework.func_graph.UnknownArgument object at 0x7f6c126a4ed0>, False), {}).
INFO:tensorflow:Unsupported signature for serialization: ((TensorSpec(shape=(None, 26), dtype=tf.int64, name='input_ids'), None, None, None, <tensorflow.python.framework.func_graph.UnknownArgument object at 0x7f6c12685d90>, True), {}).
INFO:tensorflow:Unsupported signature for serialization: ((TensorSpec(shape=(None, 26), dtype=tf.int64, name='input_ids'), None, None, None, <tensorflow.python.framework.func_graph.UnknownArgument object at 0x7f6c126e3310>, True), {}).
INFO:tensorflow:Unsupported signature for serialization: ((TensorSpec(shape=(None, 26), dtype=tf.int64, name='input_ids'), None, None, None, <tensorflow.python.framework.func_graph.UnknownArgument object at 0x7f6c126e3790>, False), {}).
INFO:tensorflow:Unsupported signature for serializatio



TypeError: ('Not JSON Serializable:', <tf.Tensor: shape=(), dtype=int32, numpy=128>)