In [1]:
%cd /home/aistudio/work/

/home/aistudio/work


In [2]:
!pip install --upgrade paddlenlp==2.3.4

Looking in indexes: https://pypi.tuna.tsinghua.edu.cn/simple

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip available: [0m[31;49m22.1.2[0m[39;49m -> [0m[32;49m22.2.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [3]:
from scipy.special import softmax
from scipy import spatial
import os
import paddle
from paddle import inference
import paddlenlp as ppnlp
from paddlenlp.data import Tuple, Pad

In [4]:
def convert_example_recall_infer(example,
                    tokenizer,
                    max_seq_length=512,
                    pad_to_max_seq_len=False):
    """
    Builds model inputs from a sequence.
        
    A BERT sequence has the following format:
    - single sequence: ``[CLS] X [SEP]``
    Args:
        example(obj:`list(str)`): The list of text to be converted to ids.
        tokenizer(obj:`PretrainedTokenizer`): This tokenizer inherits from :class:`~paddlenlp.transformers.PretrainedTokenizer` 
            which contains most of the methods. Users should refer to the superclass for more information regarding methods.
        max_seq_len(obj:`int`): The maximum total input sequence length after tokenization. 
            Sequences longer than this will be truncated, sequences shorter will be padded.
        is_test(obj:`False`, defaults to `False`): Whether the example contains label or not.
    Returns:
        input_ids(obj:`list[int]`): The list of query token ids.
        token_type_ids(obj: `list[int]`): List of query sequence pair mask.
    """

    result = []
    for key, text in example.items():
        encoded_inputs = tokenizer(
            text=text,
            max_seq_len=max_seq_length,
            pad_to_max_seq_len=pad_to_max_seq_len)
        input_ids = encoded_inputs["input_ids"]
        result += [input_ids]
    return result

In [5]:
tokenizer = ppnlp.transformers.AutoTokenizer.from_pretrained('ernie-m-large')

[2022-08-06 11:10:06,880] [    INFO] - We are using <class 'paddlenlp.transformers.ernie_m.tokenizer.ErnieMTokenizer'> to load 'ernie-m-large'.
[2022-08-06 11:10:06,883] [    INFO] - Already cached /home/aistudio/.paddlenlp/models/ernie-m-large/ernie_m.vocab.txt
[2022-08-06 11:10:06,885] [    INFO] - Already cached /home/aistudio/.paddlenlp/models/ernie-m-large/ernie_m.sentencepiece.bpe.model
[2022-08-06 11:10:07,877] [    INFO] - tokenizer config file saved in /home/aistudio/.paddlenlp/models/ernie-m-large/tokenizer_config.json
[2022-08-06 11:10:07,880] [    INFO] - Special tokens file saved in /home/aistudio/.paddlenlp/models/ernie-m-large/special_tokens_map.json


In [19]:
class RecallPredictor(object):
    def __init__(self,
                 model_dir,
                 device="gpu",
                 max_seq_length=128,
                 batch_size=32,
                 use_tensorrt=False,
                 precision="fp32",
                 cpu_threads=10,
                 enable_mkldnn=False):
        self.max_seq_length = max_seq_length
        self.batch_size = batch_size

        model_file = model_dir + "/model.get_pooled_embedding.pdmodel"
        params_file = model_dir + "/model.get_pooled_embedding.pdiparams"
        if not os.path.exists(model_file):
            raise ValueError("not find model file path {}".format(model_file))
        if not os.path.exists(params_file):
            raise ValueError("not find params file path {}".format(params_file))
        config = paddle.inference.Config(model_file, params_file)

        # 去除 Paddle Inference 运行中的 LOG
        config.disable_glog_info()

        if device == "gpu":
            # set GPU configs accordingly
            # such as intialize the gpu memory, enable tensorrt
            config.enable_use_gpu(100, 0)
            precision_map = {
                "fp16": inference.PrecisionType.Half,
                "fp32": inference.PrecisionType.Float32,
                "int8": inference.PrecisionType.Int8
            }
            precision_mode = precision_map[precision]

            if use_tensorrt:
                config.enable_tensorrt_engine(
                    max_batch_size=batch_size,
                    min_subgraph_size=30,
                    precision_mode=precision_mode)
        elif device == "cpu":
            # set CPU configs accordingly,
            # such as enable_mkldnn, set_cpu_math_library_num_threads
            config.disable_gpu()
        elif device == "xpu":
            # set XPU configs accordingly
            config.enable_xpu(100)

        config.switch_use_feed_fetch_ops(False)
        self.predictor = paddle.inference.create_predictor(config)
        self.input_handles = [
            self.predictor.get_input_handle(name)
            for name in self.predictor.get_input_names()
        ]
        self.output_handle = self.predictor.get_output_handle(
            self.predictor.get_output_names()[0])

    def extract_embedding(self, data, tokenizer):
        """
        Predicts the data labels.
        Args:
            data (obj:`List(str)`): The batch data whose each element is a raw text.
            tokenizer(obj:`PretrainedTokenizer`): This tokenizer inherits from :class:`~paddlenlp.transformers.PretrainedTokenizer` 
                which contains most of the methods. Users should refer to the superclass for more information regarding methods.
        Returns:
            results(obj:`dict`): All the feature vectors.
        """

        examples = []
        for text in data:
            input_ids = convert_example_recall_infer(text, tokenizer)
            examples.append((input_ids))

        batchify_fn = lambda samples, fn=Tuple(
            Pad(axis=0, pad_val=tokenizer.pad_token_id)  # input
        ): fn(samples)

        input_ids = batchify_fn(examples)
        self.input_handles[0].copy_from_cpu(input_ids[0])
        self.predictor.run()
        logits = self.output_handle.copy_to_cpu()
        return logits

In [20]:
model_dir = 'ernie-m-large/infer_model'
device='cpu'
max_seq_length=150
use_tensorrt = False
batch_size =32 
precision = 'fp32'
cpu_threads = 1
enable_mkldnn =False
predictor = RecallPredictor(model_dir, device, max_seq_length,
                          batch_size, use_tensorrt, precision,
                          cpu_threads, enable_mkldnn)


id2corpus = {0: '国有企业引入非国有资本对创新绩效的影响——基于制造业国有上市公司的经验证据'}
corpus_list = [{idx: text} for idx, text in id2corpus.items()]
res = predictor.extract_embedding(corpus_list, tokenizer)
print('抽取向量')
print(res.shape)
print(res)

抽取向量
(1, 256)
[[-3.20681594e-02 -5.39870933e-02 -9.69525985e-03  2.57075019e-03
  -7.77711160e-03 -1.13376260e-01  9.74038616e-02  9.69490781e-03
   2.52382103e-02 -5.15156006e-03  3.59854363e-02 -7.20969439e-02
  -6.31656796e-02  2.94445213e-02  7.01667136e-03 -2.27558576e-02
  -3.98861282e-02  6.65646642e-02 -1.37818217e-01 -8.05629641e-02
   2.59946566e-03 -3.53311375e-02  5.13674952e-02 -3.64249535e-02
  -3.71851698e-02  5.35244159e-02 -1.21604256e-01  5.70073128e-02
  -1.21392468e-02  8.23013708e-02  1.77799948e-02  2.76519656e-02
   1.43378183e-01  6.22088686e-02 -2.44487207e-02 -9.31761563e-02
   6.88312203e-02 -3.36401165e-03  8.92427564e-02  1.07140549e-01
  -7.50251487e-02 -1.04777468e-03 -2.86020953e-02  1.31616309e-01
   9.96566117e-02  7.78597267e-03 -1.58476771e-03  6.43920824e-02
  -3.88049856e-02  3.75183821e-02 -7.91258439e-02 -1.43461749e-01
  -9.43054706e-02 -3.86505090e-02 -1.64448261e-01 -1.05868347e-01
  -6.91526011e-02  7.29759261e-02 -3.57532389e-02 -3.20015550e