In [1]:
!nvidia-smi

Sun Jul 18 03:39:51 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 470.42.01    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla V100-SXM2...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   37C    P0    24W / 300W |      0MiB / 16160MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [None]:
import torch
from transformers import BertForSequenceClassification
from transformers.convert_graph_to_onnx import convert
from onnxruntime.transformers import optimizer
from pathlib import Path
import os


def convert_bert_classification_model_to_onnx(
        num_labels: int = 2,
        model_weight_pth_path: str = "./model_1.pth",
        result_folder: str = "./result_1",
        pretrained_transformers_path: str = "../hfl/chinese-roberta-wwm-ext"
):
    if os.path.exists(result_folder):
        os.rmdir(result_folder)
    os.mkdir(result_folder)

    # Default
    hugging_face_trained_model_path = os.path.join(result_folder, "hugging_face_trained_model_path")
    onnx_folder = os.path.join(result_folder, "onnx")
    unoptimized_onnx_path = os.path.join(onnx_folder, "model.unoptimized.onnx")
    optimized_onnx_path = os.path.join(onnx_folder, "model.optimized.onnx")

    # step 1: save pth to hugging_face pipeline format
    print("step 1: save pth to hugging_face pipeline format")
    model = BertForSequenceClassification.from_pretrained(pretrained_transformers_path, num_labels=num_labels)
    model.load_state_dict(torch.load(model_weight_pth_path))
    model.eval()
    model.save_pretrained(hugging_face_trained_model_path)

    # step 2: export unoptimized onnx
    print("step 2: export unoptimized onnx")
    os.mkdir(onnx_folder)
    convert(
        framework="pt",
        model=hugging_face_trained_model_path,
        output=Path(unoptimized_onnx_path),
        tokenizer=pretrained_transformers_path,
        pipeline_name="sentiment-analysis",
        opset=11)

    # step 3: export optimized onnx
    print("step 3: export optimized onnx")
    optimized_model = optimizer.optimize_model(unoptimized_onnx_path, use_gpu=True)
    optimized_model.save_model_to_file(optimized_onnx_path)

    print("end export optimized onnx")
    print("result onnx path:", optimized_onnx_path)


In [None]:
import numpy as np
from onnxruntime import InferenceSession
from transformers import BertTokenizerFast
from flask import Flask, request, jsonify
from threading import Lock


class TextPreprocessor:
    def __init__(self):
        self.lock = Lock()
        self.tokenizer = BertTokenizerFast.from_pretrained("./resource/hfl/chinese-roberta-wwm-ext")

    def preprocess(self, input_title: str, sample_paragraph):
        with self.lock:
            input_encoding = self.tokenizer(input_title, sample_paragraph,
                                            padding="max_length",
                                            truncation=True,
                                            max_length=256)
        input_encoding = {k: np.expand_dims(np.asarray(v, dtype=np.int64), axis=0) for k, v in input_encoding.items()}
        return input_encoding


session = InferenceSession("./resource/model.summary.onnx")
text_preprocessor = TextPreprocessor()


def calc_probability_level(title: str, paragraph: str) -> float:
    inputs_onnx = text_preprocessor.preprocess(title, paragraph)
    infer_result = session.run(None, inputs_onnx)[0][0]
    infer_result = np.power(np.e, infer_result)

    probability = infer_result[1] / (np.sum(infer_result))
    print("---------")
    print(paragraph)
    print(probability)
    return probability
