In [5]:
import math
import json
import re
import random
import numpy as np
from collections import defaultdict

import cn2an
from tqdm import tqdm_notebook as tqdm
from nl2sql.utils import read_data, read_tables, SQL, Query, Question, Table
from keras_bert import get_checkpoint_paths, load_vocabulary, Tokenizer, load_trained_model_from_checkpoint
from keras.utils.data_utils import Sequence
from keras.preprocessing.sequence import pad_sequences
from keras.layers import Input, Lambda, Dense
from keras.models import Model
from keras.optimizers import Adam
from keras.utils import multi_gpu_model
import keras

import os 
os.environ['CUDA_VISIBLE_DEVICES'] = ''   
NUM_GPUS = len(os.environ['CUDA_VISIBLE_DEVICES'].split(','))

In [6]:
class SimpleTokenizer(Tokenizer):
    def _tokenize(self, text):
        R = []
        for c in text:
            if c in self._token_dict:
                R.append(c)
            elif self._is_space(c):
                R.append('[unused1]')
            else:
                R.append('[UNK]')
        return R
        
        
def construct_model(paths):
    token_dict = load_vocabulary(paths.vocab)
    tokenizer = SimpleTokenizer(token_dict)

    bert_model = load_trained_model_from_checkpoint(
        paths.config, paths.checkpoint, seq_len=None)
    for l in bert_model.layers:
        l.trainable = True

    x1_in = Input(shape=(None,), name='input_x1', dtype='int32')
    x2_in = Input(shape=(None,), name='input_x2')
    x = bert_model([x1_in, x2_in])
    x_cls = Lambda(lambda x: x[:, 0])(x)
    y_pred = Dense(1, activation='sigmoid', name='output_similarity')(x_cls)

    model = Model([x1_in, x2_in], y_pred)

    return model, tokenizer



In [7]:
bert_model_path = '../../../experiments/model/chinese_wwm_L-12_H-768_A-12'
paths = get_checkpoint_paths(bert_model_path)

model, tokenizer = construct_model(paths)

model_path = '../model/m2.h5'  
model.load_weights(model_path)

In [12]:
import tensorflow as tf
from tensorflow.python.util import compat
from keras import backend as K


def export_savedmodel(model,output_path):
    '''
    传入keras model会自动保存为pb格式
    '''
    model_path = output_path # 模型保存的路径
    model_version = 0 # 模型保存的版本
    # 从网络的输入输出创建预测的签名

    input_dict = dict()
    for input_ in model.input:
        input_dict[input_.name]=input_

    
    model_signature = tf.saved_model.signature_def_utils.predict_signature_def(
        inputs=input_dict, outputs={'output':model.output}
    )
    # 使用utf-8编码将 字节或Unicode 转换为字节
    export_path = os.path.join(compat.as_bytes(model_path), compat.as_bytes(str(model_version))) # 将保存路径和版本号join
    builder = tf.saved_model.builder.SavedModelBuilder(export_path) # 生成"savedmodel"协议缓冲区并保存变量和模型
    builder.add_meta_graph_and_variables( # 将当前元图添加到savedmodel并保存变量
    sess=K.get_session(), # 返回一个 session 默认返回tf的sess,否则返回keras的sess,两者都没有将创建一个全新的sess返回
    tags=[tf.saved_model.tag_constants.SERVING], # 导出模型tag为SERVING(其他可选TRAINING,EVAL,GPU,TPU)
    clear_devices=True, # 清除设备信息
    signature_def_map={ # 签名定义映射
        tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: # 默认服务签名定义密钥
        model_signature # 网络的输入输出策创建预测的签名
    })
    builder.save() # 将"savedmodel"协议缓冲区写入磁盘.
    print("save model pb success ...")

output_path = '../model/m2'
export_savedmodel(model, output_path) # 将模型传入保存模型的方法内,模型保存成功.

INFO:tensorflow:No assets to save.
INFO:tensorflow:No assets to write.
INFO:tensorflow:SavedModel written to: ../model/m2/0/saved_model.pb
save model pb success ...
