# 导入函数库

In [1]:
import os
from tensorflow.keras.applications.mobilenet_v2 import MobileNetV2 as Net
import tensorflow as tf
from tensorflow.python.framework import graph_io
from tensorflow.keras.models import load_model
import numpy as np
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input, decode_predictions
import time

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


# 声明一个MobielnetV2实例，加载的是在ImageNet上预训练过的模型，使用的是keras的.h5模型

In [2]:
model = Net(weights='imagenet')

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


## 保存MobilenetV2到model文件下，命名为mobilenetv2.h5

In [3]:
os.makedirs('./model', exist_ok=True)  # 使用这种方式有个好处，如果已经存在model文件夹了，不会报错

model.save('./model/mobilenetv2.h5')

# Clear any previous session

In [4]:
tf.keras.backend.clear_session()

# 一些参数设置

In [5]:
save_pb_dir = './model'
model_fname = './model/mobilenetv2.h5'

# 封装freeze函数，用于把.h5模型文件转换成.pb

In [6]:
def freeze_graph(graph, session, output, save_pb_dir='.', save_pb_name='fp32_frozen_graph.pb', save_pb_as_text=False):
    with graph.as_default():
        graphdef_inf = tf.graph_util.remove_training_nodes(graph.as_graph_def())
        # 把变量转化成常量
        graphdef_frozen = tf.graph_util.convert_variables_to_constants(session, graphdef_inf, output)
        # 保存fp32_frozen_graph.pb到本地
        graph_io.write_graph(graphdef_frozen, save_pb_dir, save_pb_name, as_text=save_pb_as_text)
        return graphdef_frozen

# This line must be executed before loading Keras model

In [7]:
tf.keras.backend.set_learning_phase(0)

# 加载mobilenetv2.h5模型

In [8]:
model = load_model(model_fname)

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


## 生成一个Session，里面包含了上面加载过来的模型结构和参数

In [9]:
session = tf.keras.backend.get_session()

## 输入和输出节点收集

In [10]:
input_names = [t.op.name for t in model.inputs]
output_names = [t.op.name for t in model.outputs]

In [11]:
print(input_names, output_names)

['input_1'] ['Logits/Softmax']


## 进行模型的freeze操作，保存fp32_frozen_graph.pb，并返回fp32_frozen_graph

In [12]:
fp32_frozen_graph = freeze_graph(session.graph, session, output_names, save_pb_dir)

Instructions for updating:
Use `tf.compat.v1.graph_util.remove_training_nodes`
Instructions for updating:
Use `tf.compat.v1.graph_util.convert_variables_to_constants`
Instructions for updating:
Use `tf.compat.v1.graph_util.extract_sub_graph`
INFO:tensorflow:Froze 262 variables.
INFO:tensorflow:Converted 262 variables to const ops.


# 把fp32_frozen_graph.pb转化成fp32_frozen_graph.tflite，相当于FP32->FP32

In [13]:
graph_def_file = './model/fp32_frozen_graph.pb'
input_arrays = ['input_1']  # 输入节点
output_arrays = ['Logits/Softmax']  # 输出节点

In [14]:
converter = tf.lite.TFLiteConverter.from_frozen_graph(graph_def_file, input_arrays, output_arrays) #得到converter对象
tflite_model = converter.convert() #进行转换
open('./model/fp32_frozen_graph.tflite', 'wb').write(tflite_model) #保存fp32_frozen_graph.tflite到本地

13973776

# 把fp32_frozen_graph.pb转化成int8_frozen_graph.tflite，相当于FP32->INT8

## 加载一张图片用于calibration

In [15]:
img_path = './elephant.jpg'

image_size = [224, 224, 3]
img = image.load_img(img_path, target_size=image_size[:2])
x = image.img_to_array(img)  #转换成numpy array
x = np.expand_dims(x, axis=0) #添加一个维度，变成[batch_size, H, W, C]
x = preprocess_input(x) #对图片进行预处理

## 一些参数设置

In [16]:
num_calibration_batches = 2
BATCH_SIZE = 1
batched_input = np.zeros((BATCH_SIZE * num_calibration_batches, 224, 224, 3), dtype=np.float32)

## 得到batched_input,用于calibration

In [17]:
config = tf.ConfigProto()
config.gpu_options.allow_growth = True  #这种设置的好处是，tensorflow不会一次性占用全部显存，而是用多少占多少
with tf.Session(graph=tf.Graph(), config=config) as sess:
    next_element = tf.convert_to_tensor(x)  #把numpy array转换成tensor
    for i in range(num_calibration_batches):
        print(batched_input[i*BATCH_SIZE:(i+1)*BATCH_SIZE, :].shape, sess.run(next_element).shape)
        batched_input[i*BATCH_SIZE:(i+1)*BATCH_SIZE, :] = sess.run(next_element)

print('Calibration data shape: ', batched_input.shape)

(1, 224, 224, 3) (1, 224, 224, 3)
(1, 224, 224, 3) (1, 224, 224, 3)
Calibration data shape:  (2, 224, 224, 3)


## 封装calibration_input函数

In [18]:
def calibration_input_fn_gen():
    for i in range(num_calibration_batches):
        yield [batched_input[i*BATCH_SIZE:(i+1)*BATCH_SIZE, :]]

## 进行FP32->INT8的转化

In [19]:
graph_def_file = './model/fp32_frozen_graph.pb'
input_arrays = ['input_1']
output_arrays = ['Logits/Softmax']

In [20]:
converter = tf.lite.TFLiteConverter.from_frozen_graph(graph_def_file, input_arrays, output_arrays)#得到converter对象
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = calibration_input_fn_gen#引入用于calibration的batch数据
tflite_model = converter.convert()# 进行转换
open('./model/int8_frozen_graph.tflite', 'wb').write(tflite_model)#把int8_frozen_graph.tflite写到本地

3979936

# 推断函数封装

In [21]:
def tfLiteInference(input_details, interpreter, output_details, x):
    interpreter.set_tensor(input_details[0]['index'], x)#设置输入
    
    interpreter.invoke()#执行推断
    
    output_data = interpreter.get_tensor(output_details[0]['index'])#设置输出
    
    return output_data

# 测试FP32的.tflite的FPS

In [22]:
interpreter = tf.lite.Interpreter(model_path='./model/fp32_frozen_graph.tflite')#得到interpreter对象
interpreter.allocate_tensors()#为tensor分配内存

input_details = interpreter.get_input_details()#获取输入
output_details = interpreter.get_output_details()#获取输出

In [23]:
times =[]
output_data = tfLiteInference(input_details, interpreter, output_details, x)#执行推断

print('Predicted: ', decode_predictions(output_data, top=3)[0])

#获取FPS
for i in range(2000):
    start_time = time.time()
    # 此处的x输入来源于上面的加载一张图片用于calibration
    output_data = tfLiteInference(input_details, interpreter, output_details, x)
    delta = (time.time() - start_time)
    times.append(delta)
mean_delta = np.array(times).mean()
fps = 1 / mean_delta
print('average(sec):{:.2f}, fps:{:.2f}'.format(mean_delta, fps))

Predicted:  [('n02504458', 'African_elephant', 0.40645623), ('n02504013', 'Indian_elephant', 0.2605538), ('n01871265', 'tusker', 0.1444397)]
average(sec):0.04, fps:25.29


我发现Tensorflow Lite的模型在不同平台上，性能不一样，在微星台式机上是25FPS, 在微星笔记本上是64FPS

# 测试INT8的.tflite的FPS

In [24]:
interpreter = tf.lite.Interpreter(model_path='./model/int8_frozen_graph.tflite')#得到interpreter对象
interpreter.allocate_tensors()#为tensor分配内存

input_details = interpreter.get_input_details()#获取输入
output_details = interpreter.get_output_details()#获取输出

In [25]:
times = []

output_data = tfLiteInference(input_details, interpreter, output_details, x)#执行推断

print('Predicted: ', decode_predictions(output_data, top=3)[0])

#获取FPS
for i in range(20):
    start_time = time.time()
    # 此处的x输入来源于上面的加载一张图片用于calibration
    output_data = tfLiteInference(input_details, interpreter, output_details, x)
    delta = (time.time() - start_time)
    times.append(delta)
mean_delta = np.array(times).mean()
fps = 1 / mean_delta
print('average(sec):{:.2f}, fps:{:.2f}'.format(mean_delta, fps))

Predicted:  [('n02504458', 'African_elephant', 0.4140625), ('n01871265', 'tusker', 0.1953125), ('n02504013', 'Indian_elephant', 0.1875)]
average(sec):0.98, fps:1.02
