In [5]:
import tensorflow as tf
from tensorflow.keras import layers, models

# 加载 MNIST 数据集
mnist = tf.keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# 对数据进行预处理
x_train, x_test = x_train / 255.0, x_test / 255.0
x_train = x_train.reshape(-1, 28, 28, 1)
x_test = x_test.reshape(-1, 28, 28, 1)

# 定义模型
model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.Flatten())
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(10, activation='softmax'))

# 编译模型
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# 训练模型
model.fit(x_train, y_train, epochs=1)

# 评估模型
test_loss, test_acc = model.evaluate(x_test, y_test, verbose=2)
print('Test accuracy:', test_acc)

model.save('mnist.keras')


313/313 - 1s - loss: 0.0468 - accuracy: 0.9851 - 612ms/epoch - 2ms/step
Test accuracy: 0.9850999712944031


In [None]:
import tensorflow as tf
from tensorflow.keras import layers, models

model = models.load_model('mnist.keras')

In [None]:
# model.summary()

from tensorflow.keras.utils import plot_model

plot_model(model, to_file='model.png', show_shapes=True, show_layer_names=True)

In [None]:
import onnx
import tf2onnx

input_signature = [model.layers[0].input.type_spec]
onnx_model, _ = tf2onnx.convert.from_keras(model, input_signature, opset=13)
onnx.save(onnx_model, "model.onnx")

In [None]:
# x_test[0], y_test[0]
y_test[0]
# x_test[0].shape

# type(x_test[0])

# x_test[0].flatten().shape


In [8]:
import onnx
from onnxruntime.quantization import quantize_dynamic, QuantType


quantize_dynamic('model.onnx','model.quant.onnx',weight_type=QuantType.QUInt8)


In [9]:
from onnxruntime.quantization import quantize_static, CalibrationDataReader
import  onnxruntime
import numpy as np

class ONNXModelCalibrator(CalibrationDataReader):
    def __init__(self):
        self.x_train,self.y_train = x_train, y_train
        self.x_train = x_train
        self.iter = 0
        self.x_train = np.array(self.x_train, dtype=np.float32)

        session = onnxruntime.InferenceSession('model.onnx', None)
        self.input_name =  session.get_inputs()[0].name
        self.output_name = session.get_outputs()[0].name

        print(self.input_name,self.output_name,self.x_train.shape)
        

    def get_next(self):
        if self.iter < len(self.x_train):
            x = self.x_train[self.iter]
            y = self.y_train[self.iter]
            self.iter += 1
            # return x, y
            # print(x.dtype,y.dtype)
            # return {self.input_name: x,self.output_name: np.array([ i == y for i in range(10)])}
            # print(x.shape)
            # x = x.reshape(1,28,28,1)
            x = np.expand_dims(x,axis=0)
            return {self.input_name: x}
        else:
            return None
from onnxruntime.quantization import QuantFormat
quantize_static('model.onnx','model.quant.onnx',ONNXModelCalibrator(),quant_format=QuantFormat.QOperator) 


#TBD https://github.com/microsoft/onnxruntime/issues/6732



conv2d_input dense_1 (60000, 28, 28, 1)




In [None]:
import numpy as np
import struct

tx = x_test[0].flatten().tolist()
tx

# with open('tx.txt', 'w') as f:
#     f.write(struct.pack('i', len(tx)))

#     for value in tx:
#         f.write(struct.pack('f', value))




In [1]:
import onnx
from onnxconverter_common import float16



model = onnx.load("model.onnx")
model_fp16 = float16.convert_float_to_float16(model)
onnx.save(model_fp16, "model_fp16.onnx")

In [6]:
import onnxruntime as rt

sess_options = rt.SessionOptions()

# Set graph optimization level
sess_options.graph_optimization_level = rt.GraphOptimizationLevel.ORT_ENABLE_ALL

# To enable model serialization after graph optimization set this
sess_options.optimized_model_filepath = "model_opt_all.onnx"

session = rt.InferenceSession("model.onnx", sess_options)

[0;93m2024-03-11 17:31:16.932295 [W:onnxruntime:, inference_session.cc:1914 Initialize] Serializing optimized model with Graph Optimization level greater than ORT_ENABLE_EXTENDED and the NchwcTransformer enabled. The generated model may contain hardware specific optimizations, and should only be used in the same environment the model was optimized in.[m


In [1]:
import torch
import torch.onnx

# 定义自定义操作
class CustomOp(torch.nn.Module):
    def forward(self, input):
        output = input * 2
        return output

# 创建模型并导出为ONNX
model = CustomOp()
dummy_input = torch.randn(1, 3, 224, 224)
torch.onnx.export(model, dummy_input, "custom_op_model.onnx")

verbose: False, log level: Level.ERROR



In [2]:
import onnxruntime as ort

# 加载包含自定义操作的模型
model_path = "custom_op_model.onnx"
sess_options = ort.SessionOptions()
sess_options.register_custom_ops_library("custom_op_lib.so")  # 注册自定义操作库
session = ort.InferenceSession(model_path, sess_options)

# 执行推理
input_name = session.get_inputs()[0].name
dummy_input = np.random.randn(1, 3, 224, 224).astype(np.float32)
output = session.run(None, {input_name: dummy_input})

Fail: [ONNXRuntimeError] : 1 : FAIL : Failed to load library custom_op_lib.so with error: dlopen(custom_op_lib.so, 0x0006): tried: 'custom_op_lib.so' (no such file), '/System/Volumes/Preboot/Cryptexes/OScustom_op_lib.so' (no such file), '/opt/homebrew/lib/python3.11/site-packages/onnxruntime/capi/custom_op_lib.so' (no such file), '/opt/homebrew/Cellar/python@3.11/3.11.5/Frameworks/Python.framework/Versions/3.11/Resources/Python.app/Contents/MacOS/../../../../../../../../../../../lib/custom_op_lib.so' (no such file), '/usr/lib/custom_op_lib.so' (no such file, not in dyld cache), 'custom_op_lib.so' (no such file)