<a href="https://colab.research.google.com/github/SananSuleymanov/ONNX_optimization/blob/main/optimization.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip install onnxruntime

In [None]:
!pip install onnxruntime-tools

In [None]:
!pip install tensorflow-model-optimization

In [None]:
!pip install onnx-tf

In [None]:
!!pip install onnxoptimizer

#Required Dependicies

In [7]:
from onnxruntime.quantization import quantize_dynamic, QuantType, quantize_static, CalibrationDataReader, preprocess
from onnxruntime.quantization import QuantType
from onnxoptimizer import optimize
import onnxruntime
from PIL import Image
import tensorflow as tf
import onnx_tf.backend as backend
import onnx 
import numpy as np
import time
import os

#Inference before optimization

In [8]:
model_path= '/content/drive/MyDrive/YoloV5_small/small_yolov5.onnx'

session= onnxruntime.InferenceSession(model_path)

input_name= session.get_inputs()[0].name
output_name= session.get_outputs()[0].name

In [9]:
input_shape= session.get_inputs()[0].shape
output_shape= session.get_outputs()[0].shape
print('Input shape: {}'.format(input_shape))
print('Output shape: {}'.format(output_shape))

Input shape: [1, 3, 1024, 1024]
Output shape: [1, 73728, 7]


In [10]:
image_path= '/content/drive/MyDrive/YoloV5_small/Calibration/Copy of Black__A__Pikl__0_3__L_Keyless__2__11-07-08__0.jpg'

inf_image= Image.open(image_path)
new_shape=(1024, 1024)

#preprocessing of single image for running inference
def preprocess(image):
  image= image.resize(new_shape)
  image= np.array(image, dtype=np.float32)
  image= image/255.0
  image= np.reshape(image, (image.shape[2], image.shape[0], image.shape[1]))
  image= np.expand_dims(image, axis=0)
  return image


In [11]:
input_data = preprocess(inf_image)

In [12]:
start_time= time.time()
output= session.run([output_name], {input_name: input_data} )
end_time= time.time()

inference_time= (end_time-start_time)
print('Inference speed before purning and quantization: {}'.format(inference_time))

Inference speed before purning and quantization: 1.0894615650177002


#Preprocess Calibration Data

In [13]:
def data_reader(image_path):
  images=[]
  for filename in os.listdir(image_path):
    f= os.path.join(image_path, filename)
    image= Image.open(f)
    image= image.resize(new_shape)
    image= np.array(image, dtype=np.float32)
    image= image/255.0
    image= np.reshape(image, (image.shape[2], image.shape[0], image.shape[1]))
    image= np.expand_dims(image, axis=0)
    images.append(image)
  images= np.concatenate(
        np.expand_dims(images, axis=0), axis=0)
  return images

In [14]:
calib_image=data_reader('/content/drive/MyDrive/YoloV5_small/Calibration')

In [15]:
print('Calibration dataset size: {}'.format(calib_image.shape))

Calibration dataset size: (56, 1, 3, 1024, 1024)


#Static Quantization

I couldn't understand the problem but runtime of quantize_static() function took long time and I couldn't finish it successfully.

In [16]:
from onnxruntime.quantization import quantize_static, CalibrationDataReader, preprocess
from onnxruntime.quantization import QuantType


quant_model_path = '/content/drive/MyDrive/YoloV5_small/small_yolov5.quant.onnx'
class DataReader(CalibrationDataReader):
  def __init__(self, calibration_images):
    self.images=calibration_images
    self.enum_data_dicts = []
  
  def get_next(self):
      self.datasize = len(self.images)
      self.enum_data_dicts = iter([{input_name: data} for data in self.images])
      return next(self.enum_data_dicts, None) 

dr= DataReader(calib_image)
quantize_static(model_path,
                quant_model_path,
                dr)




KeyboardInterrupt: ignored

#Save ONNX model as TF model

In [17]:
onnx_model = onnx.load(model_path)

# Convert ONNX model to TensorFlow format
tf_model = backend.prepare(onnx_model)
tf_model.export_graph("model_1/") 


Instructions for updating:
Lambda fuctions will be no more assumed to be used in the statement where they are used, or at least in the same block. https://github.com/tensorflow/tensorflow/issues/56089


In [19]:
converter= tf.lite.TFLiteConverter.from_saved_model('model_1/')
converter.optimizations= [tf.lite.Optimize.DEFAULT]
tflite_quant_model= converter.convert() 

In [20]:
with open('model_1/model.tflite', 'wb') as f:
  f.write(tflite_quant_model)

In [23]:
interpreter = tf.lite.Interpreter(model_path='/content/model_1/model.tflite')

# Get input and output tensors.
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

interpreter.allocate_tensors()

#preprocess image
image= Image.open('/content/drive/MyDrive/YoloV5_small/Calibration/Copy of Black__A__Pikl__0_3__L_Keyless__2__11-07-08__0.jpg')
image= image.resize(new_shape)
np_features= np.array(image)
np_features= np_features/255.0
np_features= np.reshape(np_features, (np_features.shape[2], np_features.shape[0], np_features.shape[1]))
input_type = input_details[0]['dtype']

if input_type == np.int8:
    input_scale, input_zero_point = input_details[0]['quantization']
    print("Input scale:", input_scale)
    print("Input zero point:", input_zero_point)
    print()
    np_features = (np_features / input_scale) + input_zero_point
    np_features = np.around(np_features)
    
# Convert features to NumPy array of expected type
np_features = np_features.astype(input_type)

# Add dimension to input sample (TFLite model expects (# samples, data))
np_features = np.expand_dims(np_features, axis=0)

interpreter.set_tensor(input_details[0]['index'], np_features)

# Run inference
start_time2= time.time()
interpreter.invoke()
end_time2= time.time()

inference_time2= (end_time2-start_time2)
print('Inference speed after quantization: {}'.format(inference_time2))


Inference speed after quantization: 3.0270984172821045


#Dynamic Quantization


Model is dynamic quantized and saved as 'model_quant.onnx' file

In [24]:
model_quant= 'model_quant.onnx'
quantized_model = quantize_dynamic(model_path, model_quant)


In [25]:
# Load the ONNX model
model = onnx.load('/content/model_quant.onnx')

# Define the magnitude threshold for pruning weights
threshold = 0.5

# Prune the model using onnxoptimizer
passes = ["extract_constant_to_initializer", "eliminate_unused_initializer"]
optimized_model = optimize(model, passes)

# Save the pruned model to a file
onnx.save(optimized_model, 'pruned_model.onnx')


#Inference after quantization and pruning

I couldn't run inference in the latest model because of the belowmentioned model

In [26]:
session= onnxruntime.InferenceSession('/content/pruned_model.onnx')

NotImplemented: ignored