# Convert Hands CNN and Detector from ONNX to TFLite

### Hands CNN + Detector Conversion

In [1]:
import sys
# import tensorflow as tf
# import onnx_tf
import os
import onnx
import numpy as np

root_dir = '/'.join(os.getcwd().split('/')[:-1])
sys.path.append(root_dir)

In [2]:
model_name = 'face_cnn'

### Now Prepare the Onnx model for tensorflow and then convert to tflite

https://medium.com/@zergtant/convert-pytorch-model-to-tf-lite-with-onnx-tf-232a3894657c

In [None]:
def convert_onnx_to_tflite(model_name):
    # Load  ONNX model
    onnx_model = onnx.load(f'{model_name}.onnx')

    # Convert ONNX model to TensorFlow format
    tf_model = onnx_tf.backend.prepare(onnx_model)
    # Export  TensorFlow  model 
    tf_model.export_graph(f'{model_name}.tf')

    # Then convert from TF to TFLite
    converter = tf.lite.TFLiteConverter.from_saved_model(f'{model_name}.tf')
    tflite_model = converter.convert()
    open(f'{model_name}.tflite', 'wb').write(tflite_model)

convert_onnx_to_tflite(model_name)

### Now test the converted model

https://www.tensorflow.org/lite/guide/inference
https://www.tensorflow.org/lite/guide/inference#run_inference_with_dynamic_shape_model

Note that by default, this model takes a batch size of 1. Which works out well since we want real time performance

In [None]:
def test_tflite_model(model_name):
    model_filename = f"{model_name}.tflite"

    # Load the TFLite model and allocate tensors.
    interpreter = tf.lite.Interpreter(model_path=model_filename)
    interpreter.allocate_tensors()

    # Get input and output tensors.
    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()

    # Test the model on random input data.
    input_shape = input_details[0]['shape']
    print(input_shape)
    input_data = np.array(np.random.random_sample(input_shape), dtype=np.float32)
    interpreter.set_tensor(input_details[0]['index'], input_data)

    interpreter.invoke()

    # The function `get_tensor()` returns a copy of the tensor data.
    # Use `tensor()` in order to get a pointer to the tensor.
    output_data = interpreter.get_tensor(output_details[0]['index'])
    print(output_data)

test_tflite_model(model_name)

In [None]:
def test_dynamic_batches_tflite(model_name):
    model_filename = f"{model_name}.tflite"

    # Load the TFLite model in TFLite Interpreter
    interpreter = tf.lite.Interpreter(model_path=model_filename)

    input_shape = interpreter.get_input_details()[0]['shape']
    input_shape[0] = 5 # Set to batch size of 5 instead of the default 1

    # Resize input shape for dynamic shape model and allocate tensor
    interpreter.resize_tensor_input(interpreter.get_input_details()[0]['index'], input_shape)
    interpreter.allocate_tensors()

    # Get input and output tensors.
    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()

    print(input_details) # Here we can see the input size has changed to (5, 3, 299, 299) 
    print(output_details) # The output has the corresponding changes

    input_data = np.array(np.random.random_sample((5, 3, 224, 224)), dtype=np.float32)
    interpreter.set_tensor(input_details[0]['index'], input_data)
    interpreter.invoke()

    output_data = interpreter.get_tensor(output_details[0]['index'])
    print(output_data)

test_dynamic_batches_tflite(model_name)

## Now Convert the Detector from Pytorch to TFLite with Ultralytics

In [None]:
detector_path = 'best_hands_detector.pt'

In [None]:
from ultralytics import YOLO

model = YOLO(detector_path)  # load a custom trained model

# Export the model
model.export(format='tflite')

## Testing Usage of TFLite Model

In [None]:
tflite_model_path = r"D:\Programming\cybertruck\conversions\best_hands_detector_saved_model\best_hands_detector_float32.tflite"

# Load the TFLite model in TFLite Interpreter
interpreter = tf.lite.Interpreter(model_path=tflite_model_path)
interpreter.allocate_tensors()

# Get input and output tensors.
input_details = interpreter.get_input_details() # (1, 640, 640, 3) Notice that the RGB channel is at the end for the detector..
output_details = interpreter.get_output_details()

input_data = np.array(np.random.random_sample(input_details[0]['shape']), dtype=np.float32)

print(input_details) # Here we can see the input size has changed to (5, 3, 299, 299) 
print(output_details) # The output has the corresponding changes

interpreter.set_tensor(input_details[0]['index'], input_data)
interpreter.invoke()

output = interpreter.get_tensor(output_details[0]['index'])
print(output)

# Obtaining output results
output = interpreter.get_tensor(output_details[0]['index'])
output = output[0]
output = output.T

boxes_xywh = output[..., :4] #Get coordinates of bounding box, first 4 columns of output tensor
scores = np.max(output[..., 5:], axis=1) #Get score value, 5th column of output tensor
classes = np.argmax(output[..., 5:], axis=1) # Get the class value, get the 6th and subsequent columns of the output tensor, and store the largest value in the output tensor.

print(boxes_xywh)
print(scores)
print(classes)

## Example Detector Inference with TFLite

https://github.com/ultralytics/ultralytics/issues/4827

In [None]:
from PIL import Image, ImageDraw, ImageFont
from IPython.display import display

tflite_model_path = r"D:\Programming\cybertruck\conversions\best_hands_detector_saved_model\best_hands_detector_float32.tflite"
image_path = r"D:\Programming\cybertruck\conversions\bus.jpg"

def run_model_and_draw_results(model_path, image_path):
    interpreter = tf.lite.Interpreter(model_path=model_path)
    interpreter.allocate_tensors()

    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()

    # Obtain the height and width of the corresponding image from the input tensor
    image_height = input_details[0]['shape'][1] # 640
    image_width = input_details[0]['shape'][2] # 640

    # Image Preparation
    image = Image.open(image_path)
    image_resized = image.resize((image_width, image_height)) # Resize the image to the corresponding size of the input tensor and store it in a new variable

    image_np = np.array(image_resized) #
    image_np = np.true_divide(image_np, 255, dtype=np.float32) 
    image_np = image_np[np.newaxis, :]

    # inference
    interpreter.set_tensor(input_details[0]['index'], image_np)

    interpreter.invoke()

    # Obtaining output results
    output = interpreter.get_tensor(output_details[0]['index'])
    output = output[0]
    output = output.T

    boxes_xywh = output[..., :4] #Get coordinates of bounding box, first 4 columns of output tensor
    scores = np.max(output[..., 5:], axis=1) #Get score value, 5th column of output tensor
    classes = np.argmax(output[..., 5:], axis=1) # Get the class value, get the 6th and subsequent columns of the output tensor, and store the largest value in the output tensor.

    # Threshold Setting
    threshold = 0.3

    # Bounding boxes, scores, and classes are drawn on the image
    draw = ImageDraw.Draw(image_resized)

    for box, score, cls in zip(boxes_xywh, scores, classes):
        if score >= threshold:
            x_center, y_center, width, height = box
            x1 = int((x_center - width / 2) * image_width)
            y1 = int((y_center - height / 2) * image_height)
            x2 = int((x_center + width / 2) * image_width)
            y2 = int((y_center + height / 2) * image_height)

            draw.rectangle([x1, y1, x2, y2], outline="red", width=2)
            text = f"Class: {cls}, Score: {score:.2f}"
            draw.text((x1, y1), text, fill="red")

    display(image_resized)

run_model_and_draw_results(tflite_model_path, image_path)

# Converting Model for Pytorch Lite (PTL) Android deployment

https://pytorch.org/mobile/android/

For YOLO detector we use the export function to convert to torchscript, then we convert the torchscript into .ptl for android deployment

In [3]:
import torch, yaml
from ultralytics import YOLO
import torchvision
from torch.utils.mobile_optimizer import optimize_for_mobile

model = YOLO(r"/home/ron/Classes/CV-Systems/cybertruck/detection/face_detection/weights/yolov8n-face.pt")  # load a custom trained model

model.export(format='torchscript')

Ultralytics YOLOv8.0.208 🚀 Python-3.10.13 torch-2.1.0+cu121 CPU (12th Gen Intel Core(TM) i7-12700H)
Model summary (fused): 168 layers, 3005843 parameters, 0 gradients, 8.1 GFLOPs

[34m[1mPyTorch:[0m starting from '/home/ron/Classes/CV-Systems/cybertruck/detection/face_detection/weights/yolov8n-face.pt' with input shape (1, 3, 640, 640) BCHW and output shape(s) (1, 5, 8400) (6.0 MB)

[34m[1mTorchScript:[0m starting export with torch 2.1.0+cu121...
[34m[1mTorchScript:[0m export success ✅ 0.8s, saved as '/home/ron/Classes/CV-Systems/cybertruck/detection/face_detection/weights/yolov8n-face.torchscript' (11.9 MB)

Export complete (2.2s)
Results saved to [1m/home/ron/Classes/CV-Systems/cybertruck/detection/face_detection/weights[0m
Predict:         yolo predict task=detect model=/home/ron/Classes/CV-Systems/cybertruck/detection/face_detection/weights/yolov8n-face.torchscript imgsz=640  
Validate:        yolo val task=detect model=/home/ron/Classes/CV-Systems/cybertruck/detection/f

'/home/ron/Classes/CV-Systems/cybertruck/detection/face_detection/weights/yolov8n-face.torchscript'

In [4]:
torchscript_model = torch.jit.load('/home/ron/Classes/CV-Systems/cybertruck/detection/face_detection/weights/yolov8n-face.torchscript')
torchscript_model_optimized = optimize_for_mobile(torchscript_model)
torchscript_model_optimized.eval()


scripted_module = torch.jit.script(torchscript_model_optimized)
scripted_module._save_for_lite_interpreter("./best_face_detector.ptl")

In [5]:
torchscript_lite_model = torch.jit.load('./best_hands_detector.ptl')
torchscript_lite_model.eval()

x = torch.rand(1, 3, 640, 640)

output = torchscript_lite_model(x)

output.shape

torch.Size([1, 6, 8400])

# Converting CNNs to Pytorch Lite

In [1]:
import sys
import os 
import torch
from torch.utils.mobile_optimizer import optimize_for_mobile


root_dir = '/'.join(os.getcwd().split('/')[:-1])
sys.path.append(root_dir)

from cnn.face_cnn import Face_CNN

In [None]:
# class HandArgs:
#     def __init__(self):
#         self.freeze = True
#         self.num_frozen_params = 30
#         self.dropout = 0.35

# hand_args = HandArgs()

In [2]:
face_model = Face_CNN(None).to('cuda')
x = torch.randn(5, 3, 299, 299).to('cuda')
face_model.load_state_dict(torch.load(r"/home/ron/Classes/CV-Systems/cybertruck/cnn/face_models/face/SGD/epoch10_11-28_10:50:06_66acc.pt"))
face_model.eval()


scripted_module = torch.jit.script(face_model)
torchscript_model_optimized = optimize_for_mobile(scripted_module)
# Export lite interpreter version model (compatible with lite interpreter)
scripted_module._save_for_lite_interpreter("Face_CNN.ptl")

  model = create_fn(
