In [1]:
import tflite_runtime

In [4]:
import numpy as np
import tflite_runtime.interpreter as tflite

# Delegates/Executes all operations supported by Arm NN to/with Arm NN
interpreter = tflite.Interpreter(model_path="/home/pi/exp/exp1/models/yolov5nu_float32.tflite", 
                                 num_threads=4)
interpreter.allocate_tensors()

# Get input and output tensors.
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

# Test model on random input data.
input_shape = input_details[0]['shape']
input_data = np.array(np.random.random_sample(input_shape), dtype=np.float32)
interpreter.set_tensor(input_details[0]['index'], input_data)

interpreter.invoke()

# Print out result
output_data = interpreter.get_tensor(output_details[0]['index'])
print(output_data.shape)

(1, 84, 8400)


In [135]:
# Ultralytics YOLO 🚀, AGPL-3.0 license

import argparse

import cv2
import numpy as np
from tflite_runtime import interpreter as tflite
from letterletterbox import LLetterBox


class Yolov8TFLite:
    def __init__(self, tflite_model, size, confidence_thres, iou_thres):
        self.size = size
        self.tflite_model = tflite_model
        self.confidence_thres = confidence_thres
        self.iou_thres = iou_thres
        interpreter = tflite.Interpreter(model_path=self.tflite_model, 
                                        num_threads=4)
        self.model = interpreter
        self.model.allocate_tensors()

        # Load the class names from the COCO dataset

        # Generate a color palette for the classes
        self.color_palette = np.random.uniform(0, 255, size=(100, 3))

    def draw_detections(self, img, box, score, class_id):
        # Extract the coordinates of the bounding box
        x1, y1, w, h = box

        # Retrieve the color for the class ID
        color = self.color_palette[class_id]

        # Draw the bounding box on the image
        cv2.rectangle(img, (int(x1), int(y1)), (int(x1 + w), int(y1 + h)), color, 2)

        # Create the label text with class name and score
        label = f"det: {score:.2f}"

        # Calculate the dimensions of the label text
        (label_width, label_height), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)

        # Calculate the position of the label text
        label_x = x1
        label_y = y1 - 10 if y1 - 10 > label_height else y1 + 10

        # Draw a filled rectangle as the background for the label text
        cv2.rectangle(
            img,
            (int(label_x), int(label_y - label_height)),
            (int(label_x + label_width), int(label_y + label_height)),
            color,
            cv2.FILLED,
        )

        # Draw the label text on the image
        cv2.putText(img, label, (int(label_x), int(label_y)), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1, cv2.LINE_AA)

    def preprocess(self):
        self.img = cv2.imread(self.input_image) if  not isinstance(self.input_image, np.ndarray) else self.input_image
        self.img_height, self.img_width = self.img.shape[:2]
        letterbox = LetterBox(new_shape=self.size, auto=False, stride=32)
        image = np.stack([letterbox(image=self.img)])
        image = image[..., ::-1].transpose((0, 3, 1, 2))
        image = np.ascontiguousarray(image).astype(np.float32)
        return image / 255

    def postprocess(self, input_image, output):
        """
        Performs post-processing on the model's output to extract bounding boxes, scores, and class IDs.

        Args:
            input_image (numpy.ndarray): The input image.
            output (numpy.ndarray): The output of the model.

        Returns:
            numpy.ndarray: The input image with detections drawn on it.
        """
        pred = np.transpose(output[0]) 
        x = pred[:, 0] - pred[:, 2] / 2
        y = pred[:, 1] - pred[:, 3] / 2
        w = pred[:, 2]
        h = pred[:, 3]
        boxes = np.vstack([x, y, w, h]).T
        class_ids = np.argmax(pred[:, 4:], axis=1)
        scores = np.max(pred[:, 4:], axis=1)

        indices = cv2.dnn.NMSBoxes(boxes, scores, self.confidence_thres, self.iou_thres)

        import time
        start = time.time()
        for i in indices:
            # Get the box, score, and class ID corresponding to the index
            box = boxes[i]
            gain = min(img_width / self.img_width, img_height / self.img_height)
            pad = (
                round((img_width - self.img_width * gain) / 2 - 0.1),
                round((img_height - self.img_height * gain) / 2 - 0.1),
            )
            box[0] = (box[0] - pad[0]) / gain
            box[1] = (box[1] - pad[1]) / gain
            box[2] = box[2] / gain
            box[3] = box[3] / gain
            score = scores[i]
            class_id = class_ids[i]

            if score > 0.25:
                # Draw the detection on the input image
                self.draw_detections(input_image, box, score, class_id)
        print(time.time() - start)
        return input_image

    def main(self, input_image):
        """
        Performs inference using a TFLite model and returns the output image with drawn detections.

        Returns:
            output_img: The output image with drawn detections.
        """
        self.input_image = input_image 
        # Create an interpreter for the TFLite model

        interpreter = self.model
        # Get the model inputs
        input_details = interpreter.get_input_details()
        output_details = interpreter.get_output_details()

        # Store the shape of the input for later use
        input_shape = input_details[0]["shape"]
        self.input_width = input_shape[1]
        self.input_height = input_shape[2]

        # Preprocess the image data
        img_data = self.preprocess()
        self.img_data = img_data
        # img_data = img_data.cpu().numpy()
        # Set the input tensor to the interpreter
        print(img_data.shape)
        img_data = img_data.transpose((0, 2, 3, 1))

        scale, zero_point = input_details[0]["quantization"]
        interpreter.set_tensor(input_details[0]["index"], img_data)
        interpreter.invoke()
        output = interpreter.get_tensor(output_details[0]["index"])
        output[:, [0, 2]] *= img_width
        output[:, [1, 3]] *= img_height
        return self.postprocess(self.img, output)


# Create an argument parser to handle command-line arguments

# Create an instance of the Yolov8TFLite class with the specified arguments
detection = Yolov8TFLite("/home/pi/exp/exp1/models/yolov5nu_float32.tflite", (640, 640), 0.5, 0.5)

# Perform object detection and obtain the output image

img = cv2.imread("../data/images/coco_bike.jpg")


In [136]:
%%timeit
output_image = detection.main(img)

(1, 3, 640, 640)
(1, 3, 640, 640)
(1, 3, 640, 640)
(1, 3, 640, 640)
(1, 3, 640, 640)
(1, 3, 640, 640)
(1, 3, 640, 640)
(1, 3, 640, 640)
178 ms ± 9.71 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [6]:
import onnxruntime as ort
ort.InferenceSession?

[0;31mInit signature:[0m
[0mort[0m[0;34m.[0m[0mInferenceSession[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0mpath_or_bytes[0m[0;34m:[0m [0;34m'str | bytes | os.PathLike'[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0msess_options[0m[0;34m:[0m [0;34m'Sequence[onnxruntime.SessionOptions] | None'[0m [0;34m=[0m [0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mproviders[0m[0;34m:[0m [0;34m'Sequence[str | tuple[str, dict[Any, Any]]] | None'[0m [0;34m=[0m [0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mprovider_options[0m[0;34m:[0m [0;34m'Sequence[dict[Any, Any]] | None'[0m [0;34m=[0m [0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0;34m**[0m[0mkwargs[0m[0;34m,[0m[0;34m[0m
[0;34m[0m[0;34m)[0m [0;34m->[0m [0;34m'None'[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m      This is the main class used to run a model.
[0;31mInit docstring:[0m
:param path_or_bytes: Filename or serialized ONNX or ORT format model in a byte

In [4]:
!pwd

/home/pi/exp/exp1/src


In [6]:
from ultralytics import YOLO

# Load the YOLOv8 model
model = YOLO('yolov5nu.pt')

# Export the model to NCNN format
model.export(format='onnx', optimize=True, simplify=False) # creates '/yolov8n_ncnn_model'



# Run inference


Downloading https://github.com/ultralytics/assets/releases/download/v8.1.0/yolov5nu.pt to 'yolov5nu.pt'...


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5.27M/5.27M [00:00<00:00, 16.8MB/s]


Ultralytics YOLOv8.1.42 🚀 Python-3.11.2 torch-2.2.2 CPU (Cortex-A76)
YOLOv5n summary (fused): 193 layers, 2649200 parameters, 0 gradients, 7.7 GFLOPs

[34m[1mPyTorch:[0m starting from 'yolov5nu.pt' with input shape (1, 3, 640, 640) BCHW and output shape(s) (1, 84, 8400) (5.3 MB)

[34m[1mONNX:[0m starting export with onnx 1.16.0 opset 17...
[34m[1mONNX:[0m export success ✅ 3.5s, saved as 'yolov5nu.onnx' (10.3 MB)

Export complete (9.0s)
Results saved to [1m/home/pi/exp/exp1/src[0m
Predict:         yolo predict task=detect model=yolov5nu.onnx imgsz=640  
Validate:        yolo val task=detect model=yolov5nu.onnx imgsz=640 data=coco.yaml  
Visualize:       https://netron.app


'yolov5nu.onnx'

In [2]:
%%timeit
results = ncnn_model('https://ultralytics.com/images/bus.jpg')


Found https://ultralytics.com/images/bus.jpg locally at bus.jpg
image 1/1 /home/pi/exp/exp1/src/bus.jpg: 640x640 5 persons, 1 bus, 279.5ms
Speed: 10.3ms preprocess, 279.5ms inference, 2.5ms postprocess per image at shape (1, 3, 640, 640)

Found https://ultralytics.com/images/bus.jpg locally at bus.jpg
image 1/1 /home/pi/exp/exp1/src/bus.jpg: 640x640 5 persons, 1 bus, 252.2ms
Speed: 8.4ms preprocess, 252.2ms inference, 2.4ms postprocess per image at shape (1, 3, 640, 640)

Found https://ultralytics.com/images/bus.jpg locally at bus.jpg
image 1/1 /home/pi/exp/exp1/src/bus.jpg: 640x640 5 persons, 1 bus, 247.7ms
Speed: 8.4ms preprocess, 247.7ms inference, 2.4ms postprocess per image at shape (1, 3, 640, 640)

Found https://ultralytics.com/images/bus.jpg locally at bus.jpg
image 1/1 /home/pi/exp/exp1/src/bus.jpg: 640x640 5 persons, 1 bus, 175.6ms
Speed: 8.1ms preprocess, 175.6ms inference, 2.5ms postprocess per image at shape (1, 3, 640, 640)

Found https://ultralytics.com/images/bus.jpg l