In [21]:
import cv2
import numpy as np
import tensorflow as tf

# Load the TFLite model
model_path = "best-169_float16.tflite"
interpreter = tf.lite.Interpreter(model_path=model_path)
interpreter.allocate_tensors()

# Get input and output details
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

# Load and preprocess the image
image_path = "images/2.png"
image = cv2.imread(image_path)
height, width, _ = image.shape

# Resize the image to match the model's input size (e.g., 640x640)
input_size = (1408, 1408)
resized_image = cv2.resize(image, input_size)
resized_image = resized_image / 255.0  # Normalize to [0, 1]
input_image = np.expand_dims(resized_image, axis=0)

# Set the input tensor to the preprocessed image
interpreter.set_tensor(input_details[0]['index'], input_image)

# Run the inference
interpreter.invoke()

# Get the output tensor
output_data = interpreter.get_tensor(output_details[0]['index'])

# Assuming YOLOv9 output format (adjust based on actual output shape)
# For simplicity, let's assume output is a single tensor with shape (1, num_boxes, 6)
# where each box is represented as [x, y, w, h, confidence, class_id]

# Interpret the output
class_ids = []
scores = []
boxes = []

# Assuming output_data is a numpy array
for detection in output_data[0]:
    scores.append(detection[4])  # Confidence
    class_ids.append(int(detection[5]))  # Class ID
    # Convert bounding box coordinates to original image scale
    x, y, w, h = detection[0:4]
    x = int((x - w / 2) * width)
    y = int((y - h / 2) * height)
    x2 = int((x + w) * width)
    y2 = int((y + h) * height)
    
    # Ensure bounding box coordinates are within image bounds
    x = max(0, min(x, width))
    y = max(0, min(y, height))
    x2 = max(0, min(x2, width))
    y2 = max(0, min(y2, height))
    
    boxes.append([x, y, x2, y2])

# Apply Non-Maximum Suppression (NMS) if needed
# For simplicity, this step is omitted here. You can use OpenCV's NMSBoxes function.

# Draw bounding boxes on the original image
for i in range(len(boxes)):
    if scores[i] > 0.5:  # Filter by confidence threshold
        x, y, x2, y2 = boxes[i]
        cv2.rectangle(image, (x, y), (x2, y2), (0, 255, 0), 2)
        cv2.putText(image, f'Class {class_ids[i]} - {scores[i]:.2f}', (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)

# Display the image with bounding boxes
cv2.imshow('Image', image)
cv2.waitKey(0)
cv2.destroyAllWindows()


ValueError: Cannot set tensor: Got value of type FLOAT64 but expected type FLOAT32 for input 0, name: images 

In [1]:
import tensorflow as tf
import numpy as np
import cv2  # Import OpenCV

def load_tflite_model(tflite_file):
    """Loads a TensorFlow Lite model."""
    interpreter = tf.lite.Interpreter(model_path=tflite_file)
    interpreter.allocate_tensors()
    return interpreter

def preprocess_image(image_path, input_size):
    """
    Reads, resizes, and preprocesses an image for TFLite inference.
    """
    img = cv2.imread(image_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  # Convert to RGB
    img = cv2.resize(img, input_size)
    img = img.astype(np.float32) / 255.0  # Normalize
    img = np.expand_dims(img, axis=0)  # Add batch dimension
    return img, cv2.resize(cv2.cvtColor(img[0], cv2.COLOR_RGB2BGR), input_size)  # Return original image for drawing

def run_inference(interpreter, image):
    """Runs inference on a TFLite model."""
    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()

    interpreter.set_tensor(input_details[0]['index'], image)
    interpreter.invoke()
    output_data = [interpreter.get_tensor(detail['index']) for detail in output_details]
    return output_data

def draw_bounding_boxes(image, output_data, input_size):
    """
    Draws bounding boxes on the image based on the output data.
    
    Note: This function assumes a simplified output format. You may need to adjust it based on your model's actual output structure.
    """
    # Assuming output_data contains bounding box coordinates and class probabilities
    # Adjust indices and processing based on your model's output format
    for detection in output_data[0][0]:  # Adjust index based on model output
        scores = detection[5:]  # Class probabilities
        class_id = np.argmax(scores)
        confidence = scores[class_id]
        
        if confidence > 0.5:  # Confidence threshold
            # Extract bounding box coordinates
            x, y, w, h = detection[0:4] * np.array([input_size[1], input_size[0], input_size[1], input_size[0]])
            x, y, w, h = int(x), int(y), int(w), int(h)
            
            # Draw bounding box
            cv2.rectangle(image, (x - w // 2, y - h // 2), (x + w // 2, y + h // 2), (0, 255, 0), 2)
            cv2.putText(image, f"Class {class_id}", (x - w // 2, y - h // 2 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2)
    
    return image

# Example usage:
tflite_file = "best-136_float16.tflite"
image_path = "images/8.png"  # Replace with your image path

interpreter = load_tflite_model(tflite_file)

input_details = interpreter.get_input_details()
input_size = input_details[0]['shape'][1:3]  # Get input height and width

preprocessed_image, original_image = preprocess_image(image_path, input_size)
output_data = run_inference(interpreter, preprocessed_image)

# Draw bounding boxes
image_with_boxes = draw_bounding_boxes(original_image, output_data, input_size)

# Display or save the image
# cv2.imshow("Image with Bounding Boxes", image_with_boxes)
# cv2.waitKey(0)
# cv2.destroyAllWindows()

# Save the image
cv2.imwrite("output.jpg", image_with_boxes)


    TF 2.20. Please use the LiteRT interpreter from the ai_edge_litert package.
    See the [migration guide](https://ai.google.dev/edge/litert/migration)
    for details.
    
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
[ WARN:0@1.941] global loadsave.cpp:848 imwrite_ Unsupported depth image for selected encoder is fallbacked to CV_8U.


True

In [10]:
from ultralytics import YOLO

# Load a model
model = YOLO("best-136.pt")  # pretrained YOLO11n model

# Run batched inference on a list of images
results = model("images/10.png")  # return a list of Results objects

# Process results list
for result in results:
    boxes = result.boxes  # Boxes object for bounding box outputs
    masks = result.masks  # Masks object for segmentation masks outputs
    keypoints = result.keypoints  # Keypoints object for pose outputs
    probs = result.probs  # Probs object for classification outputs
    obb = result.obb  # Oriented boxes object for OBB outputs
    result.show()  # display to screen
    result.save(filename="result.jpg")  # save to disk


image 1/1 /Users/silunikeerthiratne/Documents/IoT/inference/images/10.png: 768x1408 2 4s, 2 8s, 493.7ms
Speed: 23.5ms preprocess, 493.7ms inference, 21.0ms postprocess per image at shape (1, 3, 768, 1408)


In [11]:
print(boxes)

ultralytics.engine.results.Boxes object with attributes:

cls: tensor([8., 4., 4., 8.])
conf: tensor([0.8955, 0.8592, 0.8157, 0.8007])
data: tensor([[6.1821e+02, 5.8083e+02, 1.0708e+03, 9.1033e+02, 8.9547e-01, 8.0000e+00],
        [6.5660e+02, 2.4301e+02, 1.0107e+03, 5.8521e+02, 8.5920e-01, 4.0000e+00],
        [1.7884e+03, 2.3505e+02, 2.1244e+03, 5.9381e+02, 8.1566e-01, 4.0000e+00],
        [1.7539e+03, 6.0869e+02, 2.1892e+03, 9.2581e+02, 8.0068e-01, 8.0000e+00]])
id: None
is_track: False
orig_shape: (1500, 2800)
shape: torch.Size([4, 6])
xywh: tensor([[ 844.4902,  745.5806,  452.5611,  329.5087],
        [ 833.6587,  414.1104,  354.1232,  342.2008],
        [1956.4309,  414.4302,  335.9688,  358.7605],
        [1971.5364,  767.2517,  435.2762,  317.1246]])
xywhn: tensor([[0.3016, 0.4971, 0.1616, 0.2197],
        [0.2977, 0.2761, 0.1265, 0.2281],
        [0.6987, 0.2763, 0.1200, 0.2392],
        [0.7041, 0.5115, 0.1555, 0.2114]])
xyxy: tensor([[ 618.2097,  580.8262, 1070.7708,  910.33

In [12]:
import cv2
import numpy as np
import tensorflow as tf

# Load the TFLite model
model_path = "best-136_float16.tflite"
interpreter = tf.lite.Interpreter(model_path=model_path)
interpreter.allocate_tensors()

# Get input and output details
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

# Load and preprocess the image
image_path = "images/8.png"
image = cv2.imread(image_path)
height, width, _ = image.shape

# Resize the image to match the model's input size (e.g., 640x640)
input_size = (1408, 1408)
resized_image = cv2.resize(image, input_size)
resized_image = resized_image / 255.0  # Normalize to [0, 1]

# Ensure input is FLOAT32
input_image = np.expand_dims(resized_image, axis=0).astype(np.float32)

# Set the input tensor to the preprocessed image
interpreter.set_tensor(input_details[0]['index'], input_image)

# Run the inference
interpreter.invoke()

# Get the output tensor
output_data = interpreter.get_tensor(output_details[0]['index'])

# Assuming YOLOv9 output format (adjust based on actual output shape)
# For simplicity, let's assume output is a single tensor with shape (1, num_boxes, 6)
# where each box is represented as [x, y, w, h, confidence, class_id]

# Interpret the output
class_ids = []
scores = []
boxes = []

# Assuming output_data is a numpy array
for detection in output_data[0]:
    scores.append(detection[4])  # Confidence
    class_ids.append(int(detection[5]))  # Class ID
    # Convert bounding box coordinates to original image scale
    x, y, w, h = detection[0:4]
    x = int((x - w / 2) * width)
    y = int((y - h / 2) * height)
    x2 = int((x + w) * width)
    y2 = int((y + h) * height)
    
    # Ensure bounding box coordinates are within image bounds
    x = max(0, min(x, width))
    y = max(0, min(y, height))
    x2 = max(0, min(x2, width))
    y2 = max(0, min(y2, height))
    
    boxes.append([x, y, x2, y2])

# Apply Non-Maximum Suppression (NMS) if needed
# For simplicity, this step is omitted here. You can use OpenCV's NMSBoxes function.

# Draw bounding boxes on the original image
for i in range(len(boxes)):
    if scores[i] > 0:  # Filter by confidence threshold
        x, y, x2, y2 = boxes[i]
        cv2.rectangle(image, (x, y), (x2, y2), (0, 255, 0), 2)
        cv2.putText(image, f'Class {class_ids[i]} - {scores[i]:.2f}', (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)

# Display the image with bounding boxes
# cv2.imshow('Image', image)
cv2.imwrite("image2.jpeg", image)


    TF 2.20. Please use the LiteRT interpreter from the ai_edge_litert package.
    See the [migration guide](https://ai.google.dev/edge/litert/migration)
    for details.
    


True

In [16]:
print(output_data)

[[[   0.009024    0.020137    0.021167 ...     0.92981     0.94922     0.96323]
  [   0.018884    0.023064    0.020685 ...     0.96085     0.95678     0.96026]
  [   0.023198    0.046142     0.04789 ...     0.13757     0.10063    0.074538]
  ...
  [ 7.3438e-06  1.4184e-05  1.2957e-05 ...  0.00017561  0.00019366  0.00015047]
  [ 5.7905e-06  9.9166e-06  6.6689e-06 ...  0.00010213  0.00021677  0.00019057]
  [ 8.7898e-06  1.5511e-05  1.3333e-05 ...  4.1668e-05  8.0066e-05  5.5776e-05]]]


In [None]:
import cv2
import numpy as np
import tensorflow as tf

# Load the TFLite model
model_path = "best-169_float16.tflite"
interpreter = tf.lite.Interpreter(model_path=model_path)
interpreter.allocate_tensors()

# Get input and output details
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

# Load and preprocess the image
image_path = "images/10.png"
image = cv2.imread(image_path)
height, width, _ = image.shape

# Resize the image to match the model's input size (e.g., 640x640)
input_size = (1408, 1408)
resized_image = cv2.resize(image, input_size)
resized_image = resized_image / 255.0  # Normalize to [0, 1]

# Ensure input is FLOAT32
input_image = np.expand_dims(resized_image, axis=0).astype(np.float32)

# Set the input tensor to the preprocessed image
interpreter.set_tensor(input_details[0]['index'], input_image)

# Run the inference
interpreter.invoke()

# Get the output tensor
output_data = interpreter.get_tensor(output_details[0]['index'])

# Assuming YOLOv9 output format (adjust based on actual output shape)
# For simplicity, let's assume output is a tensor with shape (1, num_anchors, num_candidate_detections)
# where each detection is represented as [x, y, w, h, confidence, class_probabilities]

# Interpret the output
class_ids = []
scores = []
boxes = []

# Assuming output_data is a numpy array
for anchor in output_data[0]:
    for detection in anchor:
        # Assuming detection format: [x, y, w, h, confidence, class_probabilities]
        x, y, w, h, confidence, *class_probabilities = detection
        
        # Convert bounding box coordinates to original image scale
        x = int(x * width)
        y = int(y * height)
        w = int(w * width)
        h = int(h * height)
        
        # Calculate bounding box coordinates
        x1 = max(0, int(x - w / 2))
        y1 = max(0, int(y - h / 2))
        x2 = min(width, int(x + w / 2))
        y2 = min(height, int(y + h / 2))
        
        # Find class ID with highest probability
        class_id = np.argmax(class_probabilities)
        
        # Append detection if confidence is above threshold
        if confidence > 0.5:
            boxes.append([x1, y1, x2, y2])
            scores.append(confidence)
            class_ids.append(class_id)

# Draw bounding boxes on the original image
for i in range(len(boxes)):
    x1, y1, x2, y2 = boxes[i]
    cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)
    cv2.putText(image, f'Class {class_ids[i]} - {scores[i]:.2f}', (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)

# Display the image with bounding boxes
cv2.imshow('Image', image)
cv2.waitKey(0)
cv2.destroyAllWindows()


    TF 2.20. Please use the LiteRT interpreter from the ai_edge_litert package.
    See the [migration guide](https://ai.google.dev/edge/litert/migration)
    for details.
    
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.


In [8]:
print(type(output_data))
print(output_data.shape)
# print(output_data[0])
print(output_data[0][0].shape)

<class 'numpy.ndarray'>
(1, 14, 40656)
(40656,)


In [15]:
import tensorflow as tf
import numpy as np
import cv2
from PIL import Image

# Load the TensorFlow Lite model
interpreter = tf.lite.Interpreter(model_path="best-136_float16.tflite")
interpreter.allocate_tensors()

# Get input and output tensor details
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

# Load class labels (adjust based on your model)
class_labels = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"] # Replace with your actual class names

# Function to preprocess the image for inference
def preprocess_image(image_path, input_shape):
    """
    Preprocess the image to match the model's input requirements.
    Args:
        image_path: Path to the input image.
        input_shape: Shape of the model's input tensor.
    Returns:
        Preprocessed image as a numpy array.
    """
    image = Image.open(image_path).convert("RGB")  # Ensure RGB format
    image = image.resize((input_shape[1], input_shape[2]))  # Resize to model input size
    image_array = np.array(image, dtype=np.float32) / 255.0  # Normalize pixel values
    image_array = np.expand_dims(image_array, axis=0)  # Add batch dimension
    return image_array

# Function to post-process inference results
def postprocess_output(output_data, original_image_shape):
    """
    Post-process the raw output data from the model.
    Args:
        output_data: Raw output data from the TensorFlow Lite model.
        original_image_shape: Shape of the original input image (height, width).
    Returns:
        Bounding boxes in [xmin, ymin, xmax, ymax] format, class names, and confidence scores.
    """
    boxes = []  # Bounding box coordinates
    class_names = []  # Predicted class names
    confidences = []  # Confidence scores
    
    # Example decoding logic (adjust based on your model's output format)
    for detection in output_data[0]:  # Assuming output_data[0] contains bounding boxes
        confidence = detection[4]  # Confidence score (adjust index based on your model)
        if confidence > 0.5:  # Threshold for valid detections
            x_min, y_min, x_max, y_max = detection[:4] * np.array(
                [original_image_shape[1], original_image_shape[0], original_image_shape[1], original_image_shape[0]]
            )
            boxes.append([x_min, y_min, x_max, y_max])
            class_id = int(detection[5])  # Class ID (adjust index based on your model)
            class_names.append(class_labels[class_id])  # Map ID to class name
            confidences.append(confidence)
    
    return boxes, class_names, confidences

# Function to draw bounding boxes on an image
def draw_bounding_boxes(image, boxes, class_names=None, confidences=None, color=(0, 255, 0), thickness=2):
    """
    Draw bounding boxes on an image.
    Args:
        image: Input image (numpy array).
        boxes: List of bounding boxes in [xmin, ymin, xmax, ymax] format.
        class_names: List of predicted class names corresponding to each box.
        confidences: List of confidence scores corresponding to each box.
        color: Color of the bounding box (default is green).
        thickness: Thickness of the box lines (default is 2).
    Returns:
        Annotated image with bounding boxes.
    """
    for i, box in enumerate(boxes):
        # Draw rectangle
        cv2.rectangle(image, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), color, thickness)
        
        # Add label and confidence if provided
        if class_names and confidences:
            label = f"{class_names[i]} ({confidences[i]:.2f})"
            cv2.putText(image, label, (int(box[0]), int(box[1]) - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, thickness)
    
    return image

# Path to your input image
image_path = "/Users/silunikeerthiratne/Documents/IoT/inference/images/10.png"  # Replace with your actual image path

# Load and preprocess the image for inference
input_shape = input_details[0]['shape']
input_data = preprocess_image(image_path, input_shape)

# Perform inference
interpreter.set_tensor(input_details[0]['index'], input_data)
interpreter.invoke()

# Get raw output data from the model
output_data = interpreter.get_tensor(output_details[0]['index'])

# Post-process output data to extract bounding boxes and labels
original_image = cv2.imread(image_path)  # Load original image with OpenCV for drawing
original_image_shape = original_image.shape[:2]  # Get height and width of original image

bounding_boxes, predicted_classes, confidence_scores = postprocess_output(output_data, original_image_shape)

# Draw bounding boxes on the original image
annotated_image = draw_bounding_boxes(original_image.copy(), bounding_boxes, predicted_classes, confidence_scores)

# Display and save the annotated image


cv2.imwrite("output_image.jpg", annotated_image)  # Save annotated image as a file


True

In [16]:
print(bounding_boxes)
print(predicted_classes)
print(confidence_scores)

[]
[]
[]


In [13]:
print(output_data[0][0].shape)

(40656,)


In [22]:
import tensorflow as tf
import cv2
import numpy as np

# Load the TFLite model
model_path = 'best-136_float16.tflite'
interpreter = tf.lite.Interpreter(model_path=model_path)
interpreter.allocate_tensors()

# Get input and output details
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

# Image preprocessing class
class LetterBox:
    """Resize image and padding for detection."""

    def __init__(self, new_shape=(1408, 1408), auto=False, scaleFill=False, scaleup=True, stride=32):
        self.new_shape = new_shape
        self.auto = auto
        self.scaleFill = scaleFill
        self.scaleup = scaleup
        self.stride = stride

    def __call__(self, labels=None, image=None):
        """Return updated labels and image with added border."""
        if labels is None:
            labels = {}
        img = labels.get('img') if image is None else image
        shape = img.shape[:2]  # current shape [height, width]
        new_shape = labels.pop('rect_shape', self.new_shape)
        if isinstance(new_shape, int):
            new_shape = (new_shape, new_shape)

        # Scale ratio (new / old)
        r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
        if not self.scaleup:  # only scale down, do not scale up (for better val mAP)
            r = min(r, 1.0)

        # Compute padding
        ratio = r, r  # width, height ratios
        new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
        dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]  # wh padding
        if self.auto:  # minimum rectangle
            dw, dh = np.mod(dw, self.stride), np.mod(dh, self.stride)  # wh padding
        elif self.scaleFill:  # stretch
            dw, dh = 0.0, 0.0
            new_unpad = (new_shape[1], new_shape[0])
            ratio = new_shape[1] / shape[1], new_shape[0] / shape[0]  # width, height ratios

        dw /= 2  # divide padding into 2 sides
        dh /= 2
        if labels.get('ratio_pad'):
            labels['ratio_pad'] = (labels['ratio_pad'], (dw, dh))  # for evaluation

        if shape[::-1] != new_unpad:  # resize
            img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)
        top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
        left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
        img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT,
                                 value=(114, 114, 114))  # add border

        if len(labels):
            labels = self._update_labels(labels, ratio, dw, dh)
            labels['img'] = img
            labels['resized_shape'] = new_shape
            return labels
        else:
            return img
    
    def _update_labels(self, labels, ratio, dw, dh):
        # Placeholder for _update_labels function (Implement if needed)
        return labels

# Load image
image_path = "images/10.png"
img = cv2.imread(image_path)

# Preprocess image
letterbox = LetterBox(1408, auto=False, stride=32)
im = letterbox(image=img)
im = np.expand_dims(im, axis=0)
im = im[..., ::-1].transpose((0, 1, 2, 3))  # BGR to RGB, BHWC to BCHW
im = np.ascontiguousarray(im)
im = im.astype(np.float32)
im /= 255

# Set input tensor
input_data = im
interpreter.set_tensor(input_details[0]['index'], input_data)

# Run inference
interpreter.invoke()
output_data = interpreter.get_tensor(output_details[0]['index'])

# Process the output_data as needed
print(output_data.shape)

import numpy as np

nc = 0
conf_thres = 0.25

bs = output_data.shape[0]  # batch size
nc = nc or (output_data.shape[1] - 4)  # number of classes
nm = output_data.shape[1] - nc - 4
mi = 4 + nc  # mask start index
xc = np.amax(output_data[:, 4:mi], 1) > conf_thres  # candidates

multi_label=False
multi_label &= nc > 1  # multiple labels per box (adds 0.5ms/img)

prediction = np.transpose(output_data, (0, 2, 1))

def xywh2xyxy(x):
    """
    Convert bounding box coordinates from (x, y, width, height) format to (x1, y1, x2, y2) format where (x1, y1) is the
    top-left corner and (x2, y2) is the bottom-right corner.

    Args:
        x (np.ndarray | torch.Tensor): The input bounding box coordinates in (x, y, width, height) format.
    Returns:
        y (np.ndarray | torch.Tensor): The bounding box coordinates in (x1, y1, x2, y2) format.
    """
    y = np.copy(x)
    y[..., 0] = x[..., 0] - x[..., 2] / 2  # top left x
    y[..., 1] = x[..., 1] - x[..., 3] / 2  # top left y
    y[..., 2] = x[..., 0] + x[..., 2] / 2  # bottom right x
    y[..., 3] = x[..., 1] + x[..., 3] / 2  # bottom right y
    return y

prediction[..., :4] = xywh2xyxy(prediction[..., :4])  # xywh to xyxy

output = [np.zeros((0, 6))] * bs

import cv2

max_nms=30000
agnostic=False
max_wh=7680
iou_thres = 0.45
max_det = 300

nc = 10 # Number of classes

for xi, x in enumerate(prediction):  # image index, image inference
    # Filter based on confidence
    conf = np.max(x[:, 4:4 + nc], axis=1)  # Confidence scores
    confidence_mask = conf > conf_thres
    x_filtered = x[confidence_mask]  # Apply confidence threshold

    if x_filtered.shape[0] == 0:
        continue

    # Apply NMS
    boxes = x_filtered[:, :4]
    scores = np.max(x_filtered[:, 4:4 + nc], axis=1)
    class_ids = np.argmax(x_filtered[:, 4:4 + nc], axis=1)

    nms_indices = cv2.dnn.NMSBoxes(boxes.astype(np.float32), scores, score_threshold=0.4, nms_threshold=iou_thres)

    if nms_indices is not None and len(nms_indices) > 0:
        nms_indices = nms_indices.flatten()
        
        # Extract the boxes that survive NMS
        xyxy = boxes[nms_indices]
        confidences = scores[nms_indices]
        class_ids = class_ids[nms_indices]
                
        # Create output array
        detections = np.concatenate([xyxy, confidences[:, None], class_ids[:, None]], axis=1) # Corrected axis
        
        output[xi] = detections  # Assign the results
    else:
        output[xi] = np.array([]) # No detections found
        
def clip_boxes(boxes, shape):
    """
    It takes a list of bounding boxes and a shape (height, width) and clips the bounding boxes to the
    shape... Args:
      boxes (torch.Tensor): the bounding boxes to clip
      shape (tuple): the shape of the image
    """
    boxes[..., [0, 2]] = boxes[..., [0, 2]].clip(0, shape[1])  # x1, x2
    boxes[..., [1, 3]] = boxes[..., [1, 3]].clip(0, shape[0])  # y1, y2

def scale_boxes(img1_shape, boxes, img0_shape, ratio_pad=None):
    """
    Rescales bounding boxes (in the format of xyxy) from the shape of the image they were originally specified in
    (img1_shape) to the shape of a different image (img0_shape).

    Args:
      img1_shape (tuple): The shape of the image that the bounding boxes are for, in the format of (height, width).
      boxes (torch.Tensor): the bounding boxes of the objects in the image, in the format of (x1, y1, x2, y2)
      img0_shape (tuple): the shape of the target image, in the format of (height, width).
      ratio_pad (tuple): a tuple of (ratio, pad) for scaling the boxes. If not provided, the ratio and pad will be
                         calculated based on the size difference between the two images.

    Returns:
      boxes (torch.Tensor): The scaled bounding boxes, in the format of (x1, y1, x2, y2)
    """
    if ratio_pad is None:  # calculate from img0_shape
        gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1])  # gain  = old / new
        pad = round((img1_shape[1] - img0_shape[1] * gain) / 2 - 0.1), round(
            (img1_shape[0] - img0_shape[0] * gain) / 2 - 0.1)  # wh padding
    else:
        gain = ratio_pad[0][0]
        pad = ratio_pad[1]

    boxes[..., [0, 2]] -= pad[0]  # x padding
    boxes[..., [1, 3]] -= pad[1]  # y padding
    boxes[..., :4] /= gain
    clip_boxes(boxes, img0_shape)
    return boxes

results = []

for i, pred in enumerate(output):
  if pred.size > 0:
    pred[:, :4] = scale_boxes((1408, 1408), pred[:, :4], img.shape) # scale for each image
  results.append(pred)

# Drawing the bounding boxes and labels
for detection in results:
    if detection.size > 0:
        for xmin, ymin, xmax, ymax, conf, class_id in detection:
            # Convert coordinates to integers
            xmin, ymin, xmax, ymax = map(int, [xmin, ymin, xmax, ymax])

            # Draw rectangle and label
            cv2.rectangle(img, (xmin, ymin), (xmax, ymax), (0, 255, 0), 2)
            label = f"Class {int(class_id)}: {conf:.2f}"
            cv2.putText(img, label, (xmin, ymin - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)

# Save the image to a directory
output_path = 'output_image.jpg'
cv2.imwrite(output_path, img)
print(f"Image saved to {output_path}")



(1, 14, 40656)
Image saved to output_image.jpg


In [19]:
print(results)

[array([[4.3926221e-01, 0.0000000e+00, 7.6024002e-01, 0.0000000e+00,
        2.4129582e-08, 5.8059495e-06, 4.3060288e-07, 1.2242452e-06,
        1.8564424e-04, 1.9738722e-05, 1.8518211e-04, 2.0621889e-04,
        8.9767706e-01, 3.2575769e-04],
       [4.6863478e-01, 0.0000000e+00, 7.1508098e-01, 0.0000000e+00,
        5.6986668e-08, 5.7254629e-06, 7.0962611e-05, 8.2790095e-05,
        8.5769618e-01, 6.4922460e-05, 3.1337936e-07, 1.8626173e-07,
        2.0498404e-04, 6.1291698e-06],
       [1.2708871e+00, 0.0000000e+00, 1.5083759e+00, 0.0000000e+00,
        5.3224773e-07, 1.2056274e-05, 1.2673342e-04, 2.1024402e-04,
        8.1394333e-01, 2.1741482e-04, 1.2471883e-06, 2.4431215e-06,
        1.0089860e-03, 3.3942022e-05],
       [1.2457570e+00, 0.0000000e+00, 1.5545688e+00, 0.0000000e+00,
        3.9000167e-08, 3.6509023e-06, 3.4076655e-07, 3.7922376e-07,
        1.8515845e-04, 6.2184913e-06, 4.2668878e-04, 4.6090828e-04,
        8.0123442e-01, 1.4105260e-04]], dtype=float32)]


In [23]:
import tensorflow as tf
import numpy as np
import cv2
from PIL import Image

# Load the TensorFlow Lite model
interpreter = tf.lite.Interpreter(model_path="/Users/silunikeerthiratne/Documents/IoT/inference/best-136_float16.tflite")
interpreter.allocate_tensors()

# Get input and output tensor details
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

# Load class labels (adjust based on your model)
class_labels = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"]  # Replace with your actual class names
# class_labels = ["Class1"]

# Function to preprocess the image for inference
def preprocess_image(image_path, input_shape):
    """
    Preprocess the image to match the model's input requirements.
    Args:
        image_path: Path to the input image.
        input_shape: Shape of the model's input tensor.
    Returns:
        Preprocessed image as a numpy array.
    """
    image = Image.open(image_path).convert("RGB")  # Ensure RGB format
    image = image.resize((input_shape[1], input_shape[2]))  # Resize to model input size
    image_array = np.array(image, dtype=np.float32) / 255.0  # Normalize pixel values
    image_array = np.expand_dims(image_array, axis=0)  # Add batch dimension
    return image_array

# Function to post-process inference results
def postprocess_output(output_data, original_image_shape):
    """
    Post-process the raw output data from the model.
    Args:
        output_data: Raw output data from the TensorFlow Lite model.
        original_image_shape: Shape of the original input image (height, width).
    Returns:
        Bounding boxes in [xmin, ymin, xmax, ymax] format, class names, and confidence scores.
    """
    boxes = []  # Bounding box coordinates
    class_names = []  # Predicted class names
    confidences = []  # Confidence scores

    # Example decoding logic (adjust based on your model's output format)
    for detection in output_data[0]:  # Assuming output_data[0] contains bounding boxes
        confidence = detection[4]  # Confidence score (adjust index based on your model)
        if confidence > 0.5:  # Threshold for valid detections
            x_min, y_min, x_max, y_max = detection[:4] * np.array(
                [original_image_shape[1], original_image_shape[0], original_image_shape[1], original_image_shape[0]]
            )
            boxes.append([x_min, y_min, x_max, y_max])
            class_id = int(detection[5])  # Class ID (adjust index based on your model)
            class_names.append(class_labels[class_id])  # Map ID to class name
            confidences.append(confidence)

    return boxes, class_names, confidences

# Function to draw bounding boxes on an image
def draw_bounding_boxes(image, boxes, class_names=None, confidences=None, color=(0, 255, 0), thickness=2):
    """
    Draw bounding boxes on an image.
    Args:
        image: Input image (numpy array).
        boxes: List of bounding boxes in [xmin, ymin, xmax, ymax] format.
        class_names: List of predicted class names corresponding to each box.
        confidences: List of confidence scores corresponding to each box.
        color: Color of the bounding box (default is green).
        thickness: Thickness of the box lines (default is 2).
    Returns:
        Annotated image with bounding boxes.
    """
    for i, box in enumerate(boxes):
        # Draw rectangle
        cv2.rectangle(image, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), color, thickness)

        # Add label and confidence if provided
        if class_names and confidences:
            label = f"{class_names[i]} ({confidences[i]:.2f})"
            cv2.putText(image, label, (int(box[0]), int(box[1]) - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, thickness)

    return image

# Path to your input image
image_path = r"/Users/silunikeerthiratne/Documents/IoT/inference/images/10.png"  # Replace with your actual image path

# Load and preprocess the image for inference
input_shape = input_details[0]['shape']
input_data = preprocess_image(image_path, input_shape)

# Perform inference
interpreter.set_tensor(input_details[0]['index'], input_data)
interpreter.invoke()

# Get raw output data from the model
output_data = interpreter.get_tensor(output_details[0]['index'])

# Post-process output data to extract bounding boxes and labels
original_image = cv2.imread(image_path)  # Load original image with OpenCV for drawing
original_image_shape = original_image.shape[:2]  # Get height and width of original image

bounding_boxes, predicted_classes, confidence_scores = postprocess_output(output_data, original_image_shape)

# Draw bounding boxes on the original image
annotated_image = draw_bounding_boxes(original_image.copy(), bounding_boxes, predicted_classes, confidence_scores)

# Display and save the annotated image


cv2.imwrite("output_image.jpg", annotated_image)  # Save annotated image as a file

 

True

In [26]:
print(bounding_boxes)
print(predicted_classes)
print(confidence_scores)


[]
[]
[]


In [25]:
print(confidence)

NameError: name 'confidence' is not defined

In [1]:
import tensorflow as tf
import cv2
import numpy as np

# Load the TFLite model
interpreter = tf.lite.Interpreter(model_path='best-136_float16.tflite')  # Update path
interpreter.allocate_tensors()

# Get input and output details
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

class LetterBox:
    def __init__(self, new_shape=(1408, 1408), auto=False, scaleFill=False, scaleup=True, stride=32):
        self.new_shape = new_shape
        self.auto = auto
        self.scaleFill = scaleFill
        self.scaleup = scaleup
        self.stride = stride

    def __call__(self, labels=None, image=None):
        if labels is None:
            labels = {}
        img = labels.get('img') if image is None else image
        shape = img.shape[:2]
        new_shape = labels.pop('rect_shape', self.new_shape)
        if isinstance(new_shape, int):
            new_shape = (new_shape, new_shape)

        r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
        if not self.scaleup:
            r = min(r, 1.0)

        ratio = r, r
        new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
        dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]
        if self.auto:
            dw, dh = np.mod(dw, self.stride), np.mod(dh, self.stride)
        elif self.scaleFill:
            dw, dh = 0.0, 0.0
            new_unpad = (new_shape[1], new_shape[0])
            ratio = new_shape[1] / shape[1], new_shape[0] / shape[0]

        dw /= 2
        dh /= 2

        if shape[::-1] != new_unpad:
            img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)
        top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
        left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
        img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT,
                                 value=(114, 114, 114))

        if len(labels):
            labels = self._update_labels(labels, ratio, dw, dh)
            labels['img'] = img
            labels['resized_shape'] = new_shape
            return labels
        else:
            return img

def xywh2xyxy(x):
    y = np.copy(x)
    y[..., 0] = x[..., 0] - x[..., 2] / 2
    y[..., 1] = x[..., 1] - x[..., 3] / 2
    y[..., 2] = x[..., 0] + x[..., 2] / 2
    y[..., 3] = x[..., 1] + x[..., 3] / 2
    return y

def clip_boxes(boxes, shape):
    boxes[..., [0, 2]] = boxes[..., [0, 2]].clip(0, shape[1])
    boxes[..., [1, 3]] = boxes[..., [1, 3]].clip(0, shape[0])

def scale_boxes(img1_shape, boxes, img0_shape, ratio_pad=None):
    if ratio_pad is None:
        gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1])
        pad = round((img1_shape[1] - img0_shape[1] * gain) / 2 - 0.1), round(
            (img1_shape[0] - img0_shape[0] * gain) / 2 - 0.1)
    else:
        gain = ratio_pad[0][0]
        pad = ratio_pad[1]

    boxes[..., [0, 2]] -= pad[0]
    boxes[..., [1, 3]] -= pad[1]
    boxes[..., :4] /= gain
    clip_boxes(boxes, img0_shape)
    return boxes

def inference(image_path, conf_thres=0.25, iou_thres = 0.45, max_det = 300):
    img = cv2.imread(image_path)
    if img is None:
        raise ValueError("Image not found at the specified path.")

    # Preprocess the image
    letterbox = LetterBox(1408, auto=False, stride=32)
    im = letterbox(image=img.copy())
    im = np.expand_dims(im, axis=0)
    im = im[..., ::-1].transpose((0, 1, 2, 3))  # BGR to RGB, BHWC to BCHW
    im = np.ascontiguousarray(im)  # contiguous
    im = im.astype(np.float32)
    im /= 255

    # Set input tensor
    input_data = im
    interpreter.set_tensor(input_details[0]['index'], input_data)

    # Run inference
    interpreter.invoke()
    output_data = interpreter.get_tensor(output_details[0]['index'])

    # Process output
    prediction = np.transpose(output_data, (0, -1, -2))
    prediction[..., :4] = xywh2xyxy(prediction[..., :4])

    bs = prediction.shape[0]
    nc = prediction.shape[2] - 4  # Number of classes
    xc = np.amax(prediction[:, :, 4:4+nc], 2) > conf_thres  # candidates

    output = [np.zeros((0, 6))] * bs  # Assuming no mask

    max_nms=30000
    agnostic=False
    max_wh=7680

    for xi, x in enumerate(prediction):
        x = x[xc[xi]]

        if not x.shape[0]:
            continue

        box = x[:, :4]
        cls = x[:, 4:4+nc]

        conf = np.max(cls, axis=1, keepdims=True)
        j = np.argmax(cls, axis=1, keepdims=True)
        x = np.concatenate((box, conf, j), axis=1)

        conf_flat = conf.flatten()
        filtered_x = x[conf_flat > conf_thres]

        n = filtered_x.shape[0]
        if not n:
            continue

        if n > max_nms:
            sorted_indices = np.argsort(x[:, 4])[::-1]
            x = x[sorted_indices[:max_nms]]

        c = x[:, 5:6] * (0 if agnostic else max_wh)
        boxes, scores = x[:, :4] + c, x[:, 4]

        i = cv2.dnn.NMSBoxes(boxes, scores, score_threshold=conf_thres, nms_threshold=iou_thres)
        if i is not None:
            i = i[:max_det]
            output[xi] = x[i.flatten()]
        else:
            output[xi] = np.array([])

    # Scale boxes to original image size
    for i, pred in enumerate(output):
        if pred.size > 0:
            pred[:, :4] = scale_boxes((1408, 1408), pred[:, :4], img.shape)
            output[i] = pred

    return img, output[0]

def draw_boxes(img, results):
    for *xyxy, conf, class_id in results:
        xmin, ymin, xmax, ymax = map(int, xyxy)
        cv2.rectangle(img, (xmin, ymin), (xmax, ymax), (0, 255, 0), 2)
        label = f"Class {int(class_id)}: {conf:.2f}"
        cv2.putText(img, label, (xmin, ymin - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)
    return img

# Example usage:
image_path = 'images/10.png'  # Specify your image path
try:
    img, results = inference(image_path)

    # Draw bounding boxes on the image
    img_with_boxes = draw_boxes(img.copy(), results)

    # Display the image
    cv2.imwrite('YOLOv8 Inference.jpg', img_with_boxes)
    
except Exception as e:
    print(f"An error occurred: {e}")


    TF 2.20. Please use the LiteRT interpreter from the ai_edge_litert package.
    See the [migration guide](https://ai.google.dev/edge/litert/migration)
    for details.
    
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.


In [2]:
print(results)

[[0.43926221 0.         0.76024001 0.         0.89767706 8.        ]
 [0.46794514 0.         0.71700969 0.         0.86015636 4.        ]]
