# Supervision Integration with DeGirum PySDK (Hailo8L/HAILO8)

This notebook demonstrates how to visualize DeGirum PySDK inference outputs with the Supervision library—focusing purely on post-inference rendering (no server config here). It follows our modular guide style and uses a single `dg.load_model(...)` flow, with each example initializing its own image path.

## What’s covered
- **Object Detection**: boxes + labels (optional scores) using `BoxAnnotator` and `LabelAnnotator`
- **Instance Segmentation**: masks + boxes + labels using `MaskAnnotator`, `BoxAnnotator`, `LabelAnnotator`
- **Pose Estimation**: keypoints + skeleton edges using `VertexAnnotator` and `EdgeAnnotator`

## What you’ll use
- `degirum`, `degirum_tools` and `supervision` annotators
- Lightweight adapters to return native Supervision objects:
  - `degirum_to_sv_detections(...)` → `sv.Detections` + labels
  - `degirum_to_sv_keypoints(...)` → `list[sv.KeyPoints]`

## Tested with
- Hailo8L / HAILO8 targets via DeGirum PySDK
- `supervision==0.22.0`
For additional visualization options, see the Supervision detection annotators documentation:
https://supervision.roboflow.com/0.22.0/detection/annotators/

In [None]:
import degirum as dg
import degirum_tools
import numpy as np
import supervision as sv
from supervision.detection.core import Detections
from supervision.draw.color import Color
from supervision.geometry.core import Position
from supervision.annotators.core import LabelAnnotator, BoxAnnotator, MaskAnnotator

# -------------------------------
# 1) Detection / Segmentation
# -------------------------------
def degirum_to_sv_detections(results):
    """
    Convert DeGirum detection/segmentation results to Supervision Detections + labels.

    Input:
      - `results`: list[dict] OR an object with `.results` (list[dict])
        Each dict can include:
          "bbox": [x1,y1,x2,y2], "category_id": int, "label": str, "score": float,
          "mask": array-like (optional; bool or numeric)

    Returns:
      - dets: supervision.Detections (with .mask if present)
      - labels: list[str] like "class 0.97"
    """
    items = getattr(results, "results", results) or []
    if not items:
        return (
            Detections(
                xyxy=np.empty((0, 4), dtype=np.float32),
                class_id=np.array([], dtype=int),
                confidence=np.array([], dtype=np.float32),
            ),
            [],
        )

    bboxes, class_ids, confidences, labels = [], [], [], []
    masks_collected = []

    for det in items:
        bboxes.append(det["bbox"])
        class_ids.append(det["category_id"])
        confidences.append(det["score"])
        labels.append(f'{det["label"]} {det["score"]:.2f}')

        m = det.get("mask", None)
        if m is None:
            masks_collected.append(None)
            continue

        m = np.asarray(m)
        # Replace NaNs/Infs to avoid surprises
        if np.issubdtype(m.dtype, np.floating):
            m = np.nan_to_num(m, nan=0.0, posinf=1.0, neginf=0.0)

        # Threshold to bool if needed
        if m.dtype != bool:
            mmax = float(m.max()) if m.size else 0.0
            if mmax <= 1.0:
                # Probability mask in [0,1] → adaptive threshold at half the observed max
                thr = 0.5 * mmax
            else:
                # Assume 0..255 (or similar) → binary around the midpoint
                thr = 128.0 if m.dtype == np.uint8 else 0.5 * mmax
            m = m >= thr

        # Handle singleton channel dims in either position
        if m.ndim == 3 and 1 in m.shape:
            m = np.squeeze(m)
        if m.ndim != 2:
            raise ValueError(f"Mask must be 2D (H,W); got shape {m.shape}")
        masks_collected.append(m.astype(bool, copy=False))

    xyxy = np.array(bboxes, dtype=np.float32)
    cls = np.array(class_ids, dtype=int)
    conf = np.array(confidences, dtype=np.float32)

    if any(m is not None for m in masks_collected):
        first_mask = next(m for m in masks_collected if m is not None)
        H, W = first_mask.shape
        masks = [(m if m is not None else np.zeros((H, W), dtype=bool)) for m in masks_collected]
        dets = Detections(xyxy=xyxy, class_id=cls, confidence=conf, mask=np.stack(masks, axis=0))
    else:
        dets = Detections(xyxy=xyxy, class_id=cls, confidence=conf)

    return dets, labels

# -------------------------------
# 2) Keypoints (Pose)
# -------------------------------
def degirum_to_sv_keypoints(results, *, min_kp_score: float = 0.3) -> list[sv.KeyPoints]:
    """
    Convert DeGirum pose results (with 'landmarks') to a list of Supervision KeyPoints.

    Input:
      - `results`: list[dict] OR an object with `.results` (list[dict])
        Each dict may include:
          "landmarks": list of {"landmark": [x, y], "score": float?}

    Returns:
      - List[sv.KeyPoints], one per item that has landmarks (shape (1, K, 2))
    """
    items = getattr(results, "results", results) or []
    keypoints_list: list[sv.KeyPoints] = []

    for det in items:
        lms = det.get("landmarks", None)
        if not lms:
            continue

        coords = []
        for lm in lms:
            x, y = lm["landmark"]
            s = lm.get("score", 1.0)
            coords.append([float(x), float(y)] if s >= min_kp_score else [0.0, 0.0])

        arr = np.array(coords, dtype=np.float32).reshape((1, -1, 2))  # (1, K, 2)
        keypoints_list.append(sv.KeyPoints(xy=arr))

    return keypoints_list

In [None]:
# --- Connection settings ---
inference_host_address = "@local"  # or None for cloud
zoo_url = "degirum/hailo"          # or None for cloud
token = ''  # cloud token if needed
device_type = ["HAILORT/HAILO8L"]

In [None]:
# --- Object Detection Example (DeGirum → Supervision) ---

image_detection = "../assets/ThreePersons.jpg"
detection_model_name = "yolov8n_relu6_face--640x640_quant_hailort_multidevice_1"

# Load model
model_det = dg.load_model(
    model_name=detection_model_name,
    inference_host_address=inference_host_address,
    zoo_url=zoo_url,
    token=token,
    device_type=device_type
)
print(f"Loaded detection model: {detection_model_name}")

# Inference
print(f"Running detection on: {image_detection}")
res_det = model_det(image_detection)

# NEW: Convert to Supervision objects (Detections + labels)
det_dets, det_labels = degirum_to_sv_detections(res_det)

# Annotate
box_annotator = BoxAnnotator(color=Color(0, 255, 0), thickness=2)
label_annotator = LabelAnnotator(color=Color(0, 0, 0), text_color=Color(255, 255, 255))

img_det = res_det.image.copy()
img_det = box_annotator.annotate(scene=img_det, detections=det_dets)
img_det = label_annotator.annotate(scene=img_det, detections=det_dets, labels=det_labels)

# Display
print("Press 'x' or 'q' to stop.")
with degirum_tools.Display("Object Detection Example") as output_display:
    output_display.show_image(img_det)

In [None]:
# --- Instance Segmentation (clean, single-pass annotators) ---

image_segmentation = "../assets/ThreePersons.jpg"
seg_model_name = "yolov8n_coco_seg--640x640_quant_hailort_multidevice_1"

# Load model
model_seg = dg.load_model(
    model_name=seg_model_name,
    inference_host_address=inference_host_address,
    zoo_url=zoo_url,
    token=token,
    device_type=device_type
)
print(f"Loaded segmentation model: {seg_model_name}")

# Inference
print(f"Running segmentation on: {image_segmentation}")
res_seg = model_seg(image_segmentation)

# Convert to Supervision objects (Detections + labels; includes .mask if present)
seg_dets, seg_labels = degirum_to_sv_detections(res_seg)

# Annotators
mask_annotator  = sv.MaskAnnotator()  # uses default palette
box_annotator   = sv.BoxAnnotator(color=Color(0, 255, 0), thickness=2)
label_annotator = sv.LabelAnnotator(
    color=Color(0, 0, 0),
    text_color=Color(255, 255, 255),
    text_position=Position.TOP_LEFT,
    text_scale=0.5, text_thickness=1, text_padding=4, border_radius=2
)

# Annotate in-place, single pass per annotator
img_seg = res_seg.image.copy()
img_seg = mask_annotator.annotate(img_seg, seg_dets)                     # masks
img_seg = box_annotator.annotate(img_seg, seg_dets)                      # boxes
img_seg = label_annotator.annotate(img_seg, seg_dets, labels=seg_labels) # labels

# Display
print("Press 'x' or 'q' to stop.")
with degirum_tools.Display("Instance Segmentation Example") as output_display:
    output_display.show_image(img_seg)

In [None]:
# --- Pose Estimation Example (DeGirum → Supervision) ---

image_pose = "../assets/ThreePersons.jpg"
pose_model_name = "yolov8n_relu6_coco_pose--640x640_quant_hailort_hailo8l_1"

# Load model
model_pose = dg.load_model(
    model_name=pose_model_name,
    inference_host_address=inference_host_address,
    zoo_url=zoo_url,
    token=token,
    device_type=device_type
)
print(f"Loaded pose model: {pose_model_name}")

# Inference
print(f"Running pose estimation on: {image_pose}")
res_pose = model_pose(image_pose)
img_pose = res_pose.image.copy()

# NEW: Convert to Supervision KeyPoints (one per person)
keypoints_list = degirum_to_sv_keypoints(res_pose, min_kp_score=0.3)

# COCO-style edges for skeleton
COCO_EDGES = [
    (6, 7), (6, 8),      # shoulders to elbows
    (8, 10), (7, 9),     # elbows to wrists
    (6, 12), (7, 13),    # shoulders to hips
    (12, 14), (14, 16),  # left leg
    (13, 15), (15, 17),  # right leg
    (12, 13), (9, 11)    # hips and left wrist–hip link (custom)
]

# Annotators
vertex_annotator = sv.VertexAnnotator(color=Color(0, 255, 0), radius=5)
edge_annotator   = sv.EdgeAnnotator(color=Color(255, 0, 0), thickness=2, edges=COCO_EDGES)

# Draw skeletons
if keypoints_list:
    for kpts in keypoints_list:
        img_pose = edge_annotator.annotate(scene=img_pose, key_points=kpts)
        img_pose = vertex_annotator.annotate(scene=img_pose, key_points=kpts)
else:
    print("No keypoints found.")

# Display
print("Press 'x' or 'q' to stop.")
with degirum_tools.Display("Pose Estimation Example") as output_display:
    output_display.show_image(img_pose)