feat: add ultralytics image instance segmentation

BIGmama-technology · Mar 11, 2024 · dc979ad · dc979ad
1 parent 1f3b5bf
commit dc979ad
Show file tree

Hide file tree

Showing 2 changed files with 111 additions and 0 deletions.
diff --git a/hyko_toolkit/models/computer_vision/ultralytics/image_instance_segmentation/main.py b/hyko_toolkit/models/computer_vision/ultralytics/image_instance_segmentation/main.py
@@ -0,0 +1,68 @@
+import math
+
+import cv2
+import cvzone
+import numpy as np
+from fastapi import HTTPException
+from metadata import Inputs, Outputs, Params, StartupParams, func
+from ultralytics import YOLO
+
+from hyko_sdk.io import Image
+
+
+@func.on_startup
+async def load(startup_params: StartupParams):
+    global model, device_map
+    device_map = startup_params.device_map
+    model = YOLO(f"{startup_params.model.name}-seg.pt")
+    if device_map == "auto":
+        raise HTTPException(
+            status_code=500, detail="AUTO not available as device_map in this Tool."
+        )
+
+
+@func.on_execute
+async def main(inputs: Inputs, params: Params) -> Outputs:
+    img = inputs.input_image.to_ndarray()
+    img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
+    # Use the model to predict the objects in the image
+    res = model.predict(source=img, conf=params.threshold, device=device_map)
+    # Get the masks and the original image from the prediction results
+    masks = res[0].masks
+    orig_img = res[0].orig_img
+    overlay = np.zeros_like(orig_img)
+    for mask_index, box in enumerate(res[0].boxes):
+        # Get the xy coordinates of the current mask
+        selected_mask_xy = masks.xy[mask_index]
+        # Create a blank mask with the same dimensions as the original image
+        selected_mask = np.zeros(orig_img.shape[:2], dtype=np.uint8)
+        # Fill the current mask with the xy coordinates
+        cv2.fillPoly(
+            selected_mask, [np.array(selected_mask_xy).astype(np.int32)], color=255
+        )
+        # Color the overlay image where the mask is not zero
+        overlay[selected_mask != 0] = (0, 255, 0)
+        # Get the class index of the current box
+        clsi = list(res[0].boxes.cls)
+        # Get the name of the object from the class index
+        object_name = res[0].names[int(clsi[mask_index])]
+        # Get the coordinates of the bounding box
+        x1, y1, x2, y2 = box.xyxy[0]
+        x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
+        clsi = int(box.cls[0])
+        conf = math.ceil(box.conf[0] * 100) / 100
+        w, h = int(x2 - x1), int(y2 - y1)
+        # Draw the name of the object and the confidence score on the image
+        cvzone.cornerRect(orig_img, (x1, y1, w, h), l=3, rt=1)
+        cvzone.putTextRect(
+            orig_img,
+            f"{object_name} {conf}",
+            (max(0, x1), max(20, y1)),
+            thickness=1,
+            colorR=(0, 0, 255),
+            scale=0.9,
+            offset=3,
+        )
+    # Blend the original image with the overlay image to highlight the detected objects
+    masked_image = cv2.addWeighted(orig_img, 0.7, overlay, 0.3, 0)
+    return Outputs(image=Image.from_ndarray(masked_image[:, :, ::-1]))
diff --git a/hyko_toolkit/models/computer_vision/ultralytics/image_instance_segmentation/metadata.py b/hyko_toolkit/models/computer_vision/ultralytics/image_instance_segmentation/metadata.py
@@ -0,0 +1,43 @@
+from enum import Enum
+
+from pydantic import Field
+
+from hyko_sdk.definitions import ToolkitModel
+from hyko_sdk.io import Image
+from hyko_sdk.models import CoreModel
+
+func = ToolkitModel(
+    name="ultralytics_image_instance_segmentation",
+    task="computer_vision",
+    description="UltraLytics Instance Segmentation Using YOLO segmentation V8.",
+)
+
+
+class SupportedModels(str, Enum):
+    yolov8n = "yolov8_Nano"
+    yolov8m = "yolov8_Medium"
+    yolov8l = "yolov8_Large"
+
+
+@func.set_startup_params
+class StartupParams(CoreModel):
+    model: SupportedModels = Field(..., description="Yolo Models.")
+    device_map: str = Field(..., description="Device map (Auto, CPU or GPU).")
+
+
+@func.set_input
+class Inputs(CoreModel):
+    input_image: Image = Field(..., description="Input image.")
+
+
+@func.set_param
+class Params(CoreModel):
+    threshold: float = Field(
+        default=0.5,
+        description="The probability necessary to make a prediction (default: 0.5).",
+    )
+
+
+@func.set_output
+class Outputs(CoreModel):
+    image: Image = Field(..., description="Labeled image.")