In [1]:
from ultralytics import YOLO
import os
import cv2
import numpy as np
from pathlib import Path
import matplotlib.pyplot as plt

In [None]:
cwd = os.getcwd()
image = os.path.join(cwd, "4C89E3D0-BE91-4503-8215-D55185D7AAA9_1_102_o.jpeg")
# model_path = os.path.join(cwd, "yolo11l_segment.pt")
model_path = os.path.join(cwd, "yolo11n_segment.pt")

In [3]:
model = YOLO(model=model_path).to("mps")

In [4]:
results = model.predict(image, show=True)


image 1/1 /Users/ciprianbangu/smart_tag/python/4C89E3D0-BE91-4503-8215-D55185D7AAA9_1_102_o.jpeg: 640x480 12 persons, 2 pizzas, 2 chairs, 1 tv, 385.4ms
Speed: 11.6ms preprocess, 385.4ms inference, 1740.8ms postprocess per image at shape (1, 3, 640, 480)


In [8]:
results[0]

ultralytics.engine.results.Results object with attributes:

boxes: ultralytics.engine.results.Boxes object
keypoints: None
masks: ultralytics.engine.results.Masks object
names: {0: 'person', 1: 'bicycle', 2: 'car', 3: 'motorcycle', 4: 'airplane', 5: 'bus', 6: 'train', 7: 'truck', 8: 'boat', 9: 'traffic light', 10: 'fire hydrant', 11: 'stop sign', 12: 'parking meter', 13: 'bench', 14: 'bird', 15: 'cat', 16: 'dog', 17: 'horse', 18: 'sheep', 19: 'cow', 20: 'elephant', 21: 'bear', 22: 'zebra', 23: 'giraffe', 24: 'backpack', 25: 'umbrella', 26: 'handbag', 27: 'tie', 28: 'suitcase', 29: 'frisbee', 30: 'skis', 31: 'snowboard', 32: 'sports ball', 33: 'kite', 34: 'baseball bat', 35: 'baseball glove', 36: 'skateboard', 37: 'surfboard', 38: 'tennis racket', 39: 'bottle', 40: 'wine glass', 41: 'cup', 42: 'fork', 43: 'knife', 44: 'spoon', 45: 'bowl', 46: 'banana', 47: 'apple', 48: 'sandwich', 49: 'orange', 50: 'broccoli', 51: 'carrot', 52: 'hot dog', 53: 'pizza', 54: 'donut', 55: 'cake', 56: 'chair

In [9]:
for result in results:
    img = np.copy(result.orig_img)
    img_name = Path(result.path).stem

    for ci, c in enumerate(result):
        label = c.names[c.boxes.cls.tolist().pop()]

In [10]:
binary_mask = np.zeros(img.shape[:2], np.uint8)

#extract contour
contour = c.masks.xy.pop()
#change the type
contour = contour.astype(np.int32)
# reshape
contour = contour.reshape(-1, 1, 2)

#Draw contour onto mask 
_ = cv2.drawContours(binary_mask, [contour], -1, (255, 255, 255), cv2.FILLED)

In [11]:
# create a 3 channel mask:
mask3ch = cv2.cvtColor(binary_mask, cv2.COLOR_GRAY2BGR)

# ioslate object with mask
isolated = cv2.bitwise_and(mask3ch, img)

In [12]:
# Create a blank black image with the same dimensions as the original image
contour_image = np.zeros_like(binary_mask)

# Iterate through all detected objects
contour_array = []
for result in results:
    for ci, c in enumerate(result):
        # Check if the detected object is a person
        if c.names[c.boxes.cls.tolist().pop()] == "person":
            # Extract the contour
            contour = c.masks.xy.pop().astype(np.int32).reshape(-1, 1, 2)
            contour_array.append(contour)
            # Draw the contour on the blank image
            cv2.drawContours(contour_image, [contour], -1, 255, thickness=cv2.FILLED)

# Draw contour index numbers inside the contours
for idx, contour in enumerate(contour_array):
    M = cv2.moments(contour)
    if M["m00"] != 0:
        # Compute the centroid (x, y)
        cX = int(M["m10"] / M["m00"])
        cY = int(M["m01"] / M["m00"])
        # Draw the index number at the centroid
        cv2.putText(contour_image, str(idx), (cX, cY), 
                    fontFace=cv2.FONT_HERSHEY_SIMPLEX, 
                    fontScale=5, color=(127), thickness=5, lineType=cv2.LINE_AA)

# Save the resulting image
output_path = os.path.join(cwd, "contour_image.png")
cv2.imwrite(output_path, contour_image)
print(f"Contour image saved at {output_path}")

Contour image saved at /Users/ciprianbangu/smart_tag/python/contour_image.png


In [13]:
# Copy of original image to draw on (assumes it's a color image)
overlay_image = cv2.imread(image)  # Load the image in BGR format

contour_array = []
for result in results:
    for ci, c in enumerate(result):
        if c.names[c.boxes.cls.tolist().pop()] == "person":
            contour = c.masks.xy.pop().astype(np.int32).reshape(-1, 1, 2)
            contour_array.append(contour)
            # Fill the contour in semi-transparent red
            cv2.drawContours(overlay_image, [contour], -1, color=(0, 0, 255), thickness=cv2.FILLED)

# Draw index numbers inside contours
for idx, contour in enumerate(contour_array):
    M = cv2.moments(contour)
    if M["m00"] != 0:
        cX = int(M["m10"] / M["m00"])
        cY = int(M["m01"] / M["m00"])
        cv2.putText(overlay_image, str(idx), (cX, cY),
                    fontFace=cv2.FONT_HERSHEY_SIMPLEX,
                    fontScale=5, color=(255, 255, 255),
                    thickness=5, lineType=cv2.LINE_AA)

# Save the image
output_path = os.path.join(cwd, "contour_overlay.png")
cv2.imwrite(output_path, overlay_image)
print(f"Overlay image saved at {output_path}")

Overlay image saved at /Users/ciprianbangu/smart_tag/python/contour_overlay.png


In [14]:
c.masks[0]

ultralytics.engine.results.Masks object with attributes:

data: tensor([[[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.]]], device='mps:0')
orig_shape: (2048, 1536)
shape: torch.Size([1, 640, 480])
xy: [array([[       1024,       662.4],
       [     1020.8,       665.6],
       [     1011.2,       665.6],
       [       1008,       668.8],
       [       1008,       710.4],
       [     1004.8,       713.6],
       [     1004.8,       726.4],
       [     1001.6,       729.6],
       [     1001.6,       732.8],
       [      998.4,         736],
       [      998.4,       742.4],
       [      995.2,       745.6],
       [      995.2,         752],
       [      988.8,       758.4],
       [      988.8,       761.6],
       [      982.4,         768],
       [      982.4,       777.6],


In [15]:
for ci, c in enumerate(result):
    print(c.boxes)
    break

ultralytics.engine.results.Boxes object with attributes:

cls: tensor([0.], device='mps:0')
conf: tensor([0.9322], device='mps:0')
data: tensor([[7.1396e+02, 5.8304e+02, 1.4371e+03, 1.9588e+03, 9.3221e-01, 0.0000e+00]], device='mps:0')
id: None
is_track: False
orig_shape: (2048, 1536)
shape: torch.Size([1, 6])
xywh: tensor([[1075.5232, 1270.9373,  723.1202, 1375.7883]], device='mps:0')
xywhn: tensor([[0.7002, 0.6206, 0.4708, 0.6718]], device='mps:0')
xyxy: tensor([[ 713.9630,  583.0432, 1437.0833, 1958.8314]], device='mps:0')
xyxyn: tensor([[0.4648, 0.2847, 0.9356, 0.9565]], device='mps:0')


In [None]:
model.export(format='coreml')

Ultralytics 8.3.96 ðŸš€ Python-3.12.9 torch-2.5.0 CPU (Apple M2 Pro)

[34m[1mPyTorch:[0m starting from '/Users/ciprianbangu/smart_tag/python/yolo11l_segment.pt' with input shape (1, 3, 640, 640) BCHW and output shape(s) ((1, 116, 8400), (1, 32, 160, 160)) (53.5 MB)

[34m[1mCoreML:[0m starting export with coremltools 8.2...


Tuple detected at graph output. This will be flattened in the converted model.
Converting PyTorch Frontend ==> MIL Ops: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–‰| 1465/1467 [00:00<00:00, 7866.31 ops/s]
Running MIL frontend_pytorch pipeline: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 5/5 [00:00<00:00, 86.45 passes/s]
Running MIL default pipeline: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 89/89 [00:02<00:00, 41.47 passes/s]
Running MIL backend_mlprogram pipeline: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 12/12 [00:00<00:00, 86.34 passes/s]


[34m[1mCoreML:[0m export success âœ… 18.4s, saved as '/Users/ciprianbangu/smart_tag/python/yolo11l_segment.mlpackage' (53.1 MB)

Export complete (19.2s)
Results saved to [1m/Users/ciprianbangu/smart_tag/python[0m
Predict:         yolo predict task=segment model=/Users/ciprianbangu/smart_tag/python/yolo11l_segment.mlpackage imgsz=640  
Validate:        yolo val task=segment model=/Users/ciprianbangu/smart_tag/python/yolo11l_segment.mlpackage imgsz=640 data=/ultralytics/ultralytics/cfg/datasets/coco.yaml  
Visualize:       https://netron.app


'/Users/ciprianbangu/smart_tag/python/yolo11l_segment.mlpackage'

: 