In [2]:
!pip install ultralytics


Collecting ultralytics
  Downloading ultralytics-8.3.108-py3-none-any.whl.metadata (37 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.14-py3-none-any.whl.metadata (9.4 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=1.8.0->ultralytics)
  Downloading n

In [3]:
from ultralytics import YOLO
import cv2
import os
import numpy as np
import random

def detect_vehicles_with_segmentation(input_image_path, output_image_path, model_path="yolov8m-seg.pt"):
    # Load the YOLOv8-seg model
    print(f"Loading model: {model_path}")
    model = YOLO(model_path)

    # Read the input image
    image = cv2.imread(input_image_path)
    if image is None:
        raise ValueError(f"Could not load image at {input_image_path}")
    print(f"Input image loaded: {input_image_path}, shape: {image.shape}")

    # Resize image to improve detection of smaller vehicles (optional)
    # image = cv2.resize(image, (1280, 1280))

    # Perform inference with a higher confidence threshold
    results = model.predict(source=input_image_path, conf=0.3, iou=0.45, verbose=True)
    print(f"Prediction completed. Number of results: {len(results)}")

    # Process results
    annotated_image = image.copy()
    detections_found = False
    combined_mask_color = np.zeros_like(image, dtype=np.uint8)  # To accumulate all masks

    # First, collect all detections and masks
    boxes_to_draw = []
    for result in results:
        if hasattr(result, 'boxes') and result.boxes is not None:
            print(f"Boxes detected: {len(result.boxes)}")
            # Collect bounding boxes for later drawing
            for box, conf, cls in zip(result.boxes.xyxy, result.boxes.conf, result.boxes.cls):
                if int(cls) in [2, 7]:  # COCO class IDs: 2 = car, 7 = truck
                    detections_found = True
                    x1, y1, x2, y2 = map(int, box)
                    label = f"Vehicle {conf:.2f} (Class: {int(cls)})"
                    boxes_to_draw.append((x1, y1, x2, y2, label))
                    print(f"Detection: {label}, Box: ({x1}, {y1}, {x2}, {y2})")

        if hasattr(result, 'masks') and result.masks is not None:
            print(f"Masks detected: {len(result.masks)}")
            # Accumulate all masks into a single mask_color array with different colors
            for mask, conf, cls in zip(result.masks.data, result.boxes.conf, result.boxes.cls):
                if int(cls) in [2, 7]:  # Only for vehicles
                    mask = mask.cpu().numpy()  # Convert mask to numpy array
                    mask = cv2.resize(mask, (image.shape[1], image.shape[0]))  # Resize to image size
                    mask = mask > 0.5  # Binarize the mask
                    # Generate a random color for this mask
                    color = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255))
                    combined_mask_color[mask] = color  # Assign random color to mask

    # Blend the combined mask with the original image (30% transparency for better visibility)
    if detections_found:
        alpha = 0.3  # Lower transparency for clearer masks
        beta = 1.0 - alpha
        cv2.addWeighted(annotated_image, beta, combined_mask_color, alpha, 0.0, annotated_image)

    # Draw bounding boxes on top of the blended image with adjusted label placement
    for x1, y1, x2, y2, label in boxes_to_draw:
        cv2.rectangle(annotated_image, (x1, y1), (x2, y2), (0, 255, 0), 2)
        # Adjust label position to avoid overlap (place above or below based on y1)
        label_y = y1 - 10 if y1 - 10 > 10 else y2 + 20
        cv2.putText(annotated_image, label, (x1, label_y),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

    if not detections_found:
        print("No vehicles detected. Try lowering the confidence threshold or fine-tuning the model.")

    # Save the output image
    cv2.imwrite(output_image_path, annotated_image)
    print(f"Output image saved to {output_image_path}")

    return annotated_image

if __name__ == "__main__":
    # Example usage
    input_path = "input.jpeg"  # Replace with your input image path
    output_path = "parking_lot_detected_seg.jpg"  # Output image path
    model_path = "yolov8m-seg.pt"  # Use medium segmentation model

    # Check if input file exists
    if not os.path.exists(input_path):
        print(f"Input image {input_path} not found. Please provide a valid image.")
    else:
        detect_vehicles_with_segmentation(input_path, output_path, model_path)

Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.
Loading model: yolov8m-seg.pt
Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8m-seg.pt to 'yolov8m-seg.pt'...


100%|██████████| 52.4M/52.4M [00:00<00:00, 58.6MB/s]


Input image loaded: input.jpeg, shape: (1698, 3566, 3)

image 1/1 /content/input.jpeg: 320x640 1 person, 22 cars, 1277.2ms
Speed: 15.7ms preprocess, 1277.2ms inference, 100.2ms postprocess per image at shape (1, 3, 320, 640)
Prediction completed. Number of results: 1
Boxes detected: 23
Detection: Vehicle 0.93 (Class: 2), Box: (1, 1115, 524, 1684)
Detection: Vehicle 0.89 (Class: 2), Box: (810, 920, 1241, 1289)
Detection: Vehicle 0.89 (Class: 2), Box: (484, 415, 911, 709)
Detection: Vehicle 0.88 (Class: 2), Box: (1266, 467, 1552, 707)
Detection: Vehicle 0.87 (Class: 2), Box: (2928, 525, 3251, 775)
Detection: Vehicle 0.87 (Class: 2), Box: (0, 847, 340, 1258)
Detection: Vehicle 0.86 (Class: 2), Box: (2566, 320, 2844, 602)
Detection: Vehicle 0.85 (Class: 2), Box: (917, 459, 1239, 704)
Detection: Vehicle 0.85 (Class: 2), Box: (355, 872, 780, 1281)
Detection: Vehicle 0.84 (Class: 2), Box: (1950, 466, 2210, 736)
Detection: Vehicle 0.83 (Class: 2), Box: (3088, 908, 3557, 1372)
Detection: Vehicl

In [4]:
from ultralytics import YOLO
import cv2
import os
import numpy as np
import random

def detect_vehicles_with_segmentation(input_image_path, output_image_path, model_path):
    # Load the chosen YOLOv8 segmentation model
    print(f"Loading model: {model_path}")
    model = YOLO(model_path)

    # Read the input image
    image = cv2.imread(input_image_path)
    if image is None:
        raise ValueError(f"Could not load image at {input_image_path}")
    print(f"Input image loaded: {input_image_path}, shape: {image.shape}")

    # Optionally, you might want to resize the image for better detection on small vehicles
    # image = cv2.resize(image, (1280, 1280))

    # Perform inference with a set confidence threshold (you can tune this value)
    results = model.predict(source=input_image_path, conf=0.3, iou=0.45, verbose=True)
    print(f"Prediction completed. Number of results: {len(results)}")

    # Process results
    annotated_image = image.copy()
    detections_found = False
    combined_mask_color = np.zeros_like(image, dtype=np.uint8)  # To accumulate masks
    boxes_to_draw = []

    # Loop over each result (could be multiple if processing a batch)
    for result in results:
        # Process bounding boxes (detections)
        if hasattr(result, 'boxes') and result.boxes is not None:
            print(f"Boxes detected: {len(result.boxes)}")
            for box, conf, cls in zip(result.boxes.xyxy, result.boxes.conf, result.boxes.cls):
                if int(cls) in [2, 7]:  # COCO class IDs for vehicles: 2=car, 7=truck
                    detections_found = True
                    x1, y1, x2, y2 = map(int, box)
                    label = f"Vehicle {conf:.2f} (Class: {int(cls)})"
                    boxes_to_draw.append((x1, y1, x2, y2, label))
                    print(f"Detection: {label}, Box: ({x1}, {y1}, {x2}, {y2})")

        # Process segmentation masks for each detection
        if hasattr(result, 'masks') and result.masks is not None:
            print(f"Masks detected: {len(result.masks)}")
            for mask, conf, cls in zip(result.masks.data, result.boxes.conf, result.boxes.cls):
                if int(cls) in [2, 7]:
                    mask = mask.cpu().numpy()  # Convert from tensor to NumPy array
                    mask = cv2.resize(mask, (image.shape[1], image.shape[0]))  # Resize to match the image
                    mask = mask > 0.5  # Binarize mask with threshold

                    # Generate a random color for each mask
                    color = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255))
                    combined_mask_color[mask] = color

    # Blend the combined colored masks with the original image (using 30% transparency)
    if detections_found:
        alpha = 0.3  # Transparency for the mask
        beta = 1.0 - alpha
        cv2.addWeighted(annotated_image, beta, combined_mask_color, alpha, 0.0, annotated_image)

    # Draw bounding boxes and labels on top of the blended image
    for x1, y1, x2, y2, label in boxes_to_draw:
        cv2.rectangle(annotated_image, (x1, y1), (x2, y2), (0, 255, 0), 2)
        # Place the label above the box if there is space; otherwise, below the box
        label_y = y1 - 10 if y1 - 10 > 10 else y2 + 20
        cv2.putText(annotated_image, label, (x1, label_y),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

    if not detections_found:
        print("No vehicles detected. Try lowering the confidence threshold or fine-tuning the model.")

    # Save the annotated image
    cv2.imwrite(output_image_path, annotated_image)
    print(f"Output image saved to {output_image_path}")
    return annotated_image

if __name__ == "__main__":
    # Example usage: change the model_path to try different variants
    input_path = "input.jpeg"   # Replace with your input image path
    output_path = "output_detected_seg.jpg" # Output image path

    # Change the model here: choose among "yolov8n-seg.pt", "yolov8s-seg.pt", "yolov8m-seg.pt", "yolov8l-seg.pt", "yolov8x-seg.pt"
    model_path = "yolov8n-seg.pt"  # Example: using the nano variant

    if not os.path.exists(input_path):
        print(f"Input image {input_path} not found. Please provide a valid image.")
    else:
        detect_vehicles_with_segmentation(input_path, output_path, model_path)


Loading model: yolov8n-seg.pt
Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8n-seg.pt to 'yolov8n-seg.pt'...


100%|██████████| 6.74M/6.74M [00:00<00:00, 19.1MB/s]


Input image loaded: input.jpeg, shape: (1698, 3566, 3)

image 1/1 /content/input.jpeg: 320x640 21 cars, 1 truck, 199.1ms
Speed: 3.8ms preprocess, 199.1ms inference, 56.9ms postprocess per image at shape (1, 3, 320, 640)
Prediction completed. Number of results: 1
Boxes detected: 22
Detection: Vehicle 0.88 (Class: 2), Box: (2926, 520, 3255, 767)
Detection: Vehicle 0.85 (Class: 2), Box: (3154, 410, 3436, 624)
Detection: Vehicle 0.85 (Class: 2), Box: (915, 462, 1240, 700)
Detection: Vehicle 0.83 (Class: 2), Box: (0, 1112, 526, 1682)
Detection: Vehicle 0.81 (Class: 2), Box: (0, 846, 341, 1262)
Detection: Vehicle 0.81 (Class: 2), Box: (354, 872, 779, 1285)
Detection: Vehicle 0.80 (Class: 2), Box: (807, 916, 1241, 1291)
Detection: Vehicle 0.80 (Class: 2), Box: (1265, 469, 1547, 706)
Detection: Vehicle 0.74 (Class: 2), Box: (485, 415, 909, 705)
Detection: Vehicle 0.74 (Class: 2), Box: (2578, 327, 2848, 594)
Detection: Vehicle 0.73 (Class: 2), Box: (1629, 300, 1877, 556)
Detection: Vehicle 0.72

In [5]:
from ultralytics import YOLO
import cv2
import os
import numpy as np
import random

def detect_vehicles_with_segmentation(input_image_path, output_image_path, model_path):
    # Load the chosen YOLOv8 segmentation model
    print(f"Loading model: {model_path}")
    model = YOLO(model_path)

    # Read the input image
    image = cv2.imread(input_image_path)
    if image is None:
        raise ValueError(f"Could not load image at {input_image_path}")
    print(f"Input image loaded: {input_image_path}, shape: {image.shape}")

    # Optionally, you might want to resize the image for better detection on small vehicles
    # image = cv2.resize(image, (1280, 1280))

    # Perform inference with a set confidence threshold (you can tune this value)
    results = model.predict(source=input_image_path, conf=0.3, iou=0.45, verbose=True)
    print(f"Prediction completed. Number of results: {len(results)}")

    # Process results
    annotated_image = image.copy()
    detections_found = False
    combined_mask_color = np.zeros_like(image, dtype=np.uint8)  # To accumulate masks
    boxes_to_draw = []

    # Loop over each result (could be multiple if processing a batch)
    for result in results:
        # Process bounding boxes (detections)
        if hasattr(result, 'boxes') and result.boxes is not None:
            print(f"Boxes detected: {len(result.boxes)}")
            for box, conf, cls in zip(result.boxes.xyxy, result.boxes.conf, result.boxes.cls):
                if int(cls) in [2, 7]:  # COCO class IDs for vehicles: 2=car, 7=truck
                    detections_found = True
                    x1, y1, x2, y2 = map(int, box)
                    label = f"Vehicle {conf:.2f} (Class: {int(cls)})"
                    boxes_to_draw.append((x1, y1, x2, y2, label))
                    print(f"Detection: {label}, Box: ({x1}, {y1}, {x2}, {y2})")

        # Process segmentation masks for each detection
        if hasattr(result, 'masks') and result.masks is not None:
            print(f"Masks detected: {len(result.masks)}")
            for mask, conf, cls in zip(result.masks.data, result.boxes.conf, result.boxes.cls):
                if int(cls) in [2, 7]:
                    mask = mask.cpu().numpy()  # Convert from tensor to NumPy array
                    mask = cv2.resize(mask, (image.shape[1], image.shape[0]))  # Resize to match the image
                    mask = mask > 0.5  # Binarize mask with threshold

                    # Generate a random color for each mask
                    color = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255))
                    combined_mask_color[mask] = color

    # Blend the combined colored masks with the original image (using 30% transparency)
    if detections_found:
        alpha = 0.3  # Transparency for the mask
        beta = 1.0 - alpha
        cv2.addWeighted(annotated_image, beta, combined_mask_color, alpha, 0.0, annotated_image)

    # Draw bounding boxes and labels on top of the blended image
    for x1, y1, x2, y2, label in boxes_to_draw:
        cv2.rectangle(annotated_image, (x1, y1), (x2, y2), (0, 255, 0), 2)
        # Place the label above the box if there is space; otherwise, below the box
        label_y = y1 - 10 if y1 - 10 > 10 else y2 + 20
        cv2.putText(annotated_image, label, (x1, label_y),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

    if not detections_found:
        print("No vehicles detected. Try lowering the confidence threshold or fine-tuning the model.")

    # Save the annotated image
    cv2.imwrite(output_image_path, annotated_image)
    print(f"Output image saved to {output_image_path}")
    return annotated_image

if __name__ == "__main__":
    # Example usage: change the model_path to try different variants
    input_path = "input.jpeg"   # Replace with your input image path
    output_path = "output_detected_seg.jpg" # Output image path

    # Change the model here: choose among "yolov8n-seg.pt", "yolov8s-seg.pt", "yolov8m-seg.pt", "yolov8l-seg.pt", "yolov8x-seg.pt"
    model_path = "yolov8s-seg.pt"  # Example: using the nano variant

    if not os.path.exists(input_path):
        print(f"Input image {input_path} not found. Please provide a valid image.")
    else:
        detect_vehicles_with_segmentation(input_path, output_path, model_path)


Loading model: yolov8s-seg.pt
Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8s-seg.pt to 'yolov8s-seg.pt'...


100%|██████████| 22.8M/22.8M [00:00<00:00, 46.7MB/s]


Input image loaded: input.jpeg, shape: (1698, 3566, 3)

image 1/1 /content/input.jpeg: 320x640 1 person, 17 cars, 7 trucks, 497.6ms
Speed: 3.3ms preprocess, 497.6ms inference, 97.1ms postprocess per image at shape (1, 3, 320, 640)
Prediction completed. Number of results: 1
Boxes detected: 25
Detection: Vehicle 0.90 (Class: 2), Box: (0, 1114, 525, 1690)
Detection: Vehicle 0.88 (Class: 2), Box: (3155, 409, 3435, 621)
Detection: Vehicle 0.86 (Class: 2), Box: (917, 462, 1236, 705)
Detection: Vehicle 0.86 (Class: 2), Box: (3086, 909, 3561, 1374)
Detection: Vehicle 0.85 (Class: 2), Box: (2929, 521, 3255, 769)
Detection: Vehicle 0.85 (Class: 2), Box: (1267, 465, 1553, 707)
Detection: Vehicle 0.82 (Class: 2), Box: (811, 919, 1241, 1293)
Detection: Vehicle 0.79 (Class: 2), Box: (0, 851, 345, 1265)
Detection: Vehicle 0.77 (Class: 2), Box: (2568, 319, 2849, 602)
Detection: Vehicle 0.73 (Class: 7), Box: (1233, 99, 1457, 286)
Detection: Vehicle 0.70 (Class: 2), Box: (2251, 428, 2554, 740)
Detection

In [6]:
from ultralytics import YOLO
import cv2
import os
import numpy as np
import random

def detect_vehicles_with_segmentation(input_image_path, output_image_path, model_path):
    # Load the chosen YOLOv8 segmentation model
    print(f"Loading model: {model_path}")
    model = YOLO(model_path)

    # Read the input image
    image = cv2.imread(input_image_path)
    if image is None:
        raise ValueError(f"Could not load image at {input_image_path}")
    print(f"Input image loaded: {input_image_path}, shape: {image.shape}")

    # Optionally, you might want to resize the image for better detection on small vehicles
    # image = cv2.resize(image, (1280, 1280))

    # Perform inference with a set confidence threshold (you can tune this value)
    results = model.predict(source=input_image_path, conf=0.3, iou=0.45, verbose=True)
    print(f"Prediction completed. Number of results: {len(results)}")

    # Process results
    annotated_image = image.copy()
    detections_found = False
    combined_mask_color = np.zeros_like(image, dtype=np.uint8)  # To accumulate masks
    boxes_to_draw = []

    # Loop over each result (could be multiple if processing a batch)
    for result in results:
        # Process bounding boxes (detections)
        if hasattr(result, 'boxes') and result.boxes is not None:
            print(f"Boxes detected: {len(result.boxes)}")
            for box, conf, cls in zip(result.boxes.xyxy, result.boxes.conf, result.boxes.cls):
                if int(cls) in [2, 7]:  # COCO class IDs for vehicles: 2=car, 7=truck
                    detections_found = True
                    x1, y1, x2, y2 = map(int, box)
                    label = f"Vehicle {conf:.2f} (Class: {int(cls)})"
                    boxes_to_draw.append((x1, y1, x2, y2, label))
                    print(f"Detection: {label}, Box: ({x1}, {y1}, {x2}, {y2})")

        # Process segmentation masks for each detection
        if hasattr(result, 'masks') and result.masks is not None:
            print(f"Masks detected: {len(result.masks)}")
            for mask, conf, cls in zip(result.masks.data, result.boxes.conf, result.boxes.cls):
                if int(cls) in [2, 7]:
                    mask = mask.cpu().numpy()  # Convert from tensor to NumPy array
                    mask = cv2.resize(mask, (image.shape[1], image.shape[0]))  # Resize to match the image
                    mask = mask > 0.5  # Binarize mask with threshold

                    # Generate a random color for each mask
                    color = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255))
                    combined_mask_color[mask] = color

    # Blend the combined colored masks with the original image (using 30% transparency)
    if detections_found:
        alpha = 0.3  # Transparency for the mask
        beta = 1.0 - alpha
        cv2.addWeighted(annotated_image, beta, combined_mask_color, alpha, 0.0, annotated_image)

    # Draw bounding boxes and labels on top of the blended image
    for x1, y1, x2, y2, label in boxes_to_draw:
        cv2.rectangle(annotated_image, (x1, y1), (x2, y2), (0, 255, 0), 2)
        # Place the label above the box if there is space; otherwise, below the box
        label_y = y1 - 10 if y1 - 10 > 10 else y2 + 20
        cv2.putText(annotated_image, label, (x1, label_y),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

    if not detections_found:
        print("No vehicles detected. Try lowering the confidence threshold or fine-tuning the model.")

    # Save the annotated image
    cv2.imwrite(output_image_path, annotated_image)
    print(f"Output image saved to {output_image_path}")
    return annotated_image

if __name__ == "__main__":
    # Example usage: change the model_path to try different variants
    input_path = "input.jpeg"   # Replace with your input image path
    output_path = "output_detected_seg.jpg" # Output image path

    # Change the model here: choose among "yolov8n-seg.pt", "yolov8s-seg.pt", "yolov8m-seg.pt", "yolov8l-seg.pt", "yolov8x-seg.pt"
    model_path = "yolov8l-seg.pt"  # Example: using the nano variant

    if not os.path.exists(input_path):
        print(f"Input image {input_path} not found. Please provide a valid image.")
    else:
        detect_vehicles_with_segmentation(input_path, output_path, model_path)


Loading model: yolov8l-seg.pt
Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8l-seg.pt to 'yolov8l-seg.pt'...


100%|██████████| 88.1M/88.1M [00:02<00:00, 34.2MB/s]


Input image loaded: input.jpeg, shape: (1698, 3566, 3)

image 1/1 /content/input.jpeg: 320x640 2 persons, 22 cars, 2020.5ms
Speed: 3.2ms preprocess, 2020.5ms inference, 50.8ms postprocess per image at shape (1, 3, 320, 640)
Prediction completed. Number of results: 1
Boxes detected: 24
Detection: Vehicle 0.91 (Class: 2), Box: (0, 1113, 522, 1687)
Detection: Vehicle 0.89 (Class: 2), Box: (487, 414, 910, 708)
Detection: Vehicle 0.89 (Class: 2), Box: (0, 849, 340, 1259)
Detection: Vehicle 0.88 (Class: 2), Box: (813, 918, 1238, 1287)
Detection: Vehicle 0.86 (Class: 2), Box: (2570, 320, 2844, 600)
Detection: Vehicle 0.86 (Class: 2), Box: (2932, 522, 3253, 773)
Detection: Vehicle 0.86 (Class: 2), Box: (193, 417, 547, 682)
Detection: Vehicle 0.86 (Class: 2), Box: (916, 455, 1237, 701)
Detection: Vehicle 0.86 (Class: 2), Box: (1264, 467, 1548, 703)
Detection: Vehicle 0.86 (Class: 2), Box: (350, 870, 778, 1280)
Detection: Vehicle 0.82 (Class: 2), Box: (3158, 410, 3432, 619)
Detection: Vehicle 0.

In [7]:
from ultralytics import YOLO
import cv2
import os
import numpy as np
import random

def detect_vehicles_with_segmentation(input_image_path, output_image_path, model_path):
    # Load the chosen YOLOv8 segmentation model
    print(f"Loading model: {model_path}")
    model = YOLO(model_path)

    # Read the input image
    image = cv2.imread(input_image_path)
    if image is None:
        raise ValueError(f"Could not load image at {input_image_path}")
    print(f"Input image loaded: {input_image_path}, shape: {image.shape}")

    # Optionally, you might want to resize the image for better detection on small vehicles
    # image = cv2.resize(image, (1280, 1280))

    # Perform inference with a set confidence threshold (you can tune this value)
    results = model.predict(source=input_image_path, conf=0.3, iou=0.45, verbose=True)
    print(f"Prediction completed. Number of results: {len(results)}")

    # Process results
    annotated_image = image.copy()
    detections_found = False
    combined_mask_color = np.zeros_like(image, dtype=np.uint8)  # To accumulate masks
    boxes_to_draw = []

    # Loop over each result (could be multiple if processing a batch)
    for result in results:
        # Process bounding boxes (detections)
        if hasattr(result, 'boxes') and result.boxes is not None:
            print(f"Boxes detected: {len(result.boxes)}")
            for box, conf, cls in zip(result.boxes.xyxy, result.boxes.conf, result.boxes.cls):
                if int(cls) in [2, 7]:  # COCO class IDs for vehicles: 2=car, 7=truck
                    detections_found = True
                    x1, y1, x2, y2 = map(int, box)
                    label = f"Vehicle {conf:.2f} (Class: {int(cls)})"
                    boxes_to_draw.append((x1, y1, x2, y2, label))
                    print(f"Detection: {label}, Box: ({x1}, {y1}, {x2}, {y2})")

        # Process segmentation masks for each detection
        if hasattr(result, 'masks') and result.masks is not None:
            print(f"Masks detected: {len(result.masks)}")
            for mask, conf, cls in zip(result.masks.data, result.boxes.conf, result.boxes.cls):
                if int(cls) in [2, 7]:
                    mask = mask.cpu().numpy()  # Convert from tensor to NumPy array
                    mask = cv2.resize(mask, (image.shape[1], image.shape[0]))  # Resize to match the image
                    mask = mask > 0.5  # Binarize mask with threshold

                    # Generate a random color for each mask
                    color = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255))
                    combined_mask_color[mask] = color

    # Blend the combined colored masks with the original image (using 30% transparency)
    if detections_found:
        alpha = 0.3  # Transparency for the mask
        beta = 1.0 - alpha
        cv2.addWeighted(annotated_image, beta, combined_mask_color, alpha, 0.0, annotated_image)

    # Draw bounding boxes and labels on top of the blended image
    for x1, y1, x2, y2, label in boxes_to_draw:
        cv2.rectangle(annotated_image, (x1, y1), (x2, y2), (0, 255, 0), 2)
        # Place the label above the box if there is space; otherwise, below the box
        label_y = y1 - 10 if y1 - 10 > 10 else y2 + 20
        cv2.putText(annotated_image, label, (x1, label_y),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

    if not detections_found:
        print("No vehicles detected. Try lowering the confidence threshold or fine-tuning the model.")

    # Save the annotated image
    cv2.imwrite(output_image_path, annotated_image)
    print(f"Output image saved to {output_image_path}")
    return annotated_image

if __name__ == "__main__":
    # Example usage: change the model_path to try different variants
    input_path = "input.jpeg"   # Replace with your input image path
    output_path = "output_detected_seg.jpg" # Output image path

    # Change the model here: choose among "yolov8n-seg.pt", "yolov8s-seg.pt", "yolov8m-seg.pt", "yolov8l-seg.pt", "yolov8x-seg.pt"
    model_path = "yolov8x-seg.pt"  # Example: using the nano variant

    if not os.path.exists(input_path):
        print(f"Input image {input_path} not found. Please provide a valid image.")
    else:
        detect_vehicles_with_segmentation(input_path, output_path, model_path)


Loading model: yolov8x-seg.pt
Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8x-seg.pt to 'yolov8x-seg.pt'...


100%|██████████| 137M/137M [00:03<00:00, 36.1MB/s]


Input image loaded: input.jpeg, shape: (1698, 3566, 3)

image 1/1 /content/input.jpeg: 320x640 1 person, 22 cars, 3471.4ms
Speed: 3.0ms preprocess, 3471.4ms inference, 63.5ms postprocess per image at shape (1, 3, 320, 640)
Prediction completed. Number of results: 1
Boxes detected: 23
Detection: Vehicle 0.92 (Class: 2), Box: (811, 919, 1239, 1287)
Detection: Vehicle 0.90 (Class: 2), Box: (486, 417, 910, 707)
Detection: Vehicle 0.90 (Class: 2), Box: (0, 1114, 524, 1685)
Detection: Vehicle 0.89 (Class: 2), Box: (0, 851, 340, 1258)
Detection: Vehicle 0.87 (Class: 2), Box: (2930, 522, 3251, 773)
Detection: Vehicle 0.87 (Class: 2), Box: (349, 871, 780, 1280)
Detection: Vehicle 0.86 (Class: 2), Box: (916, 457, 1236, 702)
Detection: Vehicle 0.85 (Class: 2), Box: (1264, 467, 1551, 707)
Detection: Vehicle 0.85 (Class: 2), Box: (2569, 318, 2847, 599)
Detection: Vehicle 0.83 (Class: 2), Box: (1631, 280, 1886, 553)
Detection: Vehicle 0.83 (Class: 2), Box: (3158, 412, 3435, 619)
Detection: Vehicle 0