In [1]:
!pip install ultralytics

Collecting ultralytics
  Downloading ultralytics-8.3.108-py3-none-any.whl.metadata (37 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.14-py3-none-any.whl.metadata (9.4 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=1.8.0->ultralytics)
  Downloading n

In [5]:
import os
from ultralytics import YOLO
import cv2
import numpy as np
import random

def detect_vehicles_with_segmentation(input_image_path, output_image_path, model_path="yolo11n-seg.pt"):
    """
    Detect vehicles in an image using YOLOv11 segmentation model and save the annotated output.

    Args:
        input_image_path (str): Path to the input image.
        output_image_path (str): Path where the annotated image will be saved.
        model_path (str): Path or name of the YOLOv11 segmentation model (default: 'yolo11n-seg.pt').
    """
    # Load the YOLOv11 segmentation model
    print(f"Loading model: {model_path}")
    model = YOLO(model_path)  # Automatically downloads weights if not present locally

    # Read the input image
    if not os.path.exists(input_image_path):
        raise FileNotFoundError(f"Input image {input_image_path} not found.")
    image = cv2.imread(input_image_path)
    if image is None:
        raise ValueError(f"Could not load image at {input_image_path}")
    print(f"Input image loaded: {input_image_path}, shape: {image.shape}")

    # Perform inference with a set confidence threshold
    results = model.predict(source=input_image_path, conf=0.3, iou=0.45, verbose=True)
    print(f"Prediction completed. Number of results: {len(results)}")

    # Process results
    annotated_image = image.copy()
    detections_found = False
    combined_mask_color = np.zeros_like(image, dtype=np.uint8)  # To accumulate masks
    boxes_to_draw = []

    # Loop over each result
    for result in results:
        # Process bounding boxes (detections)
        if hasattr(result, 'boxes') and result.boxes is not None:
            print(f"Boxes detected: {len(result.boxes)}")
            for box, conf, cls in zip(result.boxes.xyxy, result.boxes.conf, result.boxes.cls):
                # Filter for vehicles: COCO class IDs 2 (car) and 7 (truck)
                if int(cls) in [2, 7]:
                    detections_found = True
                    x1, y1, x2, y2 = map(int, box)
                    label = f"Vehicle {conf:.2f} (Class: {int(cls)})"
                    boxes_to_draw.append((x1, y1, x2, y2, label))
                    print(f"Detection: {label}, Box: ({x1}, {y1}, {x2}, {y2})")

        # Process segmentation masks for each detection
        if hasattr(result, 'masks') and result.masks is not None:
            print(f"Masks detected: {len(result.masks)}")
            for mask, conf, cls in zip(result.masks.data, result.boxes.conf, result.boxes.cls):
                if int(cls) in [2, 7]:
                    mask = mask.cpu().numpy()  # Convert from tensor to NumPy array
                    mask = cv2.resize(mask, (image.shape[1], image.shape[0]))  # Resize to match image
                    mask = mask > 0.5  # Binarize the mask
                    color = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255))
                    combined_mask_color[mask] = color

    # Blend the combined colored masks with the original image
    if detections_found:
        alpha = 0.3  # Transparency for the mask
        beta = 1.0 - alpha
        cv2.addWeighted(annotated_image, beta, combined_mask_color, alpha, 0.0, annotated_image)

    # Draw bounding boxes and labels
    for x1, y1, x2, y2, label in boxes_to_draw:
        cv2.rectangle(annotated_image, (x1, y1), (x2, y2), (0, 255, 0), 2)
        label_y = y1 - 10 if y1 - 10 > 10 else y2 + 20
        cv2.putText(annotated_image, label, (x1, label_y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

    if not detections_found:
        print("No vehicles detected. Try lowering the confidence threshold or fine-tuning the model.")

    # Save the annotated image
    cv2.imwrite(output_image_path, annotated_image)
    print(f"Output image saved to {output_image_path}")
    return annotated_image

if __name__ == "__main__":
    # Specify your input and output image paths
    input_path = "input1.jpeg"  # Replace with your actual input image path
    output_path = "output_detected_seg.jpg"  # Output image path

    # Check if the input image exists before proceeding
    if not os.path.exists(input_path):
        print(f"Input image {input_path} not found. Please provide a valid image.")
    else:
        detect_vehicles_with_segmentation(input_path, output_path)

Loading model: yolo11n-seg.pt
Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11n-seg.pt to 'yolo11n-seg.pt'...


100%|██████████| 5.90M/5.90M [00:00<00:00, 44.3MB/s]


Input image loaded: input1.jpeg, shape: (1698, 3566, 3)

image 1/1 /content/input1.jpeg: 320x640 1 person, 21 cars, 5 trucks, 380.7ms
Speed: 16.3ms preprocess, 380.7ms inference, 211.2ms postprocess per image at shape (1, 3, 320, 640)
Prediction completed. Number of results: 1
Boxes detected: 27
Detection: Vehicle 0.86 (Class: 2), Box: (0, 852, 342, 1279)
Detection: Vehicle 0.84 (Class: 2), Box: (0, 1107, 528, 1682)
Detection: Vehicle 0.82 (Class: 2), Box: (349, 873, 779, 1286)
Detection: Vehicle 0.82 (Class: 2), Box: (805, 923, 1235, 1290)
Detection: Vehicle 0.81 (Class: 2), Box: (2928, 521, 3255, 772)
Detection: Vehicle 0.73 (Class: 2), Box: (484, 411, 910, 707)
Detection: Vehicle 0.72 (Class: 2), Box: (1265, 459, 1547, 708)
Detection: Vehicle 0.70 (Class: 2), Box: (3089, 905, 3559, 1378)
Detection: Vehicle 0.66 (Class: 2), Box: (3159, 407, 3436, 623)
Detection: Vehicle 0.66 (Class: 2), Box: (916, 461, 1239, 704)
Detection: Vehicle 0.57 (Class: 2), Box: (1477, 108, 1686, 291)
Detecti

In [7]:
import os
from ultralytics import YOLO
import cv2
import numpy as np
import random
import torch

def detect_vehicles_with_segmentation(input_image_path, output_image_path, model_path="yolo11n-seg.pt"):
    """
    Detect vehicles in an image using YOLOv11 segmentation model and save the annotated output.

    Args:
        input_image_path (str): Path to the input image.
        output_image_path (str): Path where the annotated image will be saved.
        model_path (str): Path or name of the YOLOv11 segmentation model (default: 'yolo11n-seg.pt').
    """
    # Load the YOLOv11 segmentation model
    print(f"Loading model: {model_path}")
    model = YOLO(model_path)  # Automatically downloads weights if not present locally

    # If you have a GPU and want to ensure usage:
    device = "cuda" if torch.cuda.is_available() else "cpu"
    print(f"Using device: {device}")

    # Read the input image
    if not os.path.exists(input_image_path):
        raise FileNotFoundError(f"Input image {input_image_path} not found.")
    image = cv2.imread(input_image_path)
    if image is None:
        raise ValueError(f"Could not load image at {input_image_path}")
    print(f"Input image loaded: {input_image_path}, shape: {image.shape}")

    # Perform inference with:
    #  - Confidence threshold (tune as needed, e.g. 0.25, 0.3, or 0.4)
    #  - Larger input size for better detection of small objects (imgsz=1280)
    #  - GPU usage if available (device=device)
    results = model.predict(
        source=image,
        conf=0.3,      # Adjust confidence threshold here
        iou=0.45,
        verbose=True,
        device=device,
        imgsz=1280     # Larger resolution for more accurate detection of small vehicles
    )
    print(f"Prediction completed. Number of results: {len(results)}")

    # Process results
    annotated_image = image.copy()
    detections_found = False
    combined_mask_color = np.zeros_like(image, dtype=np.uint8)  # To accumulate masks
    boxes_to_draw = []

    # Loop over each result
    for result in results:
        # Process bounding boxes (detections)
        if hasattr(result, 'boxes') and result.boxes is not None:
            print(f"Boxes detected: {len(result.boxes)}")
            for box, conf, cls in zip(result.boxes.xyxy, result.boxes.conf, result.boxes.cls):
                # Filter for vehicles: COCO class IDs 2 (car) and 7 (truck)
                if int(cls) in [2, 7]:
                    detections_found = True
                    x1, y1, x2, y2 = map(int, box)
                    label = f"Vehicle {conf:.2f} (Class: {int(cls)})"
                    boxes_to_draw.append((x1, y1, x2, y2, label))
                    print(f"Detection: {label}, Box: ({x1}, {y1}, {x2}, {y2})")

        # Process segmentation masks for each detection
        if hasattr(result, 'masks') and result.masks is not None:
            print(f"Masks detected: {len(result.masks)}")
            for mask, conf, cls in zip(result.masks.data, result.boxes.conf, result.boxes.cls):
                if int(cls) in [2, 7]:
                    mask = mask.cpu().numpy()  # Convert from tensor to NumPy array
                    # If you prefer, you can skip resizing to keep the original mask resolution
                    mask = cv2.resize(mask, (image.shape[1], image.shape[0]))
                    mask = mask > 0.5  # Binarize the mask
                    color = (
                        random.randint(0, 255),
                        random.randint(0, 255),
                        random.randint(0, 255),
                    )
                    combined_mask_color[mask] = color

    # Blend the combined colored masks with the original image
    if detections_found:
        alpha = 0.3  # Transparency for the mask
        beta = 1.0 - alpha
        cv2.addWeighted(annotated_image, beta, combined_mask_color, alpha, 0.0, annotated_image)

    # Draw bounding boxes and labels
    for x1, y1, x2, y2, label in boxes_to_draw:
        cv2.rectangle(annotated_image, (x1, y1), (x2, y2), (0, 255, 0), 2)
        label_y = y1 - 10 if y1 - 10 > 10 else y2 + 20
        cv2.putText(
            annotated_image, label, (x1, label_y),
            cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2
        )

    if not detections_found:
        print("No vehicles detected. Try lowering the confidence threshold or fine-tuning the model.")

    # Save the annotated image
    cv2.imwrite(output_image_path, annotated_image)
    print(f"Output image saved to {output_image_path}")
    return annotated_image


if __name__ == "__main__":
    # Specify your input and output image paths
    input_path = "input1.jpeg"  # Replace with your actual input image path
    output_path = "output_detected_seg_1.jpg"  # Output image path

    # Adjust if you have a different YOLOv11 model variant or path
    model_path = "yolo11n-seg.pt"

    if not os.path.exists(input_path):
        print(f"Input image {input_path} not found. Please provide a valid image.")
    else:
        detect_vehicles_with_segmentation(input_path, output_path, model_path)


Loading model: yolo11n-seg.pt
Using device: cpu
Input image loaded: input1.jpeg, shape: (1698, 3566, 3)

0: 640x1280 2 persons, 22 cars, 703.2ms
Speed: 10.0ms preprocess, 703.2ms inference, 577.6ms postprocess per image at shape (1, 3, 640, 1280)
Prediction completed. Number of results: 1
Boxes detected: 24
Detection: Vehicle 0.92 (Class: 2), Box: (0, 1120, 522, 1688)
Detection: Vehicle 0.89 (Class: 2), Box: (487, 422, 910, 716)
Detection: Vehicle 0.89 (Class: 2), Box: (1951, 474, 2210, 740)
Detection: Vehicle 0.88 (Class: 2), Box: (813, 925, 1238, 1293)
Detection: Vehicle 0.88 (Class: 2), Box: (2932, 527, 3254, 777)
Detection: Vehicle 0.86 (Class: 2), Box: (349, 877, 778, 1283)
Detection: Vehicle 0.86 (Class: 2), Box: (2569, 325, 2842, 606)
Detection: Vehicle 0.85 (Class: 2), Box: (2252, 436, 2553, 745)
Detection: Vehicle 0.85 (Class: 2), Box: (0, 858, 341, 1263)
Detection: Vehicle 0.83 (Class: 2), Box: (3088, 910, 3557, 1374)
Detection: Vehicle 0.82 (Class: 2), Box: (3159, 418, 3432,