In [1]:
!pip install ultralytics

Collecting ultralytics
  Downloading ultralytics-8.3.108-py3-none-any.whl.metadata (37 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.14-py3-none-any.whl.metadata (9.4 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=1.8.0->ultralytics)
  Downloading n

In [2]:
# prompt: install PyTorch and have a YOLOv5 repository (or a fork with segmentation support)
#         so that the torch.hub.load call works.

!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
!git clone https://github.com/ultralytics/yolov5  # clone
%cd yolov5
!pip install -r requirements.txt
!pip install ultralytics


Looking in indexes: https://download.pytorch.org/whl/cu118
Cloning into 'yolov5'...
remote: Enumerating objects: 17372, done.[K
remote: Counting objects: 100% (59/59), done.[K
remote: Compressing objects: 100% (39/39), done.[K
remote: Total 17372 (delta 42), reused 20 (delta 20), pack-reused 17313 (from 3)[K
Receiving objects: 100% (17372/17372), 16.24 MiB | 22.15 MiB/s, done.
Resolving deltas: 100% (11904/11904), done.
/content/yolov5
Collecting thop>=0.1.1 (from -r requirements.txt (line 14))
  Downloading thop-0.1.1.post2209072238-py3-none-any.whl.metadata (2.7 kB)
Downloading thop-0.1.1.post2209072238-py3-none-any.whl (15 kB)
Installing collected packages: thop
Successfully installed thop-0.1.1.post2209072238


In [12]:
import torch
import cv2
import os
import numpy as np
import random

# Import non_max_suppression from YOLOv5 repository.
# Ensure that YOLOv5 repo is cloned and its 'utils' folder is in your PYTHONPATH.
from utils.general import non_max_suppression

def letterbox(im, new_shape=(640, 640), color=(114, 114, 114)):
    """
    Resize image with unchanged aspect ratio using padding (letterbox).

    Args:
      im (numpy.ndarray): Input image (BGR).
      new_shape (tuple): Desired image shape (height, width).
      color (tuple): Border color.

    Returns:
      numpy.ndarray: Resized and padded image.
    """
    shape = im.shape[:2]  # current height, width
    if isinstance(new_shape, int):
        new_shape = (new_shape, new_shape)

    # Compute scaling factor and new unpadded dimensions
    r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
    new_unpad = (int(round(shape[1] * r)), int(round(shape[0] * r)))

    # Compute padding
    dw = new_shape[1] - new_unpad[0]
    dh = new_shape[0] - new_unpad[1]
    dw /= 2  # divide padding into 2 sides
    dh /= 2

    # Resize image
    im_resized = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)

    # Add border (padding)
    top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
    left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
    im_padded = cv2.copyMakeBorder(im_resized, top, bottom, left, right,
                                   cv2.BORDER_CONSTANT, value=color)
    return im_padded

def detect_vehicles_yolov5_seg(input_image_path, output_image_path, model_path):
    """
    Detect vehicles using a YOLOv5 segmentation model loaded via torch.hub.

    Notes:
      - You must have PyTorch installed and a YOLOv5 repository (or fork) with segmentation support.
      - This example filters for COCO class IDs 2 (car) and 7 (truck). Adjust as needed.
      - Because YOLOv5-Seg is not AutoShape compatible, we manually preprocess the image and apply NMS.
      - Extraction of segmentation masks from the raw tensor output is not implemented here.

    Args:
      input_image_path (str): Path to the input image.
      output_image_path (str): Path to save the annotated output image.
      model_path (str): Path to the YOLOv5-Seg model weights (e.g., "yolov5s-seg.pt").

    Returns:
      annotated_image (numpy.ndarray): The annotated image with bounding boxes.
    """

    print(f"Loading YOLOv5-Seg model from {model_path}")
    model = torch.hub.load('ultralytics/yolov5', 'custom', path=model_path, force_reload=True)
    model.eval()  # Set model to evaluation mode

    # Load input image (BGR)
    image = cv2.imread(input_image_path)
    if image is None:
        raise ValueError(f"Error: Cannot read image at {input_image_path}")
    print(f"Input image loaded: {input_image_path}, shape: {image.shape}")

    # Resize (letterbox) image to a fixed size (e.g. 640x640) for compatibility
    image_letterboxed = letterbox(image, new_shape=(640, 640))
    print(f"Letterboxed image shape: {image_letterboxed.shape}")

    # Convert BGR to RGB and prepare tensor: [1, 3, H, W]
    img_rgb = cv2.cvtColor(image_letterboxed, cv2.COLOR_BGR2RGB)
    img_tensor = torch.from_numpy(img_rgb).permute(2, 0, 1).float() / 255.0  # [3, H, W]
    img_tensor = img_tensor.unsqueeze(0)  # Add batch dimension -> [1, 3, H, W]

    # Run inference; note that, since the model is not AutoShape-compatible,
    # the output is the raw prediction tensor.
    raw_preds = model(img_tensor)[0]

    # Apply non-max suppression to get final detections.
    # The non_max_suppression function returns a list; we take the first element.
    preds = non_max_suppression(raw_preds, conf_thres=0.3, iou_thres=0.45)[0]
    print(f"Number of detections after NMS: {len(preds)}")

    # Prepare an image for annotation based on the letterboxed image
    annotated_image = image_letterboxed.copy()
    boxes_to_draw = []
    detections_found = False

    # Process bounding box detections: each detection is [x1, y1, x2, y2, conf, cls]
    for det in preds:
        x1, y1, x2, y2, conf, cls = det.cpu().numpy()
        # Filter for vehicles (COCO class IDs: 2 for car, 7 for truck)
        if int(cls) in [2, 7]:
            detections_found = True
            x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
            label = f"Vehicle {conf:.2f} (cls: {int(cls)})"
            boxes_to_draw.append((x1, y1, x2, y2, label))
            print(f"Detection: {label}, Box: ({x1}, {y1}, {x2}, {y2})")

    # Note: For segmentation masks, additional decoding from the raw predictions is needed.
    # This demo focuses on bounding box extraction.

    # Draw bounding boxes and labels on the annotated image
    for (x1, y1, x2, y2, label) in boxes_to_draw:
        cv2.rectangle(annotated_image, (x1, y1), (x2, y2), (0, 255, 0), 2)
        # Place label above the bounding box if possible; otherwise below.
        label_y = y1 - 10 if (y1 - 10) > 10 else y2 + 20
        cv2.putText(annotated_image, label, (x1, label_y),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

    if not detections_found:
        print("No vehicles detected. Consider adjusting thresholds or checking your model.")

    cv2.imwrite(output_image_path, annotated_image)
    print(f"Annotated image saved to {output_image_path}")

    return annotated_image

if __name__ == "__main__":
    input_path = "/content/input1.jpeg"          # Replace with your input image path
    output_path = "yolov5_seg_output.jpg"          # Desired output image file path
    model_path = "yolov5s-seg.pt"                  # Path to your YOLOv5-Seg model weights

    if not os.path.exists(input_path):
        print(f"Error: Input image {input_path} not found. Please check the file path.")
    else:
        detect_vehicles_yolov5_seg(input_path, output_path, model_path)


Loading YOLOv5-Seg model from yolov5s-seg.pt


Downloading: "https://github.com/ultralytics/yolov5/zipball/master" to /root/.cache/torch/hub/master.zip
YOLOv5 🚀 2025-4-15 Python-3.11.12 torch-2.6.0+cu124 CPU

Fusing layers... 
YOLOv5s-seg summary: 224 layers, 7611485 parameters, 0 gradients, 26.4 GFLOPs


Input image loaded: /content/input1.jpeg, shape: (1698, 3566, 3)
Letterboxed image shape: (640, 640, 3)
Number of detections after NMS: 34
Detection: Vehicle 0.73 (cls: 2), Box: (0, 367, 94, 469)
Detection: Vehicle 0.63 (cls: 2), Box: (471, 339, 549, 408)
Detection: Vehicle 0.61 (cls: 2), Box: (554, 329, 638, 416)
Annotated image saved to yolov5_seg_output.jpg


In [13]:
import torch
import cv2
import os
import numpy as np
import random

# Import non_max_suppression from the YOLOv5 repository.
# Make sure the YOLOv5 repository is cloned and its 'utils' directory is in your PYTHONPATH.
from utils.general import non_max_suppression


def letterbox(im, new_shape=(1280, 1280), color=(114, 114, 114)):
    """
    Resize an image with unchanged aspect ratio using padding (letterbox).

    Args:
      im (numpy.ndarray): Input image in BGR format.
      new_shape (tuple): Desired shape as (height, width). Increase for higher resolution.
      color (tuple): Padding color.

    Returns:
      numpy.ndarray: The resized and padded image.
    """
    shape = im.shape[:2]  # current height and width
    if isinstance(new_shape, int):
        new_shape = (new_shape, new_shape)

    # Compute scale factor (r) and new unpadded dimensions
    r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
    new_unpad = (int(round(shape[1] * r)), int(round(shape[0] * r)))

    # Compute padding
    dw = new_shape[1] - new_unpad[0]
    dh = new_shape[0] - new_unpad[1]
    dw /= 2  # divide padding into two sides
    dh /= 2

    # Resize image
    im_resized = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)

    # Add padding (borders)
    top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
    left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
    im_padded = cv2.copyMakeBorder(im_resized, top, bottom, left, right,
                                   cv2.BORDER_CONSTANT, value=color)
    return im_padded


def detect_vehicles_yolov5_seg(input_image_path, output_image_path, model_path,
                               conf_threshold=0.3, iou_threshold=0.45,
                               new_shape=(1280, 1280)):
    """
    Detect vehicles using a YOLOv5 segmentation model.

    Notes:
      - Uses torch.hub to load a custom YOLOv5 segmentation model.
      - The model is manually preprocessed (letterbox, normalization) since it is not AutoShape-compatible.
      - Applies non-max suppression (NMS) to raw predictions.
      - Filters for vehicle classes (COCO class IDs 2 for car and 7 for truck).
      - Moves processing to GPU if available.

    Args:
      input_image_path (str): Path to the input image.
      output_image_path (str): Path to save the output annotated image.
      model_path (str): Path to the YOLOv5 segmentation model weights (e.g., "yolov5s-seg.pt").
      conf_threshold (float): Confidence threshold for NMS.
      iou_threshold (float): IoU threshold for NMS.
      new_shape (tuple): Desired input size for the network (height, width).

    Returns:
      numpy.ndarray: The annotated image with bounding boxes.
    """

    print(f"Loading YOLOv5-Seg model from {model_path}")
    # Load model using torch.hub; force_reload is used for demonstration.
    model = torch.hub.load('ultralytics/yolov5', 'custom', path=model_path, force_reload=True)
    model.eval()

    # Use GPU if available:
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f"Using device: {device}")
    model.to(device)

    # Read input image (BGR)
    image = cv2.imread(input_image_path)
    if image is None:
        raise ValueError(f"Error: Unable to read image at {input_image_path}")
    print(f"Input image loaded: {input_image_path}, shape: {image.shape}")

    # Letterbox the image to a larger size for improved performance on small objects
    image_letterboxed = letterbox(image, new_shape=new_shape)
    print(f"Letterboxed image shape: {image_letterboxed.shape}")

    # Convert BGR to RGB, then to tensor and normalize to [0,1]
    img_rgb = cv2.cvtColor(image_letterboxed, cv2.COLOR_BGR2RGB)
    img_tensor = torch.from_numpy(img_rgb).permute(2, 0, 1).float() / 255.0  # [3, H, W]
    img_tensor = img_tensor.unsqueeze(0)  # [1, 3, H, W]
    img_tensor = img_tensor.to(device)

    # Run inference on the preprocessed image tensor
    raw_preds = model(img_tensor)[0]

    # Apply non-max suppression to the raw outputs
    preds = non_max_suppression(raw_preds, conf_threshold, iou_threshold)[0]
    print(f"Number of detections after NMS: {len(preds)}")

    # Prepare the image for annotation (we are annotating the letterboxed image)
    annotated_image = image_letterboxed.copy()
    boxes_to_draw = []
    detections_found = False

    # Process bounding box detections (each detection is [x1, y1, x2, y2, conf, cls])
    for det in preds:
        x1, y1, x2, y2, conf, cls = det.cpu().numpy()
        # Filter detections for vehicles: COCO class ID 2 (car) and 7 (truck)
        if int(cls) in [2, 7]:
            detections_found = True
            x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
            label = f"Vehicle {conf:.2f} (cls: {int(cls)})"
            boxes_to_draw.append((x1, y1, x2, y2, label))
            print(f"Detection: {label}, Box: ({x1}, {y1}, {x2}, {y2})")

    # (Segmentation mask decoding not implemented here; expand as needed)

    # Draw bounding boxes and labels on the annotated image
    for (x1, y1, x2, y2, label) in boxes_to_draw:
        cv2.rectangle(annotated_image, (x1, y1), (x2, y2), (0, 255, 0), 2)
        label_y = y1 - 10 if (y1 - 10) > 10 else y2 + 20
        cv2.putText(annotated_image, label, (x1, label_y),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

    if not detections_found:
        print("No vehicles detected. Consider adjusting thresholds or checking your model.")

    cv2.imwrite(output_image_path, annotated_image)
    print(f"Annotated image saved to {output_image_path}")

    return annotated_image


if __name__ == "__main__":
    # Update the following paths to your files
    input_path = "/content/input1.jpeg"         # Replace with your input image path
    output_path = "yolov5_seg_output.jpg"         # Desired output image path
    model_path = "yolov5s-seg.pt"                 # You can replace with "yolov5m-seg.pt", "yolov5l-seg.pt", etc.

    if not os.path.exists(input_path):
        print(f"Error: Input image {input_path} not found. Please check the path.")
    else:
        detect_vehicles_yolov5_seg(input_path, output_path, model_path,
                                   conf_threshold=0.3, iou_threshold=0.45,
                                   new_shape=(1280, 1280))


Loading YOLOv5-Seg model from yolov5s-seg.pt


Downloading: "https://github.com/ultralytics/yolov5/zipball/master" to /root/.cache/torch/hub/master.zip
YOLOv5 🚀 2025-4-15 Python-3.11.12 torch-2.6.0+cu124 CPU

Fusing layers... 
YOLOv5s-seg summary: 224 layers, 7611485 parameters, 0 gradients, 26.4 GFLOPs


Using device: cpu
Input image loaded: /content/input1.jpeg, shape: (1698, 3566, 3)
Letterboxed image shape: (1280, 1280, 3)
Number of detections after NMS: 39
Detection: Vehicle 0.90 (cls: 2), Box: (922, 452, 1021, 552)
Detection: Vehicle 0.87 (cls: 2), Box: (584, 440, 677, 535)
Detection: Vehicle 0.85 (cls: 2), Box: (808, 491, 914, 603)
Detection: Vehicle 0.83 (cls: 2), Box: (699, 503, 793, 599)
Detection: Vehicle 0.35 (cls: 2), Box: (328, 498, 441, 588)
Annotated image saved to yolov5_seg_output.jpg
