Take Home Assignment 3

In [None]:
from ultralytics import YOLO
import cv2
from matplotlib import pyplot as plt
import numpy as np
from PIL import Image
import time
import supervision as sv

3.1.1 Draw the segmentation mask of the largest car: Use ultralytics library only

In [None]:
img = cv2.imread("cars1.jpg")

In [None]:
# Load YOLOv8 segmentation model
model = YOLO("yolov8m-seg.pt")

# Load the image
img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  # Convert BGR to RGB
original_height, original_width = img.shape[:2]  # Get original dimensions (height, width)

# Run a segmentation model on the image
results = model(img_rgb)

# Initialize variables for the largest detected box
largest_area = 0
largest_index = -1

# Iterate through the boxes in the results object
for i, box in enumerate(results[0].boxes.xyxy):
    # Obtain the coordinates of the box
    x_min, y_min, x_max, y_max = box.tolist()
    box_area = (x_max - x_min) * (y_max - y_min)  # Calculate area

    # Find out the index of the box with the largest area
    if box_area > largest_area:
        largest_area = box_area
        largest_index = i

# Draw the mask of the box with the largest area
if largest_index != -1:
    mask = results[0].masks.data[largest_index].cpu().numpy()  # Get mask
    mask_resized = cv2.resize(mask, (original_width, original_height), interpolation=cv2.INTER_NEAREST)  # Resize mask
    binary_mask = (mask_resized > 0.5).astype(np.uint8)  # Convert to binary

    # Apply the mask overlay
    img_rgb[binary_mask == 1] = img_rgb[binary_mask == 1] * 0.6 + np.array([255, 0, 0]) * 0.4   # Red color for the mask

# Display the result
plt.figure(figsize=(10, 6))
plt.imshow(img_rgb)
plt.axis("off")
plt.title("Largest Car Segmentation Highlighted")
plt.show()

3.1.2 Draw the segmentation mask of the largest car : Use ultralytics + supervision libraries

In [None]:
# Load YOLOv8 segmentation model
model = YOLO("yolov8m-seg.pt")  

# Run a segmentation model on the image using ultralytics libraries & a model of your choice
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # Convert BGR to RGB for correct visualization
results = model(image_rgb)  # Run YOLOv8 segmentation

# Transfer ultralytics results to supervision
detections = sv.Detections.from_ultralytics(results[0])  # Convert YOLO results to Supervision format

# Iterate through the masks & find the largest
largest_area = 0
largest_mask = None

for i, mask in enumerate(detections.mask):
    # Convert mask to NumPy array
    mask_np = mask.astype(np.uint8)

    # Calculate the area of the mask
    area = np.sum(mask_np)  # Count the number of pixels in the mask

    # Find the largest mask
    if area > largest_area:
        largest_area = area
        largest_mask = mask_np  # Store the largest detected mask

# Display the largest mask
if largest_mask is not None:
    # Overlay the largest mask on the image
    mask_overlay = np.zeros_like(image_rgb)
    mask_overlay[largest_mask == 1] = [255, 0, 0]  # Apply red color to the mask

    # Blend the mask with the original image
    image_rgb = cv2.addWeighted(image_rgb, 1, mask_overlay, 0.6, 0)

# Show the image
plt.figure(figsize=(10, 6))
plt.imshow(image_rgb)
plt.axis("off")
plt.title("Largest Car Segmentation Mask Highlighted")
plt.show()

3.2 Estimate distance to centroid of chair 

In [None]:
# Detect the chair in the image  - A detection model of your choice can be used

# Calculate the centroid of the chair - Access box coordinates from the detection model and calculate the centroid

# Obtain the depth map - Depth Anything V2 can be used 

# Query the value of the depth map at the centroid's location

3.3.1  Draw bounding boxes around the "cars" using supervision. 

In [None]:
# Downloads the video file from the google drive link, you'll have to "pip install gdown" first
import gdown

file_id = "1zcKvnDDEdyFF4B0B3eYud6DHU19nl0o4"
direct_url = f"https://drive.google.com/uc?id={file_id}"

gdown.download(direct_url, "vehicles_video.mp4", quiet=False)

Downloading...
From: https://drive.google.com/uc?id=1zcKvnDDEdyFF4B0B3eYud6DHU19nl0o4
To: d:\01_FOT_USJP_TeachingMaterial\06_MachineVisionSystems\02_LectureDemonstrations_2024\B5-Repo\ETM4272\video.mp4
100%|██████████| 35.3M/35.3M [04:34<00:00, 129kB/s]


'video.mp4'

In [None]:
model = YOLO("yolo11n.pt")


def callback(frame: np.ndarray, _: int) -> np.ndarray:
    # Complete this function

sv.process_video(
    source_path="vehicles_video.mp4",
    target_path="result_tracking.mp4",
    callback=callback
)

3.3.1 Track the cars. Add the tracker id to the bounding boxes around the cars. Use supervision

3.3.2 Draw the track traces for the tracked cars. Use supervision

Questions:

Explain what "def callback(frame: np.ndarray, _: int) -> np.ndarray:" does. 

Answers: Complete this section