This code performs real-time object detection using the YOLO model, displaying bounding boxes and labels for different classes with specific colors on a video feed streamed from a mobile device.

This command resolves version issues by installing specific versions of torch, torchvision, and torchaudio with CUDA 11.8 support:

* pip install torch==2.5.0 torchvision==0.20.0 torchaudio==2.5.0 --index-url https://download.pytorch.org/whl/cu118

In [None]:
import cv2
from ultralytics import YOLO
import numpy as np

# Load the trained YOLOv5 model (last.pt)
model = YOLO("last.pt")

# Dictionary with colors for each class
class_colors = {
    "Person": (255, 0, 0),          # Blue
    "Car": (0, 255, 0),             # Green
    "Motorcycle": (0, 0, 255),      # Red
    "Bicycle": (255, 255, 0),       # Cyan
    "Truck": (255, 0, 255),         # Magenta
    "Bus": (255, 165, 0),           # Orange
    "Sign": (0, 255, 255),          # Yellow
    "TrafficLight": (255, 255, 0)   # cyan
}

# Video capture from mobile
cap = cv2.VideoCapture("http://192.168.15.3:8080/video")

# Configure the window to be resizable
cv2.namedWindow("YOLO Object Detection", cv2.WINDOW_NORMAL)

# Alternative 1: Automatically maximize the window
cv2.setWindowProperty("YOLO Object Detection", cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN)

while True:
    ret, frame = cap.read()
    if not ret:
        break

    # Perform object detection with the YOLOv5 model
    results = model(frame)

    # Display bounding boxes and labels
    for result in results:
        boxes = result.boxes  # Box coordinates
        for box in boxes:
            x1, y1, x2, y2 = box.xyxy[0]  # Coordinates (xmin, ymin, xmax, ymax)
            confidence = box.conf[0]  # Prediction confidence
            class_id = int(box.cls[0])  # Class ID

            if confidence > 0.5:  # Confidence threshold
                class_name = model.names[class_id]  # Class name
                label = f"{class_name}: {confidence:.2f}"
                
                # Choose the corresponding color for the class
                color = class_colors.get(class_name, (255, 255, 255))  # White as the default color

                # Draw the bounding box
                cv2.rectangle(frame, (int(x1), int(y1)), (int(x2), int(y2)), color, 2)
                # Add the label
                cv2.putText(frame, label, (int(x1), int(y1) - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

    # Display the video feed with detections
    cv2.imshow("YOLO Object Detection", frame)

    # Press 'q' to exit the loop
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

This code performs real-time object detection using the YOLO model on a video file, saving the processed video with bounding boxes and labels for different classes with specific colors. Unlike the previous code, this version processes a video file and saves the output to a new video file.

In [None]:
import cv2
from ultralytics import YOLO
import numpy as np

# Load the trained YOLOv5 model (last.pt)
model = YOLO("last.pt")

# Dictionary with colors for each class
class_colors = {
    "Person": (255, 0, 0),          # Blue
    "Car": (0, 255, 0),             # Green
    "Motorcycle": (0, 0, 255),      # Red
    "Bicycle": (255, 255, 0),       # Cyan
    "Truck": (255, 0, 255),         # Magenta
    "Bus": (255, 165, 0),           # Orange
    "Sign": (0, 255, 255),          # Yellow
    "TrafficLight": (255, 255, 0)   # cyan
}

# Open the video file
cap = cv2.VideoCapture("video_file.mp4")  # Use the path to your video file

# Check if the video file was opened correctly
if not cap.isOpened():
    print("Error: Could not open video file.")
    exit()

# Get the video frame width and height
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

# Create a VideoWriter object to save the processed video
output_video = cv2.VideoWriter("output_video.mp4", 
                               cv2.VideoWriter_fourcc(*'mp4v'), 
                               30,  # Frames per second (fps)
                               (frame_width, frame_height))  # Frame size

while True:
    ret, frame = cap.read()
    if not ret:
        break

    # Perform object detection with the YOLOv5 model
    results = model(frame)

    # Display bounding boxes and labels
    for result in results:
        boxes = result.boxes  # Box coordinates
        for box in boxes:
            x1, y1, x2, y2 = box.xyxy[0]  # Coordinates (xmin, ymin, xmax, ymax)
            confidence = box.conf[0]  # Prediction confidence
            class_id = int(box.cls[0])  # Class ID

            if confidence > 0.5:  # Confidence threshold
                class_name = model.names[class_id]  # Class name
                label = f"{class_name}: {confidence:.2f}"
                
                # Choose the corresponding color for the class
                color = class_colors.get(class_name, (255, 255, 255))  # White as the default color

                # Draw the bounding box
                cv2.rectangle(frame, (int(x1), int(y1)), (int(x2), int(y2)), color, 2)
                # Add the label
                cv2.putText(frame, label, (int(x1), int(y1) - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

    # Write the frame with detections to the output video
    output_video.write(frame)

    # Optionally, you can display the frame
    # cv2.imshow("YOLO Object Detection", frame)

    # Press 'q' to stop the process early
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release resources
cap.release()
output_video.release()
cv2.destroyAllWindows()