In [1]:
### Object Detection and Tracking using YOLO
# Step 1: Import necessary libraries
import cv2
import numpy as np
import time
from ultralytics import YOLO


In [2]:
# Step 2: Load YOLO model
model = YOLO('yolov8n.pt')  # Ensure you have downloaded the latest YOLO model file

In [3]:
 # Step 3: Define a function for object detection
def detect_and_track_objects(frame, model):
    results = model.predict(frame, conf=0.5)  # Perform prediction with confidence threshold
    detections = []
    # Parse the results
    for result in results:
        for box in result.boxes:
            x1, y1, x2, y2 = map(int, box.xyxy[0])
            conf = box.conf[0]
            class_id = int(box.cls[0])
            detections.append((x1, y1, x2, y2, conf, class_id))

    return detections

In [4]:
# Step 4: Initialize Video Capture
video_path = 0  # Use 0 for webcam or provide path to video
cap = cv2.VideoCapture(video_path)


In [5]:
# Step 5: Define class labels (COCO dataset labels)
class_labels = [
    "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
    "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
    "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
    "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle",
    "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange",
    "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch", "potted plant", "bed",
    "dining table", "toilet", "TV", "laptop", "mouse", "remote", "keyboard", "cell phone", "microwave", "oven",
    "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush",
    "hairbrush", "wallet", "watch", "backpack", "bottle", "cell phone", "skateboard", "bicycle", "piano", "keyboard",
    "drum", "violin", "guitar", "bathtub", "shower", "bathtub", "mirror", "lamp", "desk", "bookcase", "candle", "bin",
    "lawnmower", "scissors", "headphones", "television", "camera", "binoculars", "projector", "trash can", "fireplace",
    "dumbbell", "treadmill", "surfboard", "glasses", "frying pan", "spatula", "chopsticks", "matchstick", "ice cream",
    "cake", "wrench", "hammer", "pliers", "screwdriver", "nail", "lightbulb", "clock", "fan", "radiator", "speaker",
    "fire extinguisher", "sewing machine", "flashlight", "thermometer", "stethoscope", "microscope", "telescope",
    "cupboard", "bookend", "umbrella", "shovel", "scooter", "rocket", "skateboard", "snowboard", "luggage"
]



In [None]:
# Step 6: Loop through each frame in the video
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Detect objects in the frame
    detections = detect_and_track_objects(frame, model)

    # Draw bounding boxes on the frame
    for x1, y1, x2, y2, conf, class_id in detections:
        label = f"{class_labels[class_id]}: {conf:.2f}" if class_id < len(class_labels) else f"Class {class_id}: {conf:.2f}"
        cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
        cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

    # Add a stop message on the screen
    stop_message = "Press 'q' to Stop Detection"
    cv2.putText(frame, stop_message, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)

    # Display the processed frame
    cv2.imshow('Object Detection and Tracking - AI-Geek-Aryan', frame)

    # Break the loop if 'q' is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release resources
cap.release()
cv2.destroyAllWindows()



0: 384x640 1 person, 65.4ms
Speed: 6.2ms preprocess, 65.4ms inference, 8.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 45.7ms
Speed: 2.2ms preprocess, 45.7ms inference, 1.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 62.5ms
Speed: 1.7ms preprocess, 62.5ms inference, 1.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 54.2ms
Speed: 1.6ms preprocess, 54.2ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 49.2ms
Speed: 2.3ms preprocess, 49.2ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 64.5ms
Speed: 1.9ms preprocess, 64.5ms inference, 0.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 51.3ms
Speed: 1.7ms preprocess, 51.3ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 58.1ms
Speed: 1.7ms preprocess, 58.1ms inference, 0.8ms postprocess per image at shape (1, 3, 38

In [None]:
#GitHUb - AI-Geek-Aryan#