In [1]:
#Importing Libraries
import cv2
import matplotlib.pyplot as plt
import numpy as np
import os

In [2]:
weights_path = "yolov7-tiny.weights"  # weights file path
config_path = "yolov7-tiny.cfg"      # configuration file path
names_path = "coco.names"        # class names file path

In [3]:
net = cv2.dnn.readNetFromDarknet(config_path, weights_path)

In [4]:
# Load class names
classNames = []
if names_path:
    with open(names_path, "r") as f:
        classNames = [line.strip() for line in f.readlines()]

In [5]:
print(classNames)

['person', 'bicycle', 'car', 'motorbike', 'aeroplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'sofa', 'pottedplant', 'bed', 'diningtable', 'toilet', 'tvmonitor', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']


In [6]:
# Load YOLO model using OpenCV's dnn module
net = cv2.dnn.readNetFromDarknet(config_path, weights_path)

In [7]:
# Function to detect objects in a frame
def detect_objects(frame):
    height, width, channels = frame.shape

    # Blob from the frame for feeding into the YOLO model
    blob = cv2.dnn.blobFromImage(frame, 1 / 255.0, (416, 416), swapRB=True, crop=False)
    net.setInput(blob)

    # Get the output layers names from the YOLO network
    layer_names = net.getLayerNames()
    output_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers()]

    # Perform inference and get detections
    outputs = net.forward(output_layers)

    # Initialize confidence threshold and non-maxima suppression (NMS) threshold
    conf_threshold = 0.5
    nms_thresh = 0.4

    # Create lists for bounding boxes, confidences, and class IDs
    boxes = []
    confidences = []
    class_ids = []

    # Loop over the outputs of each layer
    for output in outputs:
        for detection in output:
            # Extract scores, class ID, and bounding box coordinates
            scores = detection[5:]
            class_id = np.argmax(scores)
            confidence = scores[class_id]

            # Filter detections by confidence threshold
            if confidence > conf_threshold:
                # Scale bounding box coordinates based on frame size
                box = detection[0:4] * np.array([width, height, width, height])
                center_x, center_y, w, h = box.astype(int) 
                
                # Get top left corner coordinates of the bounding box
                x = int(center_x - (w / 2))
                y = int(center_y - (h / 2))

                boxes.append([x, y, int(w), int(h)])
                confidences.append(float(confidence))
                class_ids.append(int(class_id))

    # Apply non-maxima suppression (NMS) to suppress weak, overlapping bounding boxes
    indices = cv2.dnn.NMSBoxes(boxes, confidences, conf_threshold, nms_thresh)

    # Draw detected objects and their labels on the frame
    for i, idx in enumerate(indices):
        box = boxes[idx]
        x, y, w, h = box
        label = str(classNames[class_ids[idx]]) if classNames else str(class_ids[idx])
        confidence = confidences[idx]

        color = (0, 255, 0)  # Green color for bounding box
        cv2.rectangle(frame, (x, y), (x + w, y + h), color, 2)
        cv2.putText(frame, label + " " + str(round(confidence * 100, 2)) + "%",
                    (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

    return frame

In [8]:
# Start video capture (replace 0 for webcam or video file path)
# cap = cv2.VideoCapture('D:\OBJECT DETECTION\species.mp4')
cap = cv2.VideoCapture(0)

while True:
    # Capture frame-by-frame
    ret, frame = cap.read()

    # Detect objects in the frame
    detected_frame = detect_objects(frame)

    # Display the resulting frame
    cv2.imshow('Real-time Object Detection', detected_frame)

    # Exit on 'q' key press
    if cv2.waitKey(1) & 0xFF== ord('q'):
        break

cap.release()
cv2.destroyAllWindows()