In [2]:
from ultralytics import YOLO
import cv2
import math 
# start webcam
cap = cv2.VideoCapture(0)
cap.set(3, 640)
cap.set(4, 480)

# model
model = YOLO("yolo-Weights/yolov8n.pt")

# object classes
classNames = ["person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train", "truck", "boat",
              "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat",
              "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella",
              "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat",
              "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup",
              "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli",
              "carrot", "hot dog", "pizza", "donut", "cake", "chair", "sofa", "pottedplant", "bed",
              "diningtable", "toilet", "tvmonitor", "laptop", "mouse", "remote", "keyboard", "cell phone",
              "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors",
              "teddy bear", "hair drier", "toothbrush"
              ]



target_class = "person"

# Known parameters (example values, you need to measure these)
real_width = 0.5   # meters, adjust this to the actual width of the target object      
known_distance = 2.0  # meters, the distance at which the known_width_in_image was measured
known_width_in_image = 200  # pixels, the width of the object in the image at known_distance

# Calculate the focal length
focal_length = (known_width_in_image * known_distance) / real_width


while True:
    success, img = cap.read()
    results = model(img, stream=True)

    # coordinates
    for r in results:
        boxes = r.boxes

        for box in boxes:
            
            # class name
            
            cls = int(box.cls[0])
            class_name = classNames[cls]

            if class_name == target_class:
                
            
                # bounding box
                x1, y1, x2, y2 = box.xyxy[0]
                x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2) # convert to int values

                # calculate the width of the bounding box in pixels
                width_in_image = x2 - x1

                # estimate the distance to the object
                distance = (real_width * focal_length) / width_in_image
    
                # put box in cam
                cv2.rectangle(img, (x1, y1), (x2, y2), (255, 0, 255), 3)
    
                # confidence
                confidence = math.ceil((box.conf[0]*100))/100
                print("Confidence --->",confidence)
                print("classnamw is  --->",class_name)
                print("Estimated distance -->", distance, "meters")
    
                # object details
                org = [x1, y1]
                font = cv2.FONT_HERSHEY_SIMPLEX
                fontScale = 1
                color = (255, 0, 0)
                thickness = 2
    
                 #cv2.putText(img, classNames[cls], org, font, fontScale, color, thickness)
                cv2.putText(img, f"{class_name} {distance:.2f}m", org, font, fontScale, color, thickness)

    cv2.imshow('Webcam', img)
    if cv2.waitKey(1) == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


0: 480x640 1 person, 1 bed, 467.9ms
Confidence ---> 0.79
classnamw is  ---> person
Estimated distance --> 0.6993006993006993 meters
Speed: 8.5ms preprocess, 467.9ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 189.2ms
Confidence ---> 0.86
classnamw is  ---> person
Estimated distance --> 0.631911532385466 meters
Speed: 2.2ms preprocess, 189.2ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 171.5ms
Confidence ---> 0.85
classnamw is  ---> person
Estimated distance --> 0.6379585326953748 meters
Speed: 5.7ms preprocess, 171.5ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 172.0ms
Confidence ---> 0.88
classnamw is  ---> person
Estimated distance --> 0.6589785831960461 meters
Speed: 5.3ms preprocess, 172.0ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 171.0ms
Confidence ---> 0.85
classnamw is  ---> person
Estimated distance --