In [1]:
import cv2
from ultralytics import YOLO
import time 
import os 



In [2]:
model = YOLO("yolov8l")

pre_cap = cv2.VideoCapture("/Users/maxkucher/pytorch/smart_camera/ex.mp4")

names = model.names
threshold = 0.5

real_width_person = 30

t_ret, t_frame = pre_cap.read()

known_distance = 250
focal_length = None

while focal_length is None:

    pre_results =  model(t_frame)[0]

    for result in pre_results.boxes.data.tolist():
        x1, _, x2, _, score, class_id = result
        name = names[int(class_id)]
        if score > threshold and name == "person":
            # define width in pixels of "person"
            width_in_pixels = x2 - x1
            # f = P * D / W
            focal_length = (width_in_pixels * known_distance) / real_width_person
            break

    if focal_length is None:
        print("Focus destination is not detected.")
        exit()




0: 640x384 1 person, 1 dog, 379.8ms
Speed: 3.8ms preprocess, 379.8ms inference, 582.0ms postprocess per image at shape (1, 3, 640, 384)


In [3]:
focal_length

3124.15771484375

In [4]:
model = YOLO("yolov8l")

cap = cv2.VideoCapture("/Users/maxkucher/pytorch/smart_camera/camera.mp4")

threshold = 0.5

interval = 10
past_time = 0
frame_counter = 1

while True:

    total_objects = 0

    ret, frame = cap.read()

    if not ret: 
        break


    results = model(frame)[0]

    for result in results.boxes.data.tolist():
        x1, y1, x2, y2, score, class_id = result

        if threshold < score:

            current_time = time.time()

            total_objects += 1
            
            width_in_pixels = x2 - x1

            distance = (real_width_person * focal_length) / width_in_pixels // 100

            cv2.putText(frame, f"{names[int(class_id)]}:{distance}m", (int(x1), int(y1)),  cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2)
            cv2.rectangle(frame, (int(x1), int(y1+20)), (int(x2), int(y2)), (0, 255, 255), thickness=2)

            # cv2.rectangle(frame, (8, 15), (350, 25),  (0, 0, 0), 20)
            # cv2.putText(frame, f"Total object count: {total_objects}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2)

            if current_time - past_time >= interval:
                path_to_folder = "/Users/maxkucher/pytorch/smart_camera/detected_objects"
                file_name = os.path.join(path_to_folder, f"{names[int(class_id)]}_file_{frame_counter}.png")
                cv2.imwrite(file_name, frame)
                print(f"Object {names[int(class_id)]} was filmed and saved into {path_to_folder}")
                past_time = current_time
            
            frame_counter += 1



    cv2.rectangle(frame, (8, 15), (350, 25),  (0, 0, 0), 20)
    cv2.putText(frame, f"Total object count: {total_objects}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2)

    cv2.imshow("Camera", frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


0: 384x640 (no detections), 384.0ms
Speed: 1.7ms preprocess, 384.0ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 356.9ms
Speed: 42.2ms preprocess, 356.9ms inference, 1.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 595.5ms
Speed: 1.8ms preprocess, 595.5ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 314.7ms
Speed: 1.4ms preprocess, 314.7ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 363.3ms
Speed: 1.4ms preprocess, 363.3ms inference, 0.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 336.4ms
Speed: 1.6ms preprocess, 336.4ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 340.1ms
Speed: 1.4ms preprocess, 340.1ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 315.7ms
Speed: 1.4ms prep

In [None]:
names

{0: 'person',
 1: 'bicycle',
 2: 'car',
 3: 'motorcycle',
 4: 'airplane',
 5: 'bus',
 6: 'train',
 7: 'truck',
 8: 'boat',
 9: 'traffic light',
 10: 'fire hydrant',
 11: 'stop sign',
 12: 'parking meter',
 13: 'bench',
 14: 'bird',
 15: 'cat',
 16: 'dog',
 17: 'horse',
 18: 'sheep',
 19: 'cow',
 20: 'elephant',
 21: 'bear',
 22: 'zebra',
 23: 'giraffe',
 24: 'backpack',
 25: 'umbrella',
 26: 'handbag',
 27: 'tie',
 28: 'suitcase',
 29: 'frisbee',
 30: 'skis',
 31: 'snowboard',
 32: 'sports ball',
 33: 'kite',
 34: 'baseball bat',
 35: 'baseball glove',
 36: 'skateboard',
 37: 'surfboard',
 38: 'tennis racket',
 39: 'bottle',
 40: 'wine glass',
 41: 'cup',
 42: 'fork',
 43: 'knife',
 44: 'spoon',
 45: 'bowl',
 46: 'banana',
 47: 'apple',
 48: 'sandwich',
 49: 'orange',
 50: 'broccoli',
 51: 'carrot',
 52: 'hot dog',
 53: 'pizza',
 54: 'donut',
 55: 'cake',
 56: 'chair',
 57: 'couch',
 58: 'potted plant',
 59: 'bed',
 60: 'dining table',
 61: 'toilet',
 62: 'tv',
 63: 'laptop',
 64: 'mou

'bicycle'