In [10]:
# tracker
import math

class Tracker:
    def __init__(self):
        self.center_points = {} # Store the center positions of the objects
        self.id_count = 0 # Keep the count of the IDs each time a new object id detected, the count will increase by one


    def update(self, objects_rect):
        # Objects boxes and ids
        objects_bbs_ids = []

        # Get center point of new object
        for rect in objects_rect:
            x, y, w, h = rect
            cx = (x + x + w) // 2
            cy = (y + y + h) // 2

            # Find out if that object was detected already
            same_object_detected = False
            for id, pt in self.center_points.items():
                
                dist = math.hypot(cx - pt[0], cy - pt[1])

                if dist < 35:
                    self.center_points[id] = (cx, cy)
                    # print(self.center_points)
                    objects_bbs_ids.append([x, y, w, h, id])
                    same_object_detected = True
                    break

            # New object is detected we assign the ID to that object
            if same_object_detected is False:
                self.center_points[self.id_count] = (cx, cy)
                objects_bbs_ids.append([x, y, w, h, self.id_count])
                self.id_count += 1

        # Clean the dictionary by center points to remove IDS not used anymore
        new_center_points = {}
        for obj_bb_id in objects_bbs_ids:
            _, _, _, _, object_id = obj_bb_id
            center = self.center_points[object_id]
            new_center_points[object_id] = center

        # Update dictionary with IDs not used removed
        self.center_points = new_center_points.copy()
        return objects_bbs_ids

In [11]:
import cv2
import pandas as pd
import numpy as np
from ultralytics import YOLO
# from tracker import *

In [12]:
model=YOLO('yolov8s.pt')

In [13]:
cap = cv2.VideoCapture('traffic.mp4')
assert cap.isOpened(), "Error reading video file"

In [14]:
my_file = open("coco.txt", "r")
data = my_file.read()
class_list = data.split("\n")
print(class_list)

['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']


In [15]:
cap = cv2.VideoCapture('traffic.mp4')
w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
w, h, fps

(1280, 720, 30)

In [16]:
count=0
tracker=Tracker() 

# Define line points
x1 = 150
x2 = 1000
y1 = 650
y2 = 700
area = [(x1, y1), (x2, y1), (x2, y2), (x1, y2)]
area_c = set()

In [17]:
# Video writer
video_writer = cv2.VideoWriter("output.avi", 
                               cv2.VideoWriter_fourcc(*'mp4v'), 
                               fps, 
                               (w, h))
             
while True:    
    ret, frame = cap.read()
    if not ret:
        print("Video frame is empty or video processing has been successfully completed.")
        break
    
    results = model.predict(frame)
    
    a = results[0].boxes.data
    px = pd.DataFrame(a).astype("float")
    
    list = []
    for index, row in px.iterrows():
        x1 = int(row[0])
        y1 = int(row[1])
        x2 = int(row[2])
        y2 = int(row[3])
        d = int(row[5])
        c = class_list[d]
        list.append([x1, y1, x2, y2])
            
    bbox_idx = tracker.update(list)
    # print('bbox_idx = ', bbox_idx)
    
    for bbox in bbox_idx:
        x3, y3, x4, y4, id = bbox
        cx = int(x3 + x4) // 2
        cy = int(y3 + y4) // 2
        
        # Функция cv2.pointPolygonTest() в OpenCV находит наименьшее расстояние между точкой на изображении и контуром.
        # Она возвращает расстояние, которое:
        #     является отрицательным, если точка находится за контуром;
        #     является положительным, если точка находится внутри контура;
        #     равно нулю, если точка находится на контуре.
        results = cv2.pointPolygonTest(np.array(area, np.int32), ((x4, y4)), False)
        # print(results)
        
        if results >= 0:
            # рисование простого, объёмного или заполненного вертикального прямоугольника.
            cv2.rectangle(frame, (x3, y3), (x4, y4), (0, 255, 0), 2)
            # наложения текста на изображение.
            cv2.putText(frame, str("car"), (x3, y3), cv2.FONT_HERSHEY_COMPLEX, 0.5, (255, 0, 0), 1) # str(int(id))
            area_c.add(id)

        
    area1_c = (len(area_c))
    cv2.putText(frame, str(int(area1_c)), (1000, 60), cv2.FONT_HERSHEY_PLAIN, 3, (255, 0, 0), 2)
    cv2.polylines(frame, [np.array(area, np.int32)], True, (255, 0, 0), 2)
    
    # print(frame.shape)
    im0 = frame
    video_writer.write(im0)
    
    # print('break')
    # break
        
cap.release()
# cv2.destroyAllWindows()


0: 384x640 10 cars, 1 truck, 62.8ms
Speed: 2.0ms preprocess, 62.8ms inference, 0.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 11 cars, 1 truck, 56.1ms
Speed: 1.7ms preprocess, 56.1ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 9 cars, 1 truck, 52.2ms
Speed: 1.7ms preprocess, 52.2ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 11 cars, 1 truck, 51.8ms
Speed: 1.9ms preprocess, 51.8ms inference, 0.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 11 cars, 1 truck, 50.0ms
Speed: 1.9ms preprocess, 50.0ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 11 cars, 1 truck, 55.4ms
Speed: 1.8ms preprocess, 55.4ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 11 cars, 1 truck, 56.5ms
Speed: 1.6ms preprocess, 56.5ms inference, 0.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 11 cars, 1 truck, 52.7ms
Speed: 1.7ms preprocess, 5