In [1]:
from ultralytics import YOLO
import cv2
import numpy as np
from sort import *

In [2]:
modelL = YOLO("yolov8l.pt")

In [3]:
class_names = ["person" , "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat",
               "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse",
               "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase",
               "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", "tennis racket",
               "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange",
               "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch", "potted plant", "bed",
               "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone", "microwave", "oven", "toaster",
               "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush"]

In [4]:
filter = cv2.imread("filter.png")

In [5]:
tracker = Sort(max_age=20, min_hits=3, iou_threshold=0.3)

In [6]:
limit = [160, 500, 600, 500]  # x1, y1, x2, y2

In [10]:
countedId = []
totalCount = 0

cap = cv2.VideoCapture("video.mp4")
ret, first_frame = cap.read()
if not ret:
    raise RuntimeError("Failed to read from video.mp4")
frame_h, frame_w = first_frame.shape[:2]
if filter is None:
    filter_resized = None
else:
    if filter.shape[0] != frame_h or filter.shape[1] != frame_w:
        filter_resized = cv2.resize(filter, (frame_w, frame_h), interpolation=cv2.INTER_AREA)
    else:
        filter_resized = filter
img = first_frame
while True:
    if filter_resized is None:
        imgRegion = img.copy()
    else:
        fr = filter_resized
        if fr.ndim == 3 and fr.shape[2] == 4:
            alpha = fr[:, :, 3].astype(float) / 255.0
            overlay = fr[:, :, :3].astype(float)
            img_float = img.astype(float)
            for c in range(3):
                img_float[:, :, c] = (1 - alpha) * img_float[:, :, c] + alpha * overlay[:, :, c]
            imgRegion = img_float.astype(np.uint8)
        else:
            if fr.ndim == 2:
                fr_bgr = cv2.cvtColor(fr, cv2.COLOR_GRAY2BGR)
            else:
                fr_bgr = fr
            imgRegion = cv2.bitwise_and(img, fr_bgr)
            
            
            
    results = modelL(source=imgRegion, stream=True)
    
    detections = np.empty((0, 5))
    
    for r in results:
        boxes = r.boxes
        for box in boxes:
            x1, y1, x2, y2 = box.xyxy[0].cpu().numpy().astype(int)
            conf = float(box.conf[0].cpu().numpy())
            cls = int(box.cls[0].cpu().numpy())

            if (class_names[cls] in ["car", "truck", "bus", "motorcycle"]) and conf > 0.5:

                current_array = np.array([x1, y1, x2, y2, conf])
                detections = np.vstack((detections, current_array))

    ResultsTrackers = tracker.update(detections)
    
    cv2.line(img, (limit[0], limit[1]), (limit[2], limit[3]), (0, 0, 255), 3)
    
    for result in ResultsTrackers:
        x1, y1, x2, y2, id = map(int, result)
        cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 2)
        cv2.putText(img, str(id), (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
        cx , cy = (x1 + x2) // 2, (y1 + y2) // 2
        cv2.circle(img, (cx, cy), 5, (255, 0, 255), cv2.FILLED)
        if limit[0] < cx < limit[2] and limit[1] - 10 < cy < limit[1] + 10:
            cv2.line(img, (160, 500), (600, 500), (0, 255, 0), 3)
            if id not in countedId:
                countedId.append(id)
                totalCount += 1
    
    cv2.putText(img, f'Total vehicles Count: {totalCount}', (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 0), 2)
    cv2.imshow("Image", img)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break
    # Read next frame
    ret, img = cap.read()
    if not ret:
        break
    
cap.release()
cv2.destroyAllWindows()


0: 384x640 5 cars, 9.1ms
Speed: 10.2ms preprocess, 9.1ms inference, 1.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 5 cars, 9.3ms
Speed: 0.8ms preprocess, 9.3ms inference, 1.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 5 cars, 10.3ms
Speed: 1.0ms preprocess, 10.3ms inference, 3.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 5 cars, 10.2ms
Speed: 0.8ms preprocess, 10.2ms inference, 1.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 5 cars, 9.8ms
Speed: 0.8ms preprocess, 9.8ms inference, 1.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 5 cars, 9.7ms
Speed: 0.8ms preprocess, 9.7ms inference, 1.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 5 cars, 9.0ms
Speed: 0.9ms preprocess, 9.0ms inference, 1.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 5 cars, 9.1ms
Speed: 0.8ms preprocess, 9.1ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 5 cars,

In [9]:
totalCount

115