In [1]:
from ultralytics import YOLO
import cv2
import cvzone
import math
from sort import * 

In [12]:
# Video capture from a file
cap = cv2.VideoCapture('people.mp4')

# Loading the YOLO model
model = YOLO('../Yolo-Weights/yolov8n.pt')

# Object classes
classNames = ["person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train", "truck", "boat",
              "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat",
              "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella",
              "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat",
              "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup",
              "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli",
              "carrot", "hot dog", "pizza", "donut", "cake", "chair", "sofa", "pottedplant", "bed",
              "diningtable", "toilet", "tvmonitor", "laptop", "mouse", "remote", "keyboard", "cell phone",
              "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors",
              "teddy bear", "hair drier", "toothbrush"
              ]

paused = False  # To control the paused state of the video

# Load the mask image
mask = cv2.imread("mask-1.png")

# Tracking
tracker = Sort(max_age=20, min_hits=3, iou_threshold=0.3)

# Line limits
limitsUp = [103, 161, 296, 161]
limitsDown = [527, 489,735, 489]
totalCountUp = set()  # Use a set to avoid duplicate counts
totalCountDown = set()  # Use a set to avoid duplicate counts

# Dictionary to map SORT IDs to new IDs starting from 1
id_map = {}
next_id = 1

# Define a speed factor to skip frames
speed_factor = 2  # Process every 2nd frame

frame_count = 0

while True:
    if not paused:
        success, img = cap.read()
        if not success:
            break

        # Skip frames based on the speed factor
        if frame_count % speed_factor == 0:
            # Resize mask to match the dimensions of img
            mask = cv2.resize(mask, (img.shape[1], img.shape[0]))

            # Apply bitwise AND operation
            imgRegion = cv2.bitwise_and(img, mask)

            imgGraphics = cv2.imread("graphics-1.png",cv2.IMREAD_UNCHANGED)
            img = cvzone.overlayPNG(img, imgGraphics, (730, 260))
            results = model(imgRegion, stream=True)
            detections = np.empty((0, 5))
            for r in results:
                boxes = r.boxes
                for box in boxes:
                    x1, y1, x2, y2 = box.xyxy[0]
                    x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
                    w, h = x2 - x1, y2 - y1

                    conf = math.ceil((box.conf[0] * 100)) / 100
                    cls = int(box.cls[0])
                    currentClass = classNames[cls]

                    if currentClass == 'person' and conf > 0.3:
                        currentArray = np.array([x1, y1, x2, y2, conf])
                        detections = np.vstack((detections, currentArray))

            resultsTracker = tracker.update(detections)
            cv2.line(img, (limitsUp[0], limitsUp[1]), (limitsUp[2], limitsUp[3]), (0, 0, 255), 5)
            cv2.line(img, (limitsDown[0], limitsDown[1]), (limitsDown[2], limitsDown[3]), (0, 0, 255), 5)

            current_count = set()  # Track current frame counts
            for result in resultsTracker:
                x1, y1, x2, y2, ID = result
                x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
                w, h = x2 - x1, y2 - y1

                if ID not in id_map:
                    id_map[ID] = next_id
                    next_id += 1
                new_id = id_map[ID]
                print(f"ID: {new_id}, Bounding Box: {[x1, y1, x2, y2]}")
                cvzone.cornerRect(img, (x1, y1, w, h), l=9, rt=2, colorR=(255, 0, 255))
                cvzone.putTextRect(img, f'{new_id}', (max(0, x1), max(35, y1)),
                                   offset=10, scale=2, thickness=3)

                cx, cy = x1 + w // 2, y1 + h // 2
                cv2.circle(img, (cx, cy), 5, (255, 0, 255), cv2.FILLED)

                if limitsUp[0] < cx < limitsUp[2] and limitsUp[1] - 15 < cy < limitsUp[1] + 15:
                    if new_id not in totalCountUp:
                        totalCountUp.add(new_id)  # Add ID to the set
                        cv2.line(img, (limitsUp[0], limitsUp[1]), (limitsUp[2], limitsUp[3]), (0, 255, 0), 5)

                if limitsDown[0] < cx < limitsDown[2] and limitsDown[1] - 15 < cy < limitsDown[1] + 15:
                    if new_id not in totalCountDown:
                        totalCountDown.add(new_id)  # Add ID to the set
                        cv2.line(img, (limitsDown[0], limitsDown[1]), (limitsDown[2], limitsDown[3]), (0, 255, 0), 5)

            cv2.putText(img, str(len(totalCountUp)), (929, 345), cv2.FONT_HERSHEY_PLAIN, 5, (139, 195, 75), 7)
            cv2.putText(img, str(len(totalCountDown)), (1191, 345), cv2.FONT_HERSHEY_PLAIN, 5, (50, 50, 230), 7)

            cv2.imshow("Image", img)
        
        frame_count += 1
        
    key = cv2.waitKey(1) & 0xFF

    if key == ord('p'):  # Pause the video
        paused = True
    elif key == ord('r'):  # Resume the video
        paused = False
    elif key == ord('q'):  # Quit the video
        break

cap.release()
cv2.destroyAllWindows()



0: 384x640 3 persons, 78.5ms
Speed: 0.0ms preprocess, 78.5ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)
ID: 1, Bounding Box: [399, 651, 516, 719]
ID: 2, Bounding Box: [400, 649, 482, 719]
ID: 3, Bounding Box: [235, 318, 354, 577]

0: 384x640 3 persons, 100.2ms
Speed: 1.0ms preprocess, 100.2ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)
ID: 1, Bounding Box: [397, 646, 516, 718]
ID: 2, Bounding Box: [403, 645, 482, 716]
ID: 3, Bounding Box: [232, 316, 352, 576]

0: 384x640 2 persons, 94.2ms
Speed: 15.6ms preprocess, 94.2ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)
ID: 1, Bounding Box: [396, 640, 511, 716]
ID: 3, Bounding Box: [229, 310, 351, 569]

0: 384x640 2 persons, 78.1ms
Speed: 12.5ms preprocess, 78.1ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)
ID: 1, Bounding Box: [391, 636, 501, 715]
ID: 3, Bounding Box: [226, 304, 349, 562]

0: 384x640 3 persons, 78.1ms
Speed: 15.6ms preprocess, 78.1ms infer

ID: 5, Bounding Box: [349, 585, 480, 715]
ID: 1, Bounding Box: [294, 445, 406, 703]
ID: 3, Bounding Box: [148, 131, 270, 394]

0: 384x640 5 persons, 62.4ms
Speed: 0.0ms preprocess, 62.4ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)
ID: 5, Bounding Box: [338, 579, 475, 715]
ID: 1, Bounding Box: [292, 433, 403, 686]
ID: 3, Bounding Box: [146, 127, 269, 392]

0: 384x640 5 persons, 62.9ms
Speed: 0.0ms preprocess, 62.9ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)
ID: 5, Bounding Box: [333, 573, 473, 715]
ID: 1, Bounding Box: [288, 427, 401, 678]
ID: 3, Bounding Box: [144, 121, 266, 385]

0: 384x640 5 persons, 62.9ms
Speed: 0.0ms preprocess, 62.9ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)
ID: 6, Bounding Box: [527, 676, 590, 719]
ID: 7, Bounding Box: [387, 0, 474, 179]
ID: 5, Bounding Box: [328, 567, 470, 715]
ID: 1, Bounding Box: [285, 419, 396, 665]
ID: 3, Bounding Box: [141, 115, 264, 380]

0: 384x640 5 persons, 62.9ms
Spe

0: 384x640 7 persons, 64.5ms
Speed: 0.0ms preprocess, 64.5ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)
ID: 9, Bounding Box: [372, 11, 462, 207]
ID: 6, Bounding Box: [454, 537, 543, 713]
ID: 7, Bounding Box: [448, 19, 526, 255]
ID: 5, Bounding Box: [283, 425, 413, 706]
ID: 1, Bounding Box: [232, 285, 332, 496]
ID: 3, Bounding Box: [103, 7, 223, 281]

0: 384x640 6 persons, 62.9ms
Speed: 0.0ms preprocess, 62.9ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)
ID: 9, Bounding Box: [375, 18, 462, 211]
ID: 6, Bounding Box: [453, 531, 541, 713]
ID: 7, Bounding Box: [450, 25, 529, 261]
ID: 5, Bounding Box: [276, 418, 409, 701]
ID: 1, Bounding Box: [230, 279, 328, 489]
ID: 3, Bounding Box: [100, 2, 221, 275]

0: 384x640 6 persons, 62.9ms
Speed: 0.0ms preprocess, 62.9ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)
ID: 9, Bounding Box: [377, 22, 464, 214]
ID: 6, Bounding Box: [451, 525, 540, 713]
ID: 7, Bounding Box: [452, 29, 530, 264]


ID: 9, Bounding Box: [428, 107, 510, 293]
ID: 6, Bounding Box: [396, 403, 484, 704]
ID: 7, Bounding Box: [502, 108, 581, 340]
ID: 5, Bounding Box: [237, 300, 360, 580]
ID: 1, Bounding Box: [180, 158, 273, 365]

0: 384x640 5 persons, 62.5ms
Speed: 0.0ms preprocess, 62.5ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)
ID: 9, Bounding Box: [431, 110, 513, 296]
ID: 6, Bounding Box: [393, 398, 482, 700]
ID: 7, Bounding Box: [504, 113, 583, 344]
ID: 5, Bounding Box: [234, 294, 356, 574]
ID: 1, Bounding Box: [177, 154, 270, 360]

0: 384x640 5 persons, 72.0ms
Speed: 0.0ms preprocess, 72.0ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)
ID: 9, Bounding Box: [432, 115, 515, 301]
ID: 6, Bounding Box: [391, 390, 480, 692]
ID: 7, Bounding Box: [507, 117, 585, 348]
ID: 5, Bounding Box: [233, 288, 354, 569]
ID: 1, Bounding Box: [174, 148, 269, 355]

0: 384x640 5 persons, 62.4ms
Speed: 0.0ms preprocess, 62.4ms inference, 0.0ms postprocess per image at shape (1, 3,

0: 384x640 5 persons, 62.9ms
Speed: 0.0ms preprocess, 62.9ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)
ID: 9, Bounding Box: [499, 219, 579, 405]
ID: 6, Bounding Box: [319, 268, 414, 569]
ID: 7, Bounding Box: [572, 222, 651, 449]
ID: 5, Bounding Box: [153, 167, 291, 448]
ID: 1, Bounding Box: [123, 39, 217, 239]

0: 384x640 5 persons, 62.9ms
Speed: 0.0ms preprocess, 62.9ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)
ID: 9, Bounding Box: [501, 225, 581, 410]
ID: 6, Bounding Box: [312, 264, 411, 564]
ID: 7, Bounding Box: [573, 226, 652, 453]
ID: 5, Bounding Box: [154, 174, 292, 448]
ID: 1, Bounding Box: [121, 34, 216, 234]

0: 384x640 5 persons, 62.9ms
Speed: 0.0ms preprocess, 62.9ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)
ID: 9, Bounding Box: [503, 229, 583, 414]
ID: 6, Bounding Box: [303, 259, 407, 559]
ID: 7, Bounding Box: [576, 230, 654, 455]
ID: 5, Bounding Box: [147, 164, 288, 438]
ID: 1, Bounding Box: [119, 28, 213

0: 384x640 4 persons, 62.9ms
Speed: 0.0ms preprocess, 62.9ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)
ID: 9, Bounding Box: [563, 339, 643, 527]
ID: 6, Bounding Box: [205, 149, 335, 441]
ID: 7, Bounding Box: [635, 338, 712, 567]
ID: 5, Bounding Box: [110, 37, 230, 309]

0: 384x640 4 persons, 53.4ms
Speed: 0.0ms preprocess, 53.4ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)
ID: 9, Bounding Box: [566, 343, 645, 531]
ID: 6, Bounding Box: [202, 143, 332, 433]
ID: 7, Bounding Box: [640, 343, 716, 572]
ID: 5, Bounding Box: [110, 31, 227, 301]

0: 384x640 4 persons, 62.5ms
Speed: 0.0ms preprocess, 62.5ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)
ID: 9, Bounding Box: [569, 349, 648, 537]
ID: 6, Bounding Box: [200, 140, 331, 428]
ID: 7, Bounding Box: [641, 348, 718, 576]
ID: 5, Bounding Box: [108, 25, 225, 291]

0: 384x640 4 persons, 62.5ms
Speed: 0.0ms preprocess, 62.5ms inference, 0.0ms postprocess per image at shape (1, 3, 38

Speed: 0.0ms preprocess, 62.5ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)
ID: 9, Bounding Box: [646, 489, 727, 679]
ID: 6, Bounding Box: [174, -3, 294, 293]
ID: 7, Bounding Box: [718, 480, 801, 715]

0: 384x640 3 persons, 69.5ms
Speed: 0.0ms preprocess, 69.5ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)
ID: 9, Bounding Box: [649, 500, 732, 697]
ID: 6, Bounding Box: [166, -4, 290, 291]
ID: 7, Bounding Box: [720, 485, 803, 719]

0: 384x640 3 persons, 62.9ms
Speed: 0.0ms preprocess, 62.9ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)
ID: 9, Bounding Box: [653, 509, 735, 709]
ID: 6, Bounding Box: [155, -4, 284, 288]
ID: 7, Bounding Box: [723, 489, 806, 722]

0: 384x640 3 persons, 66.0ms
Speed: 0.0ms preprocess, 66.0ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)
ID: 9, Bounding Box: [656, 513, 738, 714]
ID: 6, Bounding Box: [147, -5, 281, 286]
ID: 7, Bounding Box: [726, 495, 808, 724]

0: 384x640 3 persons,

Speed: 0.0ms preprocess, 62.9ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 62.9ms
Speed: 0.0ms preprocess, 62.9ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 62.9ms
Speed: 0.0ms preprocess, 62.9ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 62.9ms
Speed: 0.0ms preprocess, 62.9ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 62.9ms
Speed: 0.0ms preprocess, 62.9ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 66.0ms
Speed: 0.0ms preprocess, 66.0ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)
