In [4]:
import cv2
import pandas as pd
import numpy as np
from ultralytics import YOLO
from tracker import *

# Initialize YOLO model
model = YOLO('yolov8s.pt')

# Define areas of interest
area1 = [(312, 388), (289, 390), (474, 469), (497, 462)]
area2 = [(279, 392), (250, 397), (423, 477), (454, 469)]

# Initialize variables and objects
people_entering = {}
entering = set()
people_exiting = {}
exiting = set()
tracker = Tracker()

# save label of coco datasets
my_file = open("coco.txt", "r")
data = my_file.read()
class_list = data.split("\n") 

# Open video file
cap = cv2.VideoCapture('peoplecount1.mp4')

# Define codec and create VideoWriter object
fourcc = cv2.VideoWriter_fourcc(*'XVID')
out = cv2.VideoWriter('output2.avi', fourcc, 20.0, (1020, 500))

while True:    
    ret, frame = cap.read()
    if not ret:
        break
    
    frame = cv2.resize(frame, (1020, 500))
    
    # Predict using YOLO model
    results = model.predict(frame)
    a = results[0].boxes.data
    px = pd.DataFrame(a).astype("float")
    
    list = []
             
    for index, row in px.iterrows():
        x1 = int(row[0])
        y1 = int(row[1])
        x2 = int(row[2])
        y2 = int(row[3])
        d = int(row[5])
        c = class_list[d]
        
        if 'person' in c:
           list.append([x1, y1, x2, y2])  
           
    bbox_id = tracker.update(list)
    
    for bbox in bbox_id:
        x3, y3, x4, y4, id = bbox
       
        # Determine if person is entering or exiting
        results = cv2.pointPolygonTest(np.array(area2, np.int32), ((x4, y4)), False)
        if results >= 0:
            people_entering[id] = (x4, y4)
            cv2.rectangle(frame, (x3, y3), (x4, y4), (0, 0, 255), 2)
            
        if id in people_entering:
            results1 = cv2.pointPolygonTest(np.array(area1, np.int32), ((x4, y4)), False)
            if results1 >= 0:
                cv2.rectangle(frame, (x3, y3), (x4, y4), (0, 255, 0), 2)
                cv2.circle(frame, (x4, y4), 4, (255, 0, 255), -1)
                cv2.putText(frame, str(c), (x3, y3-10), cv2.FONT_HERSHEY_COMPLEX, 0.5, (255, 255, 255), 1)
                cv2.putText(frame, str(id), (x3+65, y3-10), cv2.FONT_HERSHEY_COMPLEX, 0.5, (255, 0, 255), 1)
                entering.add(id)
        
        results2 = cv2.pointPolygonTest(np.array(area1, np.int32), ((x4, y4)), False)
        if results2 >= 0:
            people_exiting[id] = (x4, y4)
            cv2.rectangle(frame, (x3, y3), (x4, y4), (0, 255, 0), 2)
        if id in people_exiting:
            results3 = cv2.pointPolygonTest(np.array(area2, np.int32), ((x4, y4)), False)
            if results3 >= 0:
                cv2.rectangle(frame, (x3, y3), (x4, y4), (0, 0, 255), 2)
                cv2.circle(frame, (x4, y4), 4, (255, 0, 255), -1)
                cv2.putText(frame, str(c), (x3, y3-10), cv2.FONT_HERSHEY_COMPLEX, 0.5, (255, 255, 255), 1)
                cv2.putText(frame, str(id), (x3+55, y3-10), cv2.FONT_HERSHEY_COMPLEX, 0.5, (255, 0, 255), 1)
                exiting.add(id)
    
    # Draw areas of interest on the frame
    #cv2.polylines(frame, [np.array(area1, np.int32)], True, (255, 0, 0), 2)
    #cv2.putText(frame, str('1'), (504, 471), cv2.FONT_HERSHEY_COMPLEX, 1, (0, 0, 0), 2)

    #cv2.polylines(frame, [np.array(area2, np.int32)], True, (255, 0, 0), 2)
    #cv2.putText(frame, str('2'), (466, 485), cv2.FONT_HERSHEY_COMPLEX, 1, (0, 0, 0), 2)
    
    # Count entering and exiting people
    i = len(entering)
    o = len(exiting)
    cv2.putText(frame, 'No entering = ' + str(i), (20, 44), cv2.FONT_HERSHEY_COMPLEX, 1, (0, 255, 0), 2)
    cv2.putText(frame, 'No exiting = ' + str(o), (20, 82), cv2.FONT_HERSHEY_COMPLEX, 1, (0, 0, 255), 2)
    
    # Write the frame to the output video
    out.write(frame)
    
    #cv2.imshow("RGB", frame)
    
    if cv2.waitKey(1) & 0xFF == 27:
        break

# Release resources
cap.release()
out.release()
cv2.destroyAllWindows()



0: 320x640 1 car, 164.6ms
Speed: 3.0ms preprocess, 164.6ms inference, 1.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 1 car, 173.5ms
Speed: 2.0ms preprocess, 173.5ms inference, 2.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 1 car, 164.6ms
Speed: 2.0ms preprocess, 164.6ms inference, 1.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 1 car, 161.6ms
Speed: 2.0ms preprocess, 161.6ms inference, 1.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 1 person, 1 car, 168.5ms
Speed: 2.0ms preprocess, 168.5ms inference, 2.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 1 person, 1 car, 171.5ms
Speed: 3.0ms preprocess, 171.5ms inference, 2.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 2 persons, 1 car, 155.6ms
Speed: 2.0ms preprocess, 155.6ms inference, 2.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 1 person, 1 car, 142.6ms
Speed: 2.0ms preprocess, 142.6ms inference, 2.0ms postpro

Speed: 3.0ms preprocess, 191.5ms inference, 2.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 3 persons, 183.5ms
Speed: 3.0ms preprocess, 183.5ms inference, 2.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 3 persons, 165.6ms
Speed: 2.0ms preprocess, 165.6ms inference, 2.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 3 persons, 185.5ms
Speed: 2.0ms preprocess, 185.5ms inference, 2.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 3 persons, 1 car, 173.5ms
Speed: 2.0ms preprocess, 173.5ms inference, 2.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 3 persons, 168.6ms
Speed: 3.0ms preprocess, 168.6ms inference, 2.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 3 persons, 178.5ms
Speed: 2.0ms preprocess, 178.5ms inference, 2.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 3 persons, 166.6ms
Speed: 2.0ms preprocess, 166.6ms inference, 3.0ms postprocess per image at shape (1, 3, 32


0: 320x640 1 person, 196.5ms
Speed: 3.0ms preprocess, 196.5ms inference, 2.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 1 person, 176.5ms
Speed: 2.0ms preprocess, 176.5ms inference, 3.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 1 person, 171.5ms
Speed: 3.0ms preprocess, 171.5ms inference, 2.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 1 person, 176.5ms
Speed: 3.0ms preprocess, 176.5ms inference, 3.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 1 person, 184.5ms
Speed: 2.0ms preprocess, 184.5ms inference, 1.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 1 person, 178.5ms
Speed: 3.0ms preprocess, 178.5ms inference, 2.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 1 person, 171.5ms
Speed: 3.0ms preprocess, 171.5ms inference, 2.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 1 person, 192.5ms
Speed: 3.0ms preprocess, 192.5ms inference, 2.0ms postprocess per image at

0: 320x640 1 person, 178.5ms
Speed: 4.0ms preprocess, 178.5ms inference, 1.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 2 persons, 201.5ms
Speed: 2.0ms preprocess, 201.5ms inference, 2.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 1 person, 191.5ms
Speed: 2.0ms preprocess, 191.5ms inference, 2.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 1 person, 177.5ms
Speed: 3.0ms preprocess, 177.5ms inference, 2.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 1 person, 193.5ms
Speed: 3.0ms preprocess, 193.5ms inference, 2.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 3 persons, 167.6ms
Speed: 2.0ms preprocess, 167.6ms inference, 2.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 1 person, 185.5ms
Speed: 2.0ms preprocess, 185.5ms inference, 3.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 1 person, 175.5ms
Speed: 3.0ms preprocess, 175.5ms inference, 2.0ms postprocess per image a


0: 320x640 1 person, 184.5ms
Speed: 2.0ms preprocess, 184.5ms inference, 2.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 1 person, 166.6ms
Speed: 3.0ms preprocess, 166.6ms inference, 2.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 1 person, 195.5ms
Speed: 3.0ms preprocess, 195.5ms inference, 2.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 1 person, 190.5ms
Speed: 2.0ms preprocess, 190.5ms inference, 2.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 1 person, 188.5ms
Speed: 3.0ms preprocess, 188.5ms inference, 2.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 1 person, 192.5ms
Speed: 2.0ms preprocess, 192.5ms inference, 1.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 1 person, 187.5ms
Speed: 2.0ms preprocess, 187.5ms inference, 2.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 1 person, 173.5ms
Speed: 3.0ms preprocess, 173.5ms inference, 2.0ms postprocess per image at


0: 320x640 2 persons, 2 cars, 2 handbags, 197.5ms
Speed: 3.0ms preprocess, 197.5ms inference, 2.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 2 persons, 2 cars, 2 handbags, 166.6ms
Speed: 2.0ms preprocess, 166.6ms inference, 3.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 2 persons, 2 cars, 3 handbags, 181.5ms
Speed: 2.0ms preprocess, 181.5ms inference, 1.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 1 person, 2 cars, 2 handbags, 152.6ms
Speed: 2.0ms preprocess, 152.6ms inference, 2.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 1 person, 3 cars, 1 handbag, 183.5ms
Speed: 3.0ms preprocess, 183.5ms inference, 3.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 1 person, 3 cars, 3 handbags, 194.5ms
Speed: 3.0ms preprocess, 194.5ms inference, 2.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 1 person, 3 cars, 2 handbags, 182.5ms
Speed: 3.0ms preprocess, 182.5ms inference, 2.0ms postproces


0: 320x640 2 persons, 1 handbag, 197.5ms
Speed: 3.0ms preprocess, 197.5ms inference, 2.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 2 persons, 1 handbag, 1 refrigerator, 185.5ms
Speed: 2.0ms preprocess, 185.5ms inference, 2.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 2 persons, 1 handbag, 1 refrigerator, 190.5ms
Speed: 2.0ms preprocess, 190.5ms inference, 2.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 2 persons, 1 handbag, 1 refrigerator, 214.4ms
Speed: 3.0ms preprocess, 214.4ms inference, 3.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 2 persons, 1 handbag, 180.5ms
Speed: 2.0ms preprocess, 180.5ms inference, 1.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 2 persons, 1 handbag, 185.5ms
Speed: 3.0ms preprocess, 185.5ms inference, 2.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 2 persons, 1 handbag, 177.5ms
Speed: 2.0ms preprocess, 177.5ms inference, 2.0ms postprocess per imag


0: 320x640 1 person, 174.5ms
Speed: 2.0ms preprocess, 174.5ms inference, 2.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 1 person, 178.5ms
Speed: 3.0ms preprocess, 178.5ms inference, 2.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 1 person, 194.5ms
Speed: 2.0ms preprocess, 194.5ms inference, 3.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 1 person, 175.5ms
Speed: 3.0ms preprocess, 175.5ms inference, 2.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 1 person, 202.5ms
Speed: 2.0ms preprocess, 202.5ms inference, 2.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 1 person, 163.6ms
Speed: 3.0ms preprocess, 163.6ms inference, 3.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 1 person, 190.5ms
Speed: 4.0ms preprocess, 190.5ms inference, 2.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 1 person, 206.4ms
Speed: 3.0ms preprocess, 206.4ms inference, 3.0ms postprocess per image at

Speed: 4.0ms preprocess, 223.4ms inference, 2.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 (no detections), 214.4ms
Speed: 4.0ms preprocess, 214.4ms inference, 2.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 (no detections), 191.5ms
Speed: 2.0ms preprocess, 191.5ms inference, 1.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 (no detections), 200.5ms
Speed: 3.0ms preprocess, 200.5ms inference, 2.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 (no detections), 185.5ms
Speed: 3.0ms preprocess, 185.5ms inference, 1.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 (no detections), 192.5ms
Speed: 2.0ms preprocess, 192.5ms inference, 1.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 1 person, 192.5ms
Speed: 2.0ms preprocess, 192.5ms inference, 1.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 1 person, 179.5ms
Speed: 3.0ms preprocess, 179.5ms inference, 1.0ms postprocess per ima


0: 320x640 2 persons, 208.4ms
Speed: 3.0ms preprocess, 208.4ms inference, 3.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 2 persons, 218.4ms
Speed: 3.0ms preprocess, 218.4ms inference, 2.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 2 persons, 209.4ms
Speed: 3.0ms preprocess, 209.4ms inference, 2.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 2 persons, 194.5ms
Speed: 3.0ms preprocess, 194.5ms inference, 3.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 2 persons, 211.4ms
Speed: 3.0ms preprocess, 211.4ms inference, 2.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 2 persons, 198.5ms
Speed: 2.0ms preprocess, 198.5ms inference, 1.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 2 persons, 197.5ms
Speed: 3.0ms preprocess, 197.5ms inference, 2.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 2 persons, 200.5ms
Speed: 3.0ms preprocess, 200.5ms inference, 2.0ms postprocess per 