Uses a log generated by a tracking algorithm from https://github.com/PaddlePaddle/PaddleDetection to count restaurant customers.
Bytetrack was used for tracking, yolox for detection.

In [3]:
import pandas as pd
import numpy as np

In [4]:
# tl = top left
# br = bottom right

column_names = ["frame", "person_id", "x_tl", "y_tl", "x_delta_br", "y_delta_br", "confidence", "dunno_1", "dunno_2", "dunno_3"]

In [5]:
track = pd.read_csv('bytetrack_yolox.txt', names=column_names, header=None)

In [6]:
track

Unnamed: 0,frame,person_id,x_tl,y_tl,x_delta_br,y_delta_br,confidence,dunno_1,dunno_2,dunno_3
0,1,1,870.683960,275.762390,407.534363,468.123047,0.962106,-1,-1,-1
1,1,2,886.169189,145.110535,288.502869,234.870178,0.903475,-1,-1,-1
2,1,3,1190.857422,197.621826,184.442017,450.123352,0.893056,-1,-1,-1
3,1,4,1290.299072,117.527039,135.926147,398.794189,0.890587,-1,-1,-1
4,1,5,1387.999756,-37.773735,62.756226,136.049362,0.844230,-1,-1,-1
...,...,...,...,...,...,...,...,...,...,...
4087,1575,73,1479.102173,-52.429428,86.653473,174.813492,0.877379,-1,-1,-1
4088,1576,72,1557.762939,-54.580208,71.471069,173.036179,0.776701,-1,-1,-1
4089,1576,73,1479.798584,-49.639069,84.337959,171.187317,0.874249,-1,-1,-1
4090,1577,72,1557.138306,-56.410378,70.762894,169.741623,0.806881,-1,-1,-1


In [7]:
# calculates the geometrical center of a bounding box
def get_center(x, x_delta, y, y_delta):
    return ((x + x + x_delta) / 2, (y + y + y_delta) / 2)

# uses x and y coordinates counting from top left corner of the image as thresholds for what is considered the restaurant entrance
# y_thre=695 or y_thre=750 for the example video
def is_in_entrance(x, y, x_thre=350, y_thre=695):
    return (x < x_thre) * (y < y_thre)

# checks if the center of a bounding box is within restaurant entrance boundaries in the current frame
def check_entrance(row):
    x, y = get_center(row.x_tl, row.x_delta_br, row.y_tl, row.y_delta_br)
    is_in = is_in_entrance(x, y)
    return x, y, is_in

In [8]:
track[["x_center", "y_center", "is_in_entrance"]] = track.apply(check_entrance, axis=1, result_type="expand")
track

Unnamed: 0,frame,person_id,x_tl,y_tl,x_delta_br,y_delta_br,confidence,dunno_1,dunno_2,dunno_3,x_center,y_center,is_in_entrance
0,1,1,870.683960,275.762390,407.534363,468.123047,0.962106,-1,-1,-1,1074.451141,509.823914,False
1,1,2,886.169189,145.110535,288.502869,234.870178,0.903475,-1,-1,-1,1030.420624,262.545624,False
2,1,3,1190.857422,197.621826,184.442017,450.123352,0.893056,-1,-1,-1,1283.078430,422.683502,False
3,1,4,1290.299072,117.527039,135.926147,398.794189,0.890587,-1,-1,-1,1358.262146,316.924133,False
4,1,5,1387.999756,-37.773735,62.756226,136.049362,0.844230,-1,-1,-1,1419.377869,30.250946,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
4087,1575,73,1479.102173,-52.429428,86.653473,174.813492,0.877379,-1,-1,-1,1522.428909,34.977318,False
4088,1576,72,1557.762939,-54.580208,71.471069,173.036179,0.776701,-1,-1,-1,1593.498474,31.937881,False
4089,1576,73,1479.798584,-49.639069,84.337959,171.187317,0.874249,-1,-1,-1,1521.967564,35.954590,False
4090,1577,72,1557.138306,-56.410378,70.762894,169.741623,0.806881,-1,-1,-1,1592.519753,28.460434,False


In [9]:
track[track.is_in_entrance]

Unnamed: 0,frame,person_id,x_tl,y_tl,x_delta_br,y_delta_br,confidence,dunno_1,dunno_2,dunno_3,x_center,y_center,is_in_entrance
711,103,1,172.751572,540.023193,263.659210,297.309418,0.703114,-1,-1,-1,304.581177,688.677902,True
1822,300,26,91.398499,524.037354,252.410370,299.574890,0.815125,-1,-1,-1,217.603683,673.824799,True
1826,301,26,89.093002,520.319275,251.975922,305.620117,0.631808,-1,-1,-1,215.080963,673.129333,True
1830,302,26,95.167252,518.743103,276.967255,339.670502,0.615939,-1,-1,-1,233.650879,688.578354,True
1862,311,36,135.177872,525.844543,161.865662,213.671509,0.636877,-1,-1,-1,216.110703,632.680298,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...
3811,1123,66,179.842346,511.941742,186.546951,269.396057,0.562354,-1,-1,-1,273.115822,646.639771,True
3813,1124,66,175.193665,511.479492,186.173904,263.621246,0.411187,-1,-1,-1,268.280617,643.290115,True
3839,1150,63,143.867706,552.847656,205.260864,278.579956,0.441341,-1,-1,-1,246.498138,692.137634,True
3840,1151,63,142.700958,547.165649,204.777054,279.135590,0.294423,-1,-1,-1,245.089485,686.733444,True


In [19]:
print("How many frames different ids of people were inside the restaurant entrance box:")
print(track[track.is_in_entrance].person_id.value_counts())

How many frames different ids of people were inside the restaurant entrance box:
66    63
2     52
38    47
39    42
68    31
52    29
36     9
61     8
37     4
26     3
63     3
1      1
53     1
Name: person_id, dtype: int64


In [21]:
print("How many unique ids of people were detected:")
print(track.person_id.nunique())
print("How many of the unique ids of people were located inside the restaurant entrance box at any point of time:")
print(track[track.is_in_entrance].person_id.nunique())

How many unique ids of people were detected:
50
How many of the unique ids of people were located inside the restaurant entrance box at any point of time:
13


In [22]:
potential_customers = np.sort(track[track.is_in_entrance].person_id.unique())
potential_customers

array([ 1,  2, 26, 36, 37, 38, 39, 52, 53, 61, 63, 66, 68])

In [23]:
potential_customers_movement = track[track.person_id.isin(potential_customers)].sort_values(by=["person_id", "frame"]).reset_index(drop=True).copy()
potential_customers_movement

Unnamed: 0,frame,person_id,x_tl,y_tl,x_delta_br,y_delta_br,confidence,dunno_1,dunno_2,dunno_3,x_center,y_center,is_in_entrance
0,1,1,870.683960,275.762390,407.534363,468.123047,0.962106,-1,-1,-1,1074.451141,509.823914,False
1,2,1,864.841553,273.901398,408.415710,468.909851,0.964903,-1,-1,-1,1069.049408,508.356323,False
2,3,1,855.345581,280.301086,404.767059,463.824585,0.962187,-1,-1,-1,1057.729111,512.213379,False
3,4,1,846.579834,282.714722,404.640381,462.345947,0.969472,-1,-1,-1,1048.900024,513.887695,False
4,5,1,836.721008,287.722595,402.943726,457.867523,0.958062,-1,-1,-1,1038.192871,516.656357,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1844,1105,68,157.204193,425.179077,226.829041,327.154510,0.721427,-1,-1,-1,270.618713,588.756332,True
1845,1106,68,158.492279,423.089844,226.072311,326.725952,0.682495,-1,-1,-1,271.528435,586.452820,True
1846,1107,68,157.299957,416.615479,228.684158,332.185059,0.475774,-1,-1,-1,271.642036,582.708008,True
1847,1108,68,157.500198,411.152710,233.711884,340.760010,0.413025,-1,-1,-1,274.356140,581.532715,True


In [24]:
entering_customers = []
exiting_customers = []

for customer in potential_customers:
    first_frame_in = potential_customers_movement[potential_customers_movement.person_id == customer].iloc[0].is_in_entrance
    last_frame_in = potential_customers_movement[potential_customers_movement.person_id == customer].iloc[-1].is_in_entrance
        
    if not first_frame_in:
        entering_customers.append(customer)
    elif not last_frame_in:
        exiting_customers.append(customer)

In [25]:
print("Leaving customers:", len(exiting_customers))
print("Entering customers:", len(entering_customers))

Leaving customers: 0
Entering customers: 9
