In [1]:
import os
import random

import cv2

from ultralytics import YOLO

from tracker import Tracker

In [284]:
source = './assets/im1.jpg'

In [285]:
def boxAndLineOverlap(x_mid_point, y_mid_point, line_coordinates):
    x1_line, y1_line, x2_line, y2_line = line_coordinates #Unpacking

    if (x_mid_point >= x1_line and x_mid_point <= x2_line+5) and\
        (y_mid_point >= y1_line and y_mid_point <= y2_line+5):
        return True
    return False
def boxAndAreaOverlap(x_mid_point, y_mid_point, line_coordinates):
    x1_line, y1_line, x2_line, y2_line = line_coordinates #Unpacking

    if (x_mid_point >= x1_line and x_mid_point <= x2_line) and\
        (y_mid_point >= y1_line and y_mid_point <= y2_line):
        return True
    return False

In [286]:
video_path = os.path.join(os.getcwd(), 'assets', 'people.mp4')

cap = cv2.VideoCapture(video_path)


video_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
video_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
x1_area = int(0.10 * video_width)
y1_area = int(0.10 * video_height)
x2_area = int(0.90 * video_width)
y2_area = int(0.90 * video_height)

detections = []
def defun(cap):

    model = YOLO('yolov8n.pt')
    
    ret, frame = cap.read()
    results = model(frame)
    
    detections = []
    for result in results:
        for r in result.boxes.data.tolist():
            x1, y1, x2, y2, score, class_id = r
            x1 = int(x1)
            y1 = int(y1)
            x2 = int(x2)
            y2 = int(y2)
            score = float(score)
            class_id = int(class_id)

            if score > 0.5 and class_id == 0:
                detections.append([x1, y1, x2, y2, score, class_id])
    return ret, frame, detections
# for i in range(5):
#     defun()

defun(cap)


video_path = os.path.join(os.getcwd(), 'assets', 'people.mp4')
video_out_path = os.path.join(os.getcwd(), 'assets', 'people_out.mp4')

cap = cv2.VideoCapture(video_path)
ret, frame = cap.read()

cap_out = cv2.VideoWriter(video_out_path, cv2.VideoWriter_fourcc(*'mp4v'), cap.get(cv2.CAP_PROP_FPS), (frame.shape[1], frame.shape[0]))

model = YOLO('yolov8n.pt')

tracker = Tracker()

colors = [(random.randint(0,255), random.randint(0,255), random.randint(0,255)) for j in range(100)]
while ret:

    results = model(frame)
    for result in results:
        detections = []
        for r in result.boxes.data.tolist():
            x1, y1, x2, y2, score, class_id = r
            x1 = int(x1)
            y1 = int(y1)
            x2 = int(x2)
            y2 = int(y2)
            score = float(score)
            class_id = int(class_id)

            if score > 0.5:
                detections.append([x1, y1, x2, y2, score, class_id])


    tracker.update(frame, detections)
    
    for track in tracker.tracks:
        bbox = track.bbox
        x1, y1, x2, y2 = bbox
        track_id = track.track_id
        # class_id = track.class_id
        # score = track.score

        cv2.rectangle(frame, (int(x1), int(y1)), (int(x2), int(y2)), colors[track_id%len(colors)], 2)
        cv2.putText(frame, f'{track_id}', (int(x1), int(y1)-10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, colors[track_id%len(colors)], 2)
    
    # cv2.imshow('frame', frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break
    # cap_out.write(frame)
    ret, frame = cap.read()

cap.release()
cv2.destroyAllWindows()

In [321]:
from deep_sort.deep_sort.tracker import Tracker as DeepSortTracker
from deep_sort.tools import generate_detections as gdet
from deep_sort.deep_sort import nn_matching
from deep_sort.deep_sort.detection import Detection
import numpy as np


class Tracker:
    tracker = None
    encoder = None
    tracks = None
    raw_tracks = None

    def __init__(self):
        max_cosine_distance = 0.4
        nn_budget = None

        encoder_model_filename = 'model_data/mars-small128.pb'

        metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget)
        self.tracker = DeepSortTracker(metric)
        self.encoder = gdet.create_box_encoder(encoder_model_filename, batch_size=1)

    def update(self, frame, detections):

        bboxes = np.asarray([d[:-2] for d in detections])
        bboxes[:, 2:] = bboxes[:, 2:] - bboxes[:, 0:2]
#         Yolo_confidence_scores = [d[-2] for d in detections]
        scores = [d[-2] for d in detections]
        
        features = self.encoder(frame, bboxes)

        dets = []
        for bbox_id, bbox in enumerate(bboxes):
            dets.append(Detection(bbox, scores[bbox_id], features[bbox_id]))
            

        self.tracker.predict()
        self.tracker.update(dets)
        self.update_tracks()

    def update_tracks(self):
        tracks = []
#         raw_tracks = []
        for track in self.tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            bbox = track.to_tlbr()

            id = track.track_id

#             tracks.append(Track(id, bbox,0))
#             raw_tracks.append(track)
            tracks.append(track)

        self.tracks = tracks
#         self.raw_tracks = raw_tracks


# class Track:
#     track_id = None
#     bbox = None
#     class_id = None
# #     score = None

#     def __init__(self, id, bbox, class_id):
#         self.track_id = id
#         self.bbox = bbox
#         self.class_id = class_id

from deep_sort.deep_sort.tracker import Tracker as DeepSortTracker
from deep_sort.tools import generate_detections as gdet
from deep_sort.deep_sort import nn_matching
from deep_sort.deep_sort.detection import Detection
import numpy as np

max_cosine_distance = 0.4
nn_budget = None
encoder_model_filename = 'model_data/mars-small128.pb'

frame, detections = defun(cap)

np.asarray([d[:-2] for d in detections])

bboxes = np.asarray([d[:-2] for d in detections])
print(bboxes)
bboxes[:, 2:] = bboxes[:, 2:] - bboxes[:, 0:2]
bboxes

max_cosine_distance = 0.4
nn_budget = None

encoder_model_filename = 'model_data/mars-small128.pb'

metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget)
tracker = DeepSortTracker(metric)
encoder = gdet.create_box_encoder(encoder_model_filename, batch_size=1)

scores = [d[-2] for d in detections]

features = encoder(frame, bboxes)

dets = []
for bbox_id, bbox in enumerate(bboxes):
    dets.append(Detection(bbox, scores[bbox_id], features[bbox_id]))


tracker.predict()
tracker.update(dets)

for bbox_id, bbox in enumerate(bboxes):
    print(bbox_id)

classDict = [d[-1] for d in detections]

tracks = []
for track in tracker.tracks:
    if not track.is_confirmed() or track.time_since_update > 1:
        continue
    bbox = track.to_tlbr()

    id = track.track_id

    tracks.append(Track(id, bbox,0))

tracks = tracks

for i in tracks:
    print(i)

tracker

In [322]:
tracker = Tracker()

In [324]:
for i in range(3):
    ret, frame, detections = defun(cap)
    tracker.update(frame, detections)


0: 384x640 33 persons, 1 handbag, 114.4ms
Speed: 1.7ms preprocess, 114.4ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 33 persons, 1 dog, 128.1ms
Speed: 1.7ms preprocess, 128.1ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 33 persons, 122.6ms
Speed: 1.5ms preprocess, 122.6ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)


In [338]:
ret, frame, detections = defun(cap)


0: 384x640 30 persons, 1 dog, 168.1ms
Speed: 1.6ms preprocess, 168.1ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)


In [326]:
tracker.update(frame, detections)

In [327]:
for track in tracker.tracks:
    print(track.to_tlbr())

[     654.15      637.39      706.68         720]
[     1072.6      205.24      1116.4      308.77]
[     55.196      632.89      116.57      719.77]
[     803.73      471.21      855.16      590.87]
[     358.59      350.47      411.91      461.69]
[     35.722         407      98.009      537.72]
[     1174.9      192.54      1232.2      300.82]
[     603.38      272.17      648.69      376.38]
[     706.39      290.44       752.6      395.72]
[     668.81      421.86       708.1       528.7]
[        780      148.12      815.55      234.17]
[     531.41          28      558.64      112.05]
[     459.83      522.25      520.95      667.18]
[      746.3      148.66      777.67      231.21]
[     456.14      154.73      494.79       259.8]
[      371.5      267.68      413.17      358.02]
[     918.64          20       944.6         110]
[     611.24      57.829      640.81      145.93]
[     478.29      102.37      508.84      191.33]
[     149.61           0      201.05          96]


In [339]:
detections

[[1071, 202, 1115, 308, 0.824573278427124, 0],
 [39, 413, 101, 544, 0.8109283447265625, 0],
 [458, 516, 526, 647, 0.787870466709137, 0],
 [704, 287, 756, 396, 0.7685851454734802, 0],
 [645, 644, 719, 720, 0.7617627382278442, 0],
 [1179, 198, 1230, 308, 0.7565044164657593, 0],
 [46, 642, 128, 720, 0.7553132176399231, 0],
 [360, 356, 410, 466, 0.7356264591217041, 0],
 [602, 277, 649, 383, 0.730269730091095, 0],
 [801, 464, 860, 586, 0.7194649577140808, 0],
 [457, 151, 493, 250, 0.7012616395950317, 0],
 [781, 144, 817, 228, 0.6636248826980591, 0],
 [672, 421, 707, 529, 0.6387720108032227, 0],
 [613, 61, 640, 151, 0.6219330430030823, 0],
 [746, 152, 781, 234, 0.6144081354141235, 0],
 [477, 100, 508, 185, 0.6012713313102722, 0],
 [919, 20, 945, 110, 0.5962958931922913, 0],
 [370, 271, 412, 364, 0.5945473909378052, 0],
 [532, 27, 558, 112, 0.5792855620384216, 0],
 [145, 0, 209, 96, 0.5168732404708862, 0],
 [573, 29, 600, 116, 0.5127284526824951, 0]]

In [340]:
bboxes = np.asarray([d[:-2] for d in detections])


In [347]:
bboxes

array([[1071,  202, 1115,  308],
       [  39,  413,  101,  544],
       [ 458,  516,  526,  647],
       [ 704,  287,  756,  396],
       [ 645,  644,  719,  720],
       [1179,  198, 1230,  308],
       [  46,  642,  128,  720],
       [ 360,  356,  410,  466],
       [ 602,  277,  649,  383],
       [ 801,  464,  860,  586],
       [ 457,  151,  493,  250],
       [ 781,  144,  817,  228],
       [ 672,  421,  707,  529],
       [ 613,   61,  640,  151],
       [ 746,  152,  781,  234],
       [ 477,  100,  508,  185],
       [ 919,   20,  945,  110],
       [ 370,  271,  412,  364],
       [ 532,   27,  558,  112],
       [ 145,    0,  209,   96],
       [ 573,   29,  600,  116]])

In [350]:
bboxes[:, 2:] - bboxes[:, 0:2]

array([[ 44, 106],
       [ 62, 131],
       [ 68, 131],
       [ 52, 109],
       [ 74,  76],
       [ 51, 110],
       [ 82,  78],
       [ 50, 110],
       [ 47, 106],
       [ 59, 122],
       [ 36,  99],
       [ 36,  84],
       [ 35, 108],
       [ 27,  90],
       [ 35,  82],
       [ 31,  85],
       [ 26,  90],
       [ 42,  93],
       [ 26,  85],
       [ 64,  96],
       [ 27,  87]])

In [343]:
bboxes[:, 2:] = bboxes[:, 2:] - bboxes[:, 0:2]

array([[1115,  308],
       [ 101,  544],
       [ 526,  647],
       [ 756,  396],
       [ 719,  720],
       [1230,  308],
       [ 128,  720],
       [ 410,  466],
       [ 649,  383],
       [ 860,  586],
       [ 493,  250],
       [ 817,  228],
       [ 707,  529],
       [ 640,  151],
       [ 781,  234],
       [ 508,  185],
       [ 945,  110],
       [ 412,  364],
       [ 558,  112],
       [ 209,   96],
       [ 600,  116]])

In [293]:
tracker.tracks

[<deep_sort.deep_sort.track.Track at 0x292c3f290>,
 <deep_sort.deep_sort.track.Track at 0x28fc2c890>,
 <deep_sort.deep_sort.track.Track at 0x28fc2ed10>,
 <deep_sort.deep_sort.track.Track at 0x28fc2f450>,
 <deep_sort.deep_sort.track.Track at 0x28fc2f510>,
 <deep_sort.deep_sort.track.Track at 0x28fc2f5d0>,
 <deep_sort.deep_sort.track.Track at 0x28fc2f1d0>,
 <deep_sort.deep_sort.track.Track at 0x28fc2f390>,
 <deep_sort.deep_sort.track.Track at 0x28fc2c350>,
 <deep_sort.deep_sort.track.Track at 0x28fc2ecd0>,
 <deep_sort.deep_sort.track.Track at 0x28fc2cf50>,
 <deep_sort.deep_sort.track.Track at 0x28fc50f10>,
 <deep_sort.deep_sort.track.Track at 0x28fc514d0>,
 <deep_sort.deep_sort.track.Track at 0x28fc50910>,
 <deep_sort.deep_sort.track.Track at 0x28fc53c90>,
 <deep_sort.deep_sort.track.Track at 0x28fc53ad0>,
 <deep_sort.deep_sort.track.Track at 0x28fc53d10>,
 <deep_sort.deep_sort.track.Track at 0x28fc53410>,
 <deep_sort.deep_sort.track.Track at 0x28fc53790>,
 <deep_sort.deep_sort.track.Tra

In [294]:
type(tracker.tracks[0].mean)

numpy.ndarray

In [295]:
x, y, a, h, vx, vy, va, vh = tracker.tracks[0].mean

In [296]:
x, y = tracker.tracks[0].mean[:2]

In [297]:
x,y

(1005.7838186569628, 376.0567438971888)

In [298]:
tracker.tracks[0].to_tlbr()

array([      978.6,      320.12,        1033,         432])

In [299]:
np.array((1,2,3,4))

array([1, 2, 3, 4])

In [300]:
for track in tracker.tracks:
    print(track in tracker.tracks)

True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True


In [301]:

class Frame:
    """
    This class is used to store the state of the frame
    State 0 means that the frame is not processed
    State 1 means that the frame is processed
    if the frame is not processed there wont be any tracks and only boxes will be present 
    and we will store the detections in place of tracks


    if any object goes undetected in current frame then 
    the last tracks_status of the object can give us the number of frames required for object to be considered as lost out of area
    once the object is lost we no more need to track it and count it
    but if the object is not lost it should be kept in counting if it comes back in tracking and is within area.

    """
    # track_id = None
    frame_state = None
    tracks = None
    tracks_state = None
    # boxes = None

    def __init__(self, frame_state = 0, prev_frame = None):
        # if prev_frame is None:
        self.frame_state = frame_state
        self.tracks = []
        # if self.frame_state == 1:
        self.tracks_state = dict()
        # else:
        #     self.frame_state = prev_frame.frame_state
        #     self.tracks = prev_frame.tracks
        #     tracks_state = prev_frame.tracks_state
        # self.boxes = []
    
    def append(self, track):
        self.tracks.append(track)
    
    def update(self, tracks):
        untracked_tracks = list(set(self.tracks) - set(tracks))
        
        for track in tracks:
            if track not in self.tracks:
                self.append(track)
                # self.update_state(track.track_id, 0)
            state = self.calculate_state(track)
            self.update_state(track.track_id, state)
            
        for track in untracked_tracks:
            print(self.tracks_state[track.track_id][1])
            self.tracks_state[track.track_id][1] -= 1
            print(self.tracks_state[track.track_id][1])
            
            if self.tracks_state[track.track_id][1] <= 0:
                self.tracks.remove(track)


    def calculate_state(self, track):
        """
        returns : state tuple haing flowstate and n_frame
            (flow_state,n_frame)
        """
        flow_state = None

        x_mid, y_mid = track.mean[:2]
        if boxAndLineOverlap(x_mid, y_mid, (x1_area, y1_area, x2_area, y2_area)):
            flow_state = 1
        else:
            flow_state = 0
        return np.array([flow_state,1])

    def update_state(self, track_id, state):
        """
        state will contain:
        0: object is out of area
        1: object is in area
        2: object is inflowing
        3: object is outflowing

        n_frame : frames for which the object will remain inside area at current velocity
        will be calculated at each frame

        if frame state is zero the n_frame will not be calculated as there is no velocity of the object yet
        // [X] in such case box will be used to keep track of the object
        we are not tracking the objects that have not been tracked yet... Let them be in the frame uncounted
        """
        # if self.tracks_state is not None:
        self.tracks_state[track_id] = state

    def get_count(self):
        return len(self.tracks)


In [302]:
prev_frame = Frame()

In [303]:
prev_frame.tracks

[]

In [304]:
a = np.array([2,1])

In [305]:
a[1]

1

In [306]:
a[1]-=1

In [307]:
a

array([2, 0])

In [308]:
# cur_frame = Frame()

In [309]:
cur_frame.tracks

[<deep_sort.deep_sort.track.Track at 0x293e7a510>,
 <deep_sort.deep_sort.track.Track at 0x28fcb8090>,
 <deep_sort.deep_sort.track.Track at 0x293e75490>,
 <deep_sort.deep_sort.track.Track at 0x28fd05710>,
 <deep_sort.deep_sort.track.Track at 0x292ed4e50>,
 <deep_sort.deep_sort.track.Track at 0x28de5b110>,
 <deep_sort.deep_sort.track.Track at 0x292cd4150>,
 <deep_sort.deep_sort.track.Track at 0x290b68e50>,
 <deep_sort.deep_sort.track.Track at 0x28fdaccd0>,
 <deep_sort.deep_sort.track.Track at 0x293e792d0>,
 <deep_sort.deep_sort.track.Track at 0x2939abed0>,
 <deep_sort.deep_sort.track.Track at 0x28feafe90>,
 <deep_sort.deep_sort.track.Track at 0x28ea8fb10>,
 <deep_sort.deep_sort.track.Track at 0x293d670d0>,
 <deep_sort.deep_sort.track.Track at 0x290ad0dd0>,
 <deep_sort.deep_sort.track.Track at 0x293c9c350>,
 <deep_sort.deep_sort.track.Track at 0x293ec4550>,
 <deep_sort.deep_sort.track.Track at 0x292e8cd10>,
 <deep_sort.deep_sort.track.Track at 0x292c8d0d0>,
 <deep_sort.deep_sort.track.Tra

In [310]:
cur_frame.tracks_state

{1: array([0, 0]),
 2: array([0, 0]),
 3: array([0, 0]),
 4: array([0, 0]),
 5: array([0, 0]),
 6: array([0, 2]),
 7: array([0, 0]),
 8: array([0, 0]),
 9: array([0, 0]),
 10: array([0, 0]),
 11: array([0, 0]),
 12: array([0, 0]),
 13: array([0, 0]),
 14: array([0, 0]),
 15: array([1, 0]),
 16: array([0, 0]),
 17: array([1, 2]),
 19: array([0, 0]),
 21: array([0, 0]),
 23: array([0, 0]),
 24: array([0, 0]),
 26: array([0, 0]),
 27: array([0, 0]),
 29: array([0, 0]),
 31: array([0, 0]),
 35: array([0, 0]),
 36: array([0, 0]),
 39: array([0, 0]),
 41: array([0, 0]),
 43: array([0, 0]),
 44: array([0, 0]),
 45: array([1, 0]),
 48: array([1, 0]),
 51: array([1, 2]),
 52: array([0, 0]),
 53: array([0, 2]),
 56: array([0, 0]),
 57: array([0, 2]),
 58: array([0, 0]),
 60: array([0, 0]),
 61: array([1, 2]),
 63: array([0, 2]),
 67: array([0, 0]),
 69: array([0, 1]),
 70: array([0, 0]),
 71: array([0, 0]),
 73: array([0, 0]),
 75: array([0, 2]),
 76: array([1, 0]),
 77: array([0, 0]),
 79: arra

In [311]:
cur_frame.frame_state

0

In [312]:
cur_frame.update(tracker.tracks)

In [313]:
cur_frame.tracks

[<deep_sort.deep_sort.track.Track at 0x293e7a510>,
 <deep_sort.deep_sort.track.Track at 0x28fcb8090>,
 <deep_sort.deep_sort.track.Track at 0x293e75490>,
 <deep_sort.deep_sort.track.Track at 0x28fd05710>,
 <deep_sort.deep_sort.track.Track at 0x292ed4e50>,
 <deep_sort.deep_sort.track.Track at 0x28de5b110>,
 <deep_sort.deep_sort.track.Track at 0x292cd4150>,
 <deep_sort.deep_sort.track.Track at 0x28fdaccd0>,
 <deep_sort.deep_sort.track.Track at 0x293e792d0>,
 <deep_sort.deep_sort.track.Track at 0x2939abed0>,
 <deep_sort.deep_sort.track.Track at 0x28feafe90>,
 <deep_sort.deep_sort.track.Track at 0x28ea8fb10>,
 <deep_sort.deep_sort.track.Track at 0x293d670d0>,
 <deep_sort.deep_sort.track.Track at 0x290ad0dd0>,
 <deep_sort.deep_sort.track.Track at 0x293c9c350>,
 <deep_sort.deep_sort.track.Track at 0x293ec4550>,
 <deep_sort.deep_sort.track.Track at 0x292e8cd10>,
 <deep_sort.deep_sort.track.Track at 0x292c8d0d0>,
 <deep_sort.deep_sort.track.Track at 0x28fc20f10>,
 <deep_sort.deep_sort.track.Tra

In [314]:
cur_frame.tracks_state

{1: array([1, 2]),
 2: array([1, 2]),
 3: array([1, 2]),
 4: array([0, 2]),
 5: array([1, 2]),
 6: array([0, 2]),
 7: array([1, 2]),
 8: array([1, 2]),
 9: array([1, 2]),
 10: array([0, 2]),
 11: array([0, 2]),
 12: array([1, 2]),
 13: array([0, 2]),
 14: array([0, 2]),
 15: array([1, 2]),
 16: array([1, 2]),
 17: array([1, 2]),
 19: array([1, 2]),
 21: array([0, 2]),
 23: array([1, 2]),
 24: array([0, 0]),
 26: array([0, 0]),
 27: array([0, 0]),
 29: array([0, 0]),
 31: array([0, 0]),
 35: array([0, 0]),
 36: array([0, 0]),
 39: array([0, 0]),
 41: array([0, 0]),
 43: array([0, 0]),
 44: array([0, 0]),
 45: array([1, 0]),
 48: array([1, 0]),
 51: array([1, 1]),
 52: array([0, 0]),
 53: array([0, 1]),
 56: array([0, 0]),
 57: array([0, 1]),
 58: array([0, 0]),
 60: array([0, 0]),
 61: array([1, 1]),
 63: array([0, 1]),
 67: array([0, 0]),
 69: array([0, 0]),
 70: array([0, 0]),
 71: array([0, 0]),
 73: array([0, 0]),
 75: array([0, 1]),
 76: array([1, 0]),
 77: array([0, 0]),
 79: arra

In [315]:
cur_frame.frame_state

0

In [316]:
import copy

prev_frame = copy.deepcopy(cur_frame)

In [317]:
prev_frame.tracks_state

{1: array([1, 2]),
 2: array([1, 2]),
 3: array([1, 2]),
 4: array([0, 2]),
 5: array([1, 2]),
 6: array([0, 2]),
 7: array([1, 2]),
 8: array([1, 2]),
 9: array([1, 2]),
 10: array([0, 2]),
 11: array([0, 2]),
 12: array([1, 2]),
 13: array([0, 2]),
 14: array([0, 2]),
 15: array([1, 2]),
 16: array([1, 2]),
 17: array([1, 2]),
 19: array([1, 2]),
 21: array([0, 2]),
 23: array([1, 2]),
 24: array([0, 0]),
 26: array([0, 0]),
 27: array([0, 0]),
 29: array([0, 0]),
 31: array([0, 0]),
 35: array([0, 0]),
 36: array([0, 0]),
 39: array([0, 0]),
 41: array([0, 0]),
 43: array([0, 0]),
 44: array([0, 0]),
 45: array([1, 0]),
 48: array([1, 0]),
 51: array([1, 1]),
 52: array([0, 0]),
 53: array([0, 1]),
 56: array([0, 0]),
 57: array([0, 1]),
 58: array([0, 0]),
 60: array([0, 0]),
 61: array([1, 1]),
 63: array([0, 1]),
 67: array([0, 0]),
 69: array([0, 0]),
 70: array([0, 0]),
 71: array([0, 0]),
 73: array([0, 0]),
 75: array([0, 1]),
 76: array([1, 0]),
 77: array([0, 0]),
 79: arra

In [318]:
ret, frame, detections = defun(cap)
tracker.update(frame, detections)
cur_frame.update(tracker.tracks)
prev_frame = copy.deepcopy(cur_frame)


0: 384x640 34 persons, 5 birds, 135.7ms
Speed: 1.4ms preprocess, 135.7ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)


In [319]:
print(len(cur_frame.tracks_state),list(set(cur_frame.tracks_state) - set(prev_frame.tracks_state)),list(set(prev_frame.tracks_state) - set(cur_frame.tracks_state)))

68 [] []


In [320]:
while ret and len(list(set(prev_frame.tracks_state) - set(cur_frame.tracks_state)))==0:
    prev_frame = copy.deepcopy(cur_frame)
    ret, frame, detections = defun(cap)
    tracker.update(frame, detections)
    cur_frame.update(tracker.tracks)
    print(len(cur_frame.tracks_state),list(set(cur_frame.tracks_state) - set(prev_frame.tracks_state)),list(set(prev_frame.tracks_state) - set(cur_frame.tracks_state)))


0: 384x640 36 persons, 5 birds, 186.7ms
Speed: 1.5ms preprocess, 186.7ms inference, 0.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 36 persons, 4 birds, 111.4ms
Speed: 1.8ms preprocess, 111.4ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)


68 [] []



0: 384x640 35 persons, 2 birds, 122.3ms
Speed: 1.5ms preprocess, 122.3ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)


68 [] []



0: 384x640 34 persons, 2 birds, 115.8ms
Speed: 1.4ms preprocess, 115.8ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)


68 [] []



0: 384x640 36 persons, 2 birds, 110.2ms
Speed: 1.5ms preprocess, 110.2ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)


68 [] []





68 [] []


0: 384x640 33 persons, 2 birds, 166.3ms
Speed: 1.6ms preprocess, 166.3ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 34 persons, 2 birds, 117.9ms
Speed: 1.5ms preprocess, 117.9ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)


68 [] []



0: 384x640 32 persons, 3 birds, 119.3ms
Speed: 1.4ms preprocess, 119.3ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)


68 [] []



0: 384x640 33 persons, 4 birds, 122.5ms
Speed: 1.5ms preprocess, 122.5ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)


68 [] []



0: 384x640 33 persons, 4 birds, 122.4ms


68 [] []


Speed: 1.4ms preprocess, 122.4ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)



68 [] []


0: 384x640 34 persons, 4 birds, 165.1ms
Speed: 1.4ms preprocess, 165.1ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 35 persons, 3 birds, 118.9ms
Speed: 1.6ms preprocess, 118.9ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)


68 [] []



0: 384x640 34 persons, 2 birds, 119.1ms
Speed: 1.4ms preprocess, 119.1ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)


68 [] []



0: 384x640 32 persons, 2 birds, 114.8ms
Speed: 1.4ms preprocess, 114.8ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)


68 [] []



0: 384x640 33 persons, 2 birds, 108.9ms
Speed: 1.5ms preprocess, 108.9ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)


68 [] []



0: 384x640 31 persons, 2 birds, 1 dog, 127.4ms
Speed: 1.5ms preprocess, 127.4ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)


68 [] []





68 [] []


0: 384x640 33 persons, 1 bird, 1 dog, 273.6ms
Speed: 1.4ms preprocess, 273.6ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)



68 [] []


0: 384x640 32 persons, 1 bird, 1 dog, 132.7ms
Speed: 1.6ms preprocess, 132.7ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 33 persons, 1 bird, 1 dog, 115.8ms
Speed: 1.5ms preprocess, 115.8ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)


68 [] []



0: 384x640 34 persons, 1 bird, 1 dog, 124.3ms
Speed: 1.3ms preprocess, 124.3ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)


68 [] []



0: 384x640 34 persons, 1 bird, 1 dog, 118.2ms
Speed: 1.4ms preprocess, 118.2ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)


68 [] []





68 [] []


0: 384x640 35 persons, 1 bird, 116.4ms
Speed: 1.4ms preprocess, 116.4ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 34 persons, 1 bird, 118.6ms
Speed: 1.5ms preprocess, 118.6ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)


68 [] []



0: 384x640 34 persons, 1 bird, 121.0ms
Speed: 1.4ms preprocess, 121.0ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)


68 [] []





68 [] []


0: 384x640 34 persons, 1 bird, 171.7ms
Speed: 1.4ms preprocess, 171.7ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 34 persons, 2 birds, 105.0ms
Speed: 1.4ms preprocess, 105.0ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)


68 [] []



0: 384x640 31 persons, 2 birds, 119.6ms
Speed: 1.3ms preprocess, 119.6ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)


68 [] []





68 [] []


0: 384x640 31 persons, 3 birds, 176.4ms
Speed: 1.5ms preprocess, 176.4ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 33 persons, 1 bird, 116.5ms
Speed: 1.4ms preprocess, 116.5ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)


68 [] []



0: 384x640 31 persons, 2 birds, 122.0ms
Speed: 1.5ms preprocess, 122.0ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)


68 [] []



0: 384x640 33 persons, 1 bird, 117.4ms
Speed: 1.4ms preprocess, 117.4ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)


68 [] []



0: 384x640 33 persons, 1 bird, 116.4ms
Speed: 1.4ms preprocess, 116.4ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)


68 [] []





68 [] []


0: 384x640 34 persons, 2 birds, 160.2ms
Speed: 1.5ms preprocess, 160.2ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 36 persons, 2 birds, 116.3ms
Speed: 1.4ms preprocess, 116.3ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)


68 [] []



0: 384x640 35 persons, 2 birds, 111.9ms
Speed: 1.3ms preprocess, 111.9ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)


68 [] []



0: 384x640 32 persons, 2 birds, 114.2ms
Speed: 1.5ms preprocess, 114.2ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)


68 [] []



0: 384x640 33 persons, 2 birds, 124.8ms
Speed: 1.4ms preprocess, 124.8ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)


68 [] []



0: 384x640 35 persons, 2 birds, 112.1ms
Speed: 1.5ms preprocess, 112.1ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)


68 [] []



0: 384x640 35 persons, 2 birds, 124.1ms
Speed: 1.4ms preprocess, 124.1ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)


68 [] []





68 [] []


0: 384x640 35 persons, 2 birds, 116.6ms
Speed: 1.4ms preprocess, 116.6ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 38 persons, 2 birds, 110.8ms
Speed: 1.3ms preprocess, 110.8ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)


68 [] []





68 [] []


0: 384x640 36 persons, 1 bird, 126.4ms
Speed: 1.6ms preprocess, 126.4ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 36 persons, 1 bird, 117.5ms
Speed: 1.4ms preprocess, 117.5ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)


68 [] []



0: 384x640 37 persons, 2 birds, 1 dog, 109.2ms
Speed: 1.4ms preprocess, 109.2ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)


68 [] []



0: 384x640 38 persons, 1 bird, 1 dog, 117.4ms
Speed: 1.3ms preprocess, 117.4ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)


68 [] []



0: 384x640 39 persons, 1 bird, 1 dog, 1 cow, 120.0ms
Speed: 1.3ms preprocess, 120.0ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)


68 [] []



0: 384x640 39 persons, 1 bird, 1 dog, 123.9ms
Speed: 1.4ms preprocess, 123.9ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)


68 [] []



0: 384x640 38 persons, 1 bird, 1 dog, 112.2ms
Speed: 1.4ms preprocess, 112.2ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


68 [] []



0: 384x640 38 persons, 2 birds, 1 dog, 103.7ms
Speed: 1.4ms preprocess, 103.7ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)


68 [] []



0: 384x640 38 persons, 1 bird, 1 dog, 118.4ms
Speed: 1.3ms preprocess, 118.4ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)


68 [] []



0: 384x640 39 persons, 1 bird, 118.3ms
Speed: 1.3ms preprocess, 118.3ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)


68 [] []





68 [] []


0: 384x640 38 persons, 1 bird, 174.2ms
Speed: 1.3ms preprocess, 174.2ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 39 persons, 1 bird, 1 horse, 128.2ms
Speed: 1.4ms preprocess, 128.2ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)


68 [] []



0: 384x640 36 persons, 1 bird, 116.4ms
Speed: 1.4ms preprocess, 116.4ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)


68 [] []



0: 384x640 39 persons, 1 bird, 116.7ms
Speed: 1.5ms preprocess, 116.7ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)


68 [] []



0: 384x640 36 persons, 1 bird, 115.1ms
Speed: 1.4ms preprocess, 115.1ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)


68 [] []





68 [] []


0: 384x640 41 persons, 1 bird, 113.2ms
Speed: 1.3ms preprocess, 113.2ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 36 persons, 116.3ms
Speed: 1.3ms preprocess, 116.3ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)


68 [] []



0: 384x640 36 persons, 119.0ms
Speed: 1.6ms preprocess, 119.0ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)


68 [] []



0: 384x640 36 persons, 1 bird, 102.4ms
Speed: 1.3ms preprocess, 102.4ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)


68 [] []



0: 384x640 37 persons, 1 bird, 124.5ms
Speed: 1.3ms preprocess, 124.5ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)


68 [] []



0: 384x640 39 persons, 1 bird, 121.3ms
Speed: 1.3ms preprocess, 121.3ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)


68 [] []





68 [] []


0: 384x640 39 persons, 1 bird, 1 dog, 147.5ms
Speed: 1.3ms preprocess, 147.5ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 37 persons, 1 bird, 1 dog, 114.8ms
Speed: 1.4ms preprocess, 114.8ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)


68 [] []



0: 384x640 37 persons, 1 bird, 1 dog, 109.3ms
Speed: 1.5ms preprocess, 109.3ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


68 [] []



0: 384x640 38 persons, 1 bird, 1 dog, 128.4ms
Speed: 1.4ms preprocess, 128.4ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)


68 [] []



0: 384x640 38 persons, 1 bird, 117.8ms
Speed: 1.3ms preprocess, 117.8ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)


68 [] []
68 [] []



0: 384x640 39 persons, 1 bird, 121.0ms
Speed: 2.0ms preprocess, 121.0ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 42 persons, 110.4ms
Speed: 1.4ms preprocess, 110.4ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)


68 [] []



0: 384x640 41 persons, 125.5ms
Speed: 1.5ms preprocess, 125.5ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)


68 [] []



0: 384x640 40 persons, 1 bird, 110.6ms
Speed: 1.5ms preprocess, 110.6ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)


68 [] []



0: 384x640 42 persons, 1 bird, 113.8ms
Speed: 1.4ms preprocess, 113.8ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)


68 [] []



0: 384x640 38 persons, 3 birds, 117.1ms
Speed: 1.3ms preprocess, 117.1ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)


68 [] []



0: 384x640 38 persons, 3 birds, 124.8ms
Speed: 1.4ms preprocess, 124.8ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)


68 [] []



0: 384x640 39 persons, 3 birds, 126.8ms
Speed: 1.4ms preprocess, 126.8ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)


68 [] []



0: 384x640 37 persons, 3 birds, 116.2ms
Speed: 1.3ms preprocess, 116.2ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)


68 [] []



0: 384x640 36 persons, 3 birds, 113.1ms
Speed: 1.4ms preprocess, 113.1ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)


68 [] []





68 [] []


0: 384x640 39 persons, 1 bird, 126.0ms
Speed: 1.5ms preprocess, 126.0ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)



68 [] []


0: 384x640 41 persons, 1 bird, 219.8ms
Speed: 1.8ms preprocess, 219.8ms inference, 0.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 40 persons, 2 birds, 119.6ms
Speed: 1.4ms preprocess, 119.6ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)


68 [] []



0: 384x640 39 persons, 1 horse, 115.7ms
Speed: 1.4ms preprocess, 115.7ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)


68 [] []



0: 384x640 37 persons, 115.5ms
Speed: 1.6ms preprocess, 115.5ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)


68 [] []





68 [] []


0: 384x640 36 persons, 123.7ms
Speed: 1.5ms preprocess, 123.7ms inference, 1.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 37 persons, 125.4ms
Speed: 1.6ms preprocess, 125.4ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)


68 [] []



0: 384x640 37 persons, 120.4ms
Speed: 1.5ms preprocess, 120.4ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)


68 [] []



0: 384x640 41 persons, 114.0ms
Speed: 1.4ms preprocess, 114.0ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)


68 [] []



0: 384x640 39 persons, 118.4ms
Speed: 1.6ms preprocess, 118.4ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)


68 [] []





68 [] []


0: 384x640 40 persons, 127.7ms
Speed: 1.4ms preprocess, 127.7ms inference, 0.9ms postprocess per image at shape (1, 3, 384, 640)



68 [] []


0: 384x640 38 persons, 1 backpack, 126.9ms
Speed: 1.5ms preprocess, 126.9ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)



68 [] []


0: 384x640 36 persons, 169.3ms
Speed: 1.5ms preprocess, 169.3ms inference, 0.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 36 persons, 119.6ms
Speed: 1.3ms preprocess, 119.6ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)


68 [] []



0: 384x640 36 persons, 120.5ms
Speed: 1.6ms preprocess, 120.5ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


68 [] []



0: 384x640 36 persons, 1 dog, 120.4ms
Speed: 1.5ms preprocess, 120.4ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)


68 [] []



0: 384x640 37 persons, 116.2ms
Speed: 1.4ms preprocess, 116.2ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)


68 [] []



0: 384x640 37 persons, 122.9ms
Speed: 1.3ms preprocess, 122.9ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)


68 [] []



0: 384x640 37 persons, 1 backpack, 119.0ms
Speed: 1.3ms preprocess, 119.0ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)


68 [] []





68 [] []


0: 384x640 36 persons, 2 backpacks, 134.7ms
Speed: 1.4ms preprocess, 134.7ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 38 persons, 1 bird, 1 backpack, 110.3ms
Speed: 1.5ms preprocess, 110.3ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)


68 [] []





68 [] []


0: 384x640 35 persons, 1 bird, 124.7ms
Speed: 1.4ms preprocess, 124.7ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 37 persons, 1 bird, 113.7ms
Speed: 1.5ms preprocess, 113.7ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)


68 [] []



0: 384x640 35 persons, 110.0ms
Speed: 1.5ms preprocess, 110.0ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)


68 [] []



0: 384x640 35 persons, 1 bird, 117.1ms
Speed: 1.6ms preprocess, 117.1ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)


68 [] []





68 [] []


0: 384x640 35 persons, 181.4ms
Speed: 1.5ms preprocess, 181.4ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)



68 [] []


0: 384x640 34 persons, 140.4ms
Speed: 1.8ms preprocess, 140.4ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 33 persons, 1 bird, 112.8ms
Speed: 1.5ms preprocess, 112.8ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)


68 [] []


KeyboardInterrupt: 

[]

In [170]:
prev_frame = copy.deepcopy(cur_frame)

In [176]:
prev_frame.tracks_state

{1: array([1, 2]),
 2: array([1, 2]),
 3: array([1, 2]),
 4: array([0, 2]),
 5: array([1, 2]),
 6: array([0, 2]),
 7: array([1, 2]),
 8: array([1, 2]),
 9: array([1, 2]),
 10: array([0, 2]),
 11: array([0, 2]),
 12: array([1, 2]),
 13: array([0, 2]),
 14: array([0, 2]),
 15: array([1, 2]),
 16: array([1, 2]),
 17: array([1, 2]),
 19: array([1, 2]),
 21: array([0, 2]),
 23: array([1, 2]),
 24: array([1, 2])}

In [177]:
cur_frame.tracks_state

{1: array([1, 2]),
 2: array([1, 2]),
 3: array([1, 2]),
 4: array([0, 2]),
 5: array([1, 2]),
 6: array([0, 2]),
 7: array([1, 2]),
 8: array([1, 2]),
 9: array([1, 2]),
 10: array([0, 2]),
 11: array([0, 2]),
 12: array([1, 2]),
 13: array([0, 2]),
 14: array([0, 2]),
 15: array([1, 2]),
 16: array([1, 2]),
 17: array([1, 2]),
 19: array([1, 2]),
 21: array([0, 2]),
 23: array([1, 2]),
 24: array([1, 2]),
 26: array([0, 2]),
 27: array([0, 2])}