In [15]:
import time

import cv2
import matplotlib.pyplot as plt
import numpy as np
from time import sleep
import torch
from deep_sort_realtime.deep_sort.track import Track
from deep_sort_realtime.deepsort_tracker import DeepSort
from torchvision.utils import draw_bounding_boxes
from IPython.display import clear_output
from torchvision.models.detection import (
    FasterRCNN_MobileNet_V3_Large_FPN_Weights,
    fasterrcnn_mobilenet_v3_large_fpn)

In [2]:
# # Step 1: Initialize model with the best available weights
weights = FasterRCNN_MobileNet_V3_Large_FPN_Weights.DEFAULT
model = fasterrcnn_mobilenet_v3_large_fpn(weights=weights, box_score_thresh=0.9)
model.eval()

# # Step 2: Initialize the inference transforms
preprocess = weights.transforms()

# cam = cv2.VideoCapture('http://takemotopiano.aa1.netvolante.jp:8190/nphMotionJpeg?Resolution=640x480&Quality=Standard&Framerate=30')
cam = cv2.VideoCapture('/Users/babi/Downloads/video2.mp4')

In [3]:
class TrackedObject:
    def __init__(self, id, frame_index, bounding_box):
        self.id = id
        self.frame_indexed = frame_index
        self.bounding_box = bounding_box
        self.last_frame_update = frame_index
        

class ObjectTracking:
    def __init__(self, line, frame_before_drop_track):
        self.objects = []
        self.frame = 0
        self.person_count = 0
        self.line = line
        self.frame_before_drop_track = frame_before_drop_track

    # @staticmethod
    # def _iou(bounding_box_a: torch.Tensor, bounding_box_b: torch.Tensor):
    #     return box_iou(bounding_box_a.unsqueeze(0), bounding_box_b.unsqueeze(0)).numpy()
    
    @staticmethod 
    def _distance(centroid_a, centroid_b):
        return np.abs(centroid_a[0] - centroid_b[0])  +np.abs(centroid_a[1] - centroid_b[1])
    

    
    def add_object(self, bounding_box):
        # Identify if block is tracked
        new_object_box = bounding_box
        closest_object = None
        closest_iou: float = 0
        for tracked_object in self.objects:
            tracked_object: TrackedObject
            print(new_object_box, tracked_object.bounding_box)
            iou = self._iou(new_object_box, tracked_object.bounding_box)

            if iou > closest_iou and iou > 0.5:
                closest_iou = iou
                closest_object = tracked_object

        # If not match found Insert into our self.objects  Else update boundingBox
        if closest_object is None:
            self.objects.append(TrackedObject(self.person_count, self.frame, new_object_box))
            self.person_count += 1
        else:
            # Update the object with new boundingbox and frameindex
            closest_object.bounding_box = new_object_box            
            closest_object.last_frame_update = self.frame            

    def track(self, bounding_boxes):
        for bounding_box in bounding_boxes:
            if bounding_box is None:
                continue
            self.add_object(bounding_box)
        self.frame += 1



In [4]:
# newImg = torch.permute(img, [1, 2, 0]).numpy().astype(np.uint8).copy()

# h, w, _ = newImg.shape
# xmin, ymin, xmax, ymax = prediction["boxes"][0]
# xmin, ymin, xmax, ymax = int(xmin), int(ymin), int(xmax), int(ymax)
# plt.imshow(cv2.rectangle(newImg, [xmin, ymin], [xmax, ymax], (255, 255, 0), 5))

# tracker = ObjectTracking(line=[0, 400, 1280, 400], frame_before_drop_track=5)

In [4]:
tracker = DeepSort(max_age=10)

In [5]:
def convertToLTWH(bounding_box):
    width = bounding_box[2] - bounding_box[0]
    height = bounding_box[3] - bounding_box[1]
    return torch.Tensor([bounding_box[0], bounding_box[1], width, height])

In [19]:
number_person = 0
human_set = {}
while True:
    initial_time = time.time()
    check, frame = cam.read()

    img = torch.permute(torch.Tensor(frame[:, :, [2, 1, 0]]), [2, 0, 1]).to(torch.uint8)
    batch = preprocess(img).unsqueeze(0)

    with torch.no_grad():
        clear_output(wait=True)
        prediction = model(batch)[0]
        only_human = [
            (
                convertToLTWH(prediction["boxes"][i]).numpy(),
                prediction["labels"][i].numpy(),
                prediction["scores"][i].numpy(),
            )
            if prediction["labels"][i] == 1
            else None
            for i in range(len(prediction["boxes"]))
        ]
        labels = [weights.meta["categories"][i] for i in prediction["labels"]]
        box = draw_bounding_boxes(
            img,
            boxes=prediction["boxes"],
            labels=labels,
            colors="red",
            width=4,
            font_size=30,
        )

        img = torch.permute(batch[0], [1, 2, 0]).numpy()[..., [2, 1, 0]]
        img = (img * 255).astype(np.uint8)

        # tracks = tracker.update_tracks(only_human, frame=img)
        # print(tracks)

        # for track in tracks:
        #     track: Track
        #     if not track.is_confirmed():
        #         continue
        #     track_id = track.track_id
        #     ltrb = track.to_ltrb()

        #     centroid = ObjectTracking._findCentroid(ltrb) 

        #     if centroid[0] > 0 and centroid[1] > 300 and track_id in human_set:
        #         number_person += 1
        #     # print(f'ID: {track_id}')
        #     # print(f'{ltrb}')

        # print(f'NPerson: {number_person}')

        # cv2.imshow("video", torch.permute(box, [1, 2, 0]).numpy()[..., [2, 1, 0]])
        plt.imshow(torch.permute(box, [1, 2, 0]).numpy()[..., [2, 1, 0]])
        plt.show()
        # sleep(1)s
        print(f"{round( 1 / (time.time() - initial_time), 1)}")
    # key = cv2.waitKey(1)
    # if key == 27:
    #     break
# cam.release()
# cv2.destroyAllWindows()

KeyboardInterrupt: 