In [1]:
import torch
import numpy as np
import matplotlib.pyplot as plt
import cv2
import ultralytics
from ultralytics import YOLO
import random
import colorsys

In [2]:
from detection import Detection
from image_encode import Extractor
from iou_matching import iou, iou_cost
from kalman_filter import KalmanFilter
from linear_sum_assignment import gate_cost_matrix, matching_cascade, min_cost_matching
from nearest_neighbor_matching import NearestNeighborDistanceMetric
from track import Track
from tracker import Tracker

In [3]:
NUM_CLASS = {
    0: 'person'}
key_list = list(NUM_CLASS.keys()) 
val_list = list(NUM_CLASS.values()) 


In [4]:
def _interval_overlap(interval_a, interval_b):
    x1, x2 = interval_a
    x3, x4 = interval_b
    if x3 < x1:
        if x4 < x1:
            return 0
        else:
            return min(x2,x4) - x1
    else:
        if x2 < x3:
            return 0
        else:
            return min(x2,x4) - x3

def bbox_iou(box1, box2):
    intersect_w = _interval_overlap([box1[0], box1[1]], [box2[0], box2[1]])
    intersect_h = _interval_overlap([box1[2], box1[3]], [box2[2], box2[3]])
    intersect = intersect_w * intersect_h
    union = (box1[1]-box1[0])*(box1[3]-box1[2]) + (box2[1]-box2[0])*(box2[3]-box2[2]) - intersect
    return float(intersect) / union +0.01

def do_nms(boxes, nms_thresh):
    for i, box in enumerate(boxes):
        if box is None:
            continue
        for j in range(i + 1, len(boxes)):
            if boxes[j] is None:
                continue
            if bbox_iou(box, boxes[j]) >= nms_thresh:
                boxes[j] = None
    return boxes


In [5]:
def crop_image(image, bboxes):
    encoder = Extractor("ckpt.t7", use_cuda=True)
    im_crops = []
    for box in bboxes:
        x1,y1,x2,y2 = box
        x1,y1,x2,y2 = int(box[0] *640), int(box[1] * 640), int(box[2] * 640) , int(box[3] * 640)
        img = image[x1:x2, y1:y2]
        im_crops.append(img)
    if im_crops:
        features = encoder(im_crops)
    else:
        features = np.array([])
    return features


In [6]:
from draw import draw_bbox

max_cosine_distance = 0.4
nn_budget = None
# model = YOLO('test_temp/runs/detect/yolov8s_custom/weights/best.pt')
model = YOLO('yolov8n.pt')

def object_tracking():
    metric = NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget)
    tracker = Tracker(metric)

    cap = cv2.VideoCapture(0)  
    
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter('output_video.mp4', fourcc, fps =20, frameSize = (640, 640))
    while True:
        ret, frame = cap.read()

        # frame = cv2.resize(frame, (640,640))
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        
        # detect
        boxes, labels, scores = [], [], []
        yhat = model.predict(source= frame)
        bboxes = yhat[0].boxes
        array_xyxy, array_conf = bboxes.xyxyn, bboxes.conf
        for i in range(array_xyxy.shape[0]):
            xmin, ymin , xmax, ymax = array_xyxy[i].cpu().numpy().tolist()
            # xmin, ymin , xmax, ymax = int(xmin), int(ymin), int(xmax), int(ymax)
            boxes.append((xmin, ymin , xmax, ymax))
            scores.append(array_conf[i].cpu().numpy())
            labels.append("person")
        boxes = do_nms(boxes, 0.3)
        boxes = [box for box in boxes if box is not None]
        boxes = np.array(boxes) 
        names = np.array(labels)
        scores = np.array(scores)
        features = np.array(crop_image(frame, boxes))
        detections = [Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip(boxes, scores, names, features)]

        tracker.predict()
        tracker.update(detections)
        
        tracked_bboxes = []
        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue 
            print("____________________________________________________________________")
            bbox = track.to_xyxy() 
            class_name = track.get_class() 
            tracking_id = track.track_id
            index = key_list[val_list.index(class_name)] 
            tracked_bboxes.append(bbox.tolist() + [tracking_id, index])
        image = draw_bbox(frame, tracked_bboxes, CLASSES=NUM_CLASS, show_label= True, show_confidence=False, tracking=True)
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
        out.write(image)

        cv2.imshow("predict", image)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
    
    cap.release()
    cv2.destroyAllWindows()
object_tracking()


0: 480x640 1 person, 187.0ms
Speed: 2.9ms preprocess, 187.0ms inference, 183.9ms postprocess per image at shape (1, 3, 480, 640)
Loading weights from ckpt.t7... Done!

0: 480x640 1 person, 16.0ms
Speed: 0.0ms preprocess, 16.0ms inference, 7.7ms postprocess per image at shape (1, 3, 480, 640)
Loading weights from ckpt.t7... Done!

0: 480x640 1 person, 13.7ms
Speed: 0.0ms preprocess, 13.7ms inference, 17.1ms postprocess per image at shape (1, 3, 480, 640)
Loading weights from ckpt.t7... Done!
____________________________________________________________________
 1

0: 480x640 1 person, 24.0ms
Speed: 0.0ms preprocess, 24.0ms inference, 7.9ms postprocess per image at shape (1, 3, 480, 640)
Loading weights from ckpt.t7... Done!
____________________________________________________________________
 1

0: 480x640 1 person, 20.0ms
Speed: 0.0ms preprocess, 20.0ms inference, 4.0ms postprocess per image at shape (1, 3, 480, 640)
Loading weights from ckpt.t7... Done!
_______________________________

error: OpenCV(4.9.0) D:\a\opencv-python\opencv-python\opencv\modules\imgproc\src\resize.cpp:4152: error: (-215:Assertion failed) !ssize.empty() in function 'cv::resize'


: 