In [1]:
import importlib
import os
os.environ['KMP_DUPLICATE_LIB_OK']='True'
import cv2
import torch
import numpy as np

from PIL import Image
from pathlib import Path
from torchvision import models, transforms
from torchvision.ops import nms
from deep_sort_realtime.deep_sort.track import Track
from deep_sort_realtime.deepsort_tracker import DeepSort

In [2]:
device = torch.device('cpu')
if torch.cuda.is_available():
    device = torch.device('cuda')
    print(device)
elif torch.backends.mps.is_available():
    device = torch.device('mps)')
    print(device)
elif importlib.util.find_spec("torch_directml") is not None:
    import torch_directml
    device = torch_directml.device()
    print(torch_directml.device_name(0))
else:
    device = torch.device('cpu')
    print(device)

Radeon RX Vega 


In [3]:
def create_video_writer(video_cap, output_filename):

    # grab the width, height, and fps of the frames in the video stream.
    frame_width = int(video_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(video_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(video_cap.get(cv2.CAP_PROP_FPS))

    # initialize the FourCC and a video writer object
    fourcc = cv2.VideoWriter_fourcc(*'MP4V')
    writer = cv2.VideoWriter(output_filename, fourcc, fps,
                             (frame_width, frame_height))

    return writer

In [4]:
model = models.detection.fasterrcnn_resnet50_fpn_v2(weights=models.detection.FasterRCNN_ResNet50_FPN_V2_Weights.DEFAULT).to(device)
model = model.eval()

In [5]:
deepsort = DeepSort(max_age=30)

transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])
video_path = Path("./video/macv-obj-tracking-video.mp4") 

  self.model.load_state_dict(torch.load(model_wts_path))


In [None]:
cap = cv2.VideoCapture(video_path)
frame_count = 0
object_times = {}
score_threshold = 0.85
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    frame_count += 1
    # Preprocess the frame and detect objects using Faster R-CNN
    pil_img = Image.fromarray(frame)
    img_tensor = transform(pil_img).unsqueeze(0).to(device)

    with torch.no_grad():
        detections = model(img_tensor)
    # Get bounding boxes, classes, and scores (only keep detections with score > 0.5)
    boxes = detections[0]['boxes'].cpu().numpy()
    labels = detections[0]['labels'].cpu().numpy()
    scores = detections[0]['scores'].cpu().numpy()

    valid_boxes = boxes[scores > score_threshold]
    valid_scores = scores[scores > score_threshold]
    valid_cls_ids = labels[scores > score_threshold]
    valid_boxes_tensor = torch.tensor(valid_boxes, dtype=torch.float32)
    valid_scores_tensor = torch.tensor(valid_scores, dtype=torch.float32)
    valid_cls_ids_tensor = torch.tensor(valid_cls_ids, dtype=torch.int64)

    nms_indices = nms(valid_boxes_tensor, valid_scores_tensor, iou_threshold=0.4)
    final_boxes = valid_boxes_tensor[nms_indices].cpu().numpy()
    final_scores = valid_scores_tensor[nms_indices].cpu().numpy()
    final_cls_ids = valid_cls_ids_tensor[nms_indices].cpu().numpy()

    # Prepare detections for DeepSORT (format: x1, y1, width, height, score)
    detections_deepsort = []
    for box, score, label in zip(final_boxes, final_scores, final_cls_ids):
        x1, y1, x2, y2 = box
        width = x2 - x1
        height = y2 - y1
        detections_deepsort.append([[x1, y1, width, height], score, label])
    # Update DeepSORT tracker with the current frame's detections
    trackers: list[Track] = deepsort.update_tracks(detections_deepsort, frame=frame)

    # Draw bounding boxes and tracking IDs
    for track in trackers:
        if not track.is_confirmed():
            continue
        det_cls = track.det_class
        track_id = track.track_id
        if track_id not in object_times:
            object_times[track_id] = {"entry_frame": frame_count, "exit_frame": None}
        object_times[track_id]["exit_frame"] = frame_count  # Update exit frame every frame the object is tracked
        x1, y1, w, h = track.to_tlbr()  # DeepSORT returns bounding box as (x1, y1, x2, y2)
        cv2.rectangle(frame, (int(x1), int(y1)), (int(x1 + w), int(y1 + h)), (0, 255, 0), 2)
        cv2.putText(frame, f"ID: {track_id} Cls = {det_cls}", (int(x1), int(y1)-10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)

    # Display the frame
    cv2.imshow("Object Tracking", frame)

    # Break loop if 'q' is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break
cap.release()
cv2.destroyAllWindows()

error: OpenCV(4.10.0) D:\a\opencv-python\opencv-python\opencv\modules\imgproc\src\resize.cpp:4152: error: (-215:Assertion failed) !ssize.empty() in function 'cv::resize'


: 

In [None]:
# Print the duration for each object (frame range)
for obj_id, times in object_times.items():
    entry_frame = times["entry_frame"]
    exit_frame = times["exit_frame"]
    duration = exit_frame - entry_frame
    print(f"Object ID: {obj_id} appeared from frame {entry_frame} to frame {exit_frame} for {duration} frames.")

Object ID: 1 appeared from frame 3 to frame 90 for 87 frames.
Object ID: 2 appeared from frame 3 to frame 90 for 87 frames.
Object ID: 3 appeared from frame 3 to frame 90 for 87 frames.
Object ID: 4 appeared from frame 3 to frame 90 for 87 frames.
Object ID: 5 appeared from frame 3 to frame 90 for 87 frames.
Object ID: 6 appeared from frame 3 to frame 90 for 87 frames.
Object ID: 7 appeared from frame 3 to frame 37 for 34 frames.
Object ID: 9 appeared from frame 3 to frame 49 for 46 frames.
Object ID: 10 appeared from frame 3 to frame 90 for 87 frames.
Object ID: 12 appeared from frame 4 to frame 61 for 57 frames.
Object ID: 15 appeared from frame 15 to frame 47 for 32 frames.
Object ID: 16 appeared from frame 15 to frame 57 for 42 frames.
Object ID: 20 appeared from frame 27 to frame 90 for 63 frames.
Object ID: 25 appeared from frame 36 to frame 72 for 36 frames.
Object ID: 26 appeared from frame 36 to frame 66 for 30 frames.
Object ID: 40 appeared from frame 51 to frame 90 for 39 fr

In [None]:
X = torch.rand(3,640,640, device=device)
print(X.shape)
y = model(X.unsqueeze(0))

torch.Size([3, 640, 640])
