In [10]:
!pip install -r requirements.txt

Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com


In [2]:
import cv2
from ultralytics import YOLO

## Check for GPU availability

In [3]:
import torch

print(torch.cuda.is_available())
print(torch.cuda.get_device_name())

True
NVIDIA RTX A4000


In [4]:
!nvcc --version
print("\npytorch version:", torch.__version__)

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2024 NVIDIA Corporation
Built on Thu_Mar_28_02:18:24_PDT_2024
Cuda compilation tools, release 12.4, V12.4.131
Build cuda_12.4.r12.4/compiler.34097967_0

pytorch version: 2.3.0


## Initialize device to GPU/CPU usage

In [5]:
device = None
if(torch.cuda.is_available()):
    device = torch.device("cuda")
else:
    device = torch.device("cpu")
print("using", device, "device")

using cuda device


## Initialize the YOLOv9 model

In [6]:
# set to desired gpu number
# torch.cuda.set_device(0)

# specify use of gpu in model
model = YOLO('yolov9e-seg.pt')

# use gpu for model
model.to(device)
torch.cuda.synchronize()

## Initialize video source

In [7]:
# Define video source
vid_path = "/home/servicer/Documents/ouput_video1.mp4"
vid_cap = cv2.VideoCapture(vid_path)
vid_fps = vid_cap.get(cv2.CAP_PROP_FPS)

# Check if the video was openened correctly
if not vid_cap.isOpened():
    print("Error opening video file.")
else:
    print("Video opened successfully!")
    print("Video Frame Rate:", str(vid_fps) + " fps")

Video opened successfully!
Video Frame Rate: 24.95034939315925 fps


## Define YOLO classes

In [8]:
PERSON = 0
BIKE = 1
CAR = 2
MOTORCYCLE = 3
BUS = 5

## Process video source

In [None]:
ped_times = {}

while vid_cap.isOpened():
	success, frame = vid_cap.read()
	if not success:
		break

	# Perform detection and tracking
	results = model.track(frame, persist=True, conf=0.2, classes=[PERSON, BIKE], iou=0.5, show=False, tracker="bytetrack.yaml")

	# Print frame
	frame_num = int(vid_cap.get(cv2.CAP_PROP_POS_FRAMES))

	# Get annotated frame and display it
	annotated_frame = results[0].plot()
	cv2.imshow('YOLOv9 Tracking', annotated_frame)
	for i, r in enumerate(results):
		for index, box in enumerate(r.boxes):
			# id of object
			print(box.id)
			print(box.cls)
			if(box.id and box.cls[0] != None):
				# print(box.id)
				# print(box.cls)
				tracker_id = int(box.id)
				tracker_cls = int(box.cls[0])
				print("class:", tracker_cls)
				print("id", tracker_id)

				obj_idx = "class_" + str(tracker_cls) + "_idx_" + str(tracker_id)
				if(not(obj_idx in ped_times)):
					ped_times[obj_idx] = frame_num / vid_fps
    
    
	print("frame:", frame_num)

torch.cuda.synchronize()
print(ped_times)

	# # Press 'q' to exit
	# if cv2.waitKey(1) & 0xFF == ord('q'):
	# 	break


0: 384x640 (no detections), 21.1ms
Speed: 2.0ms preprocess, 21.1ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)
frame: 1

0: 384x640 (no detections), 24.6ms
Speed: 3.5ms preprocess, 24.6ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)
frame: 2

0: 384x640 (no detections), 21.0ms
Speed: 2.7ms preprocess, 21.0ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)
frame: 3

0: 384x640 (no detections), 21.8ms
Speed: 2.5ms preprocess, 21.8ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)
frame: 4

0: 384x640 (no detections), 22.4ms
Speed: 2.7ms preprocess, 22.4ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)
frame: 5



QObject::moveToThread: Current thread (0x3f7bff80) is not the object's thread (0x93ae0a0).
Cannot move to target thread (0x3f7bff80)

QObject::moveToThread: Current thread (0x3f7bff80) is not the object's thread (0x93ae0a0).
Cannot move to target thread (0x3f7bff80)

QObject::moveToThread: Current thread (0x3f7bff80) is not the object's thread (0x93ae0a0).
Cannot move to target thread (0x3f7bff80)

QObject::moveToThread: Current thread (0x3f7bff80) is not the object's thread (0x93ae0a0).
Cannot move to target thread (0x3f7bff80)

QObject::moveToThread: Current thread (0x3f7bff80) is not the object's thread (0x93ae0a0).
Cannot move to target thread (0x3f7bff80)

QObject::moveToThread: Current thread (0x3f7bff80) is not the object's thread (0x93ae0a0).
Cannot move to target thread (0x3f7bff80)

QObject::moveToThread: Current thread (0x3f7bff80) is not the object's thread (0x93ae0a0).
Cannot move to target thread (0x3f7bff80)

QObject::moveToThread: Current thread (0x3f7bff80) is not the 

0: 384x640 (no detections), 22.3ms
Speed: 3.5ms preprocess, 22.3ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)
frame: 6

0: 384x640 (no detections), 22.1ms
Speed: 2.3ms preprocess, 22.1ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)
frame: 7

0: 384x640 (no detections), 21.4ms
Speed: 2.3ms preprocess, 21.4ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)
frame: 8

0: 384x640 (no detections), 21.2ms
Speed: 3.1ms preprocess, 21.2ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)
frame: 9

0: 384x640 (no detections), 20.9ms
Speed: 2.1ms preprocess, 20.9ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)
frame: 10

0: 384x640 (no detections), 20.9ms
Speed: 2.3ms preprocess, 20.9ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)
frame: 11

0: 384x640 (no detections), 20.2ms
Speed: 2.3ms preprocess, 20.2ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)
frame: 12

0: 

None
tensor([1.], device='cuda:0')
None
tensor([0.], device='cuda:0')
frame: 61

0: 384x640 1 person, 1 bicycle, 22.1ms
Speed: 2.2ms preprocess, 22.1ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)
None
tensor([1.], device='cuda:0')
None
tensor([0.], device='cuda:0')
frame: 62

0: 384x640 1 person, 22.1ms
Speed: 2.3ms preprocess, 22.1ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)
None
tensor([0.], device='cuda:0')
frame: 63

0: 384x640 1 person, 1 bicycle, 22.0ms
Speed: 2.2ms preprocess, 22.0ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)
None
tensor([1.], device='cuda:0')
None
tensor([0.], device='cuda:0')
frame: 64

0: 384x640 1 bicycle, 22.3ms
Speed: 2.3ms preprocess, 22.3ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)
tensor([2.])
tensor([1.])
class: 1
id 2
frame: 65

0: 384x640 1 person, 1 bicycle, 22.0ms
Speed: 2.5ms preprocess, 22.0ms inference, 1.5ms postprocess per image at shape (1, 3, 384, 640)
t

tensor([3.])
tensor([0.])
class: 0
id 3
frame: 105

0: 384x640 1 person, 25.2ms
Speed: 2.1ms preprocess, 25.2ms inference, 2.1ms postprocess per image at shape (1, 3, 384, 640)
tensor([3.])
tensor([0.])
class: 0
id 3
frame: 106

0: 384x640 1 person, 23.2ms
Speed: 2.1ms preprocess, 23.2ms inference, 1.5ms postprocess per image at shape (1, 3, 384, 640)
tensor([3.])
tensor([0.])
class: 0
id 3
frame: 107

0: 384x640 1 person, 23.7ms
Speed: 2.0ms preprocess, 23.7ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)
tensor([3.])
tensor([0.])
class: 0
id 3
frame: 108

0: 384x640 1 person, 23.2ms
Speed: 2.1ms preprocess, 23.2ms inference, 2.1ms postprocess per image at shape (1, 3, 384, 640)
tensor([3.])
tensor([0.])
class: 0
id 3
frame: 109

0: 384x640 1 person, 23.7ms
Speed: 2.0ms preprocess, 23.7ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)
tensor([3.])
tensor([0.])
class: 0
id 3
frame: 110

0: 384x640 (no detections), 22.8ms
Speed: 3.7ms preprocess, 22.

0: 384x640 1 person, 1 bicycle, 23.1ms
Speed: 2.5ms preprocess, 23.1ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)
tensor([3.])
tensor([1.])
class: 1
id 3
tensor([5.])
tensor([0.])
class: 0
id 5
frame: 151

0: 384x640 1 person, 1 bicycle, 23.0ms
Speed: 2.0ms preprocess, 23.0ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)
tensor([3.])
tensor([1.])
class: 1
id 3
tensor([5.])
tensor([0.])
class: 0
id 5
frame: 152

0: 384x640 1 person, 1 bicycle, 22.5ms
Speed: 2.1ms preprocess, 22.5ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)
tensor([3.])
tensor([1.])
class: 1
id 3
tensor([5.])
tensor([0.])
class: 0
id 5
frame: 153

0: 384x640 1 person, 1 bicycle, 22.0ms
Speed: 2.0ms preprocess, 22.0ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)
tensor([3.])
tensor([1.])
class: 1
id 3
tensor([5.])
tensor([0.])
class: 0
id 5
frame: 154

0: 384x640 1 person, 1 bicycle, 22.6ms
Speed: 2.0ms preprocess, 22.6ms inference, 1.6ms 


0: 384x640 1 person, 1 bicycle, 24.5ms
Speed: 2.1ms preprocess, 24.5ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)
tensor([3.])
tensor([1.])
class: 1
id 3
tensor([5.])
tensor([0.])
class: 0
id 5
frame: 187

0: 384x640 1 person, 1 bicycle, 22.1ms
Speed: 2.1ms preprocess, 22.1ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)
tensor([3.])
tensor([1.])
class: 1
id 3
tensor([5.])
tensor([0.])
class: 0
id 5
frame: 188

0: 384x640 1 person, 1 bicycle, 22.8ms
Speed: 2.0ms preprocess, 22.8ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)
tensor([3.])
tensor([1.])
class: 1
id 3
tensor([5.])
tensor([0.])
class: 0
id 5
frame: 189

0: 384x640 1 person, 1 bicycle, 22.3ms
Speed: 2.0ms preprocess, 22.3ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)
tensor([3.])
tensor([1.])
class: 1
id 3
tensor([5.])
tensor([0.])
class: 0
id 5
frame: 190

0: 384x640 1 person, 1 bicycle, 22.4ms
Speed: 2.1ms preprocess, 22.4ms inference, 1.6ms

Speed: 2.1ms preprocess, 19.3ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)
frame: 230

0: 384x640 (no detections), 19.4ms
Speed: 2.2ms preprocess, 19.4ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)
frame: 231

0: 384x640 (no detections), 18.9ms
Speed: 2.0ms preprocess, 18.9ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)
frame: 232

0: 384x640 (no detections), 19.4ms
Speed: 2.5ms preprocess, 19.4ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)
frame: 233

0: 384x640 (no detections), 20.5ms
Speed: 2.3ms preprocess, 20.5ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)
frame: 234

0: 384x640 (no detections), 19.6ms
Speed: 2.2ms preprocess, 19.6ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)
frame: 235

0: 384x640 (no detections), 19.7ms
Speed: 2.2ms preprocess, 19.7ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)
frame: 236

0: 384x640 (no detections),

Speed: 2.4ms preprocess, 18.9ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)
frame: 287

0: 384x640 (no detections), 18.8ms
Speed: 2.4ms preprocess, 18.8ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)
frame: 288

0: 384x640 (no detections), 18.9ms
Speed: 2.3ms preprocess, 18.9ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)
frame: 289

0: 384x640 (no detections), 19.0ms
Speed: 2.2ms preprocess, 19.0ms inference, 1.1ms postprocess per image at shape (1, 3, 384, 640)
frame: 290

0: 384x640 (no detections), 18.7ms
Speed: 2.4ms preprocess, 18.7ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)
frame: 291

0: 384x640 (no detections), 18.9ms
Speed: 2.3ms preprocess, 18.9ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)
frame: 292

0: 384x640 (no detections), 18.9ms
Speed: 2.7ms preprocess, 18.9ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)
frame: 293

0: 384x640 (no detections),

Speed: 2.5ms preprocess, 20.0ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)
frame: 344

0: 384x640 (no detections), 19.8ms
Speed: 2.0ms preprocess, 19.8ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)
frame: 345

0: 384x640 (no detections), 20.2ms
Speed: 2.0ms preprocess, 20.2ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)
frame: 346

0: 384x640 (no detections), 20.1ms
Speed: 2.2ms preprocess, 20.1ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)
frame: 347

0: 384x640 (no detections), 19.6ms
Speed: 2.6ms preprocess, 19.6ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)
frame: 348

0: 384x640 (no detections), 19.6ms
Speed: 2.1ms preprocess, 19.6ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)
frame: 349

0: 384x640 (no detections), 19.6ms
Speed: 2.1ms preprocess, 19.6ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)
frame: 350

0: 384x640 (no detections),

Speed: 2.3ms preprocess, 19.9ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)
frame: 401

0: 384x640 (no detections), 19.5ms
Speed: 2.7ms preprocess, 19.5ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)
frame: 402

0: 384x640 (no detections), 20.0ms
Speed: 2.8ms preprocess, 20.0ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)
frame: 403

0: 384x640 (no detections), 19.4ms
Speed: 2.8ms preprocess, 19.4ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)
frame: 404

0: 384x640 (no detections), 19.5ms
Speed: 2.7ms preprocess, 19.5ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)
frame: 405

0: 384x640 (no detections), 19.7ms
Speed: 3.2ms preprocess, 19.7ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)
frame: 406

0: 384x640 (no detections), 19.1ms
Speed: 2.9ms preprocess, 19.1ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)
frame: 407

0: 384x640 (no detections),

## Process and Display video source

In [10]:
# Define video source
vid_path = "/home/servicer/Documents/ouput_video1.mp4"
vid_cap = cv2.VideoCapture(vid_path)
vid_fps = vid_cap.get(cv2.CAP_PROP_FPS)

# Check if the video was openened correctly
if not vid_cap.isOpened():
    print("Error opening video file.")
else:
    print("Video opened successfully!")
    print("Video Frame Rate:", str(vid_fps) + " fps")

# Specify use of gpu in model
model = YOLO('yolov9e-seg.pt')

# Use gpu for model
model.to(device)
torch.cuda.synchronize()

# Read and process the video
while vid_cap.isOpened():

    # Get the next captured frame
    success, frame = vid_cap.read()
    if not success:
        break

    # Perform detection and tracking on frame
    results = model.track(
        frame,
        persist=True,
        conf=0.2,
        iou=0.5,
        show=False,
        tracker="bytetrack.yaml",
        classes=[PERSON]
    )

    # Get annotated frame and display it
    annotated_frame = results[0].plot()
    cv2.imshow('YOLOv9 Tracking', annotated_frame)

    # Press 'q' to exit
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release resources
torch.cuda.synchronize()
vid_cap.release()
cv2.destroyAllWindows()

Video opened successfully!
Video Frame Rate: 24.95034939315925 fps

0: 384x640 (no detections), 21.4ms
Speed: 1.9ms preprocess, 21.4ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 25.0ms
Speed: 2.6ms preprocess, 25.0ms inference, 0.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 20.1ms
Speed: 2.2ms preprocess, 20.1ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 20.3ms
Speed: 2.5ms preprocess, 20.3ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 20.3ms
Speed: 2.5ms preprocess, 20.3ms inference, 1.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 20.0ms
Speed: 2.8ms preprocess, 20.0ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 20.4ms
Speed: 2.4ms preprocess, 20.4ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)

0


0: 384x640 1 person, 23.8ms
Speed: 2.5ms preprocess, 23.8ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 23.5ms
Speed: 2.6ms preprocess, 23.5ms inference, 1.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 23.3ms
Speed: 2.4ms preprocess, 23.3ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 23.9ms
Speed: 2.2ms preprocess, 23.9ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 23.8ms
Speed: 2.2ms preprocess, 23.8ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 24.0ms
Speed: 2.1ms preprocess, 24.0ms inference, 1.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 26.9ms
Speed: 2.1ms preprocess, 26.9ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 24.0ms
Speed: 2.2ms preprocess, 24.0ms inference, 1.6ms postprocess per image at shape (1, 3, 38


0: 384x640 1 person, 25.2ms
Speed: 2.0ms preprocess, 25.2ms inference, 1.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 24.9ms
Speed: 2.0ms preprocess, 24.9ms inference, 2.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 29.4ms
Speed: 2.2ms preprocess, 29.4ms inference, 1.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 24.0ms
Speed: 2.2ms preprocess, 24.0ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 23.6ms
Speed: 2.3ms preprocess, 23.6ms inference, 1.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 23.4ms
Speed: 2.4ms preprocess, 23.4ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 30.7ms
Speed: 2.2ms preprocess, 30.7ms inference, 1.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 24.3ms
Speed: 2.3ms preprocess, 24.3ms inference, 1.6ms postprocess per image at shape (1, 3, 38


0: 384x640 1 person, 25.8ms
Speed: 2.0ms preprocess, 25.8ms inference, 2.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 30.7ms
Speed: 4.2ms preprocess, 30.7ms inference, 1.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 24.6ms
Speed: 2.2ms preprocess, 24.6ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 25.3ms
Speed: 2.0ms preprocess, 25.3ms inference, 1.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 23.9ms
Speed: 2.0ms preprocess, 23.9ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 23.9ms
Speed: 2.1ms preprocess, 23.9ms inference, 1.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 23.9ms
Speed: 2.1ms preprocess, 23.9ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 23.8ms
Speed: 2.0ms preprocess, 23.8ms inference, 1.6ms postprocess per image at shape (1, 3, 38


0: 384x640 (no detections), 19.4ms
Speed: 2.3ms preprocess, 19.4ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 19.4ms
Speed: 2.3ms preprocess, 19.4ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 19.6ms
Speed: 2.4ms preprocess, 19.6ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 19.7ms
Speed: 2.4ms preprocess, 19.7ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 19.7ms
Speed: 2.2ms preprocess, 19.7ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 19.5ms
Speed: 2.3ms preprocess, 19.5ms inference, 0.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 19.3ms
Speed: 2.6ms preprocess, 19.3ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 19.6ms
Speed: 2.1ms preprocess, 19.6ms i


0: 384x640 (no detections), 19.7ms
Speed: 2.2ms preprocess, 19.7ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 20.1ms
Speed: 2.5ms preprocess, 20.1ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 19.9ms
Speed: 2.3ms preprocess, 19.9ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 20.0ms
Speed: 2.1ms preprocess, 20.0ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 20.0ms
Speed: 2.2ms preprocess, 20.0ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 20.6ms
Speed: 2.3ms preprocess, 20.6ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 20.4ms
Speed: 2.3ms preprocess, 20.4ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 20.2ms
Speed: 2.1ms preprocess, 20.2ms i


0: 384x640 (no detections), 21.2ms
Speed: 2.3ms preprocess, 21.2ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 20.6ms
Speed: 2.4ms preprocess, 20.6ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 21.0ms
Speed: 2.2ms preprocess, 21.0ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 20.5ms
Speed: 2.0ms preprocess, 20.5ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 20.2ms
Speed: 2.2ms preprocess, 20.2ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 20.1ms
Speed: 2.6ms preprocess, 20.1ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 20.4ms
Speed: 2.4ms preprocess, 20.4ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 20.1ms
Speed: 2.3ms preprocess, 20.1ms i


0: 384x640 (no detections), 20.2ms
Speed: 2.4ms preprocess, 20.2ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 20.6ms
Speed: 2.3ms preprocess, 20.6ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 20.5ms
Speed: 2.4ms preprocess, 20.5ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 20.2ms
Speed: 2.3ms preprocess, 20.2ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 20.2ms
Speed: 2.5ms preprocess, 20.2ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 20.3ms
Speed: 2.3ms preprocess, 20.3ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 19.9ms
Speed: 2.5ms preprocess, 19.9ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 19.8ms
Speed: 2.2ms preprocess, 19.8ms i


0: 384x640 (no detections), 20.3ms
Speed: 2.2ms preprocess, 20.3ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 20.2ms
Speed: 2.2ms preprocess, 20.2ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 20.3ms
Speed: 2.3ms preprocess, 20.3ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 20.0ms
Speed: 2.1ms preprocess, 20.0ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 19.8ms
Speed: 2.1ms preprocess, 19.8ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 20.1ms
Speed: 2.3ms preprocess, 20.1ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 20.6ms
Speed: 2.1ms preprocess, 20.6ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 19.9ms
Speed: 2.2ms preprocess, 19.9ms i


0: 384x640 (no detections), 19.7ms
Speed: 2.4ms preprocess, 19.7ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 20.2ms
Speed: 2.4ms preprocess, 20.2ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 19.6ms
Speed: 2.4ms preprocess, 19.6ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 19.9ms
Speed: 2.3ms preprocess, 19.9ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 19.7ms
Speed: 2.2ms preprocess, 19.7ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 19.5ms
Speed: 2.5ms preprocess, 19.5ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 19.7ms
Speed: 2.4ms preprocess, 19.7ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 20.2ms
Speed: 2.5ms preprocess, 20.2ms i


0: 384x640 (no detections), 19.6ms
Speed: 2.6ms preprocess, 19.6ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 19.9ms
Speed: 2.4ms preprocess, 19.9ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 19.9ms
Speed: 2.4ms preprocess, 19.9ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 19.9ms
Speed: 2.4ms preprocess, 19.9ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 19.4ms
Speed: 2.4ms preprocess, 19.4ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 19.5ms
Speed: 2.5ms preprocess, 19.5ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 19.6ms
Speed: 2.4ms preprocess, 19.6ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 19.5ms
Speed: 2.1ms preprocess, 19.5ms i


0: 384x640 (no detections), 19.5ms
Speed: 2.3ms preprocess, 19.5ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 27.7ms
Speed: 3.6ms preprocess, 27.7ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 19.5ms
Speed: 2.3ms preprocess, 19.5ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 19.7ms
Speed: 2.4ms preprocess, 19.7ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 19.6ms
Speed: 2.2ms preprocess, 19.6ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 19.8ms
Speed: 2.4ms preprocess, 19.8ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 19.8ms
Speed: 2.4ms preprocess, 19.8ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 19.6ms
Speed: 2.5ms preprocess, 19.6ms i


0: 384x640 (no detections), 20.1ms
Speed: 2.1ms preprocess, 20.1ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 20.4ms
Speed: 2.2ms preprocess, 20.4ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 20.2ms
Speed: 2.3ms preprocess, 20.2ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 20.0ms
Speed: 2.1ms preprocess, 20.0ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 20.6ms
Speed: 2.2ms preprocess, 20.6ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 20.2ms
Speed: 2.2ms preprocess, 20.2ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 21.4ms
Speed: 2.3ms preprocess, 21.4ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 20.3ms
Speed: 2.6ms preprocess, 20.3ms i


0: 384x640 (no detections), 19.9ms
Speed: 2.2ms preprocess, 19.9ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 20.0ms
Speed: 2.1ms preprocess, 20.0ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 20.7ms
Speed: 2.1ms preprocess, 20.7ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 20.1ms
Speed: 2.3ms preprocess, 20.1ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 20.3ms
Speed: 2.3ms preprocess, 20.3ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 20.0ms
Speed: 2.3ms preprocess, 20.0ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 19.8ms
Speed: 2.3ms preprocess, 19.8ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 20.1ms
Speed: 2.1ms preprocess, 20.1ms i


0: 384x640 (no detections), 20.5ms
Speed: 2.5ms preprocess, 20.5ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 20.2ms
Speed: 2.1ms preprocess, 20.2ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 20.3ms
Speed: 2.2ms preprocess, 20.3ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 19.9ms
Speed: 2.5ms preprocess, 19.9ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 21.0ms
Speed: 3.4ms preprocess, 21.0ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 20.4ms
Speed: 2.4ms preprocess, 20.4ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 20.5ms
Speed: 2.9ms preprocess, 20.5ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 20.2ms
Speed: 2.4ms preprocess, 20.2ms i

KeyboardInterrupt: 

## Process and Display video source

In [None]:
# Define video source
vid_path = "./sources/2024-05-10T08-45-36.mp4"
vid_cap = cv2.VideoCapture(vid_path)
vid_fps = vid_cap.get(cv2.CAP_PROP_FPS)

# Check if the video was openened correctly
if not vid_cap.isOpened():
    print("Error opening video file.")
else:
    print("Video opened successfully!")
    print("Video Frame Rate:", str(vid_fps) + " fps")

# Specify use of gpu in model
model = YOLO('yolov9e-seg.pt')

# Use gpu for model
model.to(device)
torch.cuda.synchronize()

# Read and process the video
while vid_cap.isOpened():

    # Get the next captured frame
    success, frame = vid_cap.read()
    if not success:
        break

    # Perform detection and tracking on frame
    results = model.track(
        frame,
        persist=True,
        conf=0.2,
        iou=0.5,
        show=False,
        tracker="bytetrack.yaml",
        classes=[PERSON]
    )

    # Get annotated frame and display it
    annotated_frame = results[0].plot()
    cv2.imshow('YOLOv9 Tracking', annotated_frame)

    # Press 'q' to exit
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release resources
torch.cuda.synchronize()
vid_cap.release()
cv2.destroyAllWindows()

In [None]:
torch.cuda.synchronize()
vid_cap.release()
cv2.destroyAllWindows()