In [2]:
!pip install ultralytics numpy opencv-python

Collecting ultralytics
  Using cached ultralytics-8.3.61-py3-none-any.whl.metadata (35 kB)
Collecting matplotlib>=3.3.0 (from ultralytics)
  Using cached matplotlib-3.10.0-cp312-cp312-win_amd64.whl.metadata (11 kB)
Collecting torchvision>=0.9.0 (from ultralytics)
  Using cached torchvision-0.20.1-cp312-cp312-win_amd64.whl.metadata (6.2 kB)
Collecting pandas>=1.1.4 (from ultralytics)
  Using cached pandas-2.2.3-cp312-cp312-win_amd64.whl.metadata (19 kB)
Collecting seaborn>=0.11.0 (from ultralytics)
  Using cached seaborn-0.13.2-py3-none-any.whl.metadata (5.4 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Using cached ultralytics_thop-2.0.13-py3-none-any.whl.metadata (9.4 kB)
Using cached ultralytics-8.3.61-py3-none-any.whl (906 kB)
Using cached matplotlib-3.10.0-cp312-cp312-win_amd64.whl (8.0 MB)
Using cached pandas-2.2.3-cp312-cp312-win_amd64.whl (11.5 MB)
Using cached seaborn-0.13.2-py3-none-any.whl (294 kB)
Using cached torchvision-0.20.1-cp312-cp312-win_amd64.whl (1.6 M

In [1]:
import os

import cv2
import numpy as np
from ultralytics import YOLO

class Video:
    def __init__(self, video_path):
        self.path = video_path
        self.name, self.extension = os.path.splitext(os.path.basename(self.path))
        self.capture = cv2.VideoCapture(video_path)
        self.fps, *self.shape = map(
            lambda prop: int(self.capture.get(prop)),
            [
                cv2.CAP_PROP_FPS,
                cv2.CAP_PROP_FRAME_WIDTH,
                cv2.CAP_PROP_FRAME_HEIGHT,
                cv2.CAP_PROP_FRAME_COUNT,
            ]
        )
        self.capture.release()
    
    def frame_gen(self):
        self.capture = cv2.VideoCapture(self.path)

        while self.capture.isOpened() and cv2.waitKey(1) == -1:
            read_successfully, main_frame = self.capture.read()

            if read_successfully:
                yield main_frame

        self.capture.release()
    
    def __iter__(self):
        return self.frame_gen()

def main():
    data_path = "data"
    video_name = "cars-highway.mp4"
    video_path = os.path.join(data_path, video_name)

    video = Video(video_path)
    print(f"{video.name = }")
    print(f"{video.shape = }")
    print(f"{video.fps = }")

    models_path = "pretrained_models"
    model_name = "yolo11n.pt"
    yolo_path = os.path.join(models_path, model_name)

    yolo = YOLO(yolo_path, verbose=False)

    x1_roi, x2_roi = 90, 280
    y1_roi, y2_roi = 170, 290
    roi_mask = slice(y1_roi, y2_roi), slice(x1_roi, x2_roi)

    obj_positions = {}
    obj_velocities = {}
    detection_lifetime_frames = 5
   
    for i, frame in enumerate(video):
        results = yolo.track(frame[roi_mask], persist=True, classes=[2, 7], conf=0.6, iou=0.5)

        for obj in results[0].boxes:
            try:
                id = int(obj.id.item())
                x, *_, y = map(int, obj.xyxy[0].numpy()) # (x, y) is bottom-left corner of the object
                obj_pos_frame = (x, y, i)

                if id not in obj_positions.keys():
                    obj_positions[id] = [obj_pos_frame]
                    obj_velocities[id] = None
                else:
                    obj_positions[id].append(obj_pos_frame)
                    xpx_diff = obj_positions[id][-1][0] - obj_positions[id][-2][0]
                    ypx_diff = obj_positions[id][-1][1] - obj_positions[id][-2][1]
                    frame_diff = obj_positions[id][-1][-1] - obj_positions[id][-2][-1]

                    vx = xpx_diff/frame_diff
                    vy = ypx_diff/frame_diff

                    if id not in obj_velocities.keys() or obj_velocities[id] is None:
                        obj_velocities[id] = ypx_diff/frame_diff
                    else:
                        obj_velocities[id] = (obj_velocities[id] + vy) / 2 

                    speed = obj_velocities[id] * video.fps

                    cv2.putText(frame[roi_mask], f"{-speed:.2f} px/s", (x, y+7), 0, 0.5, (0, 255, 0), 1)
            

                cv2.circle(frame[roi_mask], (x, y), 3, (0, 0, 255), -1)

            except AttributeError:
                print("No objects detected, resuming...")
                continue

        cv2.rectangle(frame, (x1_roi, y1_roi), (x2_roi, y2_roi), (0, 0, 255), 2)

        vehicle_count = len(obj_positions)
        cv2.putText(frame, f"Vehicle count: {vehicle_count}", (x2_roi+5, y1_roi-5), 0, 0.5, (0, 0, 255), 1)

        if obj_velocities:
            filtered = [value for value in obj_velocities.values() if value is not None] # Remove None's
            avg_speed = np.mean(filtered)
            cv2.putText(frame, f"Avg speed: {-avg_speed*video.fps:.2f}px/s", (x1_roi-5, y1_roi-5), 0, 0.5, (0, 255, 0), 1)

        cv2.putText(frame, f"Traffic: {-avg_speed*video.fps<15 and vehicle_count>3}", (20, 20), 0, 0.5, (255, 0, 0), 1)

        cv2.imshow("YOLO", frame)

        for id in list(obj_positions.keys()):
            if i - obj_positions[id][-1][-1] > detection_lifetime_frames:
                del obj_positions[id]
                del obj_velocities[id]
    
    cv2.destroyAllWindows()

    return



main()

video.name = 'cars-highway'
video.shape = [640, 360, 51201]
video.fps = 25
[31m[1mrequirements:[0m Ultralytics requirement ['lap>=0.5.12'] not found, attempting AutoUpdate...
Collecting lap>=0.5.12
  Downloading lap-0.5.12-cp312-cp312-win_amd64.whl.metadata (6.3 kB)
Downloading lap-0.5.12-cp312-cp312-win_amd64.whl (1.5 MB)
   ---------------------------------------- 1.5/1.5 MB 39.2 MB/s eta 0:00:00
Installing collected packages: lap
Successfully installed lap-0.5.12

[31m[1mrequirements:[0m AutoUpdate success  5.2s, installed 1 package: ['lap>=0.5.12']
[31m[1mrequirements:[0m  [1mRestart runtime or rerun command for updates to take effect[0m


0: 416x640 2 cars, 1 truck, 129.5ms
Speed: 15.7ms preprocess, 129.5ms inference, 4.5ms postprocess per image at shape (1, 3, 416, 640)



  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


0: 416x640 2 cars, 1 truck, 104.5ms
Speed: 4.5ms preprocess, 104.5ms inference, 1.0ms postprocess per image at shape (1, 3, 416, 640)

0: 416x640 2 cars, 1 truck, 101.5ms
Speed: 3.0ms preprocess, 101.5ms inference, 1.0ms postprocess per image at shape (1, 3, 416, 640)

0: 416x640 1 car, 1 truck, 92.5ms
Speed: 2.5ms preprocess, 92.5ms inference, 2.0ms postprocess per image at shape (1, 3, 416, 640)

0: 416x640 2 cars, 1 truck, 89.3ms
Speed: 2.0ms preprocess, 89.3ms inference, 1.0ms postprocess per image at shape (1, 3, 416, 640)

0: 416x640 2 cars, 1 truck, 96.8ms
Speed: 1.5ms preprocess, 96.8ms inference, 1.5ms postprocess per image at shape (1, 3, 416, 640)

0: 416x640 1 car, 1 truck, 87.4ms
Speed: 4.0ms preprocess, 87.4ms inference, 2.5ms postprocess per image at shape (1, 3, 416, 640)

0: 416x640 1 car, 1 truck, 86.8ms
Speed: 2.0ms preprocess, 86.8ms inference, 1.0ms postprocess per image at shape (1, 3, 416, 640)

0: 416x640 1 car, 1 truck, 83.6ms
Speed: 2.0ms preprocess, 83.6ms in