### Clase Video

In [65]:
import os
import cv2

class Video:
    def __init__(self, video_path):
        self.path = video_path
        self.name, self.extension = os.path.splitext(os.path.basename(self.path))
        self.capture = cv2.VideoCapture(video_path)
        self.fps, *self.shape = map(
            lambda prop: int(self.capture.get(prop)),
            [
                cv2.CAP_PROP_FPS,
                cv2.CAP_PROP_FRAME_WIDTH,
                cv2.CAP_PROP_FRAME_HEIGHT,
                cv2.CAP_PROP_FRAME_COUNT,
            ]
        )
        self.capture.release()
    
    def frame_gen(self):
        self.capture = cv2.VideoCapture(self.path)

        while self.capture.isOpened() and cv2.waitKey(1) == -1:
            read_successfully, main_frame = self.capture.read()

            if read_successfully:
                yield main_frame

        self.capture.release()
    
    def __iter__(self):
        return self.frame_gen()

data_path = "data"
video_name = "cars-highway.mp4"
video_path = os.path.join(data_path, video_name)

video = Video(video_path)
print(f"{video.name = }")
print(f"{video.shape = }")
print(f"{video.fps = }")

for frame in video:
    cv2.imshow('Video', frame)

cv2.destroyAllWindows()

video.name = 'cars-highway'
video.shape = [640, 360, 51201]
video.fps = 25


### Modelo YOLO

In [66]:
import os

from ultralytics import YOLO

models_path = "pretrained_models"
filename = "yolo11n.pt"
input_path = os.path.join(models_path, filename)

model = YOLO(input_path)
labels = list(model.names.values())
n = len(labels)

print(f"Model: {input_path}")
print(f"Labels({n=}): {labels}")

Model: pretrained_models\yolo11n.pt
Labels(n=80): ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']


### YOLO aplicado a imagen

In [67]:
import os

import cv2
from ultralytics import YOLO

data_path = "data"
filename = "bus.jpg"
input_path = os.path.join(data_path, filename)

frame = cv2.imread(input_path)

models_path = "pretrained_models"
model_name = "yolo11n.pt"
yolo_path = os.path.join(models_path, model_name)

yolo = YOLO(yolo_path, verbose=False)

results = yolo.track(frame, show=True)

obj = results[0].boxes[0]
print(obj, end="\n\n")

for obj in results[0].boxes:
    id = obj.cls.item() # .item() extracts value of tensor of a single element
    x, y, w, h = obj.xywh[0].numpy() # converting to numpy allows to unpack (readability)
    print(f"Label(id={id:.0f}): {yolo.names[id]}", end="\n-> ")
    print(f"Location: {x=:.2f}, {y=:.2f}, {w=:.2f}, {h=:.2f}", end="\n")

cv2.destroyAllWindows()


0: 640x480 4 persons, 1 bus, 106.0ms
Speed: 5.0ms preprocess, 106.0ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 480)
ultralytics.engine.results.Boxes object with attributes:

cls: tensor([5.])
conf: tensor([0.9402])
data: tensor([[  3.8328, 229.3642, 796.1946, 728.4123,   1.0000,   0.9402,   5.0000]])
id: tensor([1.])
is_track: True
orig_shape: (1080, 810)
shape: torch.Size([1, 7])
xywh: tensor([[400.0137, 478.8882, 792.3618, 499.0481]])
xywhn: tensor([[0.4938, 0.4434, 0.9782, 0.4621]])
xyxy: tensor([[  3.8328, 229.3642, 796.1946, 728.4123]])
xyxyn: tensor([[0.0047, 0.2124, 0.9830, 0.6745]])

Label(id=5): bus
-> Location: x=400.01, y=478.89, w=792.36, h=499.05
Label(id=0): person
-> Location: x=740.41, y=636.77, w=138.79, h=483.88
Label(id=0): person
-> Location: x=143.35, y=651.88, w=191.90, h=504.63
Label(id=0): person
-> Location: x=283.76, y=634.56, w=121.41, h=451.75
Label(id=0): person
-> Location: x=34.45, y=714.21, w=68.86, h=316.29


### YOLO aplicado a Video

In [68]:
import os

import cv2
import numpy as np
from ultralytics import YOLO

class Video:
    def __init__(self, video_path):
        self.path = video_path
        self.name, self.extension = os.path.splitext(os.path.basename(self.path))
        self.capture = cv2.VideoCapture(video_path)
        self.fps, *self.shape = map(
            lambda prop: int(self.capture.get(prop)),
            [
                cv2.CAP_PROP_FPS,
                cv2.CAP_PROP_FRAME_WIDTH,
                cv2.CAP_PROP_FRAME_HEIGHT,
                cv2.CAP_PROP_FRAME_COUNT,
            ]
        )
        self.capture.release()
    
    def frame_gen(self):
        self.capture = cv2.VideoCapture(self.path)

        while self.capture.isOpened() and cv2.waitKey(1) == -1:
            read_successfully, main_frame = self.capture.read()

            if read_successfully:
                yield main_frame

        self.capture.release()
    
    def __iter__(self):
        return self.frame_gen()

data_path = "data"
video_name = "cars-highway.mp4"
video_path = os.path.join(data_path, video_name)

video = Video(video_path)
print(f"{video.name = }")
print(f"{video.shape = }")
print(f"{video.fps = }")

models_path = "pretrained_models"
model_name = "yolo11n.pt"
yolo_path = os.path.join(models_path, model_name)

yolo = YOLO(yolo_path)

for i, frame in enumerate(video):
    results = yolo.track(frame, show=True, verbose=False)
    if i > 25:
        break

cv2.destroyAllWindows()

video.name = 'cars-highway'
video.shape = [640, 360, 51201]
video.fps = 25


### YOLO aplicado a Video en región de interés solo para la detección (imshow customizado)

In [69]:
import os

import cv2
import numpy as np
from ultralytics import YOLO

class Video:
    def __init__(self, video_path):
        self.path = video_path
        self.name, self.extension = os.path.splitext(os.path.basename(self.path))
        self.capture = cv2.VideoCapture(video_path)
        self.fps, *self.shape = map(
            lambda prop: int(self.capture.get(prop)),
            [
                cv2.CAP_PROP_FPS,
                cv2.CAP_PROP_FRAME_WIDTH,
                cv2.CAP_PROP_FRAME_HEIGHT,
                cv2.CAP_PROP_FRAME_COUNT,
            ]
        )
        self.capture.release()
    
    def frame_gen(self):
        self.capture = cv2.VideoCapture(self.path)

        while self.capture.isOpened() and cv2.waitKey(1) == -1:
            read_successfully, main_frame = self.capture.read()

            if read_successfully:
                yield main_frame

        self.capture.release()
    
    def __iter__(self):
        return self.frame_gen()

data_path = "data"
video_name = "cars-highway.mp4"
video_path = os.path.join(data_path, video_name)

video = Video(video_path)
print(f"{video.name = }")
print(f"{video.shape = }")
print(f"{video.fps = }")

models_path = "pretrained_models"
model_name = "yolo11n.pt"
yolo_path = os.path.join(models_path, model_name)

yolo = YOLO(yolo_path, verbose=False)

x1_roi, x2_roi = 90, 280
y1_roi, y2_roi = 170, 290
roi_mask = slice(y1_roi, y2_roi), slice(x1_roi, x2_roi)

for i, frame in enumerate(video):
    results = yolo.track(frame[roi_mask], verbose=False, persist=True, classes=[2, 7], conf=0.45, iou=0.5)

    for obj in results[0].boxes:
        try:
            id = int(obj.id.item())
            x, y, *_ = map(int, obj.xywh[0].numpy())

            obj_pos_frame = (x, y, i)

            cv2.circle(frame[roi_mask], (x, y), 3, (0, 0, 255), -1)
            cv2.putText(frame[roi_mask], f"{id}", (x, y-3), 0, 0.5, (0, 0, 255), 1)
        except AttributeError:
            print("Invalid object, resuming...")
            continue
    
    cv2.rectangle(frame, (x1_roi, y1_roi), (x2_roi, y2_roi), (0, 0, 255), 2)

    vehicle_count = len(results[0].boxes)
    cv2.putText(frame, f"Vehicle count: {vehicle_count}", (x2_roi+5, y1_roi-5), 0, 0.5, (0, 0, 255), 1)
    cv2.imshow("YOLO", frame)

cv2.destroyAllWindows()

video.name = 'cars-highway'
video.shape = [640, 360, 51201]
video.fps = 25


### Seguimiento de posiciones

In [70]:
import os

import cv2
import numpy as np
from ultralytics import YOLO

class Video:
    def __init__(self, video_path):
        self.path = video_path
        self.name, self.extension = os.path.splitext(os.path.basename(self.path))
        self.capture = cv2.VideoCapture(video_path)
        self.fps, *self.shape = map(
            lambda prop: int(self.capture.get(prop)),
            [
                cv2.CAP_PROP_FPS,
                cv2.CAP_PROP_FRAME_WIDTH,
                cv2.CAP_PROP_FRAME_HEIGHT,
                cv2.CAP_PROP_FRAME_COUNT,
            ]
        )
        self.capture.release()
    
    def frame_gen(self):
        self.capture = cv2.VideoCapture(self.path)

        while self.capture.isOpened() and cv2.waitKey(1) == -1:
            read_successfully, main_frame = self.capture.read()

            if read_successfully:
                yield main_frame

        self.capture.release()
    
    def __iter__(self):
        return self.frame_gen()

data_path = "data"
video_name = "cars-highway.mp4"
video_path = os.path.join(data_path, video_name)

video = Video(video_path)
print(f"{video.name = }")
print(f"{video.shape = }")
print(f"{video.fps = }")

models_path = "pretrained_models"
model_name = "yolo11n.pt"
yolo_path = os.path.join(models_path, model_name)

yolo = YOLO(yolo_path, verbose=False)

x1_roi, x2_roi = 90, 280
y1_roi, y2_roi = 170, 290
roi_mask = slice(y1_roi, y2_roi), slice(x1_roi, x2_roi)

obj_positions = {}
detection_lifetime_frames = 5

for i, frame in enumerate(video):
    results = yolo.track(frame[roi_mask], persist=True, classes=[2, 7], conf=0.45, iou=0.5, verbose=False)

    for obj in results[0].boxes:
        try:
            id = int(obj.id.item())
            x, y, *_ = map(int, obj.xywh[0].numpy())
            obj_pos_frame = (x, y, i)

            if id not in obj_positions.keys():
                obj_positions[id] = [obj_pos_frame]
            else:
                obj_positions[id].append(obj_pos_frame)

            cv2.circle(frame[roi_mask], (x, y), 3, (0, 0, 255), -1)

        except AttributeError:
            print("Invalid object, resuming...")
            continue

    cv2.rectangle(frame, (x1_roi, y1_roi), (x2_roi, y2_roi), (0, 0, 255), 2)

    vehicle_count = len(results[0].boxes)
    cv2.putText(frame, f"Vehicle count: {vehicle_count}", (x2_roi+5, y1_roi-5), 0, 0.5, (0, 0, 255), 1)

    cv2.imshow("YOLO", frame)

    for id in list(obj_positions.keys()):
        if i - obj_positions[id][-1][-1] > detection_lifetime_frames:
            print(f"{id}: {obj_positions[id]}")
            del obj_positions[id]
    
    if i == 50: # Early stop
        break

cv2.destroyAllWindows()

video.name = 'cars-highway'
video.shape = [640, 360, 51201]
video.fps = 25
5: [(115, 47, 0)]
2: [(166, 3, 0), (166, 3, 1), (166, 3, 2), (166, 3, 3), (167, 3, 4), (167, 2, 5), (168, 2, 6), (168, 2, 7), (168, 2, 8)]
9: [(8, 109, 27)]
1: [(183, 20, 0), (183, 20, 1), (183, 20, 2), (183, 19, 3), (183, 18, 4), (184, 18, 5), (184, 17, 6), (184, 17, 7), (184, 16, 8), (184, 15, 9), (185, 15, 10), (185, 14, 11), (185, 14, 12), (185, 13, 13), (185, 13, 14), (185, 12, 15), (185, 12, 16), (185, 12, 17), (185, 11, 18), (185, 10, 19), (185, 10, 20), (186, 9, 22), (186, 8, 23), (186, 8, 24), (187, 7, 25), (187, 6, 27), (187, 6, 28), (187, 6, 29), (187, 6, 30), (187, 6, 31)]


### Cálculo de pixels por segundo (velocidad instantánea)

In [71]:
import os

import cv2
import numpy as np
from ultralytics import YOLO

class Video:
    def __init__(self, video_path):
        self.path = video_path
        self.name, self.extension = os.path.splitext(os.path.basename(self.path))
        self.capture = cv2.VideoCapture(video_path)
        self.fps, *self.shape = map(
            lambda prop: int(self.capture.get(prop)),
            [
                cv2.CAP_PROP_FPS,
                cv2.CAP_PROP_FRAME_WIDTH,
                cv2.CAP_PROP_FRAME_HEIGHT,
                cv2.CAP_PROP_FRAME_COUNT,
            ]
        )
        self.capture.release()
    
    def frame_gen(self):
        self.capture = cv2.VideoCapture(self.path)

        while self.capture.isOpened() and cv2.waitKey(1) == -1:
            read_successfully, main_frame = self.capture.read()

            if read_successfully:
                yield main_frame

        self.capture.release()
    
    def __iter__(self):
        return self.frame_gen()

data_path = "data"
video_name = "cars-highway.mp4"
video_path = os.path.join(data_path, video_name)

video = Video(video_path)
print(f"{video.name = }")
print(f"{video.shape = }")
print(f"{video.fps = }")

models_path = "pretrained_models"
model_name = "yolo11n.pt"
yolo_path = os.path.join(models_path, model_name)

yolo = YOLO(yolo_path, verbose=False)

x1_roi, x2_roi = 90, 280
y1_roi, y2_roi = 170, 290
roi_mask = slice(y1_roi, y2_roi), slice(x1_roi, x2_roi)

obj_positions = {}
detection_lifetime_frames = 5

for i, frame in enumerate(video):
    results = yolo.track(frame[roi_mask], persist=True, classes=[2, 7], conf=0.45, iou=0.5, verbose=False)

    for obj in results[0].boxes:
        try:
            id = int(obj.id.item())
            x, y, *_ = map(int, obj.xywh[0].numpy())
            obj_pos_frame = (x, y, i)

            if id not in obj_positions.keys():
                obj_positions[id] = [obj_pos_frame]
            else:
                obj_positions[id].append(obj_pos_frame)
                xpx_diff = obj_positions[id][-1][0] - obj_positions[id][-2][0] # Omitted for simplicity
                ypx_diff = obj_positions[id][-1][1] - obj_positions[id][-2][1]
                frame_diff = obj_positions[id][-1][2] - obj_positions[id][-2][2]

                vx = xpx_diff/frame_diff * video.fps # Omitted for simplicity
                vy = -ypx_diff/frame_diff * video.fps

                cv2.putText(frame[roi_mask], f"{vy:.2f} px/s", (x, y+7), 0, 0.5, (0, 255, 0), 1)
        
            cv2.circle(frame[roi_mask], (x, y), 3, (0, 0, 255), -1)

        except AttributeError:
            print("Invalid object, resuming...")
            continue

    cv2.rectangle(frame, (x1_roi, y1_roi), (x2_roi, y2_roi), (0, 0, 255), 2)

    vehicle_count = len(results[0].boxes)
    cv2.putText(frame, f"Vehicle count: {vehicle_count}", (x2_roi+5, y1_roi-5), 0, 0.5, (0, 0, 255), 1)
    cv2.imshow("YOLO", frame)

    for id in list(obj_positions.keys()):
        if i - obj_positions[id][-1][-1] > detection_lifetime_frames:
            del obj_positions[id]

cv2.destroyAllWindows()

video.name = 'cars-highway'
video.shape = [640, 360, 51201]
video.fps = 25


### Velocidad media en región de interés (para cada objeto entre primera y última detecciones)

In [75]:
import os

import cv2
import numpy as np
from ultralytics import YOLO

class Video:
    def __init__(self, video_path):
        self.path = video_path
        self.name, self.extension = os.path.splitext(os.path.basename(self.path))
        self.capture = cv2.VideoCapture(video_path)
        self.fps, *self.shape = map(
            lambda prop: int(self.capture.get(prop)),
            [
                cv2.CAP_PROP_FPS,
                cv2.CAP_PROP_FRAME_WIDTH,
                cv2.CAP_PROP_FRAME_HEIGHT,
                cv2.CAP_PROP_FRAME_COUNT,
            ]
        )
        self.capture.release()
    
    def frame_gen(self):
        self.capture = cv2.VideoCapture(self.path)

        while self.capture.isOpened() and cv2.waitKey(1) == -1:
            read_successfully, main_frame = self.capture.read()

            if read_successfully:
                yield main_frame

        self.capture.release()
    
    def __iter__(self):
        return self.frame_gen()

data_path = "data"
video_name = "cars-highway.mp4"
video_path = os.path.join(data_path, video_name)

video = Video(video_path)
print(f"{video.name = }")
print(f"{video.shape = }")
print(f"{video.fps = }")

models_path = "pretrained_models"
model_name = "yolo11n.pt"
yolo_path = os.path.join(models_path, model_name)

yolo = YOLO(yolo_path, verbose=False)

x1_roi, x2_roi = 90, 280
y1_roi, y2_roi = 170, 290
roi_mask = slice(y1_roi, y2_roi), slice(x1_roi, x2_roi)

obj_positions = {}
obj_velocities = {}
detection_lifetime_frames = 5

for i, frame in enumerate(video):
    results = yolo.track(frame[roi_mask], persist=True, classes=[2, 7], conf=0.45, iou=0.5, verbose=False)

    for obj in results[0].boxes:
        try:
            id = int(obj.id.item())
            x, y, *_ = map(int, obj.xywh[0].numpy())
            obj_pos_frame = (x, y, i)

            if id not in obj_positions.keys():
                obj_positions[id] = [obj_pos_frame]
                obj_velocities[id] = None
            else:
                obj_positions[id].append(obj_pos_frame)
                xpx_diff = obj_positions[id][-1][0] - obj_positions[id][-2][0] # Omitted for simplicity
                ypx_diff = obj_positions[id][-1][1] - obj_positions[id][-2][1]
                frame_diff = obj_positions[id][-1][2] - obj_positions[id][-2][2]

                vx = xpx_diff/frame_diff * video.fps # Omitted for simplicity
                vy = -ypx_diff/frame_diff * video.fps

                if obj_velocities[id] is None:
                    obj_velocities[id] = vy
                else:
                    obj_velocities[id] = (obj_velocities[id] + vy) / 2 

                cv2.putText(frame[roi_mask], f"{obj_velocities[id]:.2f} px/s", (x, y+7), 0, 0.5, (0, 255, 0), 1)
        

            cv2.circle(frame[roi_mask], (x, y), 3, (0, 0, 255), -1)

        except AttributeError:
            print("Invalid object, resuming...")
            continue

    cv2.rectangle(frame, (x1_roi, y1_roi), (x2_roi, y2_roi), (0, 0, 255), 2)

    vehicle_count = len(results[0].boxes)
    cv2.putText(frame, f"Vehicle count: {vehicle_count}", (x2_roi+5, y1_roi-5), 0, 0.5, (0, 0, 255), 1)

    if obj_velocities:
        filtered = [value for value in obj_velocities.values() if value is not None] # Remove None's
        avg_speed = np.mean(filtered)
        cv2.putText(frame, f"Avg speed: {avg_speed:.2f}px/s", (x1_roi-5, y1_roi-5), 0, 0.5, (0, 255, 0), 1)

    cv2.imshow("YOLO", frame)

    for id in list(obj_positions.keys()):
        if i - obj_positions[id][-1][-1] > detection_lifetime_frames:
            del obj_positions[id]
            del obj_velocities[id]

cv2.destroyAllWindows()

video.name = 'cars-highway'
video.shape = [640, 360, 51201]
video.fps = 25


### Velocidad media total (todos las detecciones) en la región de interés

In [73]:
import os

import cv2
import numpy as np
from ultralytics import YOLO

class Video:
    def __init__(self, video_path):
        self.path = video_path
        self.name, self.extension = os.path.splitext(os.path.basename(self.path))
        self.capture = cv2.VideoCapture(video_path)
        self.fps, *self.shape = map(
            lambda prop: int(self.capture.get(prop)),
            [
                cv2.CAP_PROP_FPS,
                cv2.CAP_PROP_FRAME_WIDTH,
                cv2.CAP_PROP_FRAME_HEIGHT,
                cv2.CAP_PROP_FRAME_COUNT,
            ]
        )
        self.capture.release()
    
    def frame_gen(self):
        self.capture = cv2.VideoCapture(self.path)

        while self.capture.isOpened() and cv2.waitKey(1) == -1:
            read_successfully, main_frame = self.capture.read()

            if read_successfully:
                yield main_frame

        self.capture.release()
    
    def __iter__(self):
        return self.frame_gen()

data_path = "data"
video_name = "cars-highway.mp4"
video_path = os.path.join(data_path, video_name)

video = Video(video_path)
print(f"{video.name = }")
print(f"{video.shape = }")
print(f"{video.fps = }")

models_path = "pretrained_models"
model_name = "yolo11n.pt"
yolo_path = os.path.join(models_path, model_name)

yolo = YOLO(yolo_path, verbose=False)

x1_roi, x2_roi = 90, 280
y1_roi, y2_roi = 170, 290
roi_mask = slice(y1_roi, y2_roi), slice(x1_roi, x2_roi)

obj_positions = {}
obj_velocities = {}
detection_lifetime_frames = 5

for i, frame in enumerate(video):
    results = yolo.track(frame[roi_mask], persist=True, classes=[2, 7], conf=0.45, iou=0.5, verbose=False)

    for obj in results[0].boxes:
        try:
            id = int(obj.id.item())
            x, y, *_ = map(int, obj.xywh[0].numpy())
            obj_pos_frame = (x, y, i)

            if id not in obj_positions.keys():
                obj_positions[id] = [obj_pos_frame]
                obj_velocities[id] = None
            else:
                obj_positions[id].append(obj_pos_frame)
                xpx_diff = obj_positions[id][-1][0] - obj_positions[id][-2][0] # Omitted for simplicity
                ypx_diff = obj_positions[id][-1][1] - obj_positions[id][-2][1]
                frame_diff = obj_positions[id][-1][2] - obj_positions[id][-2][2]

                vx = xpx_diff/frame_diff * video.fps # Omitted for simplicity
                vy = -ypx_diff/frame_diff * video.fps

                if obj_velocities[id] is None:
                    obj_velocities[id] = vy
                else:
                    obj_velocities[id] = (obj_velocities[id] + vy) / 2 

                cv2.putText(frame[roi_mask], f"{obj_velocities[id]:.2f} px/s", (x, y+7), 0, 0.5, (0, 255, 0), 1)
        

            cv2.circle(frame[roi_mask], (x, y), 3, (0, 0, 255), -1)

        except AttributeError:
            print("Invalid object, resuming...")
            continue

    cv2.rectangle(frame, (x1_roi, y1_roi), (x2_roi, y2_roi), (0, 0, 255), 2)

    vehicle_count = len(results[0].boxes)
    cv2.putText(frame, f"Vehicle count: {vehicle_count}", (x2_roi+5, y1_roi-5), 0, 0.5, (0, 0, 255), 1)

    if obj_velocities:
        filtered = [value for value in obj_velocities.values() if value is not None] # Remove None's
        if filtered:
            avg_speed = np.mean(filtered)
            cv2.putText(frame, f"Avg speed: {avg_speed:.2f}px/s", (x1_roi-5, y1_roi-5), 0, 0.5, (0, 255, 0), 1)

    cv2.imshow("YOLO", frame)

    for id in list(obj_positions.keys()):
        if i - obj_positions[id][-1][-1] > detection_lifetime_frames:
            del obj_positions[id]
            del obj_velocities[id]

cv2.destroyAllWindows()

video.name = 'cars-highway'
video.shape = [640, 360, 51201]
video.fps = 25


### Condición existencia de tráfico

In [74]:
import os

import cv2
import numpy as np
from ultralytics import YOLO

class Video:
    def __init__(self, video_path):
        self.path = video_path
        self.name, self.extension = os.path.splitext(os.path.basename(self.path))
        self.capture = cv2.VideoCapture(video_path)
        self.fps, *self.shape = map(
            lambda prop: int(self.capture.get(prop)),
            [
                cv2.CAP_PROP_FPS,
                cv2.CAP_PROP_FRAME_WIDTH,
                cv2.CAP_PROP_FRAME_HEIGHT,
                cv2.CAP_PROP_FRAME_COUNT,
            ]
        )
        self.capture.release()
    
    def frame_gen(self):
        self.capture = cv2.VideoCapture(self.path)

        while self.capture.isOpened() and cv2.waitKey(1) == -1:
            read_successfully, main_frame = self.capture.read()

            if read_successfully:
                yield main_frame

        self.capture.release()
    
    def __iter__(self):
        return self.frame_gen()

data_path = "data"
video_name = "cars-highway.mp4"
video_path = os.path.join(data_path, video_name)

video = Video(video_path)
print(f"{video.name = }")
print(f"{video.shape = }")
print(f"{video.fps = }")

models_path = "pretrained_models"
model_name = "yolo11n.pt"
yolo_path = os.path.join(models_path, model_name)

yolo = YOLO(yolo_path, verbose=False)

x1_roi, x2_roi = 90, 280
y1_roi, y2_roi = 170, 290
roi_mask = slice(y1_roi, y2_roi), slice(x1_roi, x2_roi)

obj_positions = {}
obj_velocities = {}
detection_lifetime_frames = 5

for i, frame in enumerate(video):
    results = yolo.track(frame[roi_mask], persist=True, classes=[2, 7], conf=0.45, iou=0.5, verbose=False)

    for obj in results[0].boxes:
        try:
            id = int(obj.id.item())
            x, y, *_ = map(int, obj.xywh[0].numpy())
            obj_pos_frame = (x, y, i)

            if id not in obj_positions.keys():
                obj_positions[id] = [obj_pos_frame]
                obj_velocities[id] = None
            else:
                obj_positions[id].append(obj_pos_frame)
                xpx_diff = obj_positions[id][-1][0] - obj_positions[id][-2][0] # Omitted for simplicity
                ypx_diff = obj_positions[id][-1][1] - obj_positions[id][-2][1]
                frame_diff = obj_positions[id][-1][2] - obj_positions[id][-2][2]

                vx = xpx_diff/frame_diff * video.fps # Omitted for simplicity
                vy = -ypx_diff/frame_diff * video.fps

                if obj_velocities[id] is None:
                    obj_velocities[id] = vy
                else:
                    obj_velocities[id] = (obj_velocities[id] + vy) / 2 

                cv2.putText(frame[roi_mask], f"{obj_velocities[id]:.2f} px/s", (x, y+7), 0, 0.5, (0, 255, 0), 1)
        

            cv2.circle(frame[roi_mask], (x, y), 3, (0, 0, 255), -1)

        except AttributeError:
            print("Invalid object, resuming...")
            continue

    cv2.rectangle(frame, (x1_roi, y1_roi), (x2_roi, y2_roi), (0, 0, 255), 2)

    vehicle_count = len(results[0].boxes)
    cv2.putText(frame, f"Vehicle count: {vehicle_count}", (x2_roi+5, y1_roi-5), 0, 0.5, (0, 0, 255), 1)

    if obj_velocities:
        filtered = [value for value in obj_velocities.values() if value is not None] # Remove None's
        if filtered:
            avg_speed = np.mean(filtered)
            cv2.putText(frame, f"Avg speed: {avg_speed:.2f}px/s", (x1_roi-5, y1_roi-5), 0, 0.5, (0, 255, 0), 1)

            bool_traffic = avg_speed < 15 and vehicle_count > 3
            cv2.putText(frame, f"Traffic: {bool_traffic}", (20, 20), 0, 0.5, (255, 0, 0), 1)

    cv2.imshow("YOLO", frame)

    for id in list(obj_positions.keys()):
        if i - obj_positions[id][-1][-1] > detection_lifetime_frames:
            del obj_positions[id]
            del obj_velocities[id]

cv2.destroyAllWindows()

video.name = 'cars-highway'
video.shape = [640, 360, 51201]
video.fps = 25


In [1]:
import os

import cv2
import numpy as np
from ultralytics import YOLO

class Video:
    def __init__(self, video_path):
        self.path = video_path
        self.name, self.extension = os.path.splitext(os.path.basename(self.path))
        self.capture = cv2.VideoCapture(video_path)
        self.fps, *self.shape = map(
            lambda prop: int(self.capture.get(prop)),
            [
                cv2.CAP_PROP_FPS,
                cv2.CAP_PROP_FRAME_WIDTH,
                cv2.CAP_PROP_FRAME_HEIGHT,
                cv2.CAP_PROP_FRAME_COUNT,
            ]
        )
        self.capture.release()
    
    def frame_gen(self):
        self.capture = cv2.VideoCapture(self.path)

        while self.capture.isOpened() and cv2.waitKey(1) == -1:
            read_successfully, main_frame = self.capture.read()

            if read_successfully:
                yield main_frame

        self.capture.release()
    
    def __iter__(self):
        return self.frame_gen()
    
def main():
    data_path = "data"
    video_name = "cars-highway.mp4"
    video_path = os.path.join(data_path, video_name)

    video = Video(video_path)
    print(f"{video.name = }")
    print(f"{video.shape = }")
    print(f"{video.fps = }")

    models_path = "pretrained_models"
    model_name = "yolo11n.pt"
    yolo_path = os.path.join(models_path, model_name)

    yolo = YOLO(yolo_path, verbose=False)

    x1_roi, x2_roi = 90, 280
    y1_roi, y2_roi = 170, 290
    roi_mask = slice(y1_roi, y2_roi), slice(x1_roi, x2_roi)

    obj_positions = {}
    obj_velocities = {}
    avg_velocities = {}
    detection_lifetime_frames = 5

    for i, frame in enumerate(video):
        results = yolo.track(frame[roi_mask], persist=True, classes=[2, 7], conf=0.45, iou=0.5, verbose=False)

        for obj in results[0].boxes:
            try:
                id = int(obj.id.item())
                x, y, *_ = map(int, obj.xywh[0].numpy())
                obj_pos_frame = (x, y, i)

                if id not in obj_positions.keys():
                    obj_positions[id] = [obj_pos_frame]
                    obj_velocities[id] = [None]
                    avg_velocities[id] = None
                else:
                    obj_positions[id].append(obj_pos_frame)
                    xpx_diff = obj_positions[id][-1][0] - obj_positions[id][-2][0] # Omitted for simplicity
                    ypx_diff = obj_positions[id][-1][1] - obj_positions[id][-2][1]
                    frame_diff = obj_positions[id][-1][2] - obj_positions[id][-2][2]

                    vx = xpx_diff/frame_diff * video.fps # Omitted for simplicity
                    vy = -ypx_diff/frame_diff * video.fps

                    if obj_velocities[id] == [None]:
                        obj_velocities[id] = [vy]
                        avg_velocities[id] = vy
                    else:
                        obj_velocities[id].append(vy)
                        avg_velocities[id] = np.mean(obj_velocities[id])
                        cv2.putText(frame[roi_mask], f"{avg_velocities[id]:.2f} px/s", (x, y+7), 0, 0.5, (0, 255, 0), 1)
            

                cv2.circle(frame[roi_mask], (x, y), 3, (0, 0, 255), -1)

            except AttributeError:
                print("Invalid object, resuming...")
                continue

        cv2.rectangle(frame, (x1_roi, y1_roi), (x2_roi, y2_roi), (0, 0, 255), 2)

        vehicle_count = len(results[0].boxes)
        cv2.putText(frame, f"Vehicle count: {vehicle_count}", (x2_roi+5, y1_roi-5), 0, 0.5, (0, 0, 255), 1)

        filtered = [value for value in avg_velocities.values() if value is not None] # Remove None's
        if filtered:
            avg_speed = np.mean(filtered)
            bool_traffic = avg_speed < 15 and vehicle_count > 3
            cv2.putText(frame, f"Traffic: {bool_traffic}", (20, 20), 0, 0.5, (255, 0, 0), 1)
            cv2.putText(frame, f"Avg speed: {avg_speed:.2f}px/s", (x1_roi-5, y1_roi-5), 0, 0.5, (0, 255, 0), 1)

        cv2.imshow("YOLO", frame)

        for id in list(obj_positions.keys()):
            if i - obj_positions[id][-1][-1] > detection_lifetime_frames:
                del obj_positions[id]
                del obj_velocities[id]
                del avg_velocities[id]

    cv2.destroyAllWindows()

main()

video.name = 'cars-highway'
video.shape = [640, 360, 51201]
video.fps = 25


### Escribir el resultado del procesamiento a un .mp4

In [4]:
import os

import cv2
import numpy as np
from ultralytics import YOLO

class Video:
    def __init__(self, video_path):
        self.path = video_path
        self.name, self.extension = os.path.splitext(os.path.basename(self.path))
        self.capture = cv2.VideoCapture(video_path)
        self.fps, *self.shape = map(
            lambda prop: int(self.capture.get(prop)),
            [
                cv2.CAP_PROP_FPS,
                cv2.CAP_PROP_FRAME_WIDTH,
                cv2.CAP_PROP_FRAME_HEIGHT,
                cv2.CAP_PROP_FRAME_COUNT,
            ]
        )
        self.capture.release()
    
    def frame_gen(self):
        self.capture = cv2.VideoCapture(self.path)

        while self.capture.isOpened() and cv2.waitKey(1) == -1:
            read_successfully, main_frame = self.capture.read()

            if read_successfully:
                yield main_frame

        self.capture.release()
    
    def __iter__(self):
        return self.frame_gen()
    
def main():
    data_path = "data"
    video_name = "cars-highway.mp4"
    video_path = os.path.join(data_path, video_name)

    video = Video(video_path)
    print(f"{video.name = }")
    print(f"{video.shape = }")
    print(f"{video.fps = }")

    # Save processed video into a file
    size = (video.shape[0], video.shape[1]) 
   
    data_path = "data"
    output_name = "processed-cars-highway.mp4"
    output_path = os.path.join(data_path, output_name)

    result = cv2.VideoWriter(output_path, -1, video.fps, size) 

    models_path = "pretrained_models"
    model_name = "yolo11n.pt"
    yolo_path = os.path.join(models_path, model_name)

    yolo = YOLO(yolo_path, verbose=False)

    x1_roi, x2_roi = 90, 280
    y1_roi, y2_roi = 170, 290
    roi_mask = slice(y1_roi, y2_roi), slice(x1_roi, x2_roi)

    obj_positions = {}
    obj_velocities = {}
    avg_velocities = {}
    detection_lifetime_frames = 5

    for i, frame in enumerate(video):
        results = yolo.track(frame[roi_mask], persist=True, classes=[2, 7], conf=0.45, iou=0.5, verbose=False)

        for obj in results[0].boxes:
            try:
                id = int(obj.id.item())
                x, y, *_ = map(int, obj.xywh[0].numpy())
                obj_pos_frame = (x, y, i)

                if id not in obj_positions.keys():
                    obj_positions[id] = [obj_pos_frame]
                    obj_velocities[id] = [None]
                    avg_velocities[id] = None
                else:
                    obj_positions[id].append(obj_pos_frame)
                    xpx_diff = obj_positions[id][-1][0] - obj_positions[id][-2][0] # Omitted for simplicity
                    ypx_diff = obj_positions[id][-1][1] - obj_positions[id][-2][1]
                    frame_diff = obj_positions[id][-1][2] - obj_positions[id][-2][2]

                    vx = xpx_diff/frame_diff * video.fps # Omitted for simplicity
                    vy = -ypx_diff/frame_diff * video.fps

                    if obj_velocities[id] == [None]:
                        obj_velocities[id] = [vy]
                        avg_velocities[id] = vy
                    else:
                        obj_velocities[id].append(vy)
                        avg_velocities[id] = np.mean(obj_velocities[id])
                        cv2.putText(frame[roi_mask], f"{avg_velocities[id]:.2f} px/s", (x, y+7), 0, 0.5, (0, 255, 0), 1)
            

                cv2.circle(frame[roi_mask], (x, y), 3, (0, 0, 255), -1)

            except AttributeError:
                print("Invalid object, resuming...")
                continue

        cv2.rectangle(frame, (x1_roi, y1_roi), (x2_roi, y2_roi), (0, 0, 255), 2)

        vehicle_count = len(results[0].boxes)
        cv2.putText(frame, f"Vehicle count: {vehicle_count}", (x2_roi+5, y1_roi-5), 0, 0.5, (0, 0, 255), 1)

        filtered = [value for value in avg_velocities.values() if value is not None] # Remove None's
        if filtered:
            avg_speed = np.mean(filtered)
            bool_traffic = avg_speed < 15 and vehicle_count > 3
            cv2.putText(frame, f"Traffic: {bool_traffic}", (20, 20), 0, 0.5, (255, 0, 0), 1)
            cv2.putText(frame, f"Avg speed: {avg_speed:.2f}px/s", (x1_roi-5, y1_roi-5), 0, 0.5, (0, 255, 0), 1)

        cv2.imshow("YOLO", frame)
        result.write(frame)

        for id in list(obj_positions.keys()):
            if i - obj_positions[id][-1][-1] > detection_lifetime_frames:
                del obj_positions[id]
                del obj_velocities[id]
                del avg_velocities[id]
    
    result.release()

    cv2.destroyAllWindows()

main()

video.name = 'cars-highway'
video.shape = [640, 360, 51201]
video.fps = 25
