<a href="https://colab.research.google.com/github/marcelarosalesj/cfc.demos/blob/main/pedestrian_detection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# YOLO V9

Identificación de elementos para detectar conflictos viales

- personas
  - peatones
  - niños
- carros
- bicicletas
- motocicletas
- etc

In [1]:
import cv2
import matplotlib.pyplot as plt
import numpy as np

! pip install -q ultralytics

from ultralytics import YOLO, checks, hub
checks()  # checks

video_path = "ITDP_videos/cut.mp4"

Ultralytics YOLOv8.1.47 🚀 Python-3.11.0rc1 torch-2.2.2+cu121 CPU (Intel Core(TM) i5-1035G1 1.00GHz)
Setup complete ✅ (8 CPUs, 7.3 GB RAM, 132.8/181.4 GB disk)


In [2]:
#from google.colab.patches import cv2_imshow

#cv2.imshow = cv2_imshow

#Mount Google Drive
#from google.colab import drive
#drive.mount("/content/gdrive")

#Directory
#%cd /content/gdrive/MyDrive/ITDP_videos

In [3]:
videos_path = f"ITDP_videos"

import os
ffs = os.listdir(videos_path)
videos = []
for ff in ffs:
  if "mp4" in ff:
    videos.append(f"{videos_path}/{ff}")
print(videos)

['ITDP_videos/Video6.mp4', 'ITDP_videos/Video7.mp4', 'ITDP_videos/Video5.mp4', 'ITDP_videos/Video3.mp4', 'ITDP_videos/Video1.mp4', 'ITDP_videos/Video4.mp4', 'ITDP_videos/Video2.mp4', 'ITDP_videos/cut-5.mp4', 'ITDP_videos/cut.mp4']


### YOLO v9 -  track

- No detection issue: https://github.com/ultralytics/ultralytics/issues/2470

In [4]:
model = YOLO('yolov9c.pt')

result = model.track(source=video_path, show=True)





errors for large sources or long-running streams and videos. See https://docs.ultralytics.com/modes/predict/ for help.

Example:
    results = model(source=..., stream=True)  # generator of Results objects
    for r in results:
        boxes = r.boxes  # Boxes object for bbox outputs
        masks = r.masks  # Masks object for segment masks outputs
        probs = r.probs  # Class probabilities for classification outputs

video 1/1 (frame 1/300) /home/marcela/repos/cfc.demos/ITDP_videos/cut.mp4: 384x640 1 person, 10 cars, 604.0ms
video 1/1 (frame 2/300) /home/marcela/repos/cfc.demos/ITDP_videos/cut.mp4: 384x640 1 person, 10 cars, 399.0ms
video 1/1 (frame 3/300) /home/marcela/repos/cfc.demos/ITDP_videos/cut.mp4: 384x640 1 person, 10 cars, 372.8ms
video 1/1 (frame 4/300) /home/marcela/repos/cfc.demos/ITDP_videos/cut.mp4: 384x640 2 persons, 10 cars, 370.4ms
video 1/1 (frame 5/300) /home/marcela/repos/cfc.demos/ITDP_videos/cut.mp4: 384x640 2 persons, 10 cars, 333.0ms
video 1/1 (frame 6/3

### YOLO v9 - frame by frame

In [5]:
def bbox_to_point(bbox):
  return [(bbox[0, 0] + bbox[1, 0]) / 2, bbox[1, 1]]

In [6]:
from datetime import datetime

model = YOLO('yolov9c.pt')

video = cv2.VideoCapture(video_path)
frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))

entities = {}

for i_frame in range(frames):
    ret, frame = video.read()
    if not ret:
        continue
    
    cut_frame = frame # ---> No cut frame, No detections class=0
    
    # THIS ROI SELECTION IS NOT REQUIRED
    # Cut the frame - select a ROI
    # cut_frame = frame[height//3:2*height//3,:width//2,:] # ---> No detections class= not defined
    # cut_frame = frame[height//3:2*height//3,:width//2,:] # ---> No detections class=0
    # cut_frame =  frame[:2*height//3,:width//2,:] # ---> No detections class=0
    
    # Track all objects in the frame
    #results = model.track(source=cut_frame, persist=True, classes=0, show=True)
    results = model.track(source=cut_frame, persist=True, show=True)
    names = results[0].names
    boxes = results[0].boxes
    if not boxes.is_track:
        continue
    
    # Iterate over all detected objects
    for i, id in enumerate(boxes.id.int().tolist()):
        if id not in entities:
            entities[id] = dict(classes=[], bboxes=[], coordinates=[], timestamps=[])
    _class = names[boxes.cls[i].int().tolist()]
    bbox = boxes.xyxy[i].numpy().astype(float).reshape((2, 2))
    # Get the coordinates of the bbox
    coord = bbox_to_point(bbox)
    # TODO: Fix weirdness with homography and coordinates
    # Explaination: The homography is saving the calibration info as [lat, lon] and therefore we need to adhere to the convention by swapping the coordinates.
    # bbox xyxy -> point coord [x, y] -> [lat, lon]
    coord = [1 - (coord[1] / height), coord[0] / width]
    entities[id]['classes'].append(_class)
    entities[id]['bboxes'].append(bbox)
    entities[id]['coordinates'].append(coord)



0: 384x640 1 person, 10 cars, 438.3ms
Speed: 3.8ms preprocess, 438.3ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 10 cars, 423.8ms
Speed: 4.4ms preprocess, 423.8ms inference, 0.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 10 cars, 432.6ms
Speed: 4.5ms preprocess, 432.6ms inference, 0.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 10 cars, 370.1ms
Speed: 5.0ms preprocess, 370.1ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 10 cars, 449.8ms
Speed: 4.4ms preprocess, 449.8ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 10 cars, 367.8ms
Speed: 3.6ms preprocess, 367.8ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 10 cars, 340.7ms
Speed: 4.5ms preprocess, 340.7ms inference, 0.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 10 cars, 448.8