In [1]:
import torch
import cv2
import numpy as np
import sys
import os

# Asumiendo que YOLOv7 está clonado en el directorio actual
sys.path.append('./yolov7')
from models.experimental import attempt_load
from utils.general import check_img_size, non_max_suppression, scale_coords
from utils.torch_utils import select_device
from utils.datasets import letterbox

In [2]:
from ultralytics import YOLO
import cv2
import numpy as np

def count_people_cars_and_bikes(video_path, output_path='output.mp4'):
    try:
        model = YOLO('yolov8n.pt')
        cap = cv2.VideoCapture(video_path)
        
        if not cap.isOpened():
            print(f"Error: No se puede abrir el video en {video_path}")
            return None

        # Configurar el writer para guardar el video
        frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
        frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
        fps = int(cap.get(cv2.CAP_PROP_FPS))
        
        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
        out = cv2.VideoWriter(output_path, fourcc, fps, (frame_width, frame_height))

        people_count = 0
        bike_count = 0
        car_count = 0

        while cap.isOpened():
            success, frame = cap.read()
            if not success:
                break

            results = model(frame)
            
            for r in results:
                boxes = r.boxes
                for box in boxes:
                    cls = int(box.cls[0])
                    conf = float(box.conf[0])
                    
                    if conf > 0.3:
                        x1, y1, x2, y2 = map(int, box.xyxy[0])
                        
                        if cls == 0:
                            color = (0, 255, 0)
                            label = f"Persona {conf:.2f}"
                            people_count += 1
                        elif cls == 1:
                            color = (255, 0, 0)
                            label = f"Bicicleta {conf:.2f}"
                            bike_count += 1
                        elif cls == 2:
                            color = (0, 0, 255)
                            label = f"Auto {conf:.2f}"
                            car_count += 1
                        
                        cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
                        cv2.putText(frame, label, (x1, y1-10), 
                                  cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

            # Añadir contadores al frame
            cv2.putText(frame, f"Personas: {people_count}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
            cv2.putText(frame, f"Bicicletas: {bike_count}", (10, 70), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2)
            cv2.putText(frame, f"Autos: {car_count}", (10, 110), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)

            # Guardar frame
            out.write(frame)

    except Exception as e:
        print(f"Error durante el procesamiento: {e}")
        return None
    finally:
        cap.release()
        out.release()

    print(f"Video guardado en: {output_path}")
    return [people_count, bike_count, car_count]

# Uso
video_path = "2p.mp4"
output_path = "output.mp4"
counts = count_people_cars_and_bikes(video_path, output_path)
if counts:
    print(f"\nResumen final:")
    print(f"Personas detectadas: {counts[0]}")
    print(f"Bicicletas detectadas: {counts[1]}")
    print(f"Autos detectados: {counts[2]}")


0: 384x640 2 persons, 231.4ms
Speed: 11.1ms preprocess, 231.4ms inference, 15.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 93.8ms
Speed: 5.5ms preprocess, 93.8ms inference, 0.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 79.6ms
Speed: 2.8ms preprocess, 79.6ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 79.6ms
Speed: 5.1ms preprocess, 79.6ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 82.5ms
Speed: 2.9ms preprocess, 82.5ms inference, 2.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 89.6ms
Speed: 2.1ms preprocess, 89.6ms inference, 1.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 95.1ms
Speed: 6.5ms preprocess, 95.1ms inference, 1.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 76.6ms
Speed: 2.4ms preprocess, 76.6ms inference, 1.1ms postprocess per image at sha