In [None]:
# Instalar las dependencias necesarias
!pip install ultralytics
!pip install opencv-python-headless
!pip install ipywidgets

import cv2
import base64
import numpy as np
from IPython.display import display, Javascript, Image
from google.colab.output import eval_js
import time
import sys
import traceback
from ultralytics import YOLO

# Función para capturar video desde la cámara web de Colab
def video_stream():
    js = Javascript('''
    var video;
    var div = null;
    var stream;
    var captureCanvas;
    var imgElement;
    var labelElement;

    var pendingResolve = null;
    var shutdown = false;

    function removeDom() {
       stream.getVideoTracks()[0].stop();
       video.remove();
       div.remove();
       video = null;
       div = null;
       stream = null;
       imgElement = null;
       captureCanvas = null;
       labelElement = null;
    }

    function onAnimationFrame() {
      if (!shutdown) {
        window.requestAnimationFrame(onAnimationFrame);
      }
      if (pendingResolve) {
        var result = "";
        if (!shutdown) {
          captureCanvas.getContext('2d').drawImage(video, 0, 0, 640, 480);
          result = captureCanvas.toDataURL('image/jpeg', 0.8)
        }
        var lp = pendingResolve;
        pendingResolve = null;
        lp(result);
      }
    }

    async function createDom() {
      if (div !== null) {
        return stream;
      }

      div = document.createElement('div');
      div.style.border = '2px solid black';
      div.style.padding = '3px';
      div.style.width = '100%';
      div.style.maxWidth = '600px';
      document.body.appendChild(div);

      const modelOut = document.createElement('div');
      modelOut.innerHTML = "<span>Status:</span>";
      labelElement = document.createElement('span');
      labelElement.innerText = 'No data';
      modelOut.appendChild(labelElement);
      div.appendChild(modelOut);

      video = document.createElement('video');
      video.style.display = 'block';
      video.width = div.clientWidth - 6;
      video.setAttribute('playsinline', '');
      video.onclick = () => { shutdown = true; };
      stream = await navigator.mediaDevices.getUserMedia(
          {video: { facingMode: "environment"}});
      div.appendChild(video);

      imgElement = document.createElement('img');
      imgElement.style.position = 'absolute';
      imgElement.style.zIndex = 1;
      imgElement.onclick = () => { shutdown = true; };
      div.appendChild(imgElement);

      const instruction = document.createElement('div');
      instruction.innerHTML =
          '<span style="color: red; font-weight: bold;">' +
          'When finished, click here or on the video to stop this demo</span>';
      div.appendChild(instruction);
      instruction.onclick = () => { shutdown = true; };

      video.srcObject = stream;
      await video.play();

      captureCanvas = document.createElement('canvas');
      captureCanvas.width = 640; //video.videoWidth;
      captureCanvas.height = 480; //video.videoHeight;
      window.requestAnimationFrame(onAnimationFrame);

      return stream;
    }
    async function stream_frame(label, imgData) {
      if (shutdown) {
        removeDom();
        shutdown = false;
        return '';
      }

      var preCreate = Date.now();
      stream = await createDom();

      var preShow = Date.now();
      if (imgData != "") {
        var videoRect = video.getClientRects()[0];
        imgElement.style.top = videoRect.top + "px";
        imgElement.style.left = videoRect.left + "px";
        imgElement.style.width = videoRect.width + "px";
        imgElement.style.height = videoRect.height + "px";
        imgElement.src = imgData;
      }

      labelElement.innerHTML = label;

      var preCapture = Date.now();
      var result = await new Promise(function(resolve, reject) {
        pendingResolve = resolve;
      });
      shutdown = false;

      return {'create': preShow - preCreate,
              'show': preCapture - preShow,
              'capture': Date.now() - preCapture,
              'img': result};
    }
    ''')

    display(js)

# Ejecutar el código JavaScript y esperar a que se cargue
js_result = video_stream()
# Esperar un momento para asegurarse de que el código JavaScript se ha cargado
time.sleep(5)

def video_frame(label, bbox):
    try:
        data = eval_js('stream_frame("{}", "{}")'.format(label, bbox))
        if isinstance(data, str):
            print(f"Unexpected string returned: {data}")
            return None
        return data
    except Exception as e:
        print(f"Error in video_frame: {str(e)}")
        return None

# Cargar el modelo YOLOv8
model = YOLO('yolov10n.pt')  # Cargar YOLOv10 nano

# Función para realizar la detección
def detect_objects(frame):
    results = model(frame)
    return results

# Función principal para la detección en tiempo real
def real_time_detection():
    try:
        frame = video_frame("Cargando...", "")
        if frame is None:
            print("Error: No se pudo inicializar el frame de video.")
            return

        while True:
            frame = video_frame("Detectando...", "")
            if frame is None:
                print("Error: No se pudo obtener el frame de video.")
                continue

            if not isinstance(frame, dict) or 'img' not in frame:
                print(f"Error: Frame inesperado: {frame}")
                continue

            if frame['img'] == '':
                print("Transmisión de video terminada.")
                break

            # Decodificar la imagen base64
            try:
                img = base64.b64decode(frame['img'].split(',')[1])
                img = cv2.imdecode(np.frombuffer(img, np.uint8), cv2.IMREAD_COLOR)
            except Exception as e:
                print(f"Error al decodificar la imagen: {str(e)}")
                continue

            # Realizar la detección
            results = detect_objects(img)

            # Procesar los resultados
            img_with_boxes = results[0].plot()

            # Codificar la imagen resultante a base64
            _, buffer = cv2.imencode('.jpg', img_with_boxes)
            img_base64 = base64.b64encode(buffer).decode('utf-8')
            img_data = f"data:image/jpeg;base64,{img_base64}"

            # Mostrar la imagen con las detecciones
            _ = video_frame("Detecciones realizadas", img_data)

            time.sleep(0.05)  # Pequeña pausa para no sobrecargar el sistema

    except Exception as e:
        print(f"Error en real_time_detection: {str(e)}")
        print(traceback.format_exc())

# Ejecutar la detección en tiempo real
real_time_detection()



<IPython.core.display.Javascript object>


0: 480x640 1 person, 204.4ms
Speed: 6.7ms preprocess, 204.4ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 191.2ms
Speed: 5.8ms preprocess, 191.2ms inference, 0.3ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 207.8ms
Speed: 5.4ms preprocess, 207.8ms inference, 0.3ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 192.5ms
Speed: 5.7ms preprocess, 192.5ms inference, 0.3ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 184.9ms
Speed: 5.0ms preprocess, 184.9ms inference, 0.3ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 292.6ms
Speed: 4.7ms preprocess, 292.6ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 180.9ms
Speed: 5.5ms preprocess, 180.9ms inference, 0.3ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 188.2ms
Speed: 5.3ms preprocess, 188.2ms inference, 0.3ms postprocess per image at