#Detección de Rostro y Puntos faciales desde webcam
Por un tema de instalacion y permisos habara que reiniciar secion y volver a ejecutar 3 veces el cuaderno.

Recomiendo para quienes como yo esten investigando el tema este articulo:

https://www-assemblyai-com.translate.goog/blog/mediapipe-for-dummies?_x_tr_sl=en&_x_tr_tl=es&_x_tr_hl=es&_x_tr_pto=tc

Para la captura del video saque las funciones de este cuaderno (Es recomendable):

https://colab.research.google.com/drive/1QnC7lV7oVFk5OZCm75fqbLAfD9qBy9bw?usp=sharing

## Instalacion de librerias

In [None]:
!pip install numpy<2.1.0 --upgrade
!pip install mediapipe opencv-python

/bin/bash: line 1: 2.1.0: No such file or directory


##Importo las librerias

In [None]:
# Importo las librerias
import cv2
import numpy as np
import mediapipe as mp
import matplotlib.pyplot as plt
from google.colab.patches import cv2_imshow
from IPython.display import display, Javascript, Image
from google.colab.output import eval_js
from base64 import b64decode, b64encode
import PIL
import io
import html
import time
from mediapipe.tasks import python
from mediapipe.tasks.python import vision

##Importo y seteo los modelos a utilizar

In [None]:
BaseOptions = mp.tasks.BaseOptions
FaceDetector = mp.tasks.vision.FaceDetector
FaceDetectorOptions = mp.tasks.vision.FaceDetectorOptions
FaceDetectorResult = mp.tasks.vision.FaceDetectorResult
VisionRunningMode = mp.tasks.vision.RunningMode


# Descargo los modelos de deteccion
!wget -q -O detector.tflite -q https://storage.googleapis.com/mediapipe-models/face_detector/blaze_face_short_range/float16/1/blaze_face_short_range.tflite
base_options_rostro = python.BaseOptions(model_asset_path='detector.tflite')
options_rostro = vision.FaceDetectorOptions(
    base_options=base_options_rostro,
    running_mode=VisionRunningMode.IMAGE,
    )
detector_rostro = FaceDetector.create_from_options(options_rostro)

detector_puntos = mp.solutions.face_mesh.FaceMesh(
    static_image_mode=True,
    max_num_faces=1,
    refine_landmarks=True,
    min_detection_confidence=0.5)


##Codigo que permite trabajar en el navegador para importar la un frame y para volver a exportarlo al browser

In [None]:
# function to convert the JavaScript object into an OpenCV image
def js_to_image(js_reply):
  """
  Params:
          js_reply: JavaScript object containing image from webcam
  Returns:
          img: OpenCV BGR image
  """
  # decode base64 image
  image_bytes = b64decode(js_reply.split(',')[1])
  # convert bytes to numpy array
  jpg_as_np = np.frombuffer(image_bytes, dtype=np.uint8)
  # decode numpy array into OpenCV BGR image
  img = cv2.imdecode(jpg_as_np, flags=1)

  return img

# function to convert OpenCV Rectangle bounding box image into base64 byte string to be overlayed on video stream
def bbox_to_bytes(bbox_array):
  """
  Params:
          bbox_array: Numpy array (pixels) containing rectangle to overlay on video stream.
  Returns:
        bytes: Base64 image byte string
  """
  # convert array into PIL image
  bbox_PIL = PIL.Image.fromarray(bbox_array, 'RGBA')
  iobuf = io.BytesIO()
  # format bbox into png for return
  bbox_PIL.save(iobuf, format='png')
  # format return string
  bbox_bytes = 'data:image/png;base64,{}'.format((str(b64encode(iobuf.getvalue()), 'utf-8')))

  return bbox_bytes

##Funcion Principal que activa el video capturado por la webcam

In [None]:
# JavaScript to properly create our live video stream using our webcam as input
def video_stream():
  js = Javascript('''
    var video;
    var div = null;
    var stream;
    var captureCanvas;
    var imgElement;
    var labelElement;

    var pendingResolve = null;
    var shutdown = false;

    function removeDom() {
       stream.getVideoTracks()[0].stop();
       video.remove();
       div.remove();
       video = null;
       div = null;
       stream = null;
       imgElement = null;
       captureCanvas = null;
       labelElement = null;
    }

    function onAnimationFrame() {
      if (!shutdown) {
        window.requestAnimationFrame(onAnimationFrame);
      }
      if (pendingResolve) {
        var result = "";
        if (!shutdown) {
          captureCanvas.getContext('2d').drawImage(video, 0, 0, 640, 480);
          result = captureCanvas.toDataURL('image/jpeg', 0.8)
        }
        var lp = pendingResolve;
        pendingResolve = null;
        lp(result);
      }
    }

    async function createDom() {
      if (div !== null) {
        return stream;
      }

      div = document.createElement('div');
      div.style.border = '2px solid black';
      div.style.padding = '3px';
      div.style.width = '100%';
      div.style.maxWidth = '600px';
      document.body.appendChild(div);

      const modelOut = document.createElement('div');
      modelOut.innerHTML = "<span>Estado:</span>";
      labelElement = document.createElement('span');
      labelElement.innerText = 'No data';
      labelElement.style.fontWeight = 'bold';
      modelOut.appendChild(labelElement);
      div.appendChild(modelOut);

      video = document.createElement('video');
      video.style.display = 'block';
      video.width = div.clientWidth - 6;
      video.setAttribute('playsinline', '');
      video.onclick = () => { shutdown = true; };
      stream = await navigator.mediaDevices.getUserMedia(
          {video: { facingMode: "environment"}});
      div.appendChild(video);

      imgElement = document.createElement('img');
      imgElement.style.position = 'absolute';
      imgElement.style.zIndex = 1;
      imgElement.onclick = () => { shutdown = true; };
      div.appendChild(imgElement);

      const instruction = document.createElement('div');
      instruction.innerHTML =
          '<span style="color: red; font-weight: bold;">' +
          'Parafinaziar, haga click en el video</span>';
      div.appendChild(instruction);
      instruction.onclick = () => { shutdown = true; };

      video.srcObject = stream;
      await video.play();

      captureCanvas = document.createElement('canvas');
      captureCanvas.width = 640; //video.videoWidth;
      captureCanvas.height = 480; //video.videoHeight;
      window.requestAnimationFrame(onAnimationFrame);

      return stream;
    }
    async function stream_frame(label, imgData) {
      if (shutdown) {
        removeDom();
        shutdown = false;
        return '';
      }

      var preCreate = Date.now();
      stream = await createDom();

      var preShow = Date.now();
      if (label != "") {
        labelElement.innerHTML = label;
      }

      if (imgData != "") {
        var videoRect = video.getClientRects()[0];
        imgElement.style.top = videoRect.top + "px";
        imgElement.style.left = videoRect.left + "px";
        imgElement.style.width = videoRect.width + "px";
        imgElement.style.height = videoRect.height + "px";
        imgElement.src = imgData;
      }

      var preCapture = Date.now();
      var result = await new Promise(function(resolve, reject) {
        pendingResolve = resolve;
      });
      shutdown = false;

      return {'create': preShow - preCreate,
              'show': preCapture - preShow,
              'capture': Date.now() - preCapture,
              'img': result};
    }
    ''')

  display(js)

def video_frame(label, bbox):
  data = eval_js('stream_frame("{}", "{}")'.format(label, bbox))
  return data

##Funcion para dibujar los landmarks en una imagen

In [None]:
from mediapipe import solutions
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
mp_face_mesh = mp.solutions.face_mesh
from mediapipe.framework.formats import landmark_pb2

def draw_landmarks_on_image(rgb_image, detection_result):
    annotated_image = np.copy(rgb_image)
    drawing_spec = mp_drawing.DrawingSpec(thickness=1, circle_radius=1)
    for face_landmarks in detection_result.multi_face_landmarks:
        # Loop through the detected faces to visualize.

        # Draw the face landmarks.
        face_landmarks_proto = landmark_pb2.NormalizedLandmarkList()
        for landmark in face_landmarks.landmark:
            face_landmarks_proto.landmark.extend([
                landmark_pb2.NormalizedLandmark(x=landmark.x, y=landmark.y, z=landmark.z)
            ])

        solutions.drawing_utils.draw_landmarks(
            image=annotated_image,
            landmark_list=face_landmarks_proto,
            connections=mp.solutions.face_mesh.FACEMESH_TESSELATION,
            landmark_drawing_spec=drawing_spec,
            connection_drawing_spec=mp.solutions.drawing_styles
            .get_default_face_mesh_tesselation_style())
        """solutions.drawing_utils.draw_landmarks(
            image=annotated_image,
            landmark_list=face_landmarks_proto,
            connections=mp.solutions.face_mesh.FACEMESH_CONTOURS,
            landmark_drawing_spec=None,
            connection_drawing_spec=mp.solutions.drawing_styles
            .get_default_face_mesh_contours_style())
        solutions.drawing_utils.draw_landmarks(
            image=annotated_image,
            landmark_list=face_landmarks_proto,
            connections=mp.solutions.face_mesh.FACEMESH_IRISES,
              landmark_drawing_spec=None,
              connection_drawing_spec=mp.solutions.drawing_styles
              .get_default_face_mesh_iris_connections_style())"""

    return annotated_image


##Streameing con deteccion de restro y puntos

In [None]:
# start streaming video from webcam
video_stream()
# label for video
label_html = 'capturando...<br> Haga click para detener'
# initialze bounding box to empty
bbox = ''
count = 0
while True:
    js_reply = video_frame(label_html, bbox)
    if not js_reply:
        break

    if count % 1 == 0:
        # convert JS response to OpenCV Image
        img = js_to_image(js_reply["img"])

        # create transparent overlay for bounding box
        bbox_array = np.zeros([480,640,4], dtype=np.uint8)
        box_alfa = np.full((480,640), 255, dtype=np.uint8)
        imagen_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        h,w,_ = imagen_rgb.shape
        imagen_alfa = np.full((h, w), 255, dtype=np.uint8)
        imagen_rgba = cv2.merge((imagen_rgb[:,:,0], imagen_rgb[:,:,1], imagen_rgb[:,:,2], imagen_alfa))

        mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=imagen_rgb)

        rostros = detector_rostro.detect(mp_image)
        for detection in rostros.detections:
            detection_bbox = detection.bounding_box
            start_point = detection_bbox.origin_x, detection_bbox.origin_y
            end_point = detection_bbox.origin_x + detection_bbox.width, detection_bbox.origin_y + detection_bbox.height
            y1_rostro = detection_bbox.origin_y
            y2_rostro = detection_bbox.origin_y + detection_bbox.height
            x1_rostro = detection_bbox.origin_x
            x2_rostro = detection_bbox.origin_x + detection_bbox.width
            bbox_array = cv2.rectangle(bbox_array, start_point, end_point, (255, 0, 0), 1)

        #Detecto con imagen en rgb
        puntos = detector_puntos.process(imagen_rgb)

        #convierto bbox de 4 a 3 canales
        box_rgb = cv2.merge((bbox_array[:,:,0], bbox_array[:,:,1], bbox_array[:,:,2]))
        #separo el canal alfa de bbox
        box_alfa = bbox_array[:,:,3]
        #dibujo en box_rgb
        box_rgb = draw_landmarks_on_image(box_rgb,puntos)
        #Vuelvo a añadir el canal alfa
        bbox_array = cv2.merge((box_rgb[:,:,0], box_rgb[:,:,1], box_rgb[:,:,2], box_alfa))



        bbox_array[:,:,3] = (bbox_array.max(axis = 2) > 0 ).astype(int) * 255
        # convert overlay of bbox into bytes
        bbox_bytes = bbox_to_bytes(bbox_array)
        # update bbox so next frame gets new overlay
        bbox = bbox_bytes
    count += 1

<IPython.core.display.Javascript object>

## Segmentacion de figura humana
El modelo pose se utiliza para detectar personas

In [None]:
#Prueba de segmentacion
mp_pose = mp.solutions.pose
pose = mp_pose.Pose(static_image_mode=True,
		  model_complexity=2,
                  enable_segmentation=True)

def segmenta_imagen(imagen_rgb, puntos, fondo):

    segmented_image = imagen_rgb.copy()
    # Probability threshold in [0, 1] that says how "tight" to make the segmentation. Greater value => tighter.
    tightness = .3
    # Stack the segmentation mask for 3 RGB channels, and then create a filter for which pixels to keep
    condition = np.stack((puntos.segmentation_mask,) * 3, axis=-1) > tightness

    # Creates a black background image
    #bg_image = np.zeros(imagen_rgb.shape, dtype=np.uint8)
    #bg_image[:] = [4, 244, 4] # Can change the color of this background by specifying (0-255) RGB values. We choose green-screen green.

    bg_image = fondo
    # For every pixel location, display the corresponding pixel from the original imgae if the condition in our filter is met (i.e. the probability of being part of the object is above the inclusiogn threshold), or else display corresponding pixel from the background array (i.e. green)
    segmented_image = np.where(condition, segmented_image, bg_image)
    return segmented_image



In [None]:
# Elijo un fondo
!wget -O fondo.jpg https://wallpapers.com/images/featured/imagenes-de-espacio-exterior-4qy5d3cn2v1v9fpm.jpg
fondo = cv2.imread('fondo.jpg')
fondo = cv2.resize(fondo,(640,480),interpolation=None)

# start streaming video from webcam
video_stream()
label_html = 'capturando...<br> Haga click para detener'
bbox = ''
count = 0


while True:
    js_reply = video_frame(label_html, bbox)
    if not js_reply:
        break

    if count % 1 == 0:
        img = js_to_image(js_reply["img"])
        bbox_array = np.zeros([480, 640, 4], dtype=np.uint8)
        box_alfa = np.full((480, 640), 255, dtype=np.uint8)
        imagen_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        # Detectar cuerpo y máscara de segmentación
        result = pose.process(imagen_rgb)

        # Preparar imagen base para dibujar
        box_rgb = cv2.merge((bbox_array[:, :, 0], bbox_array[:, :, 1], bbox_array[:, :, 2]))
        box_alfa = bbox_array[:, :, 3]

        # Aplicar desenfoque de fondo con segmentación
        box_rgb = segmenta_imagen(imagen_rgb, result, fondo)

        # Restaurar canal alfa
        bbox_array = cv2.merge((box_rgb[:, :, 0], box_rgb[:, :, 1], box_rgb[:, :, 2], box_alfa))
        bbox_array[:, :, 3] = (bbox_array.max(axis=2) > 0).astype(int) * 255

        bbox = bbox_to_bytes(bbox_array)

    count += 1


--2025-05-14 00:04:01--  https://wallpapers.com/images/featured/imagenes-de-espacio-exterior-4qy5d3cn2v1v9fpm.jpg
Resolving wallpapers.com (wallpapers.com)... 3.166.160.50, 3.166.160.88, 3.166.160.15, ...
Connecting to wallpapers.com (wallpapers.com)|3.166.160.50|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 43283 (42K) [image/jpeg]
Saving to: ‘fondo.jpg’


2025-05-14 00:04:01 (3.42 MB/s) - ‘fondo.jpg’ saved [43283/43283]



<IPython.core.display.Javascript object>

KeyboardInterrupt: 