In [4]:
import cv2  
import numpy as np
import matplotlib.pyplot as plt
import mediapipe as mp


2024-11-14 15:49:35.311709: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [5]:
#clase de configuracion del media pipe para detectar manos
class detectorManos():
    def __init__(self, mode=False, maxManos = 2, model_complexity=0, Confdeteccion = 0.5, Confsegui = 0.5):
        self.mode = mode
        self.maxManos = maxManos
        self.model_complexity = model_complexity
        self.Confdeteccion = Confdeteccion
        self.Confsegui = Confsegui

        self.mpmanos = mp.solutions.hands
        self.manos = self.mpmanos.Hands(self.mode, self.maxManos, self.model_complexity, self.Confdeteccion, self.Confsegui)
        self.dibujo = mp.solutions.drawing_utils
        self.tip = [4, 8, 12, 16, 20]

    def encontrarManos(self, frame, dibujar = False):
        imgcolor = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        self.resultados = self.manos.process(imgcolor)

        if self.resultados.multi_hand_landmarks:
            for mano in self.resultados.multi_hand_landmarks:
                if dibujar:
                    self.dibujo.draw_landmarks(frame, mano, self.mpmanos.HAND_CONNECTIONS)
        return frame
    
    def encontrarPosicion(self, frame, ManoNum = 0, dibujar = False):
        xlista = []
        ylista = []
        bbox = []
        self.lista = []
        if self.resultados.multi_hand_landmarks:
            mi_mano = self.resultados.multi_hand_landmarks[ManoNum]
            for id, lm in enumerate(mi_mano.landmark):
                alto, ancho, c = frame.shape
                cx, cy = int(lm.x * ancho), int(lm.y * alto)
                xlista.append(cx)
                ylista.append(cy)
                self.lista.append([id, cx, cy])
                if dibujar:
                    cv2.circle(frame, (cx, cy), 5, (0, 0, 0), cv2.FILLED)
            xmin, xmax = min(xlista), max(xlista)
            ymin, ymax = min(ylista), max(ylista)
            bbox = xmin, ymin, xmax, ymax
            if dibujar:
                cv2.rectangle(frame, (xmin - 20, ymin - 20), (xmax + 20, ymax + 20), (0, 255, 0), 2)
        return self.lista, bbox
    
    def dedosArriba(self):
        dedos = []
        if self.lista[self.tip[0]][1] < self.lista[self.tip[0]-1][1]:
            dedos.append(1)
        else:
            dedos.append(0)
        
        for id in range(1,5): 
            if self.lista[self.tip[id]][2] < self.lista[self.tip[id]-2][2]:
                dedos.append(1)
            else:
                dedos.append(0)
        return dedos
    
    def numeroDedos(self):
        if(len(self.lista) != 0):
            dedos = self.dedosArriba()
            return sum(dedos)

In [9]:
import warnings
import os


# Ignorar advertencia de protobuf
warnings.filterwarnings("ignore", category=UserWarning, module='google.protobuf.symbol_database')

# Inicializar detector de manos y MediaPipe Face Mesh
detector = detectorManos(0.75)
mp_face_mesh = mp.solutions.face_mesh
mp_face_detection = mp.solutions.face_detection
face_detection = mp_face_detection.FaceDetection(min_detection_confidence=0.2)
face_mesh = mp_face_mesh.FaceMesh(min_detection_confidence=0.2)

# Iniciar la captura de video
vid = cv2.VideoCapture(0)

# Configuración para guardar el video de salida
output_video_path = os.path.join(os.getcwd(), 'output.mp4') 
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_video_path, fourcc, vid.get(cv2.CAP_PROP_FPS), 
                      (int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)), int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))))

flag = 0

while True:
    ret, frame = vid.read()
    if not ret:
        break
    
    frame = detector.encontrarManos(frame)
    manosInfo, cuadro = detector.encontrarPosicion(frame, dibujar=False)
    
    if detector.numeroDedos() == 1:
        img = cv2.imread('gafas1.jpg')
        flag = 1
    elif detector.numeroDedos() == 2:
        img = cv2.imread('gafas2.jpg')
        flag = 1
    elif detector.numeroDedos() == 0:
        flag = 0

    if flag == 1:
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        _, umbral = cv2.threshold(gray, 240, 255, cv2.THRESH_BINARY_INV)
        contornos, _ = cv2.findContours(umbral, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        mascara = np.zeros_like(img)
        cv2.drawContours(mascara, contornos, -1, (255, 255, 255), thickness=cv2.FILLED)
        objeto_extraido = cv2.bitwise_and(img, mascara)
        
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results = face_mesh.process(frame_rgb)
        results2 = face_detection.process(frame_rgb)

        if results2.detections:
            for detection in results2.detections:
                bboxC = detection.location_data.relative_bounding_box
                ih, iw, _ = frame.shape
                x, y, w, h = int(bboxC.xmin * iw), int(bboxC.ymin * ih), int(bboxC.width * iw), int(bboxC.height * ih)
                
        if results.multi_face_landmarks:
            for landmarks in results.multi_face_landmarks:
                left_eye = landmarks.landmark[33]
                right_eye = landmarks.landmark[133]
                left_eye_x = int(left_eye.x * iw)
                left_eye_y = int(left_eye.y * ih)
                right_eye_x = int(right_eye.x * iw)
                right_eye_y = int(right_eye.y * ih)
                center_x = (left_eye_x + right_eye_x) // 2 + 35
                center_y = (left_eye_y + right_eye_y) // 2
                escala_gafas = 1.2
                overlay = cv2.resize(objeto_extraido, (int(w * escala_gafas), int(h * escala_gafas)))
                new_x = max(center_x - overlay.shape[1] // 2, 0)
                new_y = max(center_y - overlay.shape[0] // 2, 0)
                n_frame = frame[new_y:new_y+overlay.shape[0], new_x:new_x+overlay.shape[1]]
                gray_overlay = cv2.cvtColor(overlay, cv2.COLOR_BGR2GRAY)
                _, mask = cv2.threshold(gray_overlay, 1, 255, cv2.THRESH_BINARY)
                mask_inv = cv2.bitwise_not(mask)
                bg_frame = cv2.bitwise_and(n_frame, n_frame, mask=mask_inv)
                fg_overlay = cv2.bitwise_and(overlay, overlay, mask=mask)
                result = cv2.add(bg_frame, fg_overlay)
                frame[new_y:new_y+overlay.shape[0], new_x:new_x+overlay.shape[1]] = result

    # Mostrar el fotograma con las gafas colocadas
    cv2.imshow('Frame', frame)
    
    # Guardar el fotograma en el video de salida
    out.write(frame)
    
    # Detener la ejecución con la tecla ESC
    if cv2.waitKey(20) == 27:
        break

# Liberar la captura de video y cerrar las ventanas
vid.release()
out.release()
cv2.destroyAllWindows()


I0000 00:00:1731599847.028705 2327715 gl_context.cc:357] GL version: 2.1 (2.1 INTEL-18.7.4), renderer: Intel(R) Iris(TM) Plus Graphics 640
I0000 00:00:1731599847.062921 2327715 gl_context.cc:357] GL version: 2.1 (2.1 INTEL-18.7.4), renderer: Intel(R) Iris(TM) Plus Graphics 640
I0000 00:00:1731599847.112943 2327715 gl_context.cc:357] GL version: 2.1 (2.1 INTEL-18.7.4), renderer: Intel(R) Iris(TM) Plus Graphics 640
W0000 00:00:1731599847.147773 2344762 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1731599847.150116 2344775 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1731599847.191314 2344725 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1731599847.192417 2344773