In [2]:
import cv2
import mediapipe as mp
import numpy as np
import joblib
import time
import math

# Cargar modelo y transformadores
modelo = joblib.load("modelo_rf_output/modelo_rf.pkl")
scaler = joblib.load("modelo_rf_output/scaler.pkl")
label_encoder = joblib.load("modelo_rf_output/label_encoder.pkl")

# Inicializar MediaPipe Pose
mp_pose = mp.solutions.pose
pose = mp_pose.Pose(static_image_mode=False, min_detection_confidence=0.5, min_tracking_confidence=0.5)
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles

# Función para calcular ángulo entre tres puntos
def calculate_angle(a, b, c):
    """Calcula el ángulo entre tres puntos (en grados)"""
    a = np.array(a)
    b = np.array(b)
    c = np.array(c)
    
    # Calcular vectores
    ba = a - b
    bc = c - b
    
    # Calcular ángulo usando producto punto
    cosine_angle = np.dot(ba, bc) / (np.linalg.norm(ba) * np.linalg.norm(bc))
    angle = np.arccos(np.clip(cosine_angle, -1.0, 1.0))
    
    # Convertir a grados
    angle = np.degrees(angle)
    
    return angle

# Función para extraer landmarks (sin visibilidad)
def extraer_landmarks(results):
    if not results.pose_landmarks:
        return None
    return [v for lm in results.pose_landmarks.landmark for v in (lm.x, lm.y, lm.z)]

def normalize_landmarks(landmarks):
    """Normaliza landmarks respecto al landmark 0 (cadera central)"""
    # Verificar que landmarks no sea None y tenga elementos
    if landmarks is None or len(landmarks) == 0:
        return None
    
    # El landmark 0 es la referencia (cadera central)
    ref_x, ref_y = landmarks[0], landmarks[1]
    
    normalized = []
    for i in range(0, len(landmarks), 3):
        # Normalizar x e y, mantener z como está
        normalized.extend([
            landmarks[i] - ref_x,       # x normalizado
            landmarks[i+1] - ref_y,     # y normalizado
            landmarks[i+2]              # z sin cambios
        ])
    
    return normalized

# Historial para suavizado de movimiento
landmark_history = []
history_length = 5

# Función para suavizar landmarks con promedio móvil
def smooth_landmarks(landmarks, history):
    """Aplica suavizado con promedio móvil a los landmarks"""
    history.append(landmarks)
    if len(history) > history_length:
        history.pop(0)
    
    # Calcular promedio
    if len(history) > 0:
        smoothed = np.mean(history, axis=0)
        return smoothed
    
    return landmarks

# Intentar diferentes métodos para abrir la cámara
def open_camera():
    """Intenta abrir la cámara utilizando diferentes métodos"""
    # Intentar con DirectShow en Windows
    cap = cv2.VideoCapture(0, cv2.CAP_DSHOW)
    if cap.isOpened():
        return cap
    
    # Intentar con índice regular
    cap = cv2.VideoCapture(0)
    if cap.isOpened():
        return cap
    
    # Intentar con otros índices
    for i in range(1, 5):
        cap = cv2.VideoCapture(i)
        if cap.isOpened():
            return cap
    
    print("No se pudo abrir ninguna cámara.")
    return None

# Iniciar captura
cap = open_camera()
if not cap:
    print("Error: No se pudo acceder a la cámara. El programa se cerrará.")
    exit()

# Configuración de visualización
font = cv2.FONT_HERSHEY_SIMPLEX
confidence_threshold = 0.7  # Umbral para mostrar predicción con alta confianza
show_angles = True  # Mostrar ángulos de articulaciones

# Variables para FPS
prev_time = 0
fps_history = []
fps_avg_length = 10

# Crear ventana y configurar tamaño
cv2.namedWindow("Sistema de Anotación de Video", cv2.WINDOW_NORMAL)
cv2.resizeWindow("Sistema de Anotación de Video", 1280, 720)

# Historial de predicciones para estabilidad
prediction_history = []
prediction_history_length = 7

# Configuración de colores
COLOR_GREEN = (0, 255, 0)
COLOR_RED = (0, 0, 255)
COLOR_BLUE = (255, 0, 0)
COLOR_YELLOW = (0, 255, 255)
COLOR_MAGENTA = (255, 0, 255)

# Ciclo principal
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        print("Error al leer el frame. Saliendo...")
        break
    
    # Calcular FPS
    current_time = time.time()
    fps = 1 / (current_time - prev_time) if prev_time > 0 else 0
    prev_time = current_time
    fps_history.append(fps)
    if len(fps_history) > fps_avg_length:
        fps_history.pop(0)
    avg_fps = sum(fps_history) / len(fps_history)
    
    # Procesar frame con MediaPipe
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = pose.process(frame_rgb)
    
    # Dibujar puntos de referencia en el frame
    if results.pose_landmarks:
        mp_drawing.draw_landmarks(
            frame,
            results.pose_landmarks,
            mp_pose.POSE_CONNECTIONS,
            landmark_drawing_spec=mp_drawing_styles.get_default_pose_landmarks_style()
        )
        
        # Extraer y suavizar landmarks
        landmarks = extraer_landmarks(results)
        if landmarks:
            # Aplicar suavizado
            smoothed_landmarks = smooth_landmarks(landmarks, landmark_history)
            
            # Normalizar respecto a la cadera
            normalized_landmarks = normalize_landmarks(smoothed_landmarks)
            
            if normalized_landmarks:
                # Preparar para predicción
                X = np.array(normalized_landmarks).reshape(1, -1)
                X_scaled = scaler.transform(X)
                
                # Obtener predicción y probabilidades
                y_pred = modelo.predict(X_scaled)
                actividad_pred = label_encoder.inverse_transform(y_pred)[0]
                
                # Añadir a historial de predicciones para estabilidad
                prediction_history.append(actividad_pred)
                if len(prediction_history) > prediction_history_length:
                    prediction_history.pop(0)
                
                # Usar la predicción más frecuente para estabilidad
                from collections import Counter
                most_common = Counter(prediction_history).most_common(1)
                actividad = most_common[0][0]
                confianza = most_common[0][1] / len(prediction_history)
                
                # Obtener probabilidades si el modelo lo soporta
                if hasattr(modelo, 'predict_proba'):
                    probs = modelo.predict_proba(X_scaled)[0]
                    max_prob = np.max(probs)
                    
                    # Color basado en confianza
                    color = COLOR_GREEN if max_prob > confidence_threshold else COLOR_YELLOW
                    
                    # Mostrar actividad y confianza
                    cv2.putText(frame, f"Actividad: {actividad}", (20, 40), 
                              font, 0.8, color, 2, cv2.LINE_AA)
                    cv2.putText(frame, f"Confianza: {max_prob:.2f}", (20, 80), 
                              font, 0.8, color, 2, cv2.LINE_AA)
                else:
                    # Si el modelo no soporta probabilidades
                    cv2.putText(frame, f"Actividad: {actividad}", (20, 40), 
                              font, 0.8, COLOR_GREEN, 2, cv2.LINE_AA)
                
                # Mostrar ángulos de articulaciones para análisis
                if show_angles and results.pose_landmarks:
                    landmarks_points = results.pose_landmarks.landmark
                    h, w, c = frame.shape
                    
                    # Función auxiliar para convertir landmark a coordenadas de píxeles
                    def lm_to_pixel(landmark):
                        return int(landmark.x * w), int(landmark.y * h)
                    
                    try:
                        # Ángulo de rodilla derecha
                        hip_r = landmarks_points[mp_pose.PoseLandmark.RIGHT_HIP.value]
                        knee_r = landmarks_points[mp_pose.PoseLandmark.RIGHT_KNEE.value]
                        ankle_r = landmarks_points[mp_pose.PoseLandmark.RIGHT_ANKLE.value]
                        
                        knee_angle_r = calculate_angle(
                            (hip_r.x, hip_r.y),
                            (knee_r.x, knee_r.y),
                            (ankle_r.x, ankle_r.y)
                        )
                        
                        # Convertir a coordenadas de píxeles
                        knee_r_px = lm_to_pixel(knee_r)
                        cv2.putText(frame, f"{int(knee_angle_r)}°", 
                                  (knee_r_px[0]-15, knee_r_px[1]-10), 
                                  font, 0.5, COLOR_RED, 1, cv2.LINE_AA)
                        
                        # Ángulo de rodilla izquierda
                        hip_l = landmarks_points[mp_pose.PoseLandmark.LEFT_HIP.value]
                        knee_l = landmarks_points[mp_pose.PoseLandmark.LEFT_KNEE.value]
                        ankle_l = landmarks_points[mp_pose.PoseLandmark.LEFT_ANKLE.value]
                        
                        knee_angle_l = calculate_angle(
                            (hip_l.x, hip_l.y),
                            (knee_l.x, knee_l.y),
                            (ankle_l.x, ankle_l.y)
                        )
                        
                        knee_l_px = lm_to_pixel(knee_l)
                        cv2.putText(frame, f"{int(knee_angle_l)}°", 
                                  (knee_l_px[0]+10, knee_l_px[1]-10), 
                                  font, 0.5, COLOR_RED, 1, cv2.LINE_AA)
                        
                        # Ángulo de codo derecho
                        shoulder_r = landmarks_points[mp_pose.PoseLandmark.RIGHT_SHOULDER.value]
                        elbow_r = landmarks_points[mp_pose.PoseLandmark.RIGHT_ELBOW.value]
                        wrist_r = landmarks_points[mp_pose.PoseLandmark.RIGHT_WRIST.value]
                        
                        elbow_angle_r = calculate_angle(
                            (shoulder_r.x, shoulder_r.y),
                            (elbow_r.x, elbow_r.y),
                            (wrist_r.x, wrist_r.y)
                        )
                        
                        elbow_r_px = lm_to_pixel(elbow_r)
                        cv2.putText(frame, f"{int(elbow_angle_r)}°", 
                                  (elbow_r_px[0]-15, elbow_r_px[1]), 
                                  font, 0.5, COLOR_BLUE, 1, cv2.LINE_AA)
                        
                        # Inclinación del tronco (ángulo entre hombros y caderas)
                        shoulder_mid_x = (landmarks_points[mp_pose.PoseLandmark.LEFT_SHOULDER.value].x + 
                                       landmarks_points[mp_pose.PoseLandmark.RIGHT_SHOULDER.value].x) / 2
                        shoulder_mid_y = (landmarks_points[mp_pose.PoseLandmark.LEFT_SHOULDER.value].y + 
                                       landmarks_points[mp_pose.PoseLandmark.RIGHT_SHOULDER.value].y) / 2
                        
                        hip_mid_x = (landmarks_points[mp_pose.PoseLandmark.LEFT_HIP.value].x + 
                                   landmarks_points[mp_pose.PoseLandmark.RIGHT_HIP.value].x) / 2
                        hip_mid_y = (landmarks_points[mp_pose.PoseLandmark.LEFT_HIP.value].y + 
                                   landmarks_points[mp_pose.PoseLandmark.RIGHT_HIP.value].y) / 2
                        
                        # Ángulo con respecto a la vertical
                        torso_angle = abs(math.degrees(math.atan2(shoulder_mid_x - hip_mid_x, 
                                                                hip_mid_y - shoulder_mid_y)))
                        
                        # Mostrar inclinación del torso
                        torso_mid_x = int((shoulder_mid_x + hip_mid_x) * w / 2)
                        torso_mid_y = int((shoulder_mid_y + hip_mid_y) * h / 2)
                        cv2.putText(frame, f"Incl: {int(torso_angle)}°", 
                                  (torso_mid_x + 10, torso_mid_y), 
                                  font, 0.6, COLOR_MAGENTA, 1, cv2.LINE_AA)
                    except:
                        pass  # Ignorar si algún landmark no está disponible
    
    # Mostrar FPS
    cv2.putText(frame, f"FPS: {avg_fps:.1f}", (frame.shape[1]-120, 30), 
              font, 0.6, (255, 255, 255), 1, cv2.LINE_AA)
    
    # Instrucciones
    cv2.putText(frame, "Q: Salir | A: Mostrar/ocultar angulos", 
              (10, frame.shape[0]-20), font, 0.5, (255, 255, 255), 1, cv2.LINE_AA)
    
    # Mostrar frame
    cv2.imshow("Sistema de Anotación de Video", frame)
    
    # Procesar teclado
    key = cv2.waitKey(1) & 0xFF
    if key == ord('q'):
        break
    elif key == ord('a'):
        # Alternar visibilidad de ángulos
        show_angles = not show_angles

# Liberar recursos
cap.release()
cv2.destroyAllWindows()



In [12]:
%pip install opencv-python mediapipe numpy joblib

Collecting opencv-python
  Using cached opencv_python-4.11.0.86-cp37-abi3-macosx_13_0_arm64.whl.metadata (20 kB)
Collecting mediapipe
  Downloading mediapipe-0.10.21-cp310-cp310-macosx_11_0_universal2.whl.metadata (9.9 kB)
Collecting absl-py (from mediapipe)
  Using cached absl_py-2.3.0-py3-none-any.whl.metadata (2.4 kB)
Collecting attrs>=19.1.0 (from mediapipe)
  Using cached attrs-25.3.0-py3-none-any.whl.metadata (10 kB)
Collecting flatbuffers>=2.0 (from mediapipe)
  Using cached flatbuffers-25.2.10-py2.py3-none-any.whl.metadata (875 bytes)
Collecting jax (from mediapipe)
  Using cached jax-0.6.1-py3-none-any.whl.metadata (13 kB)
Collecting jaxlib (from mediapipe)
  Downloading jaxlib-0.6.1-cp310-cp310-macosx_11_0_arm64.whl.metadata (1.2 kB)
Collecting numpy
  Downloading numpy-1.26.4-cp310-cp310-macosx_11_0_arm64.whl.metadata (61 kB)
Collecting opencv-contrib-python (from mediapipe)
  Using cached opencv_contrib_python-4.11.0.86-cp37-abi3-macosx_13_0_arm64.whl.metadata (20 kB)
Colle